Why can't I mmap (MAP_FIXED) the highest virtual page in a 32-bit Linux process on a 64-bit kernel?

When trying to test Is it allowed to access memory that spans the zero border in x86? in user space on Linux, I wrote a 32-bit test program that tries to display low and high pages of a 32-bit virtual address space.

After that echo 0 | sudo tee /proc/sys/vm/mmap_min_addrI can display the page zero, but I don’t know why I can’t match -4096, i.e. (void*)0xfffff000, highest page. Why mmap2((void*)-4096)returns -ENOMEM?

strace ./a.out 
execve("./a.out", ["./a.out"], 0x7ffe08827c10 /* 65 vars */) = 0
strace: [ Process PID=1407 runs in 32 bit mode. ]
....
mmap2(0xfffff000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0

Also, which check rejects it in linux/mm/mmap.c, and why is it designed that way? Is this part of ensuring that creating a one-on-one-object pointer does not wrap and violate pointer comparisons , since ISO C and C ++ allow you to create a pointer to one end of the past, but otherwise not outside of the objects .


I work under a 64-bit kernel (4.12.8-2-ARCH on Arch Linux), so the 32-bit user space has all 4GiB available. (Unlike 64-bit code in a 64-bit kernel or with a 32-bit kernel, where the separation of the user and the kernel 2: 2 or 3: 1 will make a high-level page the address of the kernel.)

( CRT libc, asm), , . CRT .


/proc/PID/maps. . , .

00000000-00001000 rw-p 00000000 00:00 0             ### the mmap(0) result
08048000-08049000 r-xp 00000000 00:15 3120510                 /home/peter/src/SO/a.out
08049000-0804a000 r--p 00000000 00:15 3120510                 /home/peter/src/SO/a.out
0804a000-0804b000 rw-p 00001000 00:15 3120510                 /home/peter/src/SO/a.out
f7d81000-f7f3a000 r-xp 00000000 00:15 1511498                 /usr/lib32/libc-2.25.so
f7f3a000-f7f3c000 r--p 001b8000 00:15 1511498                 /usr/lib32/libc-2.25.so
f7f3c000-f7f3d000 rw-p 001ba000 00:15 1511498                 /usr/lib32/libc-2.25.so
f7f3d000-f7f40000 rw-p 00000000 00:00 0 
f7f7c000-f7f7e000 rw-p 00000000 00:00 0 
f7f7e000-f7f81000 r--p 00000000 00:00 0                       [vvar]
f7f81000-f7f83000 r-xp 00000000 00:00 0                       [vdso]
f7f83000-f7fa6000 r-xp 00000000 00:15 1511499                 /usr/lib32/ld-2.25.so
f7fa6000-f7fa7000 r--p 00022000 00:15 1511499                 /usr/lib32/ld-2.25.so
f7fa7000-f7fa8000 rw-p 00023000 00:15 1511499                 /usr/lib32/ld-2.25.so
fffdd000-ffffe000 rw-p 00000000 00:00 0                       [stack]

VMA, maps, ​​ ? ENOMEM linux/mm/mmapc., , , , - . -, , ?

( PAGE_ALIGN , , ).


, gcc -O3 -fno-pie -no-pie -m32 address-wrap.c:

#include <sys/mman.h>

//void *mmap(void *addr, size_t len, int prot, int flags,
//           int fildes, off_t off);

int main(void) {
    volatile unsigned *high =
        mmap((void*)-4096L, 4096, PROT_READ | PROT_WRITE,
             MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS,
             -1, 0);
    volatile unsigned *zeropage =
        mmap((void*)0, 4096, PROT_READ | PROT_WRITE,
             MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS,
             -1, 0);


    return (high == MAP_FAILED) ? 2 : *high;
}

( , deef (int*)-2, , mmap .)

+4
1

mmap do_mmap do_brk_flags, . , , get_unmapped_area. , , , TASK_SIZE. :

 * There are a few constraints that determine this:
 *
 * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
 * address, then that syscall will enter the kernel with a
 * non-canonical return address, and SYSRET will explode dangerously.
 * We avoid this particular problem by preventing anything executable
 * from being mapped at the maximum canonical address.
 *
 * On AMD CPUs in the Ryzen family, there a nasty bug in which the
 * CPUs malfunction if they execute code from the highest canonical page.
 * They'll speculate right off the end of the canonical space, and
 * bad things happen.  This is worked around in the same way as the
 * Intel problem.

#define TASK_SIZE_MAX   ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)

#define IA32_PAGE_OFFSET    ((current->personality & ADDR_LIMIT_3GB) ? \
                    0xc0000000 : 0xFFFFe000)

#define TASK_SIZE       (test_thread_flag(TIF_ADDR32) ? \
IA32_PAGE_OFFSET : TASK_SIZE_MAX)

48- __VIRTUAL_MASK_SHIFT - 47.

, TASK_SIZE , 32- 32-, 32- 64-, 64- 64- .

, 64- Intel AMD.

- . 0xFFFFe000 , , . , 64- . ? SYSCALL - , CALL? ? 32- 32- ?

+2

All Articles