Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

uml: runtime host VMSPLIT detection

Calculate TASK_SIZE at run-time by figuring out the host's VMSPLIT - this is
needed on i386 if UML is to run on hosts with varying VMSPLITs without
recompilation.

TASK_SIZE is now defined in terms of a variable, task_size. This gets rid of
an include of pgtable.h from processor.h, which can cause include loops.

On i386, task_size is calculated early in boot by probing the address space in
a binary search to figure out where the boundary between usable and non-usable
memory is. This tries to make sure that a page that is considered to be in
userspace is, or can be made, read-write. I'm concerned about a system-global
VDSO page in kernel memory being hit and considered to be a userspace page.

On x86_64, task_size is just the old value of CONFIG_TOP_ADDR.

A bunch of config variable are gone now. CONFIG_TOP_ADDR is directly replaced
by TASK_SIZE. NEST_LEVEL is gone since the relocation of the stubs makes it
irrelevant. All the HOST_VMSPLIT stuff is gone. All references to these in
arch/um/Makefile are also gone.

I noticed and fixed a missing extern in os.h when adding os_get_task_size.

Note: This has been revised to fix the 32-bit UML on 64-bit host bug that
Miklos ran into.

Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Jeff Dike and committed by
Linus Torvalds
536788fe 2f569afd

+153 -75
-11
arch/um/Kconfig
··· 203 203 depends on SMP 204 204 default "32" 205 205 206 - config NEST_LEVEL 207 - int "Nesting level" 208 - default "0" 209 - help 210 - This is set to the number of layers of UMLs that this UML will be run 211 - in. Normally, this is zero, meaning that it will run directly on the 212 - host. Setting it to one will build a UML that can run inside a UML 213 - that is running on the host. Generally, if you intend this UML to run 214 - inside another UML, set CONFIG_NEST_LEVEL to one more than the host 215 - UML. 216 - 217 206 config HIGHMEM 218 207 bool "Highmem support (EXPERIMENTAL)" 219 208 depends on !64BIT && EXPERIMENTAL
-37
arch/um/Kconfig.i386
··· 23 23 bool 24 24 default y 25 25 26 - choice 27 - prompt "Host memory split" 28 - default HOST_VMSPLIT_3G 29 - help 30 - This is needed when the host kernel on which you run has a non-default 31 - (like 2G/2G) memory split, instead of the customary 3G/1G. If you did 32 - not recompile your own kernel but use the default distro's one, you can 33 - safely accept the "Default split" option. 34 - 35 - It can be enabled on recent (>=2.6.16-rc2) vanilla kernels via 36 - CONFIG_VM_SPLIT_*, or on previous kernels with special patches (-ck 37 - patchset by Con Kolivas, or other ones) - option names match closely the 38 - host CONFIG_VM_SPLIT_* ones. 39 - 40 - A lower setting (where 1G/3G is lowest and 3G/1G is higher) will 41 - tolerate even more "normal" host kernels, but an higher setting will be 42 - stricter. 43 - 44 - So, if you do not know what to do here, say 'Default split'. 45 - 46 - config HOST_VMSPLIT_3G 47 - bool "Default split (3G/1G user/kernel host split)" 48 - config HOST_VMSPLIT_3G_OPT 49 - bool "3G/1G user/kernel host split (for full 1G low memory)" 50 - config HOST_VMSPLIT_2G 51 - bool "2G/2G user/kernel host split" 52 - config HOST_VMSPLIT_1G 53 - bool "1G/3G user/kernel host split" 54 - endchoice 55 - 56 - config TOP_ADDR 57 - hex 58 - default 0xB0000000 if HOST_VMSPLIT_3G_OPT 59 - default 0x78000000 if HOST_VMSPLIT_2G 60 - default 0x40000000 if HOST_VMSPLIT_1G 61 - default 0xC0000000 62 - 63 26 config 3_LEVEL_PGTABLES 64 27 bool "Three-level pagetables (EXPERIMENTAL)" 65 28 default n
-4
arch/um/Kconfig.x86_64
··· 15 15 bool 16 16 default y 17 17 18 - config TOP_ADDR 19 - hex 20 - default 0x7fc0000000 21 - 22 18 config 3_LEVEL_PGTABLES 23 19 bool 24 20 default y
-11
arch/um/Makefile
··· 79 79 KBUILD_CFLAGS += $(KERNEL_DEFINES) 80 80 KBUILD_CFLAGS += $(call cc-option,-fno-unit-at-a-time,) 81 81 82 - # These are needed for clean and mrproper, since in that case .config is not 83 - # included; the values here are meaningless 84 - 85 - CONFIG_NEST_LEVEL ?= 0 86 - 87 - SIZE = ($(CONFIG_NEST_LEVEL) * 0x20000000) 88 - 89 82 PHONY += linux 90 83 91 84 all: linux ··· 112 119 113 120 CONFIG_KERNEL_STACK_ORDER ?= 2 114 121 STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] ) 115 - 116 - ifndef START 117 - START = $(shell echo $$[ $(TOP_ADDR) - $(SIZE) ] ) 118 - endif 119 122 120 123 CPPFLAGS_vmlinux.lds = -U$(SUBARCH) -DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \ 121 124 -DELF_FORMAT="$(ELF_FORMAT)" -DKERNEL_STACK_SIZE=$(STACK_SIZE)
-3
arch/um/defconfig
··· 56 56 CONFIG_UML_X86=y 57 57 # CONFIG_64BIT is not set 58 58 CONFIG_SEMAPHORE_SLEEPERS=y 59 - # CONFIG_HOST_2G_2G is not set 60 - CONFIG_TOP_ADDR=0xc0000000 61 59 # CONFIG_3_LEVEL_PGTABLES is not set 62 60 CONFIG_ARCH_HAS_SC_SIGNALS=y 63 61 CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y ··· 79 81 # CONFIG_HPPFS is not set 80 82 CONFIG_MCONSOLE=y 81 83 CONFIG_MAGIC_SYSRQ=y 82 - CONFIG_NEST_LEVEL=0 83 84 # CONFIG_HIGHMEM is not set 84 85 CONFIG_KERNEL_STACK_ORDER=0 85 86
+2
arch/um/include/as-layout.h
··· 57 57 extern unsigned long _unprotected_end; 58 58 extern unsigned long brk_start; 59 59 60 + extern unsigned long host_task_size; 61 + 60 62 extern int linux_main(int argc, char **argv); 61 63 62 64 extern void (*sig_info[])(int, struct uml_pt_regs *);
+4 -1
arch/um/include/os.h
··· 295 295 extern int os_arch_prctl(int pid, int code, unsigned long *addr); 296 296 297 297 /* tty.c */ 298 - int get_pty(void); 298 + extern int get_pty(void); 299 + 300 + /* sys-$ARCH/task_size.c */ 301 + extern unsigned long os_get_task_size(void); 299 302 300 303 #endif
+1 -1
arch/um/kernel/exec.c
··· 25 25 26 26 ret = unmap(&current->mm->context.id, 0, STUB_START, 0, &data); 27 27 ret = ret || unmap(&current->mm->context.id, STUB_END, 28 - TASK_SIZE - STUB_END, 1, &data); 28 + host_task_size - STUB_END, 1, &data); 29 29 if (ret) { 30 30 printk(KERN_ERR "flush_thread - clearing address space failed, " 31 31 "err = %d\n", ret);
+14 -2
arch/um/kernel/um_arch.c
··· 241 241 }; 242 242 243 243 /* Set during early boot */ 244 + unsigned long task_size; 245 + EXPORT_SYMBOL(task_size); 246 + 247 + unsigned long host_task_size; 248 + 244 249 unsigned long brk_start; 245 250 unsigned long end_iomem; 246 251 EXPORT_SYMBOL(end_iomem); ··· 271 266 } 272 267 if (have_root == 0) 273 268 add_arg(DEFAULT_COMMAND_LINE); 269 + 270 + host_task_size = os_get_task_size(); 271 + /* 272 + * TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps 273 + * out 274 + */ 275 + task_size = host_task_size & PGDIR_MASK; 274 276 275 277 /* OS sanity checks that need to happen before the kernel runs */ 276 278 os_early_checks(); ··· 315 303 316 304 highmem = 0; 317 305 iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; 318 - max_physmem = CONFIG_TOP_ADDR - uml_physmem - iomem_size - MIN_VMALLOC; 306 + max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; 319 307 320 308 /* 321 309 * Zones have to begin on a 1 << MAX_ORDER page boundary, ··· 347 335 } 348 336 349 337 virtmem_size = physmem_size; 350 - avail = CONFIG_TOP_ADDR - start_vm; 338 + avail = TASK_SIZE - start_vm; 351 339 if (physmem_size > avail) 352 340 virtmem_size = avail; 353 341 end_vm = start_vm + virtmem_size;
+1 -1
arch/um/os-Linux/sys-i386/Makefile
··· 3 3 # Licensed under the GPL 4 4 # 5 5 6 - obj-y = registers.o signal.o tls.o 6 + obj-y = registers.o signal.o task_size.o tls.o 7 7 8 8 USER_OBJS := $(obj-y) 9 9
+120
arch/um/os-Linux/sys-i386/task_size.c
··· 1 + #include <stdio.h> 2 + #include <stdlib.h> 3 + #include <signal.h> 4 + #include <sys/mman.h> 5 + #include "longjmp.h" 6 + #include "kern_constants.h" 7 + 8 + static jmp_buf buf; 9 + 10 + static void segfault(int sig) 11 + { 12 + longjmp(buf, 1); 13 + } 14 + 15 + static int page_ok(unsigned long page) 16 + { 17 + unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT); 18 + unsigned long n = ~0UL; 19 + void *mapped = NULL; 20 + int ok = 0; 21 + 22 + /* 23 + * First see if the page is readable. If it is, it may still 24 + * be a VDSO, so we go on to see if it's writable. If not 25 + * then try mapping memory there. If that fails, then we're 26 + * still in the kernel area. As a sanity check, we'll fail if 27 + * the mmap succeeds, but gives us an address different from 28 + * what we wanted. 29 + */ 30 + if (setjmp(buf) == 0) 31 + n = *address; 32 + else { 33 + mapped = mmap(address, UM_KERN_PAGE_SIZE, 34 + PROT_READ | PROT_WRITE, 35 + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 36 + if (mapped == MAP_FAILED) 37 + return 0; 38 + if (mapped != address) 39 + goto out; 40 + } 41 + 42 + /* 43 + * Now, is it writeable? If so, then we're in user address 44 + * space. If not, then try mprotecting it and try the write 45 + * again. 46 + */ 47 + if (setjmp(buf) == 0) { 48 + *address = n; 49 + ok = 1; 50 + goto out; 51 + } else if (mprotect(address, UM_KERN_PAGE_SIZE, 52 + PROT_READ | PROT_WRITE) != 0) 53 + goto out; 54 + 55 + if (setjmp(buf) == 0) { 56 + *address = n; 57 + ok = 1; 58 + } 59 + 60 + out: 61 + if (mapped != NULL) 62 + munmap(mapped, UM_KERN_PAGE_SIZE); 63 + return ok; 64 + } 65 + 66 + unsigned long os_get_task_size(void) 67 + { 68 + struct sigaction sa, old; 69 + unsigned long bottom = 0; 70 + /* 71 + * A 32-bit UML on a 64-bit host gets confused about the VDSO at 72 + * 0xffffe000. It is mapped, is readable, can be reprotected writeable 73 + * and written. However, exec discovers later that it can't be 74 + * unmapped. So, just set the highest address to be checked to just 75 + * below it. This might waste some address space on 4G/4G 32-bit 76 + * hosts, but shouldn't hurt otherwise. 77 + */ 78 + unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT; 79 + unsigned long test; 80 + 81 + printf("Locating the top of the address space ... "); 82 + fflush(stdout); 83 + 84 + /* 85 + * We're going to be longjmping out of the signal handler, so 86 + * SA_DEFER needs to be set. 87 + */ 88 + sa.sa_handler = segfault; 89 + sigemptyset(&sa.sa_mask); 90 + sa.sa_flags = SA_NODEFER; 91 + sigaction(SIGSEGV, &sa, &old); 92 + 93 + if (!page_ok(bottom)) { 94 + fprintf(stderr, "Address 0x%x no good?\n", 95 + bottom << UM_KERN_PAGE_SHIFT); 96 + exit(1); 97 + } 98 + 99 + /* This could happen with a 4G/4G split */ 100 + if (page_ok(top)) 101 + goto out; 102 + 103 + do { 104 + test = bottom + (top - bottom) / 2; 105 + if (page_ok(test)) 106 + bottom = test; 107 + else 108 + top = test; 109 + } while (top - bottom > 1); 110 + 111 + out: 112 + /* Restore the old SIGSEGV handling */ 113 + sigaction(SIGSEGV, &old, NULL); 114 + 115 + top <<= UM_KERN_PAGE_SHIFT; 116 + printf("0x%x\n", top); 117 + fflush(stdout); 118 + 119 + return top; 120 + }
+1 -1
arch/um/os-Linux/sys-x86_64/Makefile
··· 3 3 # Licensed under the GPL 4 4 # 5 5 6 - obj-y = registers.o prctl.o signal.o 6 + obj-y = registers.o prctl.o signal.o task_size.o 7 7 8 8 USER_OBJS := $(obj-y) 9 9
+5
arch/um/os-Linux/sys-x86_64/task_size.c
··· 1 + unsigned long os_get_task_size(unsigned long shift) 2 + { 3 + /* The old value of CONFIG_TOP_ADDR */ 4 + return 0x7fc0000000; 5 + }
+2 -1
include/asm-um/fixmap.h
··· 1 1 #ifndef __UM_FIXMAP_H 2 2 #define __UM_FIXMAP_H 3 3 4 + #include <asm/processor.h> 4 5 #include <asm/system.h> 5 6 #include <asm/kmap_types.h> 6 7 #include <asm/archparam.h> ··· 58 57 * at the top of mem.. 59 58 */ 60 59 61 - #define FIXADDR_TOP (CONFIG_TOP_ADDR - 2 * PAGE_SIZE) 60 + #define FIXADDR_TOP (TASK_SIZE - 2 * PAGE_SIZE) 62 61 #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) 63 62 #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) 64 63
+3 -2
include/asm-um/processor-generic.h
··· 11 11 struct task_struct; 12 12 13 13 #include "asm/ptrace.h" 14 - #include "asm/pgtable.h" 15 14 #include "registers.h" 16 15 #include "sysdep/archsetjmp.h" 17 16 ··· 91 92 /* 92 93 * User space process size: 3GB (default). 93 94 */ 94 - #define TASK_SIZE (CONFIG_TOP_ADDR & PGDIR_MASK) 95 + extern unsigned long task_size; 96 + 97 + #define TASK_SIZE (task_size) 95 98 96 99 #undef STACK_TOP 97 100 #undef STACK_TOP_MAX