Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

um: remove process stub VMA

This mostly reverts the old commit 3963333fe676 ("uml: cover stubs
with a VMA") which had added a VMA to the existing PTEs. However,
there's no real reason to have the PTEs in the first place and the
VMA cannot be 'fixed' in place, which leads to bugs that userspace
could try to unmap them and be forcefully killed, or such. Also,
there's a bit of an ugly hole in userspace's address space.

Simplify all this: just install the stub code/page at the top of
the (inner) address space, i.e. put it just above TASK_SIZE. The
pages are simply hard-coded to be mapped in the userspace process
we use to implement an mm context, and they're out of reach of the
inner mmap/munmap/mprotect etc. since they're above TASK_SIZE.

Getting rid of the VMA also makes vma_merge() no longer hit one of
the VM_WARN_ON()s there because we installed a VMA while the code
assumes the stack VMA is the first one.

It also removes a lockdep warning about mmap_sem usage since we no
longer have uml_setup_stubs() and thus no longer need to do any
manipulation that would require mmap_sem in activate_mm().

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Richard Weinberger <richard@nod.at>

authored by

Johannes Berg and committed by
Richard Weinberger
bfc58e2b 9f0b4807

+11 -139
+1
arch/um/include/asm/Kbuild
··· 25 25 generic-y += trace_clock.h 26 26 generic-y += word-at-a-time.h 27 27 generic-y += kprobes.h 28 + generic-y += mm_hooks.h
+1 -28
arch/um/include/asm/mmu_context.h
··· 10 10 #include <linux/mm_types.h> 11 11 #include <linux/mmap_lock.h> 12 12 13 + #include <asm/mm_hooks.h> 13 14 #include <asm/mmu.h> 14 - 15 - extern void uml_setup_stubs(struct mm_struct *mm); 16 - /* 17 - * Needed since we do not use the asm-generic/mm_hooks.h: 18 - */ 19 - static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 20 - { 21 - uml_setup_stubs(mm); 22 - return 0; 23 - } 24 - extern void arch_exit_mmap(struct mm_struct *mm); 25 - static inline void arch_unmap(struct mm_struct *mm, 26 - unsigned long start, unsigned long end) 27 - { 28 - } 29 - static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, 30 - bool write, bool execute, bool foreign) 31 - { 32 - /* by default, allow everything */ 33 - return true; 34 - } 35 - 36 - /* 37 - * end asm-generic/mm_hooks.h functions 38 - */ 39 15 40 16 extern void force_flush_all(void); 41 17 ··· 23 47 * when the new ->mm is used for the first time. 24 48 */ 25 49 __switch_mm(&new->context.id); 26 - mmap_write_lock_nested(new, SINGLE_DEPTH_NESTING); 27 - uml_setup_stubs(new); 28 - mmap_write_unlock(new); 29 50 } 30 51 31 52 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+2 -1
arch/um/include/shared/as-layout.h
··· 20 20 * 'UL' and other type specifiers unilaterally. We 21 21 * use the following macros to deal with this. 22 22 */ 23 - #define STUB_START 0x100000UL 23 + #define STUB_START stub_start 24 24 #define STUB_CODE STUB_START 25 25 #define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE) 26 26 #define STUB_END (STUB_DATA + UM_KERN_PAGE_SIZE) ··· 46 46 extern unsigned long brk_start; 47 47 48 48 extern unsigned long host_task_size; 49 + extern unsigned long stub_start; 49 50 50 51 extern int linux_main(int argc, char **argv); 51 52 extern void uml_finishsetup(void);
+1 -3
arch/um/kernel/exec.c
··· 26 26 27 27 arch_flush_thread(&current->thread.arch); 28 28 29 - ret = unmap(&current->mm->context.id, 0, STUB_START, 0, &data); 30 - ret = ret || unmap(&current->mm->context.id, STUB_END, 31 - host_task_size - STUB_END, 1, &data); 29 + ret = unmap(&current->mm->context.id, 0, TASK_SIZE, 1, &data); 32 30 if (ret) { 33 31 printk(KERN_ERR "flush_thread - clearing address space failed, " 34 32 "err = %d\n", ret);
-87
arch/um/kernel/skas/mmu.c
··· 14 14 #include <os.h> 15 15 #include <skas.h> 16 16 17 - static int init_stub_pte(struct mm_struct *mm, unsigned long proc, 18 - unsigned long kernel) 19 - { 20 - pgd_t *pgd; 21 - p4d_t *p4d; 22 - pud_t *pud; 23 - pmd_t *pmd; 24 - pte_t *pte; 25 - 26 - pgd = pgd_offset(mm, proc); 27 - 28 - p4d = p4d_alloc(mm, pgd, proc); 29 - if (!p4d) 30 - goto out; 31 - 32 - pud = pud_alloc(mm, p4d, proc); 33 - if (!pud) 34 - goto out_pud; 35 - 36 - pmd = pmd_alloc(mm, pud, proc); 37 - if (!pmd) 38 - goto out_pmd; 39 - 40 - pte = pte_alloc_map(mm, pmd, proc); 41 - if (!pte) 42 - goto out_pte; 43 - 44 - *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); 45 - *pte = pte_mkread(*pte); 46 - return 0; 47 - 48 - out_pte: 49 - pmd_free(mm, pmd); 50 - out_pmd: 51 - pud_free(mm, pud); 52 - out_pud: 53 - p4d_free(mm, p4d); 54 - out: 55 - return -ENOMEM; 56 - } 57 - 58 17 int init_new_context(struct task_struct *task, struct mm_struct *mm) 59 18 { 60 19 struct mm_context *from_mm = NULL; ··· 55 96 free_page(to_mm->id.stack); 56 97 out: 57 98 return ret; 58 - } 59 - 60 - void uml_setup_stubs(struct mm_struct *mm) 61 - { 62 - int err, ret; 63 - 64 - ret = init_stub_pte(mm, STUB_CODE, 65 - (unsigned long) __syscall_stub_start); 66 - if (ret) 67 - goto out; 68 - 69 - ret = init_stub_pte(mm, STUB_DATA, mm->context.id.stack); 70 - if (ret) 71 - goto out; 72 - 73 - mm->context.stub_pages[0] = virt_to_page(__syscall_stub_start); 74 - mm->context.stub_pages[1] = virt_to_page(mm->context.id.stack); 75 - 76 - /* dup_mmap already holds mmap_lock */ 77 - err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START, 78 - VM_READ | VM_MAYREAD | VM_EXEC | 79 - VM_MAYEXEC | VM_DONTCOPY | VM_PFNMAP, 80 - mm->context.stub_pages); 81 - if (err) { 82 - printk(KERN_ERR "install_special_mapping returned %d\n", err); 83 - goto out; 84 - } 85 - return; 86 - 87 - out: 88 - force_sigsegv(SIGSEGV); 89 - } 90 - 91 - void arch_exit_mmap(struct mm_struct *mm) 92 - { 93 - pte_t *pte; 94 - 95 - pte = virt_to_pte(mm, STUB_CODE); 96 - if (pte != NULL) 97 - pte_clear(mm, STUB_CODE, pte); 98 - 99 - pte = virt_to_pte(mm, STUB_DATA); 100 - if (pte == NULL) 101 - return; 102 - 103 - pte_clear(mm, STUB_DATA, pte); 104 99 } 105 100 106 101 void destroy_context(struct mm_struct *mm)
-15
arch/um/kernel/tlb.c
··· 125 125 struct host_vm_op *last; 126 126 int fd = -1, ret = 0; 127 127 128 - if (virt + len > STUB_START && virt < STUB_END) 129 - return -EINVAL; 130 - 131 128 if (hvc->userspace) 132 129 fd = phys_mapping(phys, &offset); 133 130 else ··· 162 165 struct host_vm_op *last; 163 166 int ret = 0; 164 167 165 - if (addr + len > STUB_START && addr < STUB_END) 166 - return -EINVAL; 167 - 168 168 if (hvc->index != 0) { 169 169 last = &hvc->ops[hvc->index - 1]; 170 170 if ((last->type == MUNMAP) && ··· 188 194 { 189 195 struct host_vm_op *last; 190 196 int ret = 0; 191 - 192 - if (addr + len > STUB_START && addr < STUB_END) 193 - return -EINVAL; 194 197 195 198 if (hvc->index != 0) { 196 199 last = &hvc->ops[hvc->index - 1]; ··· 223 232 224 233 pte = pte_offset_kernel(pmd, addr); 225 234 do { 226 - if ((addr >= STUB_START) && (addr < STUB_END)) 227 - continue; 228 - 229 235 r = pte_read(*pte); 230 236 w = pte_write(*pte); 231 237 x = pte_exec(*pte); ··· 465 477 struct mm_id *mm_id; 466 478 467 479 address &= PAGE_MASK; 468 - 469 - if (address >= STUB_START && address < STUB_END) 470 - goto kill; 471 480 472 481 pgd = pgd_offset(mm, address); 473 482 if (!pgd_present(*pgd))
+5
arch/um/kernel/um_arch.c
··· 249 249 } 250 250 251 251 /* Set during early boot */ 252 + unsigned long stub_start; 252 253 unsigned long task_size; 253 254 EXPORT_SYMBOL(task_size); 254 255 ··· 284 283 add_arg(DEFAULT_COMMAND_LINE_CONSOLE); 285 284 286 285 host_task_size = os_get_top_address(); 286 + /* reserve two pages for the stubs */ 287 + host_task_size -= 2 * PAGE_SIZE; 288 + stub_start = host_task_size; 289 + 287 290 /* 288 291 * TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps 289 292 * out
-4
arch/um/os-Linux/skas/process.c
··· 251 251 signal(SIGTERM, SIG_DFL); 252 252 signal(SIGWINCH, SIG_IGN); 253 253 254 - /* 255 - * This has a pte, but it can't be mapped in with the usual 256 - * tlb_flush mechanism because this is part of that mechanism 257 - */ 258 254 fd = phys_mapping(to_phys(__syscall_stub_start), &offset); 259 255 addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, 260 256 PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
+1 -1
arch/x86/um/os-Linux/task_size.c
··· 145 145 unsigned long os_get_top_address(void) 146 146 { 147 147 /* The old value of CONFIG_TOP_ADDR */ 148 - return 0x7fc0000000; 148 + return 0x7fc0002000; 149 149 } 150 150 151 151 #endif