Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

um: rework userspace stubs to not hard-code stub location

The userspace stacks mostly have a stack (and in the case of the
syscall stub we can just set their stack pointer) that points to
the location of the stub data page already.

Rework the stubs to use the stack pointer to derive the start of
the data page, rather than requiring it to be hard-coded.

In the clone stub, also integrate the int3 into the stack remap,
since we really must not use the stack while we remap it.

This prepares for putting the stub at a variable location that's
not part of the normal address space of the userspace processes
running inside the UML machine.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Richard Weinberger <richard@nod.at>

authored by

Johannes Berg and committed by
Richard Weinberger
9f0b4807 84b2789d

+75 -48
+4 -12
arch/um/include/shared/as-layout.h
··· 20 20 * 'UL' and other type specifiers unilaterally. We 21 21 * use the following macros to deal with this. 22 22 */ 23 - 24 - #ifdef __ASSEMBLY__ 25 - #define _UML_AC(X, Y) (Y) 26 - #else 27 - #define __UML_AC(X, Y) (X(Y)) 28 - #define _UML_AC(X, Y) __UML_AC(X, Y) 29 - #endif 30 - 31 - #define STUB_START _UML_AC(, 0x100000) 32 - #define STUB_CODE _UML_AC((unsigned long), STUB_START) 33 - #define STUB_DATA _UML_AC((unsigned long), STUB_CODE + UM_KERN_PAGE_SIZE) 34 - #define STUB_END _UML_AC((unsigned long), STUB_DATA + UM_KERN_PAGE_SIZE) 23 + #define STUB_START 0x100000UL 24 + #define STUB_CODE STUB_START 25 + #define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE) 26 + #define STUB_END (STUB_DATA + UM_KERN_PAGE_SIZE) 35 27 36 28 #ifndef __ASSEMBLY__ 37 29
+6
arch/um/include/shared/common-offsets.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 /* for use by sys-$SUBARCH/kernel-offsets.c */ 3 + #include <stub-data.h> 3 4 4 5 DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); 5 6 ··· 44 43 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 45 44 DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT); 46 45 #endif 46 + 47 + /* for stub */ 48 + DEFINE(UML_STUB_FIELD_OFFSET, offsetof(struct stub_data, offset)); 49 + DEFINE(UML_STUB_FIELD_CHILD_ERR, offsetof(struct stub_data, child_err)); 50 + DEFINE(UML_STUB_FIELD_FD, offsetof(struct stub_data, fd));
+1 -2
arch/um/kernel/skas/clone.c
··· 41 41 goto done; 42 42 } 43 43 44 - remap_stack(data->fd, data->offset); 45 - goto done; 44 + remap_stack_and_trap(); 46 45 47 46 done: 48 47 trap_myself();
+2
arch/um/os-Linux/skas/mem.c
··· 40 40 syscall_regs[REGS_IP_INDEX] = STUB_CODE + 41 41 ((unsigned long) batch_syscall_stub - 42 42 (unsigned long) __syscall_stub_start); 43 + syscall_regs[REGS_SP_INDEX] = STUB_DATA; 44 + 43 45 return 0; 44 46 } 45 47
+22 -11
arch/x86/um/shared/sysdep/stub_32.h
··· 7 7 #define __SYSDEP_STUB_H 8 8 9 9 #include <asm/ptrace.h> 10 + #include <generated/asm-offsets.h> 10 11 11 - #define STUB_SYSCALL_RET EAX 12 12 #define STUB_MMAP_NR __NR_mmap2 13 13 #define MMAP_OFFSET(o) ((o) >> UM_KERN_PAGE_SHIFT) 14 14 ··· 77 77 __asm("int3"); 78 78 } 79 79 80 - static inline void remap_stack(int fd, unsigned long offset) 80 + static void inline remap_stack_and_trap(void) 81 81 { 82 - __asm__ volatile ("movl %%eax,%%ebp ; movl %0,%%eax ; int $0x80 ;" 83 - "movl %7, %%ebx ; movl %%eax, (%%ebx)" 84 - : : "g" (STUB_MMAP_NR), "b" (STUB_DATA), 85 - "c" (UM_KERN_PAGE_SIZE), 86 - "d" (PROT_READ | PROT_WRITE), 87 - "S" (MAP_FIXED | MAP_SHARED), "D" (fd), 88 - "a" (offset), 89 - "i" (&((struct stub_data *) STUB_DATA)->child_err) 90 - : "memory"); 82 + __asm__ volatile ( 83 + "movl %%esp,%%ebx ;" 84 + "andl %0,%%ebx ;" 85 + "movl %1,%%eax ;" 86 + "movl %%ebx,%%edi ; addl %2,%%edi ; movl (%%edi),%%edi ;" 87 + "movl %%ebx,%%ebp ; addl %3,%%ebp ; movl (%%ebp),%%ebp ;" 88 + "int $0x80 ;" 89 + "addl %4,%%ebx ; movl %%eax, (%%ebx) ;" 90 + "int $3" 91 + : : 92 + "g" (~(UM_KERN_PAGE_SIZE - 1)), 93 + "g" (STUB_MMAP_NR), 94 + "g" (UML_STUB_FIELD_FD), 95 + "g" (UML_STUB_FIELD_OFFSET), 96 + "g" (UML_STUB_FIELD_CHILD_ERR), 97 + "c" (UM_KERN_PAGE_SIZE), 98 + "d" (PROT_READ | PROT_WRITE), 99 + "S" (MAP_FIXED | MAP_SHARED) 100 + : 101 + "memory"); 91 102 } 92 103 93 104 #endif
+24 -12
arch/x86/um/shared/sysdep/stub_64.h
··· 7 7 #define __SYSDEP_STUB_H 8 8 9 9 #include <sysdep/ptrace_user.h> 10 + #include <generated/asm-offsets.h> 10 11 11 - #define STUB_SYSCALL_RET PT_INDEX(RAX) 12 12 #define STUB_MMAP_NR __NR_mmap 13 13 #define MMAP_OFFSET(o) (o) 14 14 ··· 82 82 __asm("int3"); 83 83 } 84 84 85 - static inline void remap_stack(long fd, unsigned long offset) 85 + static inline void remap_stack_and_trap(void) 86 86 { 87 - __asm__ volatile ("movq %4,%%r10 ; movq %5,%%r8 ; " 88 - "movq %6, %%r9; " __syscall "; movq %7, %%rbx ; " 89 - "movq %%rax, (%%rbx)": 90 - : "a" (STUB_MMAP_NR), "D" (STUB_DATA), 91 - "S" (UM_KERN_PAGE_SIZE), 92 - "d" (PROT_READ | PROT_WRITE), 93 - "g" (MAP_FIXED | MAP_SHARED), "g" (fd), 94 - "g" (offset), 95 - "i" (&((struct stub_data *) STUB_DATA)->child_err) 96 - : __syscall_clobber, "r10", "r8", "r9" ); 87 + __asm__ volatile ( 88 + "movq %0,%%rax ;" 89 + "movq %%rsp,%%rdi ;" 90 + "andq %1,%%rdi ;" 91 + "movq %2,%%r10 ;" 92 + "movq %%rdi,%%r8 ; addq %3,%%r8 ; movq (%%r8),%%r8 ;" 93 + "movq %%rdi,%%r9 ; addq %4,%%r9 ; movq (%%r9),%%r9 ;" 94 + __syscall ";" 95 + "movq %%rsp,%%rdi ; andq %1,%%rdi ;" 96 + "addq %5,%%rdi ; movq %%rax, (%%rdi) ;" 97 + "int3" 98 + : : 99 + "g" (STUB_MMAP_NR), 100 + "g" (~(UM_KERN_PAGE_SIZE - 1)), 101 + "g" (MAP_FIXED | MAP_SHARED), 102 + "g" (UML_STUB_FIELD_FD), 103 + "g" (UML_STUB_FIELD_OFFSET), 104 + "g" (UML_STUB_FIELD_CHILD_ERR), 105 + "S" (UM_KERN_PAGE_SIZE), 106 + "d" (PROT_READ | PROT_WRITE) 107 + : 108 + __syscall_clobber, "r10", "r8", "r9"); 97 109 } 98 110 99 111 #endif
+11 -6
arch/x86/um/stub_32.S
··· 5 5 6 6 .globl batch_syscall_stub 7 7 batch_syscall_stub: 8 - /* load pointer to first operation */ 9 - mov $(STUB_DATA+8), %esp 10 - 8 + /* %esp comes in as "top of page" */ 9 + mov %esp, %ecx 10 + /* %esp has pointer to first operation */ 11 + add $8, %esp 11 12 again: 12 13 /* load length of additional data */ 13 14 mov 0x0(%esp), %eax 14 15 15 16 /* if(length == 0) : end of list */ 16 17 /* write possible 0 to header */ 17 - mov %eax, STUB_DATA+4 18 + mov %eax, 0x4(%ecx) 18 19 cmpl $0, %eax 19 20 jz done 20 21 21 22 /* save current pointer */ 22 - mov %esp, STUB_DATA+4 23 + mov %esp, 0x4(%ecx) 23 24 24 25 /* skip additional data */ 25 26 add %eax, %esp ··· 39 38 /* execute syscall */ 40 39 int $0x80 41 40 41 + /* restore top of page pointer in %ecx */ 42 + mov %esp, %ecx 43 + andl $(~UM_KERN_PAGE_SIZE) + 1, %ecx 44 + 42 45 /* check return value */ 43 46 pop %ebx 44 47 cmp %ebx, %eax ··· 50 45 51 46 done: 52 47 /* save return value */ 53 - mov %eax, STUB_DATA 48 + mov %eax, (%ecx) 54 49 55 50 /* stop */ 56 51 int3
+2 -3
arch/x86/um/stub_64.S
··· 4 4 .section .__syscall_stub, "ax" 5 5 .globl batch_syscall_stub 6 6 batch_syscall_stub: 7 - mov $(STUB_DATA), %rbx 8 - /* load pointer to first operation */ 9 - mov %rbx, %rsp 7 + /* %rsp has the pointer to first operation */ 8 + mov %rsp, %rbx 10 9 add $0x10, %rsp 11 10 again: 12 11 /* load length of additional data */
+3 -2
arch/x86/um/stub_segv.c
··· 11 11 void __attribute__ ((__section__ (".__syscall_stub"))) 12 12 stub_segv_handler(int sig, siginfo_t *info, void *p) 13 13 { 14 + int stack; 14 15 ucontext_t *uc = p; 16 + struct faultinfo *f = (void *)(((unsigned long)&stack) & ~(UM_KERN_PAGE_SIZE - 1)); 15 17 16 - GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA), 17 - &uc->uc_mcontext); 18 + GET_FAULTINFO_FROM_MC(*f, &uc->uc_mcontext); 18 19 trap_myself(); 19 20 } 20 21