Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

um: Implement kernel side of SECCOMP based process handling

This adds the kernel side of the seccomp based process handling.

Co-authored-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Benjamin Berg <benjamin@sipsolutions.net>
Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Link: https://patch.msgid.link/20250602130052.545733-6-benjamin@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

authored by

Benjamin Berg
Johannes Berg
and committed by
Johannes Berg
406d17c6 8420e08f

+459 -138
+2
arch/um/include/shared/common-offsets.h
··· 16 16 DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); 17 17 18 18 DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES); 19 + 20 + DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE);
+1 -1
arch/um/include/shared/os.h
··· 286 286 287 287 /* skas/process.c */ 288 288 extern int is_skas_winch(int pid, int fd, void *data); 289 - extern int start_userspace(unsigned long stub_stack); 289 + extern int start_userspace(struct mm_id *mm_id); 290 290 extern void userspace(struct uml_pt_regs *regs); 291 291 extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)); 292 292 extern void switch_threads(jmp_buf *me, jmp_buf *you);
+4 -1
arch/um/include/shared/skas/stub-data.h
··· 17 17 #define FUTEX_IN_KERN 1 18 18 19 19 struct stub_init_data { 20 + int seccomp; 21 + 20 22 unsigned long stub_start; 21 23 22 24 int stub_code_fd; ··· 26 24 int stub_data_fd; 27 25 unsigned long stub_data_offset; 28 26 29 - unsigned long segv_handler; 27 + unsigned long signal_handler; 28 + unsigned long signal_restorer; 30 29 }; 31 30 32 31 #define STUB_NEXT_SYSCALL(s) \
+2 -4
arch/um/kernel/skas/mmu.c
··· 40 40 list_add(&mm->context.list, &mm_list); 41 41 } 42 42 43 - new_id->pid = start_userspace(stack); 44 - if (new_id->pid < 0) { 45 - ret = new_id->pid; 43 + ret = start_userspace(new_id); 44 + if (ret < 0) 46 45 goto out_free; 47 - } 48 46 49 47 /* Ensure the new MM is clean and nothing unwanted is mapped */ 50 48 unmap(new_id, 0, STUB_START);
+130 -11
arch/um/kernel/skas/stub_exe.c
··· 3 3 #include <asm/unistd.h> 4 4 #include <sysdep/stub.h> 5 5 #include <stub-data.h> 6 + #include <linux/filter.h> 7 + #include <linux/seccomp.h> 8 + #include <generated/asm-offsets.h> 6 9 7 10 void _start(void); 8 11 ··· 28 25 } sa = { 29 26 /* Need to set SA_RESTORER (but the handler never returns) */ 30 27 .sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000, 31 - /* no need to mask any signals */ 32 - .sa_mask = 0, 33 28 }; 34 29 35 30 /* set a nice name */ ··· 35 34 36 35 /* Make sure this process dies if the kernel dies */ 37 36 stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL); 37 + 38 + /* Needed in SECCOMP mode (and safe to do anyway) */ 39 + stub_syscall5(__NR_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 38 40 39 41 /* read information from STDIN and close it */ 40 42 res = stub_syscall3(__NR_read, 0, ··· 67 63 stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE; 68 64 stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0); 69 65 70 - /* register SIGSEGV handler */ 71 - sa.sa_handler_ = (void *) init_data.segv_handler; 72 - res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0, 73 - sizeof(sa.sa_mask)); 74 - if (res != 0) 75 - stub_syscall1(__NR_exit, 13); 66 + /* register signal handlers */ 67 + sa.sa_handler_ = (void *) init_data.signal_handler; 68 + sa.sa_restorer = (void *) init_data.signal_restorer; 69 + if (!init_data.seccomp) { 70 + /* In ptrace mode, the SIGSEGV handler never returns */ 71 + sa.sa_mask = 0; 76 72 77 - stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); 73 + res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, 74 + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 75 + if (res != 0) 76 + stub_syscall1(__NR_exit, 13); 77 + } else { 78 + /* SECCOMP mode uses rt_sigreturn, need to mask all signals */ 79 + sa.sa_mask = ~0ULL; 78 80 79 - stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP); 81 + res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, 82 + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 83 + if (res != 0) 84 + stub_syscall1(__NR_exit, 14); 80 85 81 - stub_syscall1(__NR_exit, 14); 86 + res = stub_syscall4(__NR_rt_sigaction, SIGSYS, 87 + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 88 + if (res != 0) 89 + stub_syscall1(__NR_exit, 15); 90 + 91 + res = stub_syscall4(__NR_rt_sigaction, SIGALRM, 92 + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 93 + if (res != 0) 94 + stub_syscall1(__NR_exit, 16); 95 + 96 + res = stub_syscall4(__NR_rt_sigaction, SIGTRAP, 97 + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 98 + if (res != 0) 99 + stub_syscall1(__NR_exit, 17); 100 + 101 + res = stub_syscall4(__NR_rt_sigaction, SIGILL, 102 + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 103 + if (res != 0) 104 + stub_syscall1(__NR_exit, 18); 105 + 106 + res = stub_syscall4(__NR_rt_sigaction, SIGFPE, 107 + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 108 + if (res != 0) 109 + stub_syscall1(__NR_exit, 19); 110 + } 111 + 112 + /* 113 + * If in seccomp mode, install the SECCOMP filter and trigger a syscall. 114 + * Otherwise set PTRACE_TRACEME and do a SIGSTOP. 115 + */ 116 + if (init_data.seccomp) { 117 + struct sock_filter filter[] = { 118 + #if __BITS_PER_LONG > 32 119 + /* [0] Load upper 32bit of instruction pointer from seccomp_data */ 120 + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, 121 + (offsetof(struct seccomp_data, instruction_pointer) + 4)), 122 + 123 + /* [1] Jump forward 3 instructions if the upper address is not identical */ 124 + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) >> 32, 0, 3), 125 + #endif 126 + /* [2] Load lower 32bit of instruction pointer from seccomp_data */ 127 + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, 128 + (offsetof(struct seccomp_data, instruction_pointer))), 129 + 130 + /* [3] Mask out lower bits */ 131 + BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000), 132 + 133 + /* [4] Jump to [6] if the lower bits are not on the expected page */ 134 + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) & 0xfffff000, 1, 0), 135 + 136 + /* [5] Trap call, allow */ 137 + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP), 138 + 139 + /* [6,7] Check architecture */ 140 + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, 141 + offsetof(struct seccomp_data, arch)), 142 + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 143 + UM_SECCOMP_ARCH_NATIVE, 1, 0), 144 + 145 + /* [8] Kill (for architecture check) */ 146 + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), 147 + 148 + /* [9] Load syscall number */ 149 + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, 150 + offsetof(struct seccomp_data, nr)), 151 + 152 + /* [10-14] Check against permitted syscalls */ 153 + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex, 154 + 5, 0), 155 + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR, 156 + 4, 0), 157 + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap, 158 + 3, 0), 159 + #ifdef __i386__ 160 + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_thread_area, 161 + 2, 0), 162 + #else 163 + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_arch_prctl, 164 + 2, 0), 165 + #endif 166 + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn, 167 + 1, 0), 168 + 169 + /* [15] Not one of the permitted syscalls */ 170 + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), 171 + 172 + /* [16] Permitted call for the stub */ 173 + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), 174 + }; 175 + struct sock_fprog prog = { 176 + .len = sizeof(filter) / sizeof(filter[0]), 177 + .filter = filter, 178 + }; 179 + 180 + if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER, 181 + SECCOMP_FILTER_FLAG_TSYNC, 182 + (unsigned long)&prog) != 0) 183 + stub_syscall1(__NR_exit, 20); 184 + 185 + /* Fall through, the exit syscall will cause SIGSYS */ 186 + } else { 187 + stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); 188 + 189 + stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP); 190 + } 191 + 192 + stub_syscall1(__NR_exit, 30); 82 193 83 194 __builtin_unreachable(); 84 195 }
+4 -1
arch/um/os-Linux/internal.h
··· 2 2 #ifndef __UM_OS_LINUX_INTERNAL_H 3 3 #define __UM_OS_LINUX_INTERNAL_H 4 4 5 + #include <mm_id.h> 6 + #include <stub-data.h> 7 + 5 8 /* 6 9 * elf_aux.c 7 10 */ ··· 19 16 * skas/process.c 20 17 */ 21 18 void wait_stub_done(int pid); 22 - 19 + void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys); 23 20 #endif /* __UM_OS_LINUX_INTERNAL_H */
+20 -15
arch/um/os-Linux/skas/mem.c
··· 80 80 int n, i; 81 81 int err, pid = mm_idp->pid; 82 82 83 - n = ptrace_setregs(pid, syscall_regs); 84 - if (n < 0) { 85 - printk(UM_KERN_ERR "Registers - \n"); 86 - for (i = 0; i < MAX_REG_NR; i++) 87 - printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]); 88 - panic("%s : PTRACE_SETREGS failed, errno = %d\n", 89 - __func__, -n); 90 - } 91 - 92 83 /* Inform process how much we have filled in. */ 93 84 proc_data->syscall_data_len = mm_idp->syscall_data_len; 94 85 95 - err = ptrace(PTRACE_CONT, pid, 0, 0); 96 - if (err) 97 - panic("Failed to continue stub, pid = %d, errno = %d\n", pid, 98 - errno); 86 + if (using_seccomp) { 87 + proc_data->restart_wait = 1; 88 + wait_stub_done_seccomp(mm_idp, 0, 1); 89 + } else { 90 + n = ptrace_setregs(pid, syscall_regs); 91 + if (n < 0) { 92 + printk(UM_KERN_ERR "Registers -\n"); 93 + for (i = 0; i < MAX_REG_NR; i++) 94 + printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]); 95 + panic("%s : PTRACE_SETREGS failed, errno = %d\n", 96 + __func__, -n); 97 + } 99 98 100 - wait_stub_done(pid); 99 + err = ptrace(PTRACE_CONT, pid, 0, 0); 100 + if (err) 101 + panic("Failed to continue stub, pid = %d, errno = %d\n", 102 + pid, errno); 103 + 104 + wait_stub_done(pid); 105 + } 101 106 102 107 /* 103 - * proc_data->err will be non-zero if there was an (unexpected) error. 108 + * proc_data->err will be negative if there was an (unexpected) error. 104 109 * In that case, syscall_data_len points to the last executed syscall, 105 110 * otherwise it will be zero (but we do not need to rely on that). 106 111 */
+275 -101
arch/um/os-Linux/skas/process.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* 3 + * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net> 3 4 * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 4 5 * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 5 6 */ ··· 26 25 #include <registers.h> 27 26 #include <skas.h> 28 27 #include <sysdep/stub.h> 28 + #include <sysdep/mcontext.h> 29 + #include <linux/futex.h> 29 30 #include <linux/threads.h> 30 31 #include <timetravel.h> 32 + #include <asm-generic/rwonce.h> 31 33 #include "../internal.h" 32 34 33 35 int is_skas_winch(int pid, int fd, void *data) ··· 146 142 fatal_sigsegv(); 147 143 } 148 144 145 + void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys) 146 + { 147 + struct stub_data *data = (void *)mm_idp->stack; 148 + int ret; 149 + 150 + do { 151 + if (!running) { 152 + data->signal = 0; 153 + data->futex = FUTEX_IN_CHILD; 154 + CATCH_EINTR(syscall(__NR_futex, &data->futex, 155 + FUTEX_WAKE, 1, NULL, NULL, 0)); 156 + } 157 + 158 + do { 159 + /* 160 + * We need to check whether the child is still alive 161 + * before and after the FUTEX_WAIT call. Before, in 162 + * case it just died but we still updated data->futex 163 + * to FUTEX_IN_CHILD. And after, in case it died while 164 + * we were waiting (and SIGCHLD woke us up, see the 165 + * IRQ handler in mmu.c). 166 + * 167 + * Either way, if PID is negative, then we have no 168 + * choice but to kill the task. 169 + */ 170 + if (__READ_ONCE(mm_idp->pid) < 0) 171 + goto out_kill; 172 + 173 + ret = syscall(__NR_futex, &data->futex, 174 + FUTEX_WAIT, FUTEX_IN_CHILD, 175 + NULL, NULL, 0); 176 + if (ret < 0 && errno != EINTR && errno != EAGAIN) { 177 + printk(UM_KERN_ERR "%s : FUTEX_WAIT failed, errno = %d\n", 178 + __func__, errno); 179 + goto out_kill; 180 + } 181 + } while (data->futex == FUTEX_IN_CHILD); 182 + 183 + if (__READ_ONCE(mm_idp->pid) < 0) 184 + goto out_kill; 185 + 186 + running = 0; 187 + 188 + /* We may receive a SIGALRM before SIGSYS, iterate again. */ 189 + } while (wait_sigsys && data->signal == SIGALRM); 190 + 191 + if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) { 192 + printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__); 193 + goto out_kill; 194 + } 195 + 196 + if (wait_sigsys && data->signal != SIGSYS) { 197 + printk(UM_KERN_ERR "%s : expected SIGSYS but got %d", 198 + __func__, data->signal); 199 + goto out_kill; 200 + } 201 + 202 + return; 203 + 204 + out_kill: 205 + printk(UM_KERN_ERR "%s : failed to wait for stub, pid = %d, errno = %d\n", 206 + __func__, mm_idp->pid, errno); 207 + /* This is not true inside start_userspace */ 208 + if (current_mm_id() == mm_idp) 209 + fatal_sigsegv(); 210 + } 211 + 149 212 extern unsigned long current_stub_stack(void); 150 213 151 214 static void get_skas_faultinfo(int pid, struct faultinfo *fi) ··· 256 185 int pipe_fds[2]; 257 186 unsigned long long offset; 258 187 struct stub_init_data init_data = { 188 + .seccomp = using_seccomp, 259 189 .stub_start = STUB_START, 260 - .segv_handler = STUB_CODE + 261 - (unsigned long) stub_segv_handler - 262 - (unsigned long) __syscall_stub_start, 263 190 }; 264 191 struct iomem_region *iomem; 265 192 int ret; 193 + 194 + if (using_seccomp) { 195 + init_data.signal_handler = STUB_CODE + 196 + (unsigned long) stub_signal_interrupt - 197 + (unsigned long) __syscall_stub_start; 198 + init_data.signal_restorer = STUB_CODE + 199 + (unsigned long) stub_signal_restorer - 200 + (unsigned long) __syscall_stub_start; 201 + } else { 202 + init_data.signal_handler = STUB_CODE + 203 + (unsigned long) stub_segv_handler - 204 + (unsigned long) __syscall_stub_start; 205 + init_data.signal_restorer = 0; 206 + } 266 207 267 208 init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start), 268 209 &offset); ··· 406 323 * when negative: an error number. 407 324 * FIXME: can PIDs become negative?! 408 325 */ 409 - int start_userspace(unsigned long stub_stack) 326 + int start_userspace(struct mm_id *mm_id) 410 327 { 328 + struct stub_data *proc_data = (void *)mm_id->stack; 411 329 void *stack; 412 330 unsigned long sp; 413 331 int pid, status, n, err; ··· 427 343 /* set stack pointer to the end of the stack page, so it can grow downwards */ 428 344 sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; 429 345 346 + if (using_seccomp) 347 + proc_data->futex = FUTEX_IN_CHILD; 348 + 430 349 /* clone into new userspace process */ 431 350 pid = clone(userspace_tramp, (void *) sp, 432 351 CLONE_VFORK | CLONE_VM | SIGCHLD, 433 - (void *)stub_stack); 352 + (void *)mm_id->stack); 434 353 if (pid < 0) { 435 354 err = -errno; 436 355 printk(UM_KERN_ERR "%s : clone failed, errno = %d\n", ··· 441 354 return err; 442 355 } 443 356 444 - do { 445 - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); 446 - if (n < 0) { 357 + if (using_seccomp) { 358 + wait_stub_done_seccomp(mm_id, 1, 1); 359 + } else { 360 + do { 361 + CATCH_EINTR(n = waitpid(pid, &status, 362 + WUNTRACED | __WALL)); 363 + if (n < 0) { 364 + err = -errno; 365 + printk(UM_KERN_ERR "%s : wait failed, errno = %d\n", 366 + __func__, errno); 367 + goto out_kill; 368 + } 369 + } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); 370 + 371 + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { 372 + err = -EINVAL; 373 + printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n", 374 + __func__, status); 375 + goto out_kill; 376 + } 377 + 378 + if (ptrace(PTRACE_SETOPTIONS, pid, NULL, 379 + (void *) PTRACE_O_TRACESYSGOOD) < 0) { 447 380 err = -errno; 448 - printk(UM_KERN_ERR "%s : wait failed, errno = %d\n", 381 + printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n", 449 382 __func__, errno); 450 383 goto out_kill; 451 384 } 452 - } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); 453 - 454 - if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { 455 - err = -EINVAL; 456 - printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n", 457 - __func__, status); 458 - goto out_kill; 459 - } 460 - 461 - if (ptrace(PTRACE_SETOPTIONS, pid, NULL, 462 - (void *) PTRACE_O_TRACESYSGOOD) < 0) { 463 - err = -errno; 464 - printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n", 465 - __func__, errno); 466 - goto out_kill; 467 385 } 468 386 469 387 if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) { ··· 477 385 __func__, errno); 478 386 goto out_kill; 479 387 } 388 + 389 + mm_id->pid = pid; 480 390 481 391 return pid; 482 392 ··· 493 399 void userspace(struct uml_pt_regs *regs) 494 400 { 495 401 int err, status, op, pid = userspace_pid[0]; 496 - siginfo_t si; 402 + siginfo_t si_ptrace; 403 + siginfo_t *si; 404 + int sig; 497 405 498 406 /* Handle any immediate reschedules or signals */ 499 407 interrupt_end(); ··· 528 432 529 433 current_mm_sync(); 530 434 531 - /* Flush out any pending syscalls */ 532 - err = syscall_stub_flush(current_mm_id()); 533 - if (err) { 534 - if (err == -ENOMEM) 535 - report_enomem(); 435 + if (using_seccomp) { 436 + struct mm_id *mm_id = current_mm_id(); 437 + struct stub_data *proc_data = (void *) mm_id->stack; 438 + int ret; 536 439 537 - printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d", 538 - __func__, -err); 539 - fatal_sigsegv(); 540 - } 440 + ret = set_stub_state(regs, proc_data, singlestepping()); 441 + if (ret) { 442 + printk(UM_KERN_ERR "%s - failed to set regs: %d", 443 + __func__, ret); 444 + fatal_sigsegv(); 445 + } 541 446 542 - /* 543 - * This can legitimately fail if the process loads a 544 - * bogus value into a segment register. It will 545 - * segfault and PTRACE_GETREGS will read that value 546 - * out of the process. However, PTRACE_SETREGS will 547 - * fail. In this case, there is nothing to do but 548 - * just kill the process. 549 - */ 550 - if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) { 551 - printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n", 552 - __func__, errno); 553 - fatal_sigsegv(); 554 - } 447 + /* Must have been reset by the syscall caller */ 448 + if (proc_data->restart_wait != 0) 449 + panic("Programming error: Flag to only run syscalls in child was not cleared!"); 555 450 556 - if (put_fp_registers(pid, regs->fp)) { 557 - printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n", 558 - __func__, errno); 559 - fatal_sigsegv(); 560 - } 451 + /* Mark pending syscalls for flushing */ 452 + proc_data->syscall_data_len = mm_id->syscall_data_len; 453 + mm_id->syscall_data_len = 0; 561 454 562 - if (singlestepping()) 563 - op = PTRACE_SYSEMU_SINGLESTEP; 564 - else 565 - op = PTRACE_SYSEMU; 455 + proc_data->signal = 0; 456 + proc_data->futex = FUTEX_IN_CHILD; 457 + CATCH_EINTR(syscall(__NR_futex, &proc_data->futex, 458 + FUTEX_WAKE, 1, NULL, NULL, 0)); 459 + do { 460 + ret = syscall(__NR_futex, &proc_data->futex, 461 + FUTEX_WAIT, FUTEX_IN_CHILD, NULL, NULL, 0); 462 + } while ((ret == -1 && errno == EINTR) || 463 + proc_data->futex == FUTEX_IN_CHILD); 566 464 567 - if (ptrace(op, pid, 0, 0)) { 568 - printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n", 569 - __func__, op, errno); 570 - fatal_sigsegv(); 571 - } 465 + sig = proc_data->signal; 572 466 573 - CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); 574 - if (err < 0) { 575 - printk(UM_KERN_ERR "%s - wait failed, errno = %d\n", 576 - __func__, errno); 577 - fatal_sigsegv(); 578 - } 467 + if (sig == SIGTRAP && proc_data->err != 0) { 468 + printk(UM_KERN_ERR "%s - Error flushing stub syscalls", 469 + __func__); 470 + syscall_stub_dump_error(mm_id); 471 + fatal_sigsegv(); 472 + } 579 473 580 - regs->is_user = 1; 581 - if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { 582 - printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n", 583 - __func__, errno); 584 - fatal_sigsegv(); 585 - } 474 + ret = get_stub_state(regs, proc_data, NULL); 475 + if (ret) { 476 + printk(UM_KERN_ERR "%s - failed to get regs: %d", 477 + __func__, ret); 478 + fatal_sigsegv(); 479 + } 586 480 587 - if (get_fp_registers(pid, regs->fp)) { 588 - printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n", 589 - __func__, errno); 590 - fatal_sigsegv(); 481 + if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si)) 482 + panic("%s - Invalid siginfo offset from child", 483 + __func__); 484 + si = (void *)&proc_data->sigstack[proc_data->si_offset]; 485 + 486 + regs->is_user = 1; 487 + 488 + /* Fill in ORIG_RAX and extract fault information */ 489 + PT_SYSCALL_NR(regs->gp) = si->si_syscall; 490 + if (sig == SIGSEGV) { 491 + mcontext_t *mcontext = (void *)&proc_data->sigstack[proc_data->mctx_offset]; 492 + 493 + GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext); 494 + } 495 + } else { 496 + /* Flush out any pending syscalls */ 497 + err = syscall_stub_flush(current_mm_id()); 498 + if (err) { 499 + if (err == -ENOMEM) 500 + report_enomem(); 501 + 502 + printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d", 503 + __func__, -err); 504 + fatal_sigsegv(); 505 + } 506 + 507 + /* 508 + * This can legitimately fail if the process loads a 509 + * bogus value into a segment register. It will 510 + * segfault and PTRACE_GETREGS will read that value 511 + * out of the process. However, PTRACE_SETREGS will 512 + * fail. In this case, there is nothing to do but 513 + * just kill the process. 514 + */ 515 + if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) { 516 + printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n", 517 + __func__, errno); 518 + fatal_sigsegv(); 519 + } 520 + 521 + if (put_fp_registers(pid, regs->fp)) { 522 + printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n", 523 + __func__, errno); 524 + fatal_sigsegv(); 525 + } 526 + 527 + if (singlestepping()) 528 + op = PTRACE_SYSEMU_SINGLESTEP; 529 + else 530 + op = PTRACE_SYSEMU; 531 + 532 + if (ptrace(op, pid, 0, 0)) { 533 + printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n", 534 + __func__, op, errno); 535 + fatal_sigsegv(); 536 + } 537 + 538 + CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); 539 + if (err < 0) { 540 + printk(UM_KERN_ERR "%s - wait failed, errno = %d\n", 541 + __func__, errno); 542 + fatal_sigsegv(); 543 + } 544 + 545 + regs->is_user = 1; 546 + if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { 547 + printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n", 548 + __func__, errno); 549 + fatal_sigsegv(); 550 + } 551 + 552 + if (get_fp_registers(pid, regs->fp)) { 553 + printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n", 554 + __func__, errno); 555 + fatal_sigsegv(); 556 + } 557 + 558 + if (WIFSTOPPED(status)) { 559 + sig = WSTOPSIG(status); 560 + 561 + /* 562 + * These signal handlers need the si argument 563 + * and SIGSEGV needs the faultinfo. 564 + * The SIGIO and SIGALARM handlers which constitute 565 + * the majority of invocations, do not use it. 566 + */ 567 + switch (sig) { 568 + case SIGSEGV: 569 + get_skas_faultinfo(pid, 570 + &regs->faultinfo); 571 + fallthrough; 572 + case SIGTRAP: 573 + case SIGILL: 574 + case SIGBUS: 575 + case SIGFPE: 576 + case SIGWINCH: 577 + ptrace(PTRACE_GETSIGINFO, pid, 0, 578 + (struct siginfo *)&si_ptrace); 579 + si = &si_ptrace; 580 + break; 581 + default: 582 + si = NULL; 583 + break; 584 + } 585 + } else { 586 + sig = 0; 587 + } 591 588 } 592 589 593 590 UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ 594 591 595 - if (WIFSTOPPED(status)) { 596 - int sig = WSTOPSIG(status); 597 - 598 - /* These signal handlers need the si argument. 599 - * The SIGIO and SIGALARM handlers which constitute the 600 - * majority of invocations, do not use it. 601 - */ 592 + if (sig) { 602 593 switch (sig) { 603 594 case SIGSEGV: 604 - case SIGTRAP: 605 - case SIGILL: 606 - case SIGBUS: 607 - case SIGFPE: 608 - case SIGWINCH: 609 - ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si); 610 - break; 611 - } 612 - 613 - switch (sig) { 614 - case SIGSEGV: 615 - get_skas_faultinfo(pid, &regs->faultinfo); 616 - 617 - if (PTRACE_FULL_FAULTINFO) 618 - (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, 595 + if (using_seccomp || PTRACE_FULL_FAULTINFO) 596 + (*sig_info[SIGSEGV])(SIGSEGV, 597 + (struct siginfo *)si, 619 598 regs, NULL); 620 599 else 621 600 segv(regs->faultinfo, 0, 1, NULL, NULL); 622 601 623 602 break; 603 + case SIGSYS: 604 + handle_syscall(regs); 605 + break; 624 606 case SIGTRAP + 0x80: 625 607 handle_trap(pid, regs); 626 608 break; 627 609 case SIGTRAP: 628 - relay_signal(SIGTRAP, (struct siginfo *)&si, regs, NULL); 610 + relay_signal(SIGTRAP, (struct siginfo *)si, regs, NULL); 629 611 break; 630 612 case SIGALRM: 631 613 break; ··· 713 539 case SIGFPE: 714 540 case SIGWINCH: 715 541 block_signals_trace(); 716 - (*sig_info[sig])(sig, (struct siginfo *)&si, regs, NULL); 542 + (*sig_info[sig])(sig, (struct siginfo *)si, regs, NULL); 717 543 unblock_signals_trace(); 718 544 break; 719 545 default:
+2
arch/x86/um/shared/sysdep/kernel-offsets.h
··· 4 4 #include <linux/elf.h> 5 5 #include <linux/crypto.h> 6 6 #include <linux/kbuild.h> 7 + #include <linux/audit.h> 7 8 #include <asm/mman.h> 9 + #include <asm/seccomp.h> 8 10 9 11 /* workaround for a warning with -Wmissing-prototypes */ 10 12 void foo(void);
+19 -4
arch/x86/um/tls_32.c
··· 12 12 #include <skas.h> 13 13 #include <sysdep/tls.h> 14 14 #include <asm/desc.h> 15 + #include <stub-data.h> 15 16 16 17 /* 17 18 * If needed we can detect when it's uninitialized. ··· 22 21 static int host_supports_tls = -1; 23 22 int host_gdt_entry_tls_min; 24 23 25 - static int do_set_thread_area(struct user_desc *info) 24 + static int do_set_thread_area(struct task_struct* task, struct user_desc *info) 26 25 { 27 26 int ret; 28 27 u32 cpu; 29 28 29 + if (info->entry_number < host_gdt_entry_tls_min || 30 + info->entry_number >= host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES) 31 + return -EINVAL; 32 + 33 + if (using_seccomp) { 34 + int idx = info->entry_number - host_gdt_entry_tls_min; 35 + struct stub_data *data = (void *)task->mm->context.id.stack; 36 + 37 + data->arch_data.tls[idx] = *info; 38 + data->arch_data.sync |= BIT(idx); 39 + 40 + return 0; 41 + } 42 + 30 43 cpu = get_cpu(); 31 - ret = os_set_thread_area(info, userspace_pid[cpu]); 44 + ret = os_set_thread_area(info, task->mm->context.id.pid); 32 45 put_cpu(); 33 46 34 47 if (ret) ··· 112 97 if (!(flags & O_FORCE) && curr->flushed) 113 98 continue; 114 99 115 - ret = do_set_thread_area(&curr->tls); 100 + ret = do_set_thread_area(current, &curr->tls); 116 101 if (ret) 117 102 goto out; 118 103 ··· 290 275 return -EFAULT; 291 276 } 292 277 293 - ret = do_set_thread_area(&info); 278 + ret = do_set_thread_area(current, &info); 294 279 if (ret) 295 280 return ret; 296 281 return set_tls_entry(current, &info, idx, 1);