Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

um: pass FD for memory operations when needed

Instead of always sharing the FDs with the userspace process, only hand
over the FDs needed for mmap when required. The idea is that userspace
might be able to force the stub into executing an mmap syscall, however,
it will not be able to manipulate the control flow sufficiently to have
access to an FD that would allow mapping arbitrary memory.

Security wise, we need to be sure that only the expected syscalls are
executed after the kernel sends FDs through the socket. This is
currently not the case, as userspace can trivially jump to the
rt_sigreturn syscall instruction to execute any syscall that the stub is
permitted to do. With this, it can trick the kernel to send the FD,
which in turn allows userspace to freely map any physical memory.

As such, this is currently *not* secure. However, in principle the
approach should be fine with a more strict SECCOMP filter and a careful
review of the stub control flow (as userspace can prepare a stack). With
some care, it is likely possible to extend the security model to SMP if
desired.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Link: https://patch.msgid.link/20250602130052.545733-8-benjamin@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

authored by

Benjamin Berg and committed by
Johannes Berg
e92e2552 beddc9fb

+280 -60
+7
arch/um/include/shared/skas/mm_id.h
··· 6 6 #ifndef __MM_ID_H 7 7 #define __MM_ID_H 8 8 9 + #define STUB_MAX_FDS 4 10 + 9 11 struct mm_id { 10 12 int pid; 11 13 unsigned long stack; 12 14 int syscall_data_len; 15 + 16 + /* Only used with SECCOMP mode */ 17 + int sock; 18 + int syscall_fd_num; 19 + int syscall_fd_map[STUB_MAX_FDS]; 13 20 }; 14 21 15 22 void __switch_mm(struct mm_id *mm_idp);
+1
arch/um/include/shared/skas/stub-data.h
··· 12 12 #include <as-layout.h> 13 13 #include <sysdep/tls.h> 14 14 #include <sysdep/stub-data.h> 15 + #include <mm_id.h> 15 16 16 17 #define FUTEX_IN_CHILD 0 17 18 #define FUTEX_IN_KERN 1
+3
arch/um/kernel/skas/mmu.c
··· 78 78 mmu->id.pid = -1; 79 79 } 80 80 81 + if (using_seccomp && mmu->id.sock) 82 + os_close_file(mmu->id.sock); 83 + 81 84 free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); 82 85 83 86 guard(spinlock_irqsave)(&mm_list_lock);
+80 -7
arch/um/kernel/skas/stub.c
··· 6 6 #include <sysdep/stub.h> 7 7 8 8 #include <linux/futex.h> 9 + #include <sys/socket.h> 9 10 #include <errno.h> 10 11 11 - static __always_inline int syscall_handler(struct stub_data *d) 12 + /* 13 + * Known security issues 14 + * 15 + * Userspace can jump to this address to execute *any* syscall that is 16 + * permitted by the stub. As we will return afterwards, it can do 17 + * whatever it likes, including: 18 + * - Tricking the kernel into handing out the memory FD 19 + * - Using this memory FD to read/write all physical memory 20 + * - Running in parallel to the kernel processing a syscall 21 + * (possibly creating data races?) 22 + * - Blocking e.g. SIGALRM to avoid time based scheduling 23 + * 24 + * To avoid this, the permitted location for each syscall needs to be 25 + * checked for in the SECCOMP filter (which is reasonably simple). Also, 26 + * more care will need to go into considerations how the code might be 27 + * tricked by using a prepared stack (or even modifying the stack from 28 + * another thread in case SMP support is added). 29 + * 30 + * As for the SIGALRM, the best counter measure will be to check in the 31 + * kernel that the process is reporting back the SIGALRM in a timely 32 + * fashion. 33 + */ 34 + static __always_inline int syscall_handler(int fd_map[STUB_MAX_FDS]) 12 35 { 36 + struct stub_data *d = get_stub_data(); 13 37 int i; 14 38 unsigned long res; 39 + int fd; 15 40 16 41 for (i = 0; i < d->syscall_data_len; i++) { 17 42 struct stub_syscall *sc = &d->syscall_data[i]; 18 43 19 44 switch (sc->syscall) { 20 45 case STUB_SYSCALL_MMAP: 46 + if (fd_map) 47 + fd = fd_map[sc->mem.fd]; 48 + else 49 + fd = sc->mem.fd; 50 + 21 51 res = stub_syscall6(STUB_MMAP_NR, 22 52 sc->mem.addr, sc->mem.length, 23 53 sc->mem.prot, 24 54 MAP_SHARED | MAP_FIXED, 25 - sc->mem.fd, sc->mem.offset); 55 + fd, sc->mem.offset); 26 56 if (res != sc->mem.addr) { 27 57 d->err = res; 28 58 d->syscall_data_len = i; ··· 84 54 void __section(".__syscall_stub") 85 55 stub_syscall_handler(void) 86 56 { 87 - struct stub_data *d = get_stub_data(); 88 - 89 - syscall_handler(d); 57 + syscall_handler(NULL); 90 58 91 59 trap_myself(); 92 60 } ··· 93 65 stub_signal_interrupt(int sig, siginfo_t *info, void *p) 94 66 { 95 67 struct stub_data *d = get_stub_data(); 68 + char rcv_data; 69 + union { 70 + char data[CMSG_SPACE(sizeof(int) * STUB_MAX_FDS)]; 71 + struct cmsghdr align; 72 + } ctrl = {}; 73 + struct iovec iov = { 74 + .iov_base = &rcv_data, 75 + .iov_len = 1, 76 + }; 77 + struct msghdr msghdr = { 78 + .msg_iov = &iov, 79 + .msg_iovlen = 1, 80 + .msg_control = &ctrl, 81 + .msg_controllen = sizeof(ctrl), 82 + }; 96 83 ucontext_t *uc = p; 84 + struct cmsghdr *fd_msg; 85 + int *fd_map; 86 + int num_fds; 97 87 long res; 98 88 99 89 d->signal = sig; ··· 124 78 res = stub_syscall3(__NR_futex, (unsigned long)&d->futex, 125 79 FUTEX_WAKE, 1); 126 80 } while (res == -EINTR); 81 + 127 82 do { 128 83 res = stub_syscall4(__NR_futex, (unsigned long)&d->futex, 129 84 FUTEX_WAIT, FUTEX_IN_KERN, 0); ··· 133 86 if (res < 0 && res != -EAGAIN) 134 87 stub_syscall1(__NR_exit_group, 1); 135 88 136 - /* Try running queued syscalls. */ 137 - if (syscall_handler(d) < 0 || d->restart_wait) { 89 + if (d->syscall_data_len) { 90 + /* Read passed FDs (if any) */ 91 + do { 92 + res = stub_syscall3(__NR_recvmsg, 0, (unsigned long)&msghdr, 0); 93 + } while (res == -EINTR); 94 + 95 + /* We should never have a receive error (other than -EAGAIN) */ 96 + if (res < 0 && res != -EAGAIN) 97 + stub_syscall1(__NR_exit_group, 1); 98 + 99 + /* Receive the FDs */ 100 + num_fds = 0; 101 + fd_msg = msghdr.msg_control; 102 + fd_map = (void *)&CMSG_DATA(fd_msg); 103 + if (res == iov.iov_len && msghdr.msg_controllen > sizeof(struct cmsghdr)) 104 + num_fds = (fd_msg->cmsg_len - CMSG_LEN(0)) / sizeof(int); 105 + 106 + /* Try running queued syscalls. */ 107 + res = syscall_handler(fd_map); 108 + 109 + while (num_fds) 110 + stub_syscall2(__NR_close, fd_map[--num_fds], 0); 111 + } else { 112 + res = 0; 113 + } 114 + 115 + if (res < 0 || d->restart_wait) { 138 116 /* Report SIGSYS if we restart. */ 139 117 d->signal = SIGSYS; 140 118 d->restart_wait = 0; 119 + 141 120 goto restart_wait; 142 121 } 143 122
+28 -12
arch/um/kernel/skas/stub_exe.c
··· 1 1 #include <sys/ptrace.h> 2 2 #include <sys/prctl.h> 3 + #include <sys/fcntl.h> 3 4 #include <asm/unistd.h> 4 5 #include <sysdep/stub.h> 5 6 #include <stub-data.h> ··· 46 45 if (res != sizeof(init_data)) 47 46 stub_syscall1(__NR_exit, 10); 48 47 49 - stub_syscall1(__NR_close, 0); 48 + /* In SECCOMP mode, FD 0 is a socket and is later used for FD passing */ 49 + if (!init_data.seccomp) 50 + stub_syscall1(__NR_close, 0); 51 + else 52 + stub_syscall3(__NR_fcntl, 0, F_SETFL, O_NONBLOCK); 50 53 51 54 /* map stub code + data */ 52 55 res = stub_syscall6(STUB_MMAP_NR, ··· 68 63 if (res != init_data.stub_start + UM_KERN_PAGE_SIZE) 69 64 stub_syscall1(__NR_exit, 12); 70 65 66 + /* In SECCOMP mode, we only need the signalling FD from now on */ 67 + if (init_data.seccomp) { 68 + res = stub_syscall3(__NR_close_range, 1, ~0U, 0); 69 + if (res != 0) 70 + stub_syscall1(__NR_exit, 13); 71 + } 72 + 71 73 /* setup signal stack inside stub data */ 72 74 stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE; 73 75 stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0); ··· 89 77 res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, 90 78 (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 91 79 if (res != 0) 92 - stub_syscall1(__NR_exit, 13); 80 + stub_syscall1(__NR_exit, 14); 93 81 } else { 94 82 /* SECCOMP mode uses rt_sigreturn, need to mask all signals */ 95 83 sa.sa_mask = ~0ULL; ··· 97 85 res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, 98 86 (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 99 87 if (res != 0) 100 - stub_syscall1(__NR_exit, 14); 88 + stub_syscall1(__NR_exit, 15); 101 89 102 90 res = stub_syscall4(__NR_rt_sigaction, SIGSYS, 103 91 (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 104 92 if (res != 0) 105 - stub_syscall1(__NR_exit, 15); 93 + stub_syscall1(__NR_exit, 16); 106 94 107 95 res = stub_syscall4(__NR_rt_sigaction, SIGALRM, 108 96 (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 109 97 if (res != 0) 110 - stub_syscall1(__NR_exit, 16); 98 + stub_syscall1(__NR_exit, 17); 111 99 112 100 res = stub_syscall4(__NR_rt_sigaction, SIGTRAP, 113 101 (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 114 102 if (res != 0) 115 - stub_syscall1(__NR_exit, 17); 103 + stub_syscall1(__NR_exit, 18); 116 104 117 105 res = stub_syscall4(__NR_rt_sigaction, SIGILL, 118 106 (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 119 107 if (res != 0) 120 - stub_syscall1(__NR_exit, 18); 108 + stub_syscall1(__NR_exit, 19); 121 109 122 110 res = stub_syscall4(__NR_rt_sigaction, SIGFPE, 123 111 (unsigned long)&sa, 0, sizeof(sa.sa_mask)); 124 112 if (res != 0) 125 - stub_syscall1(__NR_exit, 19); 113 + stub_syscall1(__NR_exit, 20); 126 114 } 127 115 128 116 /* ··· 165 153 BPF_STMT(BPF_LD | BPF_W | BPF_ABS, 166 154 offsetof(struct seccomp_data, nr)), 167 155 168 - /* [10-14] Check against permitted syscalls */ 156 + /* [10-16] Check against permitted syscalls */ 169 157 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex, 158 + 7, 0), 159 + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_recvmsg, 160 + 6, 0), 161 + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_close, 170 162 5, 0), 171 163 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR, 172 164 4, 0), ··· 186 170 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn, 187 171 1, 0), 188 172 189 - /* [15] Not one of the permitted syscalls */ 173 + /* [17] Not one of the permitted syscalls */ 190 174 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), 191 175 192 - /* [16] Permitted call for the stub */ 176 + /* [18] Permitted call for the stub */ 193 177 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), 194 178 }; 195 179 struct sock_fprog prog = { ··· 200 184 if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER, 201 185 SECCOMP_FILTER_FLAG_TSYNC, 202 186 (unsigned long)&prog) != 0) 203 - stub_syscall1(__NR_exit, 20); 187 + stub_syscall1(__NR_exit, 21); 204 188 205 189 /* Fall through, the exit syscall will cause SIGSYS */ 206 190 } else {
+63 -3
arch/um/os-Linux/skas/mem.c
··· 43 43 44 44 print_hex_dump(UM_KERN_ERR, " syscall data: ", 0, 45 45 16, 4, sc, sizeof(*sc), 0); 46 + 47 + if (using_seccomp) { 48 + printk(UM_KERN_ERR "%s: FD map num: %d", __func__, 49 + mm_idp->syscall_fd_num); 50 + print_hex_dump(UM_KERN_ERR, 51 + " FD map: ", 0, 16, 52 + sizeof(mm_idp->syscall_fd_map[0]), 53 + mm_idp->syscall_fd_map, 54 + sizeof(mm_idp->syscall_fd_map), 0); 55 + } 46 56 } 47 57 48 58 static inline unsigned long *check_init_stack(struct mm_id * mm_idp, ··· 128 118 mm_idp->syscall_data_len = 0; 129 119 } 130 120 121 + if (using_seccomp) 122 + mm_idp->syscall_fd_num = 0; 123 + 131 124 return mm_idp->syscall_data_len; 132 125 } 133 126 ··· 193 180 return NULL; 194 181 } 195 182 183 + static int get_stub_fd(struct mm_id *mm_idp, int fd) 184 + { 185 + int i; 186 + 187 + /* Find an FD slot (or flush and use first) */ 188 + if (!using_seccomp) 189 + return fd; 190 + 191 + /* Already crashed, value does not matter */ 192 + if (mm_idp->syscall_data_len < 0) 193 + return 0; 194 + 195 + /* Find existing FD in map if we can allocate another syscall */ 196 + if (mm_idp->syscall_data_len < 197 + ARRAY_SIZE(((struct stub_data *)NULL)->syscall_data)) { 198 + for (i = 0; i < mm_idp->syscall_fd_num; i++) { 199 + if (mm_idp->syscall_fd_map[i] == fd) 200 + return i; 201 + } 202 + 203 + if (mm_idp->syscall_fd_num < STUB_MAX_FDS) { 204 + i = mm_idp->syscall_fd_num; 205 + mm_idp->syscall_fd_map[i] = fd; 206 + 207 + mm_idp->syscall_fd_num++; 208 + 209 + return i; 210 + } 211 + } 212 + 213 + /* FD map full or no syscall space available, continue after flush */ 214 + do_syscall_stub(mm_idp); 215 + mm_idp->syscall_fd_map[0] = fd; 216 + mm_idp->syscall_fd_num = 1; 217 + 218 + return 0; 219 + } 220 + 196 221 int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot, 197 222 int phys_fd, unsigned long long offset) 198 223 { ··· 238 187 239 188 /* Compress with previous syscall if that is possible */ 240 189 sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt); 241 - if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd && 190 + if (sc && sc->mem.prot == prot && 242 191 sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) { 243 - sc->mem.length += len; 244 - return 0; 192 + int prev_fd = sc->mem.fd; 193 + 194 + if (using_seccomp) 195 + prev_fd = mm_idp->syscall_fd_map[sc->mem.fd]; 196 + 197 + if (phys_fd == prev_fd) { 198 + sc->mem.length += len; 199 + return 0; 200 + } 245 201 } 202 + 203 + phys_fd = get_stub_fd(mm_idp, phys_fd); 246 204 247 205 sc = syscall_stub_alloc(mm_idp); 248 206 sc->syscall = STUB_SYSCALL_MMAP;
+91 -35
arch/um/os-Linux/skas/process.c
··· 16 16 #include <sys/mman.h> 17 17 #include <sys/wait.h> 18 18 #include <sys/stat.h> 19 + #include <sys/socket.h> 19 20 #include <asm/unistd.h> 20 21 #include <as-layout.h> 21 22 #include <init.h> ··· 153 152 int ret; 154 153 155 154 do { 155 + const char byte = 0; 156 + struct iovec iov = { 157 + .iov_base = (void *)&byte, 158 + .iov_len = sizeof(byte), 159 + }; 160 + union { 161 + char data[CMSG_SPACE(sizeof(mm_idp->syscall_fd_map))]; 162 + struct cmsghdr align; 163 + } ctrl; 164 + struct msghdr msgh = { 165 + .msg_iov = &iov, 166 + .msg_iovlen = 1, 167 + }; 168 + 156 169 if (!running) { 170 + if (mm_idp->syscall_fd_num) { 171 + unsigned int fds_size = 172 + sizeof(int) * mm_idp->syscall_fd_num; 173 + struct cmsghdr *cmsg; 174 + 175 + msgh.msg_control = ctrl.data; 176 + msgh.msg_controllen = CMSG_SPACE(fds_size); 177 + cmsg = CMSG_FIRSTHDR(&msgh); 178 + cmsg->cmsg_level = SOL_SOCKET; 179 + cmsg->cmsg_type = SCM_RIGHTS; 180 + cmsg->cmsg_len = CMSG_LEN(fds_size); 181 + memcpy(CMSG_DATA(cmsg), mm_idp->syscall_fd_map, 182 + fds_size); 183 + 184 + CATCH_EINTR(syscall(__NR_sendmsg, mm_idp->sock, 185 + &msgh, 0)); 186 + } 187 + 157 188 data->signal = 0; 158 189 data->futex = FUTEX_IN_CHILD; 159 190 CATCH_EINTR(syscall(__NR_futex, &data->futex, ··· 279 246 280 247 static int stub_exe_fd; 281 248 249 + struct tramp_data { 250 + struct stub_data *stub_data; 251 + /* 0 is inherited, 1 is the kernel side */ 252 + int sockpair[2]; 253 + }; 254 + 282 255 #ifndef CLOSE_RANGE_CLOEXEC 283 256 #define CLOSE_RANGE_CLOEXEC (1U << 2) 284 257 #endif 285 258 286 - static int userspace_tramp(void *stack) 259 + static int userspace_tramp(void *data) 287 260 { 261 + struct tramp_data *tramp_data = data; 288 262 char *const argv[] = { "uml-userspace", NULL }; 289 - int pipe_fds[2]; 290 263 unsigned long long offset; 291 264 struct stub_init_data init_data = { 292 265 .seccomp = using_seccomp, ··· 319 280 &offset); 320 281 init_data.stub_code_offset = MMAP_OFFSET(offset); 321 282 322 - init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset); 283 + init_data.stub_data_fd = phys_mapping(uml_to_phys(tramp_data->stub_data), 284 + &offset); 323 285 init_data.stub_data_offset = MMAP_OFFSET(offset); 324 286 325 287 /* ··· 331 291 syscall(__NR_close_range, 0, ~0U, CLOSE_RANGE_CLOEXEC); 332 292 333 293 fcntl(init_data.stub_data_fd, F_SETFD, 0); 334 - for (iomem = iomem_regions; iomem; iomem = iomem->next) 335 - fcntl(iomem->fd, F_SETFD, 0); 336 294 337 - /* Create a pipe for init_data (no CLOEXEC) and dup2 to STDIN */ 338 - if (pipe(pipe_fds)) 339 - exit(2); 295 + /* In SECCOMP mode, these FDs are passed when needed */ 296 + if (!using_seccomp) { 297 + for (iomem = iomem_regions; iomem; iomem = iomem->next) 298 + fcntl(iomem->fd, F_SETFD, 0); 299 + } 340 300 341 - if (dup2(pipe_fds[0], 0) < 0) 301 + /* dup2 signaling FD/socket to STDIN */ 302 + if (dup2(tramp_data->sockpair[0], 0) < 0) 342 303 exit(3); 343 - close(pipe_fds[0]); 304 + close(tramp_data->sockpair[0]); 344 305 345 306 /* Write init_data and close write side */ 346 - ret = write(pipe_fds[1], &init_data, sizeof(init_data)); 347 - close(pipe_fds[1]); 307 + ret = write(tramp_data->sockpair[1], &init_data, sizeof(init_data)); 308 + close(tramp_data->sockpair[1]); 348 309 349 310 if (ret != sizeof(init_data)) 350 311 exit(4); ··· 438 397 439 398 /** 440 399 * start_userspace() - prepare a new userspace process 441 - * @stub_stack: pointer to the stub stack. 400 + * @mm_id: The corresponding struct mm_id 442 401 * 443 402 * Setups a new temporary stack page that is used while userspace_tramp() runs 444 403 * Clones the kernel process into a new userspace process, with FDs only. ··· 450 409 int start_userspace(struct mm_id *mm_id) 451 410 { 452 411 struct stub_data *proc_data = (void *)mm_id->stack; 412 + struct tramp_data tramp_data = { 413 + .stub_data = proc_data, 414 + }; 453 415 void *stack; 454 416 unsigned long sp; 455 - int pid, status, n, err; 417 + int status, n, err; 456 418 457 419 /* setup a temporary stack page */ 458 420 stack = mmap(NULL, UM_KERN_PAGE_SIZE, ··· 471 427 /* set stack pointer to the end of the stack page, so it can grow downwards */ 472 428 sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; 473 429 430 + /* socket pair for init data and SECCOMP FD passing (no CLOEXEC here) */ 431 + if (socketpair(AF_UNIX, SOCK_STREAM, 0, tramp_data.sockpair)) { 432 + err = -errno; 433 + printk(UM_KERN_ERR "%s : socketpair failed, errno = %d\n", 434 + __func__, errno); 435 + return err; 436 + } 437 + 474 438 if (using_seccomp) 475 439 proc_data->futex = FUTEX_IN_CHILD; 476 440 477 - /* clone into new userspace process */ 478 - pid = clone(userspace_tramp, (void *) sp, 441 + mm_id->pid = clone(userspace_tramp, (void *) sp, 479 442 CLONE_VFORK | CLONE_VM | SIGCHLD, 480 - (void *)mm_id->stack); 481 - if (pid < 0) { 443 + (void *)&tramp_data); 444 + if (mm_id->pid < 0) { 482 445 err = -errno; 483 446 printk(UM_KERN_ERR "%s : clone failed, errno = %d\n", 484 447 __func__, errno); 485 - return err; 448 + goto out_close; 486 449 } 487 450 488 451 if (using_seccomp) { 489 452 wait_stub_done_seccomp(mm_id, 1, 1); 490 453 } else { 491 454 do { 492 - CATCH_EINTR(n = waitpid(pid, &status, 455 + CATCH_EINTR(n = waitpid(mm_id->pid, &status, 493 456 WUNTRACED | __WALL)); 494 457 if (n < 0) { 495 458 err = -errno; ··· 513 462 goto out_kill; 514 463 } 515 464 516 - if (ptrace(PTRACE_SETOPTIONS, pid, NULL, 465 + if (ptrace(PTRACE_SETOPTIONS, mm_id->pid, NULL, 517 466 (void *) PTRACE_O_TRACESYSGOOD) < 0) { 518 467 err = -errno; 519 468 printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n", ··· 529 478 goto out_kill; 530 479 } 531 480 532 - mm_id->pid = pid; 481 + close(tramp_data.sockpair[0]); 482 + if (using_seccomp) 483 + mm_id->sock = tramp_data.sockpair[1]; 484 + else 485 + close(tramp_data.sockpair[1]); 533 486 534 - return pid; 487 + return 0; 535 488 536 - out_kill: 537 - os_kill_ptraced_process(pid, 1); 489 + out_kill: 490 + os_kill_ptraced_process(mm_id->pid, 1); 491 + out_close: 492 + close(tramp_data.sockpair[0]); 493 + close(tramp_data.sockpair[1]); 494 + 495 + mm_id->pid = -1; 496 + 538 497 return err; 539 498 } 540 499 ··· 607 546 608 547 /* Mark pending syscalls for flushing */ 609 548 proc_data->syscall_data_len = mm_id->syscall_data_len; 610 - mm_id->syscall_data_len = 0; 611 549 612 - proc_data->signal = 0; 613 - proc_data->futex = FUTEX_IN_CHILD; 614 - CATCH_EINTR(syscall(__NR_futex, &proc_data->futex, 615 - FUTEX_WAKE, 1, NULL, NULL, 0)); 616 - do { 617 - ret = syscall(__NR_futex, &proc_data->futex, 618 - FUTEX_WAIT, FUTEX_IN_CHILD, NULL, NULL, 0); 619 - } while ((ret == -1 && errno == EINTR) || 620 - proc_data->futex == FUTEX_IN_CHILD); 550 + wait_stub_done_seccomp(mm_id, 0, 0); 621 551 622 552 sig = proc_data->signal; 623 553 ··· 616 564 printk(UM_KERN_ERR "%s - Error flushing stub syscalls", 617 565 __func__); 618 566 syscall_stub_dump_error(mm_id); 567 + mm_id->syscall_data_len = proc_data->err; 619 568 fatal_sigsegv(); 620 569 } 570 + 571 + mm_id->syscall_data_len = 0; 572 + mm_id->syscall_fd_num = 0; 621 573 622 574 ret = get_stub_state(regs, proc_data, NULL); 623 575 if (ret) {
+7 -3
arch/um/os-Linux/start_up.c
··· 265 265 }; 266 266 struct sigaction sa; 267 267 268 + /* close_range is needed for the stub */ 269 + if (stub_syscall3(__NR_close_range, 1, ~0U, 0)) 270 + exit(1); 271 + 268 272 set_sigstack(seccomp_test_stub_data->sigstack, 269 273 sizeof(seccomp_test_stub_data->sigstack)); 270 274 ··· 276 272 sa.sa_sigaction = (void *) sigsys_handler; 277 273 sa.sa_restorer = NULL; 278 274 if (sigaction(SIGSYS, &sa, NULL) < 0) 279 - exit(1); 275 + exit(2); 280 276 281 277 prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 282 278 if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, 283 279 SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0) 284 - exit(2); 280 + exit(3); 285 281 286 282 sleep(0); 287 283 288 284 /* Never reached. */ 289 - _exit(3); 285 + _exit(4); 290 286 } 291 287 292 288 static bool __init init_seccomp(void)