Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus-4.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml

Pull UML updates from Richard Weinberger:
"This contains beside of random fixes/cleanups two bigger changes:

- seccomp support by Mickaël Salaün

- IRQ rework by Anton Ivanov"

* 'for-linus-4.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml:
um: Use race-free temporary file creation
um: Do not set unsecure permission for temporary file
um: Fix build error and kconfig for i386
um: Add seccomp support
um: Add full asm/syscall.h support
selftests/seccomp: Remove the need for HAVE_ARCH_TRACEHOOK
um: Fix ptrace GETREGS/SETREGS bugs
um: link with -lpthread
um: Update UBD to use pread/pwrite family of functions
um: Do not change hard IRQ flags in soft IRQ processing
um: Prevent IRQ handler reentrancy
uml: flush stdout before forking
uml: fix hostfs mknod()

+285 -63
+1 -1
Documentation/features/seccomp/seccomp-filter/arch-support.txt
··· 33 33 | sh: | TODO | 34 34 | sparc: | TODO | 35 35 | tile: | ok | 36 - | um: | TODO | 36 + | um: | ok | 37 37 | unicore32: | TODO | 38 38 | x86: | ok | 39 39 | xtensa: | TODO |
+1
arch/um/Kconfig.common
··· 2 2 bool 3 3 default y 4 4 select HAVE_ARCH_AUDITSYSCALL 5 + select HAVE_ARCH_SECCOMP_FILTER 5 6 select HAVE_UID16 6 7 select HAVE_FUTEX_CMPXCHG if FUTEX 7 8 select GENERIC_IRQ_SHOW
+16
arch/um/Kconfig.um
··· 104 104 int 105 105 default 3 if 3_LEVEL_PGTABLES 106 106 default 2 107 + 108 + config SECCOMP 109 + def_bool y 110 + prompt "Enable seccomp to safely compute untrusted bytecode" 111 + ---help--- 112 + This kernel feature is useful for number crunching applications 113 + that may need to compute untrusted bytecode during their 114 + execution. By using pipes or other transports made available to 115 + the process as file descriptors supporting the read/write 116 + syscalls, it's possible to isolate those applications in 117 + their own address space using seccomp. Once seccomp is 118 + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled 119 + and the task is only allowed to execute a few safe syscalls 120 + defined by each seccomp mode. 121 + 122 + If unsure, say Y.
+5 -22
arch/um/drivers/ubd_kern.c
··· 535 535 { 536 536 int err; 537 537 538 - err = os_seek_file(fd, offset); 539 - if (err < 0) 540 - return err; 541 - 542 - err = os_read_file(fd, buf, len); 538 + err = os_pread_file(fd, buf, len, offset); 543 539 if (err < 0) 544 540 return err; 545 541 ··· 1373 1377 if(req->cow_offset == -1) 1374 1378 return 0; 1375 1379 1376 - n = os_seek_file(req->fds[1], req->cow_offset); 1377 - if(n < 0){ 1378 - printk("do_io - bitmap lseek failed : err = %d\n", -n); 1379 - return 1; 1380 - } 1381 - 1382 - n = os_write_file(req->fds[1], &req->bitmap_words, 1383 - sizeof(req->bitmap_words)); 1380 + n = os_pwrite_file(req->fds[1], &req->bitmap_words, 1381 + sizeof(req->bitmap_words), req->cow_offset); 1384 1382 if(n != sizeof(req->bitmap_words)){ 1385 1383 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, 1386 1384 req->fds[1]); ··· 1389 1399 char *buf; 1390 1400 unsigned long len; 1391 1401 int n, nsectors, start, end, bit; 1392 - int err; 1393 1402 __u64 off; 1394 1403 1395 1404 if (req->op == UBD_FLUSH) { ··· 1417 1428 len = (end - start) * req->sectorsize; 1418 1429 buf = &req->buffer[start * req->sectorsize]; 1419 1430 1420 - err = os_seek_file(req->fds[bit], off); 1421 - if(err < 0){ 1422 - printk("do_io - lseek failed : err = %d\n", -err); 1423 - req->error = 1; 1424 - return; 1425 - } 1426 1431 if(req->op == UBD_READ){ 1427 1432 n = 0; 1428 1433 do { 1429 1434 buf = &buf[n]; 1430 1435 len -= n; 1431 - n = os_read_file(req->fds[bit], buf, len); 1436 + n = os_pread_file(req->fds[bit], buf, len, off); 1432 1437 if (n < 0) { 1433 1438 printk("do_io - read failed, err = %d " 1434 1439 "fd = %d\n", -n, req->fds[bit]); ··· 1432 1449 } while((n < len) && (n != 0)); 1433 1450 if (n < len) memset(&buf[n], 0, len - n); 1434 1451 } else { 1435 - n = os_write_file(req->fds[bit], buf, len); 1452 + n = os_pwrite_file(req->fds[bit], buf, len, off); 1436 1453 if(n != len){ 1437 1454 printk("do_io - write failed err = %d " 1438 1455 "fd = %d\n", -n, req->fds[bit]);
+23
arch/um/include/asm/hardirq.h
··· 1 + #ifndef __ASM_UM_HARDIRQ_H 2 + #define __ASM_UM_HARDIRQ_H 3 + 4 + #include <linux/cache.h> 5 + #include <linux/threads.h> 6 + 7 + typedef struct { 8 + unsigned int __softirq_pending; 9 + } ____cacheline_aligned irq_cpustat_t; 10 + 11 + #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ 12 + #include <linux/irq.h> 13 + 14 + #ifndef ack_bad_irq 15 + static inline void ack_bad_irq(unsigned int irq) 16 + { 17 + printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq); 18 + } 19 + #endif 20 + 21 + #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 22 + 23 + #endif /* __ASM_UM_HARDIRQ_H */
+138
arch/um/include/asm/syscall-generic.h
··· 1 + /* 2 + * Access to user system call parameters and results 3 + * 4 + * See asm-generic/syscall.h for function descriptions. 5 + * 6 + * Copyright (C) 2015 Mickaël Salaün <mic@digikod.net> 7 + * 8 + * This program is free software; you can redistribute it and/or modify 9 + * it under the terms of the GNU General Public License version 2 as 10 + * published by the Free Software Foundation. 11 + */ 12 + 13 + #ifndef __UM_SYSCALL_GENERIC_H 14 + #define __UM_SYSCALL_GENERIC_H 15 + 16 + #include <asm/ptrace.h> 17 + #include <linux/err.h> 18 + #include <linux/sched.h> 19 + #include <sysdep/ptrace.h> 20 + 21 + static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) 22 + { 23 + 24 + return PT_REGS_SYSCALL_NR(regs); 25 + } 26 + 27 + static inline void syscall_rollback(struct task_struct *task, 28 + struct pt_regs *regs) 29 + { 30 + /* do nothing */ 31 + } 32 + 33 + static inline long syscall_get_error(struct task_struct *task, 34 + struct pt_regs *regs) 35 + { 36 + const long error = regs_return_value(regs); 37 + 38 + return IS_ERR_VALUE(error) ? error : 0; 39 + } 40 + 41 + static inline long syscall_get_return_value(struct task_struct *task, 42 + struct pt_regs *regs) 43 + { 44 + return regs_return_value(regs); 45 + } 46 + 47 + static inline void syscall_set_return_value(struct task_struct *task, 48 + struct pt_regs *regs, 49 + int error, long val) 50 + { 51 + PT_REGS_SET_SYSCALL_RETURN(regs, (long) error ?: val); 52 + } 53 + 54 + static inline void syscall_get_arguments(struct task_struct *task, 55 + struct pt_regs *regs, 56 + unsigned int i, unsigned int n, 57 + unsigned long *args) 58 + { 59 + const struct uml_pt_regs *r = &regs->regs; 60 + 61 + switch (i) { 62 + case 0: 63 + if (!n--) 64 + break; 65 + *args++ = UPT_SYSCALL_ARG1(r); 66 + case 1: 67 + if (!n--) 68 + break; 69 + *args++ = UPT_SYSCALL_ARG2(r); 70 + case 2: 71 + if (!n--) 72 + break; 73 + *args++ = UPT_SYSCALL_ARG3(r); 74 + case 3: 75 + if (!n--) 76 + break; 77 + *args++ = UPT_SYSCALL_ARG4(r); 78 + case 4: 79 + if (!n--) 80 + break; 81 + *args++ = UPT_SYSCALL_ARG5(r); 82 + case 5: 83 + if (!n--) 84 + break; 85 + *args++ = UPT_SYSCALL_ARG6(r); 86 + case 6: 87 + if (!n--) 88 + break; 89 + default: 90 + BUG(); 91 + break; 92 + } 93 + } 94 + 95 + static inline void syscall_set_arguments(struct task_struct *task, 96 + struct pt_regs *regs, 97 + unsigned int i, unsigned int n, 98 + const unsigned long *args) 99 + { 100 + struct uml_pt_regs *r = &regs->regs; 101 + 102 + switch (i) { 103 + case 0: 104 + if (!n--) 105 + break; 106 + UPT_SYSCALL_ARG1(r) = *args++; 107 + case 1: 108 + if (!n--) 109 + break; 110 + UPT_SYSCALL_ARG2(r) = *args++; 111 + case 2: 112 + if (!n--) 113 + break; 114 + UPT_SYSCALL_ARG3(r) = *args++; 115 + case 3: 116 + if (!n--) 117 + break; 118 + UPT_SYSCALL_ARG4(r) = *args++; 119 + case 4: 120 + if (!n--) 121 + break; 122 + UPT_SYSCALL_ARG5(r) = *args++; 123 + case 5: 124 + if (!n--) 125 + break; 126 + UPT_SYSCALL_ARG6(r) = *args++; 127 + case 6: 128 + if (!n--) 129 + break; 130 + default: 131 + BUG(); 132 + break; 133 + } 134 + } 135 + 136 + /* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */ 137 + 138 + #endif /* __UM_SYSCALL_GENERIC_H */
+2
arch/um/include/asm/thread_info.h
··· 62 62 #define TIF_SYSCALL_AUDIT 6 63 63 #define TIF_RESTORE_SIGMASK 7 64 64 #define TIF_NOTIFY_RESUME 8 65 + #define TIF_SECCOMP 9 /* secure computing */ 65 66 66 67 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 67 68 #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) 68 69 #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) 69 70 #define _TIF_MEMDIE (1 << TIF_MEMDIE) 70 71 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) 72 + #define _TIF_SECCOMP (1 << TIF_SECCOMP) 71 73 72 74 #endif
+2 -1
arch/um/include/shared/os.h
··· 146 146 extern int os_write_file(int fd, const void *buf, int count); 147 147 extern int os_sync_file(int fd); 148 148 extern int os_file_size(const char *file, unsigned long long *size_out); 149 + extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset); 150 + extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset); 149 151 extern int os_file_modtime(const char *file, unsigned long *modtime); 150 152 extern int os_pipe(int *fd, int stream, int close_on_exec); 151 153 extern int os_set_fd_async(int fd); ··· 284 282 void *arg); 285 283 extern void halt_skas(void); 286 284 extern void reboot_skas(void); 287 - extern int get_syscall(struct uml_pt_regs *regs); 288 285 289 286 /* irq.c */ 290 287 extern int os_waiting_for_events(struct irq_fd *active_fds);
+19 -12
arch/um/kernel/skas/syscall.c
··· 5 5 6 6 #include <linux/kernel.h> 7 7 #include <linux/ptrace.h> 8 + #include <linux/seccomp.h> 8 9 #include <kern_util.h> 9 10 #include <sysdep/ptrace.h> 11 + #include <sysdep/ptrace_user.h> 10 12 #include <sysdep/syscalls.h> 11 - #include <os.h> 12 13 13 14 void handle_syscall(struct uml_pt_regs *r) 14 15 { 15 16 struct pt_regs *regs = container_of(r, struct pt_regs, regs); 16 - long result; 17 17 int syscall; 18 18 19 - if (syscall_trace_enter(regs)) { 20 - result = -ENOSYS; 19 + /* Initialize the syscall number and default return value. */ 20 + UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); 21 + PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS); 22 + 23 + /* Do the secure computing check first; failures should be fast. */ 24 + if (secure_computing() == -1) 25 + return; 26 + 27 + if (syscall_trace_enter(regs)) 21 28 goto out; 22 - } 23 29 24 - syscall = get_syscall(r); 30 + /* Update the syscall number after orig_ax has potentially been updated 31 + * with ptrace. 32 + */ 33 + UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); 34 + syscall = UPT_SYSCALL_NR(r); 25 35 26 - if ((syscall > __NR_syscall_max) || syscall < 0) 27 - result = -ENOSYS; 28 - else 29 - result = EXECUTE_SYSCALL(syscall, regs); 36 + if (syscall >= 0 && syscall <= __NR_syscall_max) 37 + PT_REGS_SET_SYSCALL_RETURN(regs, 38 + EXECUTE_SYSCALL(syscall, regs)); 30 39 31 40 out: 32 - PT_REGS_SET_SYSCALL_RETURN(regs, result); 33 - 34 41 syscall_trace_leave(regs); 35 42 }
+19
arch/um/os-Linux/file.c
··· 264 264 return n; 265 265 } 266 266 267 + int os_pread_file(int fd, void *buf, int len, unsigned long long offset) 268 + { 269 + int n = pread(fd, buf, len, offset); 270 + 271 + if (n < 0) 272 + return -errno; 273 + return n; 274 + } 275 + 267 276 int os_write_file(int fd, const void *buf, int len) 268 277 { 269 278 int n = write(fd, (void *) buf, len); ··· 290 281 return -errno; 291 282 return n; 292 283 } 284 + 285 + int os_pwrite_file(int fd, const void *buf, int len, unsigned long long offset) 286 + { 287 + int n = pwrite(fd, (void *) buf, len, offset); 288 + 289 + if (n < 0) 290 + return -errno; 291 + return n; 292 + } 293 + 293 294 294 295 int os_file_size(const char *file, unsigned long long *size_out) 295 296 {
+11 -6
arch/um/os-Linux/mem.c
··· 106 106 } 107 107 } 108 108 109 + #ifdef O_TMPFILE 110 + fd = open(tempdir, O_CLOEXEC | O_RDWR | O_EXCL | O_TMPFILE, 0700); 111 + /* 112 + * If the running system does not support O_TMPFILE flag then retry 113 + * without it. 114 + */ 115 + if (fd != -1 || (errno != EINVAL && errno != EISDIR && 116 + errno != EOPNOTSUPP)) 117 + return fd; 118 + #endif 119 + 109 120 tempname = malloc(strlen(tempdir) + strlen(template) + 1); 110 121 if (tempname == NULL) 111 122 return -1; ··· 152 141 fd = make_tempfile(TEMPNAME_TEMPLATE); 153 142 if (fd < 0) 154 143 exit(1); 155 - 156 - err = fchmod(fd, 0777); 157 - if (err < 0) { 158 - perror("fchmod"); 159 - exit(1); 160 - } 161 144 162 145 /* 163 146 * Seek to len - 1 because writing a character there will
+15 -1
arch/um/os-Linux/signal.c
··· 62 62 63 63 static int signals_enabled; 64 64 static unsigned int signals_pending; 65 + static unsigned int signals_active = 0; 65 66 66 67 void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) 67 68 { ··· 102 101 103 102 block_signals(); 104 103 104 + signals_active |= SIGALRM_MASK; 105 + 105 106 timer_real_alarm_handler(mc); 107 + 108 + signals_active &= ~SIGALRM_MASK; 109 + 106 110 set_signals(enabled); 107 111 } 108 112 ··· 292 286 if (save_pending & SIGIO_MASK) 293 287 sig_handler_common(SIGIO, NULL, NULL); 294 288 295 - if (save_pending & SIGALRM_MASK) 289 + /* Do not reenter the handler */ 290 + 291 + if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK))) 296 292 timer_real_alarm_handler(NULL); 293 + 294 + /* Rerun the loop only if there is still pending SIGIO and not in TIMER handler */ 295 + 296 + if (!(signals_pending & SIGIO_MASK) && (signals_active & SIGALRM_MASK)) 297 + return; 298 + 297 299 } 298 300 } 299 301
-7
arch/um/os-Linux/skas/process.c
··· 172 172 handle_syscall(regs); 173 173 } 174 174 175 - int get_syscall(struct uml_pt_regs *regs) 176 - { 177 - UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp); 178 - 179 - return UPT_SYSCALL_NR(regs); 180 - } 181 - 182 175 extern char __syscall_stub_start[]; 183 176 184 177 static int userspace_tramp(void *stack)
+2
arch/um/os-Linux/start_up.c
··· 94 94 { 95 95 int pid, n, status; 96 96 97 + fflush(stdout); 98 + 97 99 pid = fork(); 98 100 if (pid == 0) 99 101 ptrace_child();
+1 -1
arch/x86/um/Makefile
··· 17 17 ifeq ($(CONFIG_X86_32),y) 18 18 19 19 obj-y += checksum_32.o 20 - obj-$(CONFIG_BINFMT_ELF) += elfcore.o 20 + obj-$(CONFIG_ELF_CORE) += elfcore.o 21 21 22 22 subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o 23 23 subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o
+1
arch/x86/um/asm/syscall.h
··· 1 1 #ifndef __UM_ASM_SYSCALL_H 2 2 #define __UM_ASM_SYSCALL_H 3 3 4 + #include <asm/syscall-generic.h> 4 5 #include <uapi/linux/audit.h> 5 6 6 7 typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
+3 -5
arch/x86/um/ptrace_32.c
··· 68 68 [EFL] = HOST_EFLAGS, 69 69 [UESP] = HOST_SP, 70 70 [SS] = HOST_SS, 71 + [ORIG_EAX] = HOST_ORIG_AX, 71 72 }; 72 73 73 74 int putreg(struct task_struct *child, int regno, unsigned long value) ··· 84 83 case EAX: 85 84 case EIP: 86 85 case UESP: 86 + case ORIG_EAX: 87 87 break; 88 88 case FS: 89 89 if (value && (value & 3) != 3) ··· 109 107 case EFL: 110 108 value &= FLAG_MASK; 111 109 child->thread.regs.regs.gp[HOST_EFLAGS] |= value; 112 - return 0; 113 - case ORIG_EAX: 114 - child->thread.regs.regs.syscall = value; 115 110 return 0; 116 111 default : 117 112 panic("Bad register in putreg() : %d\n", regno); ··· 142 143 143 144 regno >>= 2; 144 145 switch (regno) { 145 - case ORIG_EAX: 146 - return child->thread.regs.regs.syscall; 147 146 case FS: 148 147 case GS: 149 148 case DS: ··· 160 163 case EDI: 161 164 case EBP: 162 165 case EFL: 166 + case ORIG_EAX: 163 167 break; 164 168 default: 165 169 panic("Bad register in getreg() : %d\n", regno);
+1 -3
fs/hostfs/hostfs_kern.c
··· 730 730 731 731 init_special_inode(inode, mode, dev); 732 732 err = do_mknod(name, mode, MAJOR(dev), MINOR(dev)); 733 - if (!err) 733 + if (err) 734 734 goto out_free; 735 735 736 736 err = read_name(inode, name); 737 737 __putname(name); 738 - if (err) 739 - goto out_put; 740 738 if (err) 741 739 goto out_put; 742 740
+24 -3
tools/testing/selftests/seccomp/seccomp_bpf.c
··· 1246 1246 # error "Do not know how to find your architecture's registers and syscalls" 1247 1247 #endif 1248 1248 1249 + /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for 1250 + * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). 1251 + */ 1252 + #if defined(__x86_64__) || defined(__i386__) 1253 + #define HAVE_GETREGS 1254 + #endif 1255 + 1249 1256 /* Architecture-specific syscall fetching routine. */ 1250 1257 int get_syscall(struct __test_metadata *_metadata, pid_t tracee) 1251 1258 { 1252 - struct iovec iov; 1253 1259 ARCH_REGS regs; 1260 + #ifdef HAVE_GETREGS 1261 + EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) { 1262 + TH_LOG("PTRACE_GETREGS failed"); 1263 + return -1; 1264 + } 1265 + #else 1266 + struct iovec iov; 1254 1267 1255 1268 iov.iov_base = &regs; 1256 1269 iov.iov_len = sizeof(regs); ··· 1271 1258 TH_LOG("PTRACE_GETREGSET failed"); 1272 1259 return -1; 1273 1260 } 1261 + #endif 1274 1262 1275 1263 return regs.SYSCALL_NUM; 1276 1264 } ··· 1280 1266 void change_syscall(struct __test_metadata *_metadata, 1281 1267 pid_t tracee, int syscall) 1282 1268 { 1283 - struct iovec iov; 1284 1269 int ret; 1285 1270 ARCH_REGS regs; 1286 - 1271 + #ifdef HAVE_GETREGS 1272 + ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs); 1273 + #else 1274 + struct iovec iov; 1287 1275 iov.iov_base = &regs; 1288 1276 iov.iov_len = sizeof(regs); 1289 1277 ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); 1278 + #endif 1290 1279 EXPECT_EQ(0, ret); 1291 1280 1292 1281 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ ··· 1329 1312 if (syscall == -1) 1330 1313 regs.SYSCALL_RET = 1; 1331 1314 1315 + #ifdef HAVE_GETREGS 1316 + ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs); 1317 + #else 1332 1318 iov.iov_base = &regs; 1333 1319 iov.iov_len = sizeof(regs); 1334 1320 ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); 1321 + #endif 1335 1322 EXPECT_EQ(0, ret); 1336 1323 } 1337 1324