Merge branch 'x86-syscall-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull syscall updates from Ingo Molnar:
"Improve the security of set_fs(): we now check the address limit on a
number of key platforms (x86, arm, arm64) before returning to
user-space - without adding overhead to the typical system call fast
path"

* 'x86-syscall-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
arm64/syscalls: Check address limit on user-mode return
arm/syscalls: Check address limit on user-mode return
x86/syscalls: Check address limit on user-mode return

+63 -11
+9 -6
arch/arm/include/asm/thread_info.h
··· 139 #define TIF_NEED_RESCHED 1 /* rescheduling necessary */ 140 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ 141 #define TIF_UPROBE 3 /* breakpointed or singlestepping */ 142 - #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ 143 - #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ 144 - #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ 145 - #define TIF_SECCOMP 7 /* seccomp syscall filtering active */ 146 147 #define TIF_NOHZ 12 /* in adaptive nohz mode */ 148 #define TIF_USING_IWMMXT 17 ··· 154 #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) 155 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) 156 #define _TIF_UPROBE (1 << TIF_UPROBE) 157 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 158 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) 159 #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) ··· 168 /* 169 * Change these and you break ASM code in entry-common.S 170 */ 171 - #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ 172 - _TIF_NOTIFY_RESUME | _TIF_UPROBE) 173 174 #endif /* __KERNEL__ */ 175 #endif /* __ASM_ARM_THREAD_INFO_H */
··· 139 #define TIF_NEED_RESCHED 1 /* rescheduling necessary */ 140 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ 141 #define TIF_UPROBE 3 /* breakpointed or singlestepping */ 142 + #define TIF_FSCHECK 4 /* Check FS is USER_DS on return */ 143 + #define TIF_SYSCALL_TRACE 5 /* syscall trace active */ 144 + #define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ 145 + #define TIF_SYSCALL_TRACEPOINT 7 /* syscall tracepoint instrumentation */ 146 + #define TIF_SECCOMP 8 /* seccomp syscall filtering active */ 147 148 #define TIF_NOHZ 12 /* in adaptive nohz mode */ 149 #define TIF_USING_IWMMXT 17 ··· 153 #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) 154 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) 155 #define _TIF_UPROBE (1 << TIF_UPROBE) 156 + #define _TIF_FSCHECK (1 << TIF_FSCHECK) 157 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 158 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) 159 #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) ··· 166 /* 167 * Change these and you break ASM code in entry-common.S 168 */ 169 + #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ 170 + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ 171 + _TIF_FSCHECK) 172 173 #endif /* __KERNEL__ */ 174 #endif /* __ASM_ARM_THREAD_INFO_H */
+2
arch/arm/include/asm/uaccess.h
··· 70 { 71 current_thread_info()->addr_limit = fs; 72 modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER); 73 } 74 75 #define segment_eq(a, b) ((a) == (b))
··· 70 { 71 current_thread_info()->addr_limit = fs; 72 modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER); 73 + /* On user-mode return, check fs is correct */ 74 + set_thread_flag(TIF_FSCHECK); 75 } 76 77 #define segment_eq(a, b) ((a) == (b))
+7 -2
arch/arm/kernel/entry-common.S
··· 41 UNWIND(.cantunwind ) 42 disable_irq_notrace @ disable interrupts 43 ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing 44 - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK 45 bne fast_work_pending 46 47 /* perform architecture specific actions before user return */ ··· 69 str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 70 disable_irq_notrace @ disable interrupts 71 ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing 72 - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK 73 beq no_work_pending 74 UNWIND(.fnend ) 75 ENDPROC(ret_fast_syscall) 76 77 /* Slower path - fall through to work_pending */ 78 #endif 79 80 tst r1, #_TIF_SYSCALL_WORK
··· 41 UNWIND(.cantunwind ) 42 disable_irq_notrace @ disable interrupts 43 ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing 44 + tst r1, #_TIF_SYSCALL_WORK 45 + bne fast_work_pending 46 + tst r1, #_TIF_WORK_MASK 47 bne fast_work_pending 48 49 /* perform architecture specific actions before user return */ ··· 67 str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 68 disable_irq_notrace @ disable interrupts 69 ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing 70 + tst r1, #_TIF_SYSCALL_WORK 71 + bne fast_work_pending 72 + tst r1, #_TIF_WORK_MASK 73 beq no_work_pending 74 UNWIND(.fnend ) 75 ENDPROC(ret_fast_syscall) 76 77 /* Slower path - fall through to work_pending */ 78 + fast_work_pending: 79 #endif 80 81 tst r1, #_TIF_SYSCALL_WORK
+5
arch/arm/kernel/signal.c
··· 14 #include <linux/uaccess.h> 15 #include <linux/tracehook.h> 16 #include <linux/uprobes.h> 17 18 #include <asm/elf.h> 19 #include <asm/cacheflush.h> ··· 614 * Update the trace code with the current status. 615 */ 616 trace_hardirqs_off(); 617 do { 618 if (likely(thread_flags & _TIF_NEED_RESCHED)) { 619 schedule();
··· 14 #include <linux/uaccess.h> 15 #include <linux/tracehook.h> 16 #include <linux/uprobes.h> 17 + #include <linux/syscalls.h> 18 19 #include <asm/elf.h> 20 #include <asm/cacheflush.h> ··· 613 * Update the trace code with the current status. 614 */ 615 trace_hardirqs_off(); 616 + 617 + /* Check valid user FS if needed */ 618 + addr_limit_user_check(); 619 + 620 do { 621 if (likely(thread_flags & _TIF_NEED_RESCHED)) { 622 schedule();
+3 -1
arch/arm64/include/asm/thread_info.h
··· 86 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ 87 #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ 88 #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ 89 #define TIF_NOHZ 7 90 #define TIF_SYSCALL_TRACE 8 91 #define TIF_SYSCALL_AUDIT 9 ··· 108 #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) 109 #define _TIF_SECCOMP (1 << TIF_SECCOMP) 110 #define _TIF_UPROBE (1 << TIF_UPROBE) 111 #define _TIF_32BIT (1 << TIF_32BIT) 112 113 #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ 114 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ 115 - _TIF_UPROBE) 116 117 #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ 118 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
··· 86 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ 87 #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ 88 #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ 89 + #define TIF_FSCHECK 5 /* Check FS is USER_DS on return */ 90 #define TIF_NOHZ 7 91 #define TIF_SYSCALL_TRACE 8 92 #define TIF_SYSCALL_AUDIT 9 ··· 107 #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) 108 #define _TIF_SECCOMP (1 << TIF_SECCOMP) 109 #define _TIF_UPROBE (1 << TIF_UPROBE) 110 + #define _TIF_FSCHECK (1 << TIF_FSCHECK) 111 #define _TIF_32BIT (1 << TIF_32BIT) 112 113 #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ 114 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ 115 + _TIF_UPROBE | _TIF_FSCHECK) 116 117 #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ 118 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
+3
arch/arm64/include/asm/uaccess.h
··· 45 { 46 current_thread_info()->addr_limit = fs; 47 48 /* 49 * Enable/disable UAO so that copy_to_user() etc can access 50 * kernel memory with the unprivileged instructions.
··· 45 { 46 current_thread_info()->addr_limit = fs; 47 48 + /* On user-mode return, check fs is correct */ 49 + set_thread_flag(TIF_FSCHECK); 50 + 51 /* 52 * Enable/disable UAO so that copy_to_user() etc can access 53 * kernel memory with the unprivileged instructions.
+5
arch/arm64/kernel/signal.c
··· 29 #include <linux/string.h> 30 #include <linux/tracehook.h> 31 #include <linux/ratelimit.h> 32 33 #include <asm/debug-monitors.h> 34 #include <asm/elf.h> ··· 750 * Update the trace code with the current status. 751 */ 752 trace_hardirqs_off(); 753 do { 754 if (thread_flags & _TIF_NEED_RESCHED) { 755 schedule();
··· 29 #include <linux/string.h> 30 #include <linux/tracehook.h> 31 #include <linux/ratelimit.h> 32 + #include <linux/syscalls.h> 33 34 #include <asm/debug-monitors.h> 35 #include <asm/elf.h> ··· 749 * Update the trace code with the current status. 750 */ 751 trace_hardirqs_off(); 752 + 753 + /* Check valid user FS if needed */ 754 + addr_limit_user_check(); 755 + 756 do { 757 if (thread_flags & _TIF_NEED_RESCHED) { 758 schedule();
+3
arch/x86/entry/common.c
··· 23 #include <linux/user-return-notifier.h> 24 #include <linux/uprobes.h> 25 #include <linux/livepatch.h> 26 27 #include <asm/desc.h> 28 #include <asm/traps.h> ··· 183 { 184 struct thread_info *ti = current_thread_info(); 185 u32 cached_flags; 186 187 if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) 188 local_irq_disable();
··· 23 #include <linux/user-return-notifier.h> 24 #include <linux/uprobes.h> 25 #include <linux/livepatch.h> 26 + #include <linux/syscalls.h> 27 28 #include <asm/desc.h> 29 #include <asm/traps.h> ··· 182 { 183 struct thread_info *ti = current_thread_info(); 184 u32 cached_flags; 185 + 186 + addr_limit_user_check(); 187 188 if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) 189 local_irq_disable();
+4 -1
arch/x86/include/asm/thread_info.h
··· 98 #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ 99 #define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ 100 #define TIF_X32 30 /* 32-bit native x86-64 binary */ 101 102 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 103 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) ··· 123 #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) 124 #define _TIF_ADDR32 (1 << TIF_ADDR32) 125 #define _TIF_X32 (1 << TIF_X32) 126 127 /* 128 * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for ··· 139 (_TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \ 140 _TIF_NEED_RESCHED | _TIF_SINGLESTEP | _TIF_SYSCALL_EMU | \ 141 _TIF_SYSCALL_AUDIT | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE | \ 142 - _TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT) 143 144 /* flags to check in __switch_to() */ 145 #define _TIF_WORK_CTXSW \
··· 98 #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ 99 #define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ 100 #define TIF_X32 30 /* 32-bit native x86-64 binary */ 101 + #define TIF_FSCHECK 31 /* Check FS is USER_DS on return */ 102 103 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 104 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) ··· 122 #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) 123 #define _TIF_ADDR32 (1 << TIF_ADDR32) 124 #define _TIF_X32 (1 << TIF_X32) 125 + #define _TIF_FSCHECK (1 << TIF_FSCHECK) 126 127 /* 128 * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for ··· 137 (_TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \ 138 _TIF_NEED_RESCHED | _TIF_SINGLESTEP | _TIF_SYSCALL_EMU | \ 139 _TIF_SYSCALL_AUDIT | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE | \ 140 + _TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT | \ 141 + _TIF_FSCHECK) 142 143 /* flags to check in __switch_to() */ 144 #define _TIF_WORK_CTXSW \
+6 -1
arch/x86/include/asm/uaccess.h
··· 26 27 #define get_ds() (KERNEL_DS) 28 #define get_fs() (current->thread.addr_limit) 29 - #define set_fs(x) (current->thread.addr_limit = (x)) 30 31 #define segment_eq(a, b) ((a).seg == (b).seg) 32
··· 26 27 #define get_ds() (KERNEL_DS) 28 #define get_fs() (current->thread.addr_limit) 29 + static inline void set_fs(mm_segment_t fs) 30 + { 31 + current->thread.addr_limit = fs; 32 + /* On user-mode return, check fs is correct */ 33 + set_thread_flag(TIF_FSCHECK); 34 + } 35 36 #define segment_eq(a, b) ((a).seg == (b).seg) 37
+16
include/linux/syscalls.h
··· 207 } \ 208 static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) 209 210 asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, 211 qid_t id, void __user *addr); 212 asmlinkage long sys_time(time_t __user *tloc);
··· 207 } \ 208 static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) 209 210 + #ifdef TIF_FSCHECK 211 + /* 212 + * Called before coming back to user-mode. Returning to user-mode with an 213 + * address limit different than USER_DS can allow to overwrite kernel memory. 214 + */ 215 + static inline void addr_limit_user_check(void) 216 + { 217 + 218 + if (!test_thread_flag(TIF_FSCHECK)) 219 + return; 220 + 221 + BUG_ON(!segment_eq(get_fs(), USER_DS)); 222 + clear_thread_flag(TIF_FSCHECK); 223 + } 224 + #endif 225 + 226 asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, 227 qid_t id, void __user *addr); 228 asmlinkage long sys_time(time_t __user *tloc);