Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'core/entry' into sched/core

Pull the entry update to avoid merge conflicts with the time slice
extension changes.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>

+161 -125
+148 -17
include/linux/entry-common.h
··· 2 2 #ifndef __LINUX_ENTRYCOMMON_H 3 3 #define __LINUX_ENTRYCOMMON_H 4 4 5 + #include <linux/audit.h> 5 6 #include <linux/irq-entry-common.h> 6 7 #include <linux/livepatch.h> 7 8 #include <linux/ptrace.h> ··· 46 45 SYSCALL_WORK_SYSCALL_EXIT_TRAP | \ 47 46 ARCH_SYSCALL_WORK_EXIT) 48 47 49 - long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work); 48 + /** 49 + * arch_ptrace_report_syscall_entry - Architecture specific ptrace_report_syscall_entry() wrapper 50 + * 51 + * Invoked from syscall_trace_enter() to wrap ptrace_report_syscall_entry(). 52 + * 53 + * This allows architecture specific ptrace_report_syscall_entry() 54 + * implementations. If not defined by the architecture this falls back to 55 + * to ptrace_report_syscall_entry(). 56 + */ 57 + static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs); 58 + 59 + #ifndef arch_ptrace_report_syscall_entry 60 + static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs) 61 + { 62 + return ptrace_report_syscall_entry(regs); 63 + } 64 + #endif 65 + 66 + bool syscall_user_dispatch(struct pt_regs *regs); 67 + long trace_syscall_enter(struct pt_regs *regs, long syscall); 68 + void trace_syscall_exit(struct pt_regs *regs, long ret); 69 + 70 + static inline void syscall_enter_audit(struct pt_regs *regs, long syscall) 71 + { 72 + if (unlikely(audit_context())) { 73 + unsigned long args[6]; 74 + 75 + syscall_get_arguments(current, regs, args); 76 + audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]); 77 + } 78 + } 79 + 80 + static __always_inline long syscall_trace_enter(struct pt_regs *regs, unsigned long work) 81 + { 82 + long syscall, ret = 0; 83 + 84 + /* 85 + * Handle Syscall User Dispatch. This must comes first, since 86 + * the ABI here can be something that doesn't make sense for 87 + * other syscall_work features. 88 + */ 89 + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { 90 + if (syscall_user_dispatch(regs)) 91 + return -1L; 92 + } 93 + 94 + /* 95 + * User space got a time slice extension granted and relinquishes 96 + * the CPU. The work stops the slice timer to avoid an extra round 97 + * through hrtimer_interrupt(). 98 + */ 99 + if (work & SYSCALL_WORK_SYSCALL_RSEQ_SLICE) 100 + rseq_syscall_enter_work(syscall_get_nr(current, regs)); 101 + 102 + /* Handle ptrace */ 103 + if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { 104 + ret = arch_ptrace_report_syscall_entry(regs); 105 + if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) 106 + return -1L; 107 + } 108 + 109 + /* Do seccomp after ptrace, to catch any tracer changes. */ 110 + if (work & SYSCALL_WORK_SECCOMP) { 111 + ret = __secure_computing(); 112 + if (ret == -1L) 113 + return ret; 114 + } 115 + 116 + /* Either of the above might have changed the syscall number */ 117 + syscall = syscall_get_nr(current, regs); 118 + 119 + if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) 120 + syscall = trace_syscall_enter(regs, syscall); 121 + 122 + syscall_enter_audit(regs, syscall); 123 + 124 + return ret ? : syscall; 125 + } 50 126 51 127 /** 52 128 * syscall_enter_from_user_mode_work - Check and handle work before invoking ··· 153 75 unsigned long work = READ_ONCE(current_thread_info()->syscall_work); 154 76 155 77 if (work & SYSCALL_WORK_ENTER) 156 - syscall = syscall_trace_enter(regs, syscall, work); 78 + syscall = syscall_trace_enter(regs, work); 157 79 158 80 return syscall; 159 81 } ··· 190 112 return ret; 191 113 } 192 114 115 + /* 116 + * If SYSCALL_EMU is set, then the only reason to report is when 117 + * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall 118 + * instruction has been already reported in syscall_enter_from_user_mode(). 119 + */ 120 + static __always_inline bool report_single_step(unsigned long work) 121 + { 122 + if (work & SYSCALL_WORK_SYSCALL_EMU) 123 + return false; 124 + 125 + return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; 126 + } 127 + 128 + /** 129 + * arch_ptrace_report_syscall_exit - Architecture specific ptrace_report_syscall_exit() 130 + * 131 + * This allows architecture specific ptrace_report_syscall_exit() 132 + * implementations. If not defined by the architecture this falls back to 133 + * to ptrace_report_syscall_exit(). 134 + */ 135 + static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs, 136 + int step); 137 + 138 + #ifndef arch_ptrace_report_syscall_exit 139 + static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs, 140 + int step) 141 + { 142 + ptrace_report_syscall_exit(regs, step); 143 + } 144 + #endif 145 + 193 146 /** 194 147 * syscall_exit_work - Handle work before returning to user mode 195 148 * @regs: Pointer to current pt_regs ··· 228 119 * 229 120 * Do one-time syscall specific work. 230 121 */ 231 - void syscall_exit_work(struct pt_regs *regs, unsigned long work); 122 + static __always_inline void syscall_exit_work(struct pt_regs *regs, unsigned long work) 123 + { 124 + bool step; 125 + 126 + /* 127 + * If the syscall was rolled back due to syscall user dispatching, 128 + * then the tracers below are not invoked for the same reason as 129 + * the entry side was not invoked in syscall_trace_enter(): The ABI 130 + * of these syscalls is unknown. 131 + */ 132 + if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { 133 + if (unlikely(current->syscall_dispatch.on_dispatch)) { 134 + current->syscall_dispatch.on_dispatch = false; 135 + return; 136 + } 137 + } 138 + 139 + audit_syscall_exit(regs); 140 + 141 + if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) 142 + trace_syscall_exit(regs, syscall_get_return_value(current, regs)); 143 + 144 + step = report_single_step(work); 145 + if (step || work & SYSCALL_WORK_SYSCALL_TRACE) 146 + arch_ptrace_report_syscall_exit(regs, step); 147 + } 232 148 233 149 /** 234 - * syscall_exit_to_user_mode_work - Handle work before returning to user mode 150 + * syscall_exit_to_user_mode_work - Handle one time work before returning to user mode 235 151 * @regs: Pointer to currents pt_regs 236 152 * 237 - * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling 238 - * exit_to_user_mode() to perform the final transition to user mode. 153 + * Step 1 of syscall_exit_to_user_mode() with the same calling convention. 239 154 * 240 - * Calling convention is the same as for syscall_exit_to_user_mode() and it 241 - * returns with all work handled and interrupts disabled. The caller must 242 - * invoke exit_to_user_mode() before actually switching to user mode to 243 - * make the final state transitions. Interrupts must stay disabled between 244 - * return from this function and the invocation of exit_to_user_mode(). 155 + * The caller must invoke steps 2-3 of syscall_exit_to_user_mode() afterwards. 245 156 */ 246 157 static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) 247 158 { ··· 284 155 */ 285 156 if (unlikely(work & SYSCALL_WORK_EXIT)) 286 157 syscall_exit_work(regs, work); 287 - local_irq_disable_exit_to_user(); 288 - syscall_exit_to_user_mode_prepare(regs); 289 158 } 290 159 291 160 /** 292 161 * syscall_exit_to_user_mode - Handle work before returning to user mode 293 162 * @regs: Pointer to currents pt_regs 294 163 * 295 - * Invoked with interrupts enabled and fully valid regs. Returns with all 164 + * Invoked with interrupts enabled and fully valid @regs. Returns with all 296 165 * work handled, interrupts disabled such that the caller can immediately 297 166 * switch to user mode. Called from architecture specific syscall and ret 298 167 * from fork code. ··· 303 176 * - ptrace (single stepping) 304 177 * 305 178 * 2) Preparatory work 179 + * - Disable interrupts 306 180 * - Exit to user mode loop (common TIF handling). Invokes 307 181 * arch_exit_to_user_mode_work() for architecture specific TIF work 308 182 * - Architecture specific one time work arch_exit_to_user_mode_prepare() ··· 312 184 * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the 313 185 * functionality in exit_to_user_mode(). 314 186 * 315 - * This is a combination of syscall_exit_to_user_mode_work() (1,2) and 316 - * exit_to_user_mode(). This function is preferred unless there is a 317 - * compelling architectural reason to use the separate functions. 187 + * This is a combination of syscall_exit_to_user_mode_work() (1), disabling 188 + * interrupts followed by syscall_exit_to_user_mode_prepare() (2) and 189 + * exit_to_user_mode() (3). This function is preferred unless there is a 190 + * compelling architectural reason to invoke the functions separately. 318 191 */ 319 192 static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) 320 193 { 321 194 instrumentation_begin(); 322 195 syscall_exit_to_user_mode_work(regs); 196 + local_irq_disable_exit_to_user(); 197 + syscall_exit_to_user_mode_prepare(regs); 323 198 instrumentation_end(); 324 199 exit_to_user_mode(); 325 200 }
-7
kernel/entry/common.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef _COMMON_H 3 - #define _COMMON_H 4 - 5 - bool syscall_user_dispatch(struct pt_regs *regs); 6 - 7 - #endif
+11 -99
kernel/entry/syscall-common.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 - #include <linux/audit.h> 4 3 #include <linux/entry-common.h> 5 - #include "common.h" 6 4 7 5 #define CREATE_TRACE_POINTS 8 6 #include <trace/events/syscalls.h> 9 7 10 - static inline void syscall_enter_audit(struct pt_regs *regs, long syscall) 11 - { 12 - if (unlikely(audit_context())) { 13 - unsigned long args[6]; 8 + /* Out of line to prevent tracepoint code duplication */ 14 9 15 - syscall_get_arguments(current, regs, args); 16 - audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]); 17 - } 10 + long trace_syscall_enter(struct pt_regs *regs, long syscall) 11 + { 12 + trace_sys_enter(regs, syscall); 13 + /* 14 + * Probes or BPF hooks in the tracepoint may have changed the 15 + * system call number. Reread it. 16 + */ 17 + return syscall_get_nr(current, regs); 18 18 } 19 19 20 - long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work) 20 + void trace_syscall_exit(struct pt_regs *regs, long ret) 21 21 { 22 - long ret = 0; 23 - 24 - /* 25 - * Handle Syscall User Dispatch. This must comes first, since 26 - * the ABI here can be something that doesn't make sense for 27 - * other syscall_work features. 28 - */ 29 - if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { 30 - if (syscall_user_dispatch(regs)) 31 - return -1L; 32 - } 33 - 34 - /* 35 - * User space got a time slice extension granted and relinquishes 36 - * the CPU. The work stops the slice timer to avoid an extra round 37 - * through hrtimer_interrupt(). 38 - */ 39 - if (work & SYSCALL_WORK_SYSCALL_RSEQ_SLICE) 40 - rseq_syscall_enter_work(syscall); 41 - 42 - /* Handle ptrace */ 43 - if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { 44 - ret = ptrace_report_syscall_entry(regs); 45 - if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) 46 - return -1L; 47 - } 48 - 49 - /* Do seccomp after ptrace, to catch any tracer changes. */ 50 - if (work & SYSCALL_WORK_SECCOMP) { 51 - ret = __secure_computing(); 52 - if (ret == -1L) 53 - return ret; 54 - } 55 - 56 - /* Either of the above might have changed the syscall number */ 57 - syscall = syscall_get_nr(current, regs); 58 - 59 - if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) { 60 - trace_sys_enter(regs, syscall); 61 - /* 62 - * Probes or BPF hooks in the tracepoint may have changed the 63 - * system call number as well. 64 - */ 65 - syscall = syscall_get_nr(current, regs); 66 - } 67 - 68 - syscall_enter_audit(regs, syscall); 69 - 70 - return ret ? : syscall; 71 - } 72 - 73 - /* 74 - * If SYSCALL_EMU is set, then the only reason to report is when 75 - * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall 76 - * instruction has been already reported in syscall_enter_from_user_mode(). 77 - */ 78 - static inline bool report_single_step(unsigned long work) 79 - { 80 - if (work & SYSCALL_WORK_SYSCALL_EMU) 81 - return false; 82 - 83 - return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; 84 - } 85 - 86 - void syscall_exit_work(struct pt_regs *regs, unsigned long work) 87 - { 88 - bool step; 89 - 90 - /* 91 - * If the syscall was rolled back due to syscall user dispatching, 92 - * then the tracers below are not invoked for the same reason as 93 - * the entry side was not invoked in syscall_trace_enter(): The ABI 94 - * of these syscalls is unknown. 95 - */ 96 - if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { 97 - if (unlikely(current->syscall_dispatch.on_dispatch)) { 98 - current->syscall_dispatch.on_dispatch = false; 99 - return; 100 - } 101 - } 102 - 103 - audit_syscall_exit(regs); 104 - 105 - if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT) 106 - trace_sys_exit(regs, syscall_get_return_value(current, regs)); 107 - 108 - step = report_single_step(work); 109 - if (step || work & SYSCALL_WORK_SYSCALL_TRACE) 110 - ptrace_report_syscall_exit(regs, step); 22 + trace_sys_exit(regs, ret); 111 23 }
+2 -2
kernel/entry/syscall_user_dispatch.c
··· 2 2 /* 3 3 * Copyright (C) 2020 Collabora Ltd. 4 4 */ 5 + 6 + #include <linux/entry-common.h> 5 7 #include <linux/sched.h> 6 8 #include <linux/prctl.h> 7 9 #include <linux/ptrace.h> ··· 16 14 #include <linux/sched/task_stack.h> 17 15 18 16 #include <asm/syscall.h> 19 - 20 - #include "common.h" 21 17 22 18 static void trigger_sigsys(struct pt_regs *regs) 23 19 {