Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'core-entry-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull core entry code updates from Thomas Gleixner:
"Updates for the generic and architecture entry code:

- Move LoongArch and RISC-V ret_from_fork() implementations to C code
so that syscall_exit_user_mode() can be inlined

- Split the RISC-V ret_from_fork() implementation into return to user
and return to kernel, which gives a measurable performance
improvement

- Inline syscall_exit_user_mode() which benefits all architectures by
avoiding a function call and letting the compiler do better
optimizations"

* tag 'core-entry-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
LoongArch: entry: Fix include order
entry: Inline syscall_exit_to_user_mode()
LoongArch: entry: Migrate ret_from_fork() to C
riscv: entry: Split ret_from_fork() into user and kernel
riscv: entry: Convert ret_from_fork() to C

+120 -80
+8
arch/loongarch/include/asm/asm-prototypes.h
··· 12 12 __int128_t __ashrti3(__int128_t a, int b); 13 13 __int128_t __lshrti3(__int128_t a, int b); 14 14 #endif 15 + 16 + asmlinkage void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev, 17 + struct pt_regs *regs); 18 + 19 + asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev, 20 + struct pt_regs *regs, 21 + int (*fn)(void *), 22 + void *fn_arg);
+10 -12
arch/loongarch/kernel/entry.S
··· 77 77 SYM_CODE_END(handle_syscall) 78 78 _ASM_NOKPROBE(handle_syscall) 79 79 80 - SYM_CODE_START(ret_from_fork) 80 + SYM_CODE_START(ret_from_fork_asm) 81 81 UNWIND_HINT_REGS 82 - bl schedule_tail # a0 = struct task_struct *prev 83 - move a0, sp 84 - bl syscall_exit_to_user_mode 82 + move a1, sp 83 + bl ret_from_fork 85 84 RESTORE_STATIC 86 85 RESTORE_SOME 87 86 RESTORE_SP_AND_RET 88 - SYM_CODE_END(ret_from_fork) 87 + SYM_CODE_END(ret_from_fork_asm) 89 88 90 - SYM_CODE_START(ret_from_kernel_thread) 89 + SYM_CODE_START(ret_from_kernel_thread_asm) 91 90 UNWIND_HINT_REGS 92 - bl schedule_tail # a0 = struct task_struct *prev 93 - move a0, s1 94 - jirl ra, s0, 0 95 - move a0, sp 96 - bl syscall_exit_to_user_mode 91 + move a1, sp 92 + move a2, s0 93 + move a3, s1 94 + bl ret_from_kernel_thread 97 95 RESTORE_STATIC 98 96 RESTORE_SOME 99 97 RESTORE_SP_AND_RET 100 - SYM_CODE_END(ret_from_kernel_thread) 98 + SYM_CODE_END(ret_from_kernel_thread_asm)
+27 -6
arch/loongarch/kernel/process.c
··· 13 13 #include <linux/cpu.h> 14 14 #include <linux/init.h> 15 15 #include <linux/kernel.h> 16 + #include <linux/entry-common.h> 16 17 #include <linux/errno.h> 17 18 #include <linux/sched.h> 18 19 #include <linux/sched/debug.h> ··· 35 34 #include <linux/nmi.h> 36 35 37 36 #include <asm/asm.h> 37 + #include <asm/asm-prototypes.h> 38 38 #include <asm/bootinfo.h> 39 39 #include <asm/cpu.h> 40 40 #include <asm/elf.h> ··· 49 47 #include <asm/pgtable.h> 50 48 #include <asm/processor.h> 51 49 #include <asm/reg.h> 50 + #include <asm/switch_to.h> 52 51 #include <asm/unwind.h> 53 52 #include <asm/vdso.h> 54 53 ··· 66 63 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; 67 64 EXPORT_SYMBOL(boot_option_idle_override); 68 65 69 - asmlinkage void ret_from_fork(void); 70 - asmlinkage void ret_from_kernel_thread(void); 66 + asmlinkage void restore_and_ret(void); 67 + asmlinkage void ret_from_fork_asm(void); 68 + asmlinkage void ret_from_kernel_thread_asm(void); 71 69 72 70 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) 73 71 { ··· 142 138 return 0; 143 139 } 144 140 141 + asmlinkage void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev, 142 + struct pt_regs *regs) 143 + { 144 + schedule_tail(prev); 145 + syscall_exit_to_user_mode(regs); 146 + } 147 + 148 + asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev, 149 + struct pt_regs *regs, 150 + int (*fn)(void *), 151 + void *fn_arg) 152 + { 153 + schedule_tail(prev); 154 + fn(fn_arg); 155 + syscall_exit_to_user_mode(regs); 156 + } 157 + 145 158 /* 146 159 * Copy architecture-specific thread state 147 160 */ ··· 186 165 p->thread.reg03 = childksp; 187 166 p->thread.reg23 = (unsigned long)args->fn; 188 167 p->thread.reg24 = (unsigned long)args->fn_arg; 189 - p->thread.reg01 = (unsigned long)ret_from_kernel_thread; 190 - p->thread.sched_ra = (unsigned long)ret_from_kernel_thread; 168 + p->thread.reg01 = (unsigned long)ret_from_kernel_thread_asm; 169 + p->thread.sched_ra = (unsigned long)ret_from_kernel_thread_asm; 191 170 memset(childregs, 0, sizeof(struct pt_regs)); 192 171 childregs->csr_euen = p->thread.csr_euen; 193 172 childregs->csr_crmd = p->thread.csr_crmd; ··· 203 182 childregs->regs[3] = usp; 204 183 205 184 p->thread.reg03 = (unsigned long) childregs; 206 - p->thread.reg01 = (unsigned long) ret_from_fork; 207 - p->thread.sched_ra = (unsigned long) ret_from_fork; 185 + p->thread.reg01 = (unsigned long) ret_from_fork_asm; 186 + p->thread.sched_ra = (unsigned long) ret_from_fork_asm; 208 187 209 188 /* 210 189 * New tasks lose permission to use the fpu. This accelerates context
+2
arch/riscv/include/asm/asm-prototypes.h
··· 52 52 DECLARE_DO_ERROR_INFO(do_trap_ecall_m); 53 53 DECLARE_DO_ERROR_INFO(do_trap_break); 54 54 55 + asmlinkage void ret_from_fork_kernel(void *fn_arg, int (*fn)(void *), struct pt_regs *regs); 56 + asmlinkage void ret_from_fork_user(struct pt_regs *regs); 55 57 asmlinkage void handle_bad_stack(struct pt_regs *regs); 56 58 asmlinkage void do_page_fault(struct pt_regs *regs); 57 59 asmlinkage void do_irq(struct pt_regs *regs);
+13 -9
arch/riscv/kernel/entry.S
··· 319 319 ASM_NOKPROBE(handle_kernel_stack_overflow) 320 320 #endif 321 321 322 - SYM_CODE_START(ret_from_fork) 322 + SYM_CODE_START(ret_from_fork_kernel_asm) 323 323 call schedule_tail 324 - beqz s0, 1f /* not from kernel thread */ 325 - /* Call fn(arg) */ 326 - move a0, s1 327 - jalr s0 328 - 1: 329 - move a0, sp /* pt_regs */ 330 - call syscall_exit_to_user_mode 324 + move a0, s1 /* fn_arg */ 325 + move a1, s0 /* fn */ 326 + move a2, sp /* pt_regs */ 327 + call ret_from_fork_kernel 331 328 j ret_from_exception 332 - SYM_CODE_END(ret_from_fork) 329 + SYM_CODE_END(ret_from_fork_kernel_asm) 330 + 331 + SYM_CODE_START(ret_from_fork_user_asm) 332 + call schedule_tail 333 + move a0, sp /* pt_regs */ 334 + call ret_from_fork_user 335 + j ret_from_exception 336 + SYM_CODE_END(ret_from_fork_user_asm) 333 337 334 338 #ifdef CONFIG_IRQ_STACKS 335 339 /*
+18 -3
arch/riscv/kernel/process.c
··· 17 17 #include <linux/ptrace.h> 18 18 #include <linux/uaccess.h> 19 19 #include <linux/personality.h> 20 + #include <linux/entry-common.h> 20 21 22 + #include <asm/asm-prototypes.h> 21 23 #include <asm/unistd.h> 22 24 #include <asm/processor.h> 23 25 #include <asm/csr.h> ··· 38 36 EXPORT_SYMBOL(__stack_chk_guard); 39 37 #endif 40 38 41 - extern asmlinkage void ret_from_fork(void); 39 + extern asmlinkage void ret_from_fork_kernel_asm(void); 40 + extern asmlinkage void ret_from_fork_user_asm(void); 42 41 43 42 void noinstr arch_cpu_idle(void) 44 43 { ··· 209 206 return 0; 210 207 } 211 208 209 + asmlinkage void ret_from_fork_kernel(void *fn_arg, int (*fn)(void *), struct pt_regs *regs) 210 + { 211 + fn(fn_arg); 212 + 213 + syscall_exit_to_user_mode(regs); 214 + } 215 + 216 + asmlinkage void ret_from_fork_user(struct pt_regs *regs) 217 + { 218 + syscall_exit_to_user_mode(regs); 219 + } 220 + 212 221 int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) 213 222 { 214 223 unsigned long clone_flags = args->flags; ··· 243 228 244 229 p->thread.s[0] = (unsigned long)args->fn; 245 230 p->thread.s[1] = (unsigned long)args->fn_arg; 231 + p->thread.ra = (unsigned long)ret_from_fork_kernel_asm; 246 232 } else { 247 233 *childregs = *(current_pt_regs()); 248 234 /* Turn off status.VS */ ··· 253 237 if (clone_flags & CLONE_SETTLS) 254 238 childregs->tp = tls; 255 239 childregs->a0 = 0; /* Return value of fork() */ 256 - p->thread.s[0] = 0; 240 + p->thread.ra = (unsigned long)ret_from_fork_user_asm; 257 241 } 258 242 p->thread.riscv_v_flags = 0; 259 243 if (has_vector() || has_xtheadvector()) 260 244 riscv_v_thread_alloc(p); 261 - p->thread.ra = (unsigned long)ret_from_fork; 262 245 p->thread.sp = (unsigned long)childregs; /* kernel sp */ 263 246 return 0; 264 247 }
+41 -2
include/linux/entry-common.h
··· 14 14 #include <linux/kmsan.h> 15 15 16 16 #include <asm/entry-common.h> 17 + #include <asm/syscall.h> 17 18 18 19 /* 19 20 * Define dummy _TIF work flags if not defined by the architecture or for ··· 368 367 } 369 368 370 369 /** 370 + * syscall_exit_work - Handle work before returning to user mode 371 + * @regs: Pointer to current pt_regs 372 + * @work: Current thread syscall work 373 + * 374 + * Do one-time syscall specific work. 375 + */ 376 + void syscall_exit_work(struct pt_regs *regs, unsigned long work); 377 + 378 + /** 371 379 * syscall_exit_to_user_mode_work - Handle work before returning to user mode 372 380 * @regs: Pointer to currents pt_regs 373 381 * ··· 389 379 * make the final state transitions. Interrupts must stay disabled between 390 380 * return from this function and the invocation of exit_to_user_mode(). 391 381 */ 392 - void syscall_exit_to_user_mode_work(struct pt_regs *regs); 382 + static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) 383 + { 384 + unsigned long work = READ_ONCE(current_thread_info()->syscall_work); 385 + unsigned long nr = syscall_get_nr(current, regs); 386 + 387 + CT_WARN_ON(ct_state() != CT_STATE_KERNEL); 388 + 389 + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 390 + if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) 391 + local_irq_enable(); 392 + } 393 + 394 + rseq_syscall(regs); 395 + 396 + /* 397 + * Do one-time syscall specific work. If these work items are 398 + * enabled, we want to run them exactly once per syscall exit with 399 + * interrupts enabled. 400 + */ 401 + if (unlikely(work & SYSCALL_WORK_EXIT)) 402 + syscall_exit_work(regs, work); 403 + local_irq_disable_exit_to_user(); 404 + exit_to_user_mode_prepare(regs); 405 + } 393 406 394 407 /** 395 408 * syscall_exit_to_user_mode - Handle work before returning to user mode ··· 443 410 * exit_to_user_mode(). This function is preferred unless there is a 444 411 * compelling architectural reason to use the separate functions. 445 412 */ 446 - void syscall_exit_to_user_mode(struct pt_regs *regs); 413 + static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) 414 + { 415 + instrumentation_begin(); 416 + syscall_exit_to_user_mode_work(regs); 417 + instrumentation_end(); 418 + exit_to_user_mode(); 419 + } 447 420 448 421 /** 449 422 * irqentry_enter_from_user_mode - Establish state before invoking the irq handler
+1 -48
kernel/entry/common.c
··· 146 146 return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; 147 147 } 148 148 149 - static void syscall_exit_work(struct pt_regs *regs, unsigned long work) 149 + void syscall_exit_work(struct pt_regs *regs, unsigned long work) 150 150 { 151 151 bool step; 152 152 ··· 171 171 step = report_single_step(work); 172 172 if (step || work & SYSCALL_WORK_SYSCALL_TRACE) 173 173 ptrace_report_syscall_exit(regs, step); 174 - } 175 - 176 - /* 177 - * Syscall specific exit to user mode preparation. Runs with interrupts 178 - * enabled. 179 - */ 180 - static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) 181 - { 182 - unsigned long work = READ_ONCE(current_thread_info()->syscall_work); 183 - unsigned long nr = syscall_get_nr(current, regs); 184 - 185 - CT_WARN_ON(ct_state() != CT_STATE_KERNEL); 186 - 187 - if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { 188 - if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) 189 - local_irq_enable(); 190 - } 191 - 192 - rseq_syscall(regs); 193 - 194 - /* 195 - * Do one-time syscall specific work. If these work items are 196 - * enabled, we want to run them exactly once per syscall exit with 197 - * interrupts enabled. 198 - */ 199 - if (unlikely(work & SYSCALL_WORK_EXIT)) 200 - syscall_exit_work(regs, work); 201 - } 202 - 203 - static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs) 204 - { 205 - syscall_exit_to_user_mode_prepare(regs); 206 - local_irq_disable_exit_to_user(); 207 - exit_to_user_mode_prepare(regs); 208 - } 209 - 210 - void syscall_exit_to_user_mode_work(struct pt_regs *regs) 211 - { 212 - __syscall_exit_to_user_mode_work(regs); 213 - } 214 - 215 - __visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs) 216 - { 217 - instrumentation_begin(); 218 - __syscall_exit_to_user_mode_work(regs); 219 - instrumentation_end(); 220 - exit_to_user_mode(); 221 174 } 222 175 223 176 noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)