at v6.19-rc4 5.2 kB view raw
1/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ 2#ifndef _LINUX_RSEQ_H 3#define _LINUX_RSEQ_H 4 5#ifdef CONFIG_RSEQ 6#include <linux/sched.h> 7 8#include <uapi/linux/rseq.h> 9 10void __rseq_handle_slowpath(struct pt_regs *regs); 11 12/* Invoked from resume_user_mode_work() */ 13static inline void rseq_handle_slowpath(struct pt_regs *regs) 14{ 15 if (IS_ENABLED(CONFIG_GENERIC_ENTRY)) { 16 if (current->rseq.event.slowpath) 17 __rseq_handle_slowpath(regs); 18 } else { 19 /* '&' is intentional to spare one conditional branch */ 20 if (current->rseq.event.sched_switch & current->rseq.event.has_rseq) 21 __rseq_handle_slowpath(regs); 22 } 23} 24 25void __rseq_signal_deliver(int sig, struct pt_regs *regs); 26 27/* 28 * Invoked from signal delivery to fixup based on the register context before 29 * switching to the signal delivery context. 30 */ 31static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) 32{ 33 if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) { 34 /* '&' is intentional to spare one conditional branch */ 35 if (current->rseq.event.has_rseq & current->rseq.event.user_irq) 36 __rseq_signal_deliver(ksig->sig, regs); 37 } else { 38 if (current->rseq.event.has_rseq) 39 __rseq_signal_deliver(ksig->sig, regs); 40 } 41} 42 43static inline void rseq_raise_notify_resume(struct task_struct *t) 44{ 45 set_tsk_thread_flag(t, TIF_RSEQ); 46} 47 48/* Invoked from context switch to force evaluation on exit to user */ 49static __always_inline void rseq_sched_switch_event(struct task_struct *t) 50{ 51 struct rseq_event *ev = &t->rseq.event; 52 53 if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) { 54 /* 55 * Avoid a boat load of conditionals by using simple logic 56 * to determine whether NOTIFY_RESUME needs to be raised. 57 * 58 * It's required when the CPU or MM CID has changed or 59 * the entry was from user space. 60 */ 61 bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq; 62 63 if (raise) { 64 ev->sched_switch = true; 65 rseq_raise_notify_resume(t); 66 } 67 } else { 68 if (ev->has_rseq) { 69 t->rseq.event.sched_switch = true; 70 rseq_raise_notify_resume(t); 71 } 72 } 73} 74 75/* 76 * Invoked from __set_task_cpu() when a task migrates or from 77 * mm_cid_schedin() when the CID changes to enforce an IDs update. 78 * 79 * This does not raise TIF_NOTIFY_RESUME as that happens in 80 * rseq_sched_switch_event(). 81 */ 82static __always_inline void rseq_sched_set_ids_changed(struct task_struct *t) 83{ 84 t->rseq.event.ids_changed = true; 85} 86 87/* Enforce a full update after RSEQ registration and when execve() failed */ 88static inline void rseq_force_update(void) 89{ 90 if (current->rseq.event.has_rseq) { 91 current->rseq.event.ids_changed = true; 92 current->rseq.event.sched_switch = true; 93 rseq_raise_notify_resume(current); 94 } 95} 96 97/* 98 * KVM/HYPERV invoke resume_user_mode_work() before entering guest mode, 99 * which clears TIF_NOTIFY_RESUME on architectures that don't use the 100 * generic TIF bits and therefore can't provide a separate TIF_RSEQ flag. 101 * 102 * To avoid updating user space RSEQ in that case just to do it eventually 103 * again before returning to user space, because __rseq_handle_slowpath() 104 * does nothing when invoked with NULL register state. 105 * 106 * After returning from guest mode, before exiting to userspace, hypervisors 107 * must invoke this function to re-raise TIF_NOTIFY_RESUME if necessary. 108 */ 109static inline void rseq_virt_userspace_exit(void) 110{ 111 /* 112 * The generic optimization for deferring RSEQ updates until the next 113 * exit relies on having a dedicated TIF_RSEQ. 114 */ 115 if (!IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS) && 116 current->rseq.event.sched_switch) 117 rseq_raise_notify_resume(current); 118} 119 120static inline void rseq_reset(struct task_struct *t) 121{ 122 memset(&t->rseq, 0, sizeof(t->rseq)); 123 t->rseq.ids.cpu_id = RSEQ_CPU_ID_UNINITIALIZED; 124} 125 126static inline void rseq_execve(struct task_struct *t) 127{ 128 rseq_reset(t); 129} 130 131/* 132 * If parent process has a registered restartable sequences area, the 133 * child inherits. Unregister rseq for a clone with CLONE_VM set. 134 * 135 * On fork, keep the IDs (CPU, MMCID) of the parent, which avoids a fault 136 * on the COW page on exit to user space, when the child stays on the same 137 * CPU as the parent. That's obviously not guaranteed, but in overcommit 138 * scenarios it is more likely and optimizes for the fork/exec case without 139 * taking the fault. 140 */ 141static inline void rseq_fork(struct task_struct *t, u64 clone_flags) 142{ 143 if (clone_flags & CLONE_VM) 144 rseq_reset(t); 145 else 146 t->rseq = current->rseq; 147} 148 149#else /* CONFIG_RSEQ */ 150static inline void rseq_handle_slowpath(struct pt_regs *regs) { } 151static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } 152static inline void rseq_sched_switch_event(struct task_struct *t) { } 153static inline void rseq_sched_set_ids_changed(struct task_struct *t) { } 154static inline void rseq_force_update(void) { } 155static inline void rseq_virt_userspace_exit(void) { } 156static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { } 157static inline void rseq_execve(struct task_struct *t) { } 158#endif /* !CONFIG_RSEQ */ 159 160#ifdef CONFIG_DEBUG_RSEQ 161void rseq_syscall(struct pt_regs *regs); 162#else /* CONFIG_DEBUG_RSEQ */ 163static inline void rseq_syscall(struct pt_regs *regs) { } 164#endif /* !CONFIG_DEBUG_RSEQ */ 165 166#endif /* _LINUX_RSEQ_H */