Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
2#ifndef _LINUX_RSEQ_H
3#define _LINUX_RSEQ_H
4
5#ifdef CONFIG_RSEQ
6#include <linux/sched.h>
7
8#include <uapi/linux/rseq.h>
9
10void __rseq_handle_slowpath(struct pt_regs *regs);
11
12/* Invoked from resume_user_mode_work() */
13static inline void rseq_handle_slowpath(struct pt_regs *regs)
14{
15 if (IS_ENABLED(CONFIG_GENERIC_ENTRY)) {
16 if (current->rseq.event.slowpath)
17 __rseq_handle_slowpath(regs);
18 } else {
19 /* '&' is intentional to spare one conditional branch */
20 if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
21 __rseq_handle_slowpath(regs);
22 }
23}
24
25void __rseq_signal_deliver(int sig, struct pt_regs *regs);
26
27/*
28 * Invoked from signal delivery to fixup based on the register context before
29 * switching to the signal delivery context.
30 */
31static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
32{
33 if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
34 /* '&' is intentional to spare one conditional branch */
35 if (current->rseq.event.has_rseq & current->rseq.event.user_irq)
36 __rseq_signal_deliver(ksig->sig, regs);
37 } else {
38 if (current->rseq.event.has_rseq)
39 __rseq_signal_deliver(ksig->sig, regs);
40 }
41}
42
43static inline void rseq_raise_notify_resume(struct task_struct *t)
44{
45 set_tsk_thread_flag(t, TIF_RSEQ);
46}
47
48/* Invoked from context switch to force evaluation on exit to user */
49static __always_inline void rseq_sched_switch_event(struct task_struct *t)
50{
51 struct rseq_event *ev = &t->rseq.event;
52
53 if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
54 /*
55 * Avoid a boat load of conditionals by using simple logic
56 * to determine whether NOTIFY_RESUME needs to be raised.
57 *
58 * It's required when the CPU or MM CID has changed or
59 * the entry was from user space.
60 */
61 bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq;
62
63 if (raise) {
64 ev->sched_switch = true;
65 rseq_raise_notify_resume(t);
66 }
67 } else {
68 if (ev->has_rseq) {
69 t->rseq.event.sched_switch = true;
70 rseq_raise_notify_resume(t);
71 }
72 }
73}
74
75/*
76 * Invoked from __set_task_cpu() when a task migrates or from
77 * mm_cid_schedin() when the CID changes to enforce an IDs update.
78 *
79 * This does not raise TIF_NOTIFY_RESUME as that happens in
80 * rseq_sched_switch_event().
81 */
82static __always_inline void rseq_sched_set_ids_changed(struct task_struct *t)
83{
84 t->rseq.event.ids_changed = true;
85}
86
87/* Enforce a full update after RSEQ registration and when execve() failed */
88static inline void rseq_force_update(void)
89{
90 if (current->rseq.event.has_rseq) {
91 current->rseq.event.ids_changed = true;
92 current->rseq.event.sched_switch = true;
93 rseq_raise_notify_resume(current);
94 }
95}
96
97/*
98 * KVM/HYPERV invoke resume_user_mode_work() before entering guest mode,
99 * which clears TIF_NOTIFY_RESUME on architectures that don't use the
100 * generic TIF bits and therefore can't provide a separate TIF_RSEQ flag.
101 *
102 * To avoid updating user space RSEQ in that case just to do it eventually
103 * again before returning to user space, because __rseq_handle_slowpath()
104 * does nothing when invoked with NULL register state.
105 *
106 * After returning from guest mode, before exiting to userspace, hypervisors
107 * must invoke this function to re-raise TIF_NOTIFY_RESUME if necessary.
108 */
109static inline void rseq_virt_userspace_exit(void)
110{
111 /*
112 * The generic optimization for deferring RSEQ updates until the next
113 * exit relies on having a dedicated TIF_RSEQ.
114 */
115 if (!IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS) &&
116 current->rseq.event.sched_switch)
117 rseq_raise_notify_resume(current);
118}
119
120static inline void rseq_reset(struct task_struct *t)
121{
122 memset(&t->rseq, 0, sizeof(t->rseq));
123 t->rseq.ids.cpu_id = RSEQ_CPU_ID_UNINITIALIZED;
124}
125
126static inline void rseq_execve(struct task_struct *t)
127{
128 rseq_reset(t);
129}
130
131/*
132 * If parent process has a registered restartable sequences area, the
133 * child inherits. Unregister rseq for a clone with CLONE_VM set.
134 *
135 * On fork, keep the IDs (CPU, MMCID) of the parent, which avoids a fault
136 * on the COW page on exit to user space, when the child stays on the same
137 * CPU as the parent. That's obviously not guaranteed, but in overcommit
138 * scenarios it is more likely and optimizes for the fork/exec case without
139 * taking the fault.
140 */
141static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
142{
143 if (clone_flags & CLONE_VM)
144 rseq_reset(t);
145 else
146 t->rseq = current->rseq;
147}
148
149#else /* CONFIG_RSEQ */
150static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
151static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
152static inline void rseq_sched_switch_event(struct task_struct *t) { }
153static inline void rseq_sched_set_ids_changed(struct task_struct *t) { }
154static inline void rseq_force_update(void) { }
155static inline void rseq_virt_userspace_exit(void) { }
156static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { }
157static inline void rseq_execve(struct task_struct *t) { }
158#endif /* !CONFIG_RSEQ */
159
160#ifdef CONFIG_DEBUG_RSEQ
161void rseq_syscall(struct pt_regs *regs);
162#else /* CONFIG_DEBUG_RSEQ */
163static inline void rseq_syscall(struct pt_regs *regs) { }
164#endif /* !CONFIG_DEBUG_RSEQ */
165
166#endif /* _LINUX_RSEQ_H */