Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __LINUX_ENTRYCOMMON_H
3#define __LINUX_ENTRYCOMMON_H
4
5#include <linux/irq-entry-common.h>
6#include <linux/livepatch.h>
7#include <linux/ptrace.h>
8#include <linux/resume_user_mode.h>
9#include <linux/seccomp.h>
10#include <linux/sched.h>
11
12#include <asm/entry-common.h>
13#include <asm/syscall.h>
14
15#ifndef _TIF_UPROBE
16# define _TIF_UPROBE (0)
17#endif
18
19/*
20 * SYSCALL_WORK flags handled in syscall_enter_from_user_mode()
21 */
22#ifndef ARCH_SYSCALL_WORK_ENTER
23# define ARCH_SYSCALL_WORK_ENTER (0)
24#endif
25
26/*
27 * SYSCALL_WORK flags handled in syscall_exit_to_user_mode()
28 */
29#ifndef ARCH_SYSCALL_WORK_EXIT
30# define ARCH_SYSCALL_WORK_EXIT (0)
31#endif
32
33#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \
34 SYSCALL_WORK_SYSCALL_TRACEPOINT | \
35 SYSCALL_WORK_SYSCALL_TRACE | \
36 SYSCALL_WORK_SYSCALL_EMU | \
37 SYSCALL_WORK_SYSCALL_AUDIT | \
38 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
39 ARCH_SYSCALL_WORK_ENTER)
40
41#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \
42 SYSCALL_WORK_SYSCALL_TRACE | \
43 SYSCALL_WORK_SYSCALL_AUDIT | \
44 SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
45 SYSCALL_WORK_SYSCALL_EXIT_TRAP | \
46 ARCH_SYSCALL_WORK_EXIT)
47
48long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work);
49
50/**
51 * syscall_enter_from_user_mode_work - Check and handle work before invoking
52 * a syscall
53 * @regs: Pointer to currents pt_regs
54 * @syscall: The syscall number
55 *
56 * Invoked from architecture specific syscall entry code with interrupts
57 * enabled after invoking enter_from_user_mode(), enabling interrupts and
58 * extra architecture specific work.
59 *
60 * Returns: The original or a modified syscall number
61 *
62 * If the returned syscall number is -1 then the syscall should be
63 * skipped. In this case the caller may invoke syscall_set_error() or
64 * syscall_set_return_value() first. If neither of those are called and -1
65 * is returned, then the syscall will fail with ENOSYS.
66 *
67 * It handles the following work items:
68 *
69 * 1) syscall_work flag dependent invocations of
70 * ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter()
71 * 2) Invocation of audit_syscall_entry()
72 */
73static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
74{
75 unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
76
77 if (work & SYSCALL_WORK_ENTER)
78 syscall = syscall_trace_enter(regs, syscall, work);
79
80 return syscall;
81}
82
83/**
84 * syscall_enter_from_user_mode - Establish state and check and handle work
85 * before invoking a syscall
86 * @regs: Pointer to currents pt_regs
87 * @syscall: The syscall number
88 *
89 * Invoked from architecture specific syscall entry code with interrupts
90 * disabled. The calling code has to be non-instrumentable. When the
91 * function returns all state is correct, interrupts are enabled and the
92 * subsequent functions can be instrumented.
93 *
94 * This is the combination of enter_from_user_mode() and
95 * syscall_enter_from_user_mode_work() to be used when there is no
96 * architecture specific work to be done between the two.
97 *
98 * Returns: The original or a modified syscall number. See
99 * syscall_enter_from_user_mode_work() for further explanation.
100 */
101static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
102{
103 long ret;
104
105 enter_from_user_mode(regs);
106
107 instrumentation_begin();
108 local_irq_enable();
109 ret = syscall_enter_from_user_mode_work(regs, syscall);
110 instrumentation_end();
111
112 return ret;
113}
114
115/**
116 * syscall_exit_work - Handle work before returning to user mode
117 * @regs: Pointer to current pt_regs
118 * @work: Current thread syscall work
119 *
120 * Do one-time syscall specific work.
121 */
122void syscall_exit_work(struct pt_regs *regs, unsigned long work);
123
124/**
125 * syscall_exit_to_user_mode_work - Handle work before returning to user mode
126 * @regs: Pointer to currents pt_regs
127 *
128 * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
129 * exit_to_user_mode() to perform the final transition to user mode.
130 *
131 * Calling convention is the same as for syscall_exit_to_user_mode() and it
132 * returns with all work handled and interrupts disabled. The caller must
133 * invoke exit_to_user_mode() before actually switching to user mode to
134 * make the final state transitions. Interrupts must stay disabled between
135 * return from this function and the invocation of exit_to_user_mode().
136 */
137static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
138{
139 unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
140 unsigned long nr = syscall_get_nr(current, regs);
141
142 CT_WARN_ON(ct_state() != CT_STATE_KERNEL);
143
144 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
145 if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
146 local_irq_enable();
147 }
148
149 rseq_debug_syscall_return(regs);
150
151 /*
152 * Do one-time syscall specific work. If these work items are
153 * enabled, we want to run them exactly once per syscall exit with
154 * interrupts enabled.
155 */
156 if (unlikely(work & SYSCALL_WORK_EXIT))
157 syscall_exit_work(regs, work);
158 local_irq_disable_exit_to_user();
159 syscall_exit_to_user_mode_prepare(regs);
160}
161
162/**
163 * syscall_exit_to_user_mode - Handle work before returning to user mode
164 * @regs: Pointer to currents pt_regs
165 *
166 * Invoked with interrupts enabled and fully valid regs. Returns with all
167 * work handled, interrupts disabled such that the caller can immediately
168 * switch to user mode. Called from architecture specific syscall and ret
169 * from fork code.
170 *
171 * The call order is:
172 * 1) One-time syscall exit work:
173 * - rseq syscall exit
174 * - audit
175 * - syscall tracing
176 * - ptrace (single stepping)
177 *
178 * 2) Preparatory work
179 * - Exit to user mode loop (common TIF handling). Invokes
180 * arch_exit_to_user_mode_work() for architecture specific TIF work
181 * - Architecture specific one time work arch_exit_to_user_mode_prepare()
182 * - Address limit and lockdep checks
183 *
184 * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
185 * functionality in exit_to_user_mode().
186 *
187 * This is a combination of syscall_exit_to_user_mode_work() (1,2) and
188 * exit_to_user_mode(). This function is preferred unless there is a
189 * compelling architectural reason to use the separate functions.
190 */
191static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
192{
193 instrumentation_begin();
194 syscall_exit_to_user_mode_work(regs);
195 instrumentation_end();
196 exit_to_user_mode();
197}
198
199#endif