Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _LINUX_PTRACE_H
3#define _LINUX_PTRACE_H
4
5#include <linux/compiler.h> /* For unlikely. */
6#include <linux/sched.h> /* For struct task_struct. */
7#include <linux/sched/signal.h> /* For send_sig(), same_thread_group(), etc. */
8#include <linux/err.h> /* for IS_ERR_VALUE */
9#include <linux/bug.h> /* For BUG_ON. */
10#include <linux/pid_namespace.h> /* For task_active_pid_ns. */
11#include <uapi/linux/ptrace.h>
12#include <linux/seccomp.h>
13
14/* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */
15struct syscall_info {
16 __u64 sp;
17 struct seccomp_data data;
18};
19
20extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
21 void *buf, int len, unsigned int gup_flags);
22
23/*
24 * Ptrace flags
25 *
26 * The owner ship rules for task->ptrace which holds the ptrace
27 * flags is simple. When a task is running it owns it's task->ptrace
28 * flags. When the a task is stopped the ptracer owns task->ptrace.
29 */
30
31#define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */
32#define PT_PTRACED 0x00000001
33#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */
34
35#define PT_OPT_FLAG_SHIFT 3
36/* PT_TRACE_* event enable flags */
37#define PT_EVENT_FLAG(event) (1 << (PT_OPT_FLAG_SHIFT + (event)))
38#define PT_TRACESYSGOOD PT_EVENT_FLAG(0)
39#define PT_TRACE_FORK PT_EVENT_FLAG(PTRACE_EVENT_FORK)
40#define PT_TRACE_VFORK PT_EVENT_FLAG(PTRACE_EVENT_VFORK)
41#define PT_TRACE_CLONE PT_EVENT_FLAG(PTRACE_EVENT_CLONE)
42#define PT_TRACE_EXEC PT_EVENT_FLAG(PTRACE_EVENT_EXEC)
43#define PT_TRACE_VFORK_DONE PT_EVENT_FLAG(PTRACE_EVENT_VFORK_DONE)
44#define PT_TRACE_EXIT PT_EVENT_FLAG(PTRACE_EVENT_EXIT)
45#define PT_TRACE_SECCOMP PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP)
46
47#define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
48#define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)
49
50/* single stepping state bits (used on ARM and PA-RISC) */
51#define PT_SINGLESTEP_BIT 31
52#define PT_SINGLESTEP (1<<PT_SINGLESTEP_BIT)
53#define PT_BLOCKSTEP_BIT 30
54#define PT_BLOCKSTEP (1<<PT_BLOCKSTEP_BIT)
55
56extern long arch_ptrace(struct task_struct *child, long request,
57 unsigned long addr, unsigned long data);
58extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
59extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
60extern void ptrace_disable(struct task_struct *);
61extern int ptrace_request(struct task_struct *child, long request,
62 unsigned long addr, unsigned long data);
63extern int ptrace_notify(int exit_code, unsigned long message);
64extern void __ptrace_link(struct task_struct *child,
65 struct task_struct *new_parent,
66 const struct cred *ptracer_cred);
67extern void __ptrace_unlink(struct task_struct *child);
68extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
69#define PTRACE_MODE_READ 0x01
70#define PTRACE_MODE_ATTACH 0x02
71#define PTRACE_MODE_NOAUDIT 0x04
72#define PTRACE_MODE_FSCREDS 0x08
73#define PTRACE_MODE_REALCREDS 0x10
74
75/* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
76#define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
77#define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS)
78#define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS)
79#define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS)
80
81/**
82 * ptrace_may_access - check whether the caller is permitted to access
83 * a target task.
84 * @task: target task
85 * @mode: selects type of access and caller credentials
86 *
87 * Returns true on success, false on denial.
88 *
89 * One of the flags PTRACE_MODE_FSCREDS and PTRACE_MODE_REALCREDS must
90 * be set in @mode to specify whether the access was requested through
91 * a filesystem syscall (should use effective capabilities and fsuid
92 * of the caller) or through an explicit syscall such as
93 * process_vm_writev or ptrace (and should use the real credentials).
94 */
95extern bool ptrace_may_access(struct task_struct *task, unsigned int mode);
96
97static inline int ptrace_reparented(struct task_struct *child)
98{
99 return !same_thread_group(child->real_parent, child->parent);
100}
101
102static inline void ptrace_unlink(struct task_struct *child)
103{
104 if (unlikely(child->ptrace))
105 __ptrace_unlink(child);
106}
107
108int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
109 unsigned long data);
110int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
111 unsigned long data);
112
113/**
114 * ptrace_parent - return the task that is tracing the given task
115 * @task: task to consider
116 *
117 * Returns %NULL if no one is tracing @task, or the &struct task_struct
118 * pointer to its tracer.
119 *
120 * Must called under rcu_read_lock(). The pointer returned might be kept
121 * live only by RCU. During exec, this may be called with task_lock() held
122 * on @task, still held from when check_unsafe_exec() was called.
123 */
124static inline struct task_struct *ptrace_parent(struct task_struct *task)
125{
126 if (unlikely(task->ptrace))
127 return rcu_dereference(task->parent);
128 return NULL;
129}
130
131/**
132 * ptrace_event_enabled - test whether a ptrace event is enabled
133 * @task: ptracee of interest
134 * @event: %PTRACE_EVENT_* to test
135 *
136 * Test whether @event is enabled for ptracee @task.
137 *
138 * Returns %true if @event is enabled, %false otherwise.
139 */
140static inline bool ptrace_event_enabled(struct task_struct *task, int event)
141{
142 return task->ptrace & PT_EVENT_FLAG(event);
143}
144
145/**
146 * ptrace_event - possibly stop for a ptrace event notification
147 * @event: %PTRACE_EVENT_* value to report
148 * @message: value for %PTRACE_GETEVENTMSG to return
149 *
150 * Check whether @event is enabled and, if so, report @event and @message
151 * to the ptrace parent.
152 *
153 * Called without locks.
154 */
155static inline void ptrace_event(int event, unsigned long message)
156{
157 if (unlikely(ptrace_event_enabled(current, event))) {
158 ptrace_notify((event << 8) | SIGTRAP, message);
159 } else if (event == PTRACE_EVENT_EXEC) {
160 /* legacy EXEC report via SIGTRAP */
161 if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED)
162 send_sig(SIGTRAP, current, 0);
163 }
164}
165
166/**
167 * ptrace_event_pid - possibly stop for a ptrace event notification
168 * @event: %PTRACE_EVENT_* value to report
169 * @pid: process identifier for %PTRACE_GETEVENTMSG to return
170 *
171 * Check whether @event is enabled and, if so, report @event and @pid
172 * to the ptrace parent. @pid is reported as the pid_t seen from the
173 * ptrace parent's pid namespace.
174 *
175 * Called without locks.
176 */
177static inline void ptrace_event_pid(int event, struct pid *pid)
178{
179 /*
180 * FIXME: There's a potential race if a ptracer in a different pid
181 * namespace than parent attaches between computing message below and
182 * when we acquire tasklist_lock in ptrace_stop(). If this happens,
183 * the ptracer will get a bogus pid from PTRACE_GETEVENTMSG.
184 */
185 unsigned long message = 0;
186 struct pid_namespace *ns;
187
188 rcu_read_lock();
189 ns = task_active_pid_ns(rcu_dereference(current->parent));
190 if (ns)
191 message = pid_nr_ns(pid, ns);
192 rcu_read_unlock();
193
194 ptrace_event(event, message);
195}
196
197/**
198 * ptrace_init_task - initialize ptrace state for a new child
199 * @child: new child task
200 * @ptrace: true if child should be ptrace'd by parent's tracer
201 *
202 * This is called immediately after adding @child to its parent's children
203 * list. @ptrace is false in the normal case, and true to ptrace @child.
204 *
205 * Called with current's siglock and write_lock_irq(&tasklist_lock) held.
206 */
207static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
208{
209 INIT_LIST_HEAD(&child->ptrace_entry);
210 INIT_LIST_HEAD(&child->ptraced);
211 child->jobctl = 0;
212 child->ptrace = 0;
213 child->parent = child->real_parent;
214
215 if (unlikely(ptrace) && current->ptrace) {
216 child->ptrace = current->ptrace;
217 __ptrace_link(child, current->parent, current->ptracer_cred);
218
219 if (child->ptrace & PT_SEIZED)
220 task_set_jobctl_pending(child, JOBCTL_TRAP_STOP);
221 else
222 sigaddset(&child->pending.signal, SIGSTOP);
223 }
224 else
225 child->ptracer_cred = NULL;
226}
227
228/**
229 * ptrace_release_task - final ptrace-related cleanup of a zombie being reaped
230 * @task: task in %EXIT_DEAD state
231 *
232 * Called with write_lock(&tasklist_lock) held.
233 */
234static inline void ptrace_release_task(struct task_struct *task)
235{
236 BUG_ON(!list_empty(&task->ptraced));
237 ptrace_unlink(task);
238 BUG_ON(!list_empty(&task->ptrace_entry));
239}
240
241#ifndef force_successful_syscall_return
242/*
243 * System call handlers that, upon successful completion, need to return a
244 * negative value should call force_successful_syscall_return() right before
245 * returning. On architectures where the syscall convention provides for a
246 * separate error flag (e.g., alpha, ia64, ppc{,64}, sparc{,64}, possibly
247 * others), this macro can be used to ensure that the error flag will not get
248 * set. On architectures which do not support a separate error flag, the macro
249 * is a no-op and the spurious error condition needs to be filtered out by some
250 * other means (e.g., in user-level, by passing an extra argument to the
251 * syscall handler, or something along those lines).
252 */
253#define force_successful_syscall_return() do { } while (0)
254#endif
255
256#ifndef is_syscall_success
257/*
258 * On most systems we can tell if a syscall is a success based on if the retval
259 * is an error value. On some systems like ia64 and powerpc they have different
260 * indicators of success/failure and must define their own.
261 */
262#define is_syscall_success(regs) (!IS_ERR_VALUE((unsigned long)(regs_return_value(regs))))
263#endif
264
265/*
266 * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__.
267 *
268 * These do-nothing inlines are used when the arch does not
269 * implement single-step. The kerneldoc comments are here
270 * to document the interface for all arch definitions.
271 */
272
273#ifndef arch_has_single_step
274/**
275 * arch_has_single_step - does this CPU support user-mode single-step?
276 *
277 * If this is defined, then there must be function declarations or
278 * inlines for user_enable_single_step() and user_disable_single_step().
279 * arch_has_single_step() should evaluate to nonzero iff the machine
280 * supports instruction single-step for user mode.
281 * It can be a constant or it can test a CPU feature bit.
282 */
283#define arch_has_single_step() (0)
284
285/**
286 * user_enable_single_step - single-step in user-mode task
287 * @task: either current or a task stopped in %TASK_TRACED
288 *
289 * This can only be called when arch_has_single_step() has returned nonzero.
290 * Set @task so that when it returns to user mode, it will trap after the
291 * next single instruction executes. If arch_has_block_step() is defined,
292 * this must clear the effects of user_enable_block_step() too.
293 */
294static inline void user_enable_single_step(struct task_struct *task)
295{
296 BUG(); /* This can never be called. */
297}
298
299/**
300 * user_disable_single_step - cancel user-mode single-step
301 * @task: either current or a task stopped in %TASK_TRACED
302 *
303 * Clear @task of the effects of user_enable_single_step() and
304 * user_enable_block_step(). This can be called whether or not either
305 * of those was ever called on @task, and even if arch_has_single_step()
306 * returned zero.
307 */
308static inline void user_disable_single_step(struct task_struct *task)
309{
310}
311#else
312extern void user_enable_single_step(struct task_struct *);
313extern void user_disable_single_step(struct task_struct *);
314#endif /* arch_has_single_step */
315
316#ifndef arch_has_block_step
317/**
318 * arch_has_block_step - does this CPU support user-mode block-step?
319 *
320 * If this is defined, then there must be a function declaration or inline
321 * for user_enable_block_step(), and arch_has_single_step() must be defined
322 * too. arch_has_block_step() should evaluate to nonzero iff the machine
323 * supports step-until-branch for user mode. It can be a constant or it
324 * can test a CPU feature bit.
325 */
326#define arch_has_block_step() (0)
327
328/**
329 * user_enable_block_step - step until branch in user-mode task
330 * @task: either current or a task stopped in %TASK_TRACED
331 *
332 * This can only be called when arch_has_block_step() has returned nonzero,
333 * and will never be called when single-instruction stepping is being used.
334 * Set @task so that when it returns to user mode, it will trap after the
335 * next branch or trap taken.
336 */
337static inline void user_enable_block_step(struct task_struct *task)
338{
339 BUG(); /* This can never be called. */
340}
341#else
342extern void user_enable_block_step(struct task_struct *);
343#endif /* arch_has_block_step */
344
345#ifdef ARCH_HAS_USER_SINGLE_STEP_REPORT
346extern void user_single_step_report(struct pt_regs *regs);
347#else
348static inline void user_single_step_report(struct pt_regs *regs)
349{
350 kernel_siginfo_t info;
351 clear_siginfo(&info);
352 info.si_signo = SIGTRAP;
353 info.si_errno = 0;
354 info.si_code = SI_USER;
355 info.si_pid = 0;
356 info.si_uid = 0;
357 force_sig_info(&info);
358}
359#endif
360
361#ifndef arch_ptrace_stop_needed
362/**
363 * arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called
364 *
365 * This is called with the siglock held, to decide whether or not it's
366 * necessary to release the siglock and call arch_ptrace_stop(). It can be
367 * defined to a constant if arch_ptrace_stop() is never required, or always
368 * is. On machines where this makes sense, it should be defined to a quick
369 * test to optimize out calling arch_ptrace_stop() when it would be
370 * superfluous. For example, if the thread has not been back to user mode
371 * since the last stop, the thread state might indicate that nothing needs
372 * to be done.
373 *
374 * This is guaranteed to be invoked once before a task stops for ptrace and
375 * may include arch-specific operations necessary prior to a ptrace stop.
376 */
377#define arch_ptrace_stop_needed() (0)
378#endif
379
380#ifndef arch_ptrace_stop
381/**
382 * arch_ptrace_stop - Do machine-specific work before stopping for ptrace
383 *
384 * This is called with no locks held when arch_ptrace_stop_needed() has
385 * just returned nonzero. It is allowed to block, e.g. for user memory
386 * access. The arch can have machine-specific work to be done before
387 * ptrace stops. On ia64, register backing store gets written back to user
388 * memory here. Since this can be costly (requires dropping the siglock),
389 * we only do it when the arch requires it for this particular stop, as
390 * indicated by arch_ptrace_stop_needed().
391 */
392#define arch_ptrace_stop() do { } while (0)
393#endif
394
395#ifndef current_pt_regs
396#define current_pt_regs() task_pt_regs(current)
397#endif
398
399/*
400 * unlike current_pt_regs(), this one is equal to task_pt_regs(current)
401 * on *all* architectures; the only reason to have a per-arch definition
402 * is optimisation.
403 */
404#ifndef signal_pt_regs
405#define signal_pt_regs() task_pt_regs(current)
406#endif
407
408#ifndef current_user_stack_pointer
409#define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
410#endif
411
412extern int task_current_syscall(struct task_struct *target, struct syscall_info *info);
413
414extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact);
415
416/*
417 * ptrace report for syscall entry and exit looks identical.
418 */
419static inline int ptrace_report_syscall(unsigned long message)
420{
421 int ptrace = current->ptrace;
422 int signr;
423
424 if (!(ptrace & PT_PTRACED))
425 return 0;
426
427 signr = ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0),
428 message);
429
430 /*
431 * this isn't the same as continuing with a signal, but it will do
432 * for normal use. strace only continues with a signal if the
433 * stopping signal is not SIGTRAP. -brl
434 */
435 if (signr)
436 send_sig(signr, current, 1);
437
438 return fatal_signal_pending(current);
439}
440
441/**
442 * ptrace_report_syscall_entry - task is about to attempt a system call
443 * @regs: user register state of current task
444 *
445 * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or
446 * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just
447 * entered the kernel for a system call. Full user register state is
448 * available here. Changing the values in @regs can affect the system
449 * call number and arguments to be tried. It is safe to block here,
450 * preventing the system call from beginning.
451 *
452 * Returns zero normally, or nonzero if the calling arch code should abort
453 * the system call. That must prevent normal entry so no system call is
454 * made. If @task ever returns to user mode after this, its register state
455 * is unspecified, but should be something harmless like an %ENOSYS error
456 * return. It should preserve enough information so that syscall_rollback()
457 * can work (see asm-generic/syscall.h).
458 *
459 * Called without locks, just after entering kernel mode.
460 */
461static inline __must_check int ptrace_report_syscall_entry(
462 struct pt_regs *regs)
463{
464 return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY);
465}
466
467/**
468 * ptrace_report_syscall_exit - task has just finished a system call
469 * @regs: user register state of current task
470 * @step: nonzero if simulating single-step or block-step
471 *
472 * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when
473 * the current task has just finished an attempted system call. Full
474 * user register state is available here. It is safe to block here,
475 * preventing signals from being processed.
476 *
477 * If @step is nonzero, this report is also in lieu of the normal
478 * trap that would follow the system call instruction because
479 * user_enable_block_step() or user_enable_single_step() was used.
480 * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set.
481 *
482 * Called without locks, just before checking for pending signals.
483 */
484static inline void ptrace_report_syscall_exit(struct pt_regs *regs, int step)
485{
486 if (step)
487 user_single_step_report(regs);
488 else
489 ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT);
490}
491#endif