at v5.18 18 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _LINUX_PTRACE_H 3#define _LINUX_PTRACE_H 4 5#include <linux/compiler.h> /* For unlikely. */ 6#include <linux/sched.h> /* For struct task_struct. */ 7#include <linux/sched/signal.h> /* For send_sig(), same_thread_group(), etc. */ 8#include <linux/err.h> /* for IS_ERR_VALUE */ 9#include <linux/bug.h> /* For BUG_ON. */ 10#include <linux/pid_namespace.h> /* For task_active_pid_ns. */ 11#include <uapi/linux/ptrace.h> 12#include <linux/seccomp.h> 13 14/* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */ 15struct syscall_info { 16 __u64 sp; 17 struct seccomp_data data; 18}; 19 20extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, 21 void *buf, int len, unsigned int gup_flags); 22 23/* 24 * Ptrace flags 25 * 26 * The owner ship rules for task->ptrace which holds the ptrace 27 * flags is simple. When a task is running it owns it's task->ptrace 28 * flags. When the a task is stopped the ptracer owns task->ptrace. 29 */ 30 31#define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ 32#define PT_PTRACED 0x00000001 33#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ 34 35#define PT_OPT_FLAG_SHIFT 3 36/* PT_TRACE_* event enable flags */ 37#define PT_EVENT_FLAG(event) (1 << (PT_OPT_FLAG_SHIFT + (event))) 38#define PT_TRACESYSGOOD PT_EVENT_FLAG(0) 39#define PT_TRACE_FORK PT_EVENT_FLAG(PTRACE_EVENT_FORK) 40#define PT_TRACE_VFORK PT_EVENT_FLAG(PTRACE_EVENT_VFORK) 41#define PT_TRACE_CLONE PT_EVENT_FLAG(PTRACE_EVENT_CLONE) 42#define PT_TRACE_EXEC PT_EVENT_FLAG(PTRACE_EVENT_EXEC) 43#define PT_TRACE_VFORK_DONE PT_EVENT_FLAG(PTRACE_EVENT_VFORK_DONE) 44#define PT_TRACE_EXIT PT_EVENT_FLAG(PTRACE_EVENT_EXIT) 45#define PT_TRACE_SECCOMP PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP) 46 47#define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) 48#define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) 49 50/* single stepping state bits (used on ARM and PA-RISC) */ 51#define PT_SINGLESTEP_BIT 31 52#define PT_SINGLESTEP (1<<PT_SINGLESTEP_BIT) 53#define PT_BLOCKSTEP_BIT 30 54#define PT_BLOCKSTEP (1<<PT_BLOCKSTEP_BIT) 55 56extern long arch_ptrace(struct task_struct *child, long request, 57 unsigned long addr, unsigned long data); 58extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); 59extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); 60extern void ptrace_disable(struct task_struct *); 61extern int ptrace_request(struct task_struct *child, long request, 62 unsigned long addr, unsigned long data); 63extern int ptrace_notify(int exit_code, unsigned long message); 64extern void __ptrace_link(struct task_struct *child, 65 struct task_struct *new_parent, 66 const struct cred *ptracer_cred); 67extern void __ptrace_unlink(struct task_struct *child); 68extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); 69#define PTRACE_MODE_READ 0x01 70#define PTRACE_MODE_ATTACH 0x02 71#define PTRACE_MODE_NOAUDIT 0x04 72#define PTRACE_MODE_FSCREDS 0x08 73#define PTRACE_MODE_REALCREDS 0x10 74 75/* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ 76#define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) 77#define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) 78#define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) 79#define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) 80 81/** 82 * ptrace_may_access - check whether the caller is permitted to access 83 * a target task. 84 * @task: target task 85 * @mode: selects type of access and caller credentials 86 * 87 * Returns true on success, false on denial. 88 * 89 * One of the flags PTRACE_MODE_FSCREDS and PTRACE_MODE_REALCREDS must 90 * be set in @mode to specify whether the access was requested through 91 * a filesystem syscall (should use effective capabilities and fsuid 92 * of the caller) or through an explicit syscall such as 93 * process_vm_writev or ptrace (and should use the real credentials). 94 */ 95extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); 96 97static inline int ptrace_reparented(struct task_struct *child) 98{ 99 return !same_thread_group(child->real_parent, child->parent); 100} 101 102static inline void ptrace_unlink(struct task_struct *child) 103{ 104 if (unlikely(child->ptrace)) 105 __ptrace_unlink(child); 106} 107 108int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, 109 unsigned long data); 110int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, 111 unsigned long data); 112 113/** 114 * ptrace_parent - return the task that is tracing the given task 115 * @task: task to consider 116 * 117 * Returns %NULL if no one is tracing @task, or the &struct task_struct 118 * pointer to its tracer. 119 * 120 * Must called under rcu_read_lock(). The pointer returned might be kept 121 * live only by RCU. During exec, this may be called with task_lock() held 122 * on @task, still held from when check_unsafe_exec() was called. 123 */ 124static inline struct task_struct *ptrace_parent(struct task_struct *task) 125{ 126 if (unlikely(task->ptrace)) 127 return rcu_dereference(task->parent); 128 return NULL; 129} 130 131/** 132 * ptrace_event_enabled - test whether a ptrace event is enabled 133 * @task: ptracee of interest 134 * @event: %PTRACE_EVENT_* to test 135 * 136 * Test whether @event is enabled for ptracee @task. 137 * 138 * Returns %true if @event is enabled, %false otherwise. 139 */ 140static inline bool ptrace_event_enabled(struct task_struct *task, int event) 141{ 142 return task->ptrace & PT_EVENT_FLAG(event); 143} 144 145/** 146 * ptrace_event - possibly stop for a ptrace event notification 147 * @event: %PTRACE_EVENT_* value to report 148 * @message: value for %PTRACE_GETEVENTMSG to return 149 * 150 * Check whether @event is enabled and, if so, report @event and @message 151 * to the ptrace parent. 152 * 153 * Called without locks. 154 */ 155static inline void ptrace_event(int event, unsigned long message) 156{ 157 if (unlikely(ptrace_event_enabled(current, event))) { 158 ptrace_notify((event << 8) | SIGTRAP, message); 159 } else if (event == PTRACE_EVENT_EXEC) { 160 /* legacy EXEC report via SIGTRAP */ 161 if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED) 162 send_sig(SIGTRAP, current, 0); 163 } 164} 165 166/** 167 * ptrace_event_pid - possibly stop for a ptrace event notification 168 * @event: %PTRACE_EVENT_* value to report 169 * @pid: process identifier for %PTRACE_GETEVENTMSG to return 170 * 171 * Check whether @event is enabled and, if so, report @event and @pid 172 * to the ptrace parent. @pid is reported as the pid_t seen from the 173 * ptrace parent's pid namespace. 174 * 175 * Called without locks. 176 */ 177static inline void ptrace_event_pid(int event, struct pid *pid) 178{ 179 /* 180 * FIXME: There's a potential race if a ptracer in a different pid 181 * namespace than parent attaches between computing message below and 182 * when we acquire tasklist_lock in ptrace_stop(). If this happens, 183 * the ptracer will get a bogus pid from PTRACE_GETEVENTMSG. 184 */ 185 unsigned long message = 0; 186 struct pid_namespace *ns; 187 188 rcu_read_lock(); 189 ns = task_active_pid_ns(rcu_dereference(current->parent)); 190 if (ns) 191 message = pid_nr_ns(pid, ns); 192 rcu_read_unlock(); 193 194 ptrace_event(event, message); 195} 196 197/** 198 * ptrace_init_task - initialize ptrace state for a new child 199 * @child: new child task 200 * @ptrace: true if child should be ptrace'd by parent's tracer 201 * 202 * This is called immediately after adding @child to its parent's children 203 * list. @ptrace is false in the normal case, and true to ptrace @child. 204 * 205 * Called with current's siglock and write_lock_irq(&tasklist_lock) held. 206 */ 207static inline void ptrace_init_task(struct task_struct *child, bool ptrace) 208{ 209 INIT_LIST_HEAD(&child->ptrace_entry); 210 INIT_LIST_HEAD(&child->ptraced); 211 child->jobctl = 0; 212 child->ptrace = 0; 213 child->parent = child->real_parent; 214 215 if (unlikely(ptrace) && current->ptrace) { 216 child->ptrace = current->ptrace; 217 __ptrace_link(child, current->parent, current->ptracer_cred); 218 219 if (child->ptrace & PT_SEIZED) 220 task_set_jobctl_pending(child, JOBCTL_TRAP_STOP); 221 else 222 sigaddset(&child->pending.signal, SIGSTOP); 223 } 224 else 225 child->ptracer_cred = NULL; 226} 227 228/** 229 * ptrace_release_task - final ptrace-related cleanup of a zombie being reaped 230 * @task: task in %EXIT_DEAD state 231 * 232 * Called with write_lock(&tasklist_lock) held. 233 */ 234static inline void ptrace_release_task(struct task_struct *task) 235{ 236 BUG_ON(!list_empty(&task->ptraced)); 237 ptrace_unlink(task); 238 BUG_ON(!list_empty(&task->ptrace_entry)); 239} 240 241#ifndef force_successful_syscall_return 242/* 243 * System call handlers that, upon successful completion, need to return a 244 * negative value should call force_successful_syscall_return() right before 245 * returning. On architectures where the syscall convention provides for a 246 * separate error flag (e.g., alpha, ia64, ppc{,64}, sparc{,64}, possibly 247 * others), this macro can be used to ensure that the error flag will not get 248 * set. On architectures which do not support a separate error flag, the macro 249 * is a no-op and the spurious error condition needs to be filtered out by some 250 * other means (e.g., in user-level, by passing an extra argument to the 251 * syscall handler, or something along those lines). 252 */ 253#define force_successful_syscall_return() do { } while (0) 254#endif 255 256#ifndef is_syscall_success 257/* 258 * On most systems we can tell if a syscall is a success based on if the retval 259 * is an error value. On some systems like ia64 and powerpc they have different 260 * indicators of success/failure and must define their own. 261 */ 262#define is_syscall_success(regs) (!IS_ERR_VALUE((unsigned long)(regs_return_value(regs)))) 263#endif 264 265/* 266 * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__. 267 * 268 * These do-nothing inlines are used when the arch does not 269 * implement single-step. The kerneldoc comments are here 270 * to document the interface for all arch definitions. 271 */ 272 273#ifndef arch_has_single_step 274/** 275 * arch_has_single_step - does this CPU support user-mode single-step? 276 * 277 * If this is defined, then there must be function declarations or 278 * inlines for user_enable_single_step() and user_disable_single_step(). 279 * arch_has_single_step() should evaluate to nonzero iff the machine 280 * supports instruction single-step for user mode. 281 * It can be a constant or it can test a CPU feature bit. 282 */ 283#define arch_has_single_step() (0) 284 285/** 286 * user_enable_single_step - single-step in user-mode task 287 * @task: either current or a task stopped in %TASK_TRACED 288 * 289 * This can only be called when arch_has_single_step() has returned nonzero. 290 * Set @task so that when it returns to user mode, it will trap after the 291 * next single instruction executes. If arch_has_block_step() is defined, 292 * this must clear the effects of user_enable_block_step() too. 293 */ 294static inline void user_enable_single_step(struct task_struct *task) 295{ 296 BUG(); /* This can never be called. */ 297} 298 299/** 300 * user_disable_single_step - cancel user-mode single-step 301 * @task: either current or a task stopped in %TASK_TRACED 302 * 303 * Clear @task of the effects of user_enable_single_step() and 304 * user_enable_block_step(). This can be called whether or not either 305 * of those was ever called on @task, and even if arch_has_single_step() 306 * returned zero. 307 */ 308static inline void user_disable_single_step(struct task_struct *task) 309{ 310} 311#else 312extern void user_enable_single_step(struct task_struct *); 313extern void user_disable_single_step(struct task_struct *); 314#endif /* arch_has_single_step */ 315 316#ifndef arch_has_block_step 317/** 318 * arch_has_block_step - does this CPU support user-mode block-step? 319 * 320 * If this is defined, then there must be a function declaration or inline 321 * for user_enable_block_step(), and arch_has_single_step() must be defined 322 * too. arch_has_block_step() should evaluate to nonzero iff the machine 323 * supports step-until-branch for user mode. It can be a constant or it 324 * can test a CPU feature bit. 325 */ 326#define arch_has_block_step() (0) 327 328/** 329 * user_enable_block_step - step until branch in user-mode task 330 * @task: either current or a task stopped in %TASK_TRACED 331 * 332 * This can only be called when arch_has_block_step() has returned nonzero, 333 * and will never be called when single-instruction stepping is being used. 334 * Set @task so that when it returns to user mode, it will trap after the 335 * next branch or trap taken. 336 */ 337static inline void user_enable_block_step(struct task_struct *task) 338{ 339 BUG(); /* This can never be called. */ 340} 341#else 342extern void user_enable_block_step(struct task_struct *); 343#endif /* arch_has_block_step */ 344 345#ifdef ARCH_HAS_USER_SINGLE_STEP_REPORT 346extern void user_single_step_report(struct pt_regs *regs); 347#else 348static inline void user_single_step_report(struct pt_regs *regs) 349{ 350 kernel_siginfo_t info; 351 clear_siginfo(&info); 352 info.si_signo = SIGTRAP; 353 info.si_errno = 0; 354 info.si_code = SI_USER; 355 info.si_pid = 0; 356 info.si_uid = 0; 357 force_sig_info(&info); 358} 359#endif 360 361#ifndef arch_ptrace_stop_needed 362/** 363 * arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called 364 * 365 * This is called with the siglock held, to decide whether or not it's 366 * necessary to release the siglock and call arch_ptrace_stop(). It can be 367 * defined to a constant if arch_ptrace_stop() is never required, or always 368 * is. On machines where this makes sense, it should be defined to a quick 369 * test to optimize out calling arch_ptrace_stop() when it would be 370 * superfluous. For example, if the thread has not been back to user mode 371 * since the last stop, the thread state might indicate that nothing needs 372 * to be done. 373 * 374 * This is guaranteed to be invoked once before a task stops for ptrace and 375 * may include arch-specific operations necessary prior to a ptrace stop. 376 */ 377#define arch_ptrace_stop_needed() (0) 378#endif 379 380#ifndef arch_ptrace_stop 381/** 382 * arch_ptrace_stop - Do machine-specific work before stopping for ptrace 383 * 384 * This is called with no locks held when arch_ptrace_stop_needed() has 385 * just returned nonzero. It is allowed to block, e.g. for user memory 386 * access. The arch can have machine-specific work to be done before 387 * ptrace stops. On ia64, register backing store gets written back to user 388 * memory here. Since this can be costly (requires dropping the siglock), 389 * we only do it when the arch requires it for this particular stop, as 390 * indicated by arch_ptrace_stop_needed(). 391 */ 392#define arch_ptrace_stop() do { } while (0) 393#endif 394 395#ifndef current_pt_regs 396#define current_pt_regs() task_pt_regs(current) 397#endif 398 399/* 400 * unlike current_pt_regs(), this one is equal to task_pt_regs(current) 401 * on *all* architectures; the only reason to have a per-arch definition 402 * is optimisation. 403 */ 404#ifndef signal_pt_regs 405#define signal_pt_regs() task_pt_regs(current) 406#endif 407 408#ifndef current_user_stack_pointer 409#define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) 410#endif 411 412extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); 413 414extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); 415 416/* 417 * ptrace report for syscall entry and exit looks identical. 418 */ 419static inline int ptrace_report_syscall(unsigned long message) 420{ 421 int ptrace = current->ptrace; 422 int signr; 423 424 if (!(ptrace & PT_PTRACED)) 425 return 0; 426 427 signr = ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), 428 message); 429 430 /* 431 * this isn't the same as continuing with a signal, but it will do 432 * for normal use. strace only continues with a signal if the 433 * stopping signal is not SIGTRAP. -brl 434 */ 435 if (signr) 436 send_sig(signr, current, 1); 437 438 return fatal_signal_pending(current); 439} 440 441/** 442 * ptrace_report_syscall_entry - task is about to attempt a system call 443 * @regs: user register state of current task 444 * 445 * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or 446 * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just 447 * entered the kernel for a system call. Full user register state is 448 * available here. Changing the values in @regs can affect the system 449 * call number and arguments to be tried. It is safe to block here, 450 * preventing the system call from beginning. 451 * 452 * Returns zero normally, or nonzero if the calling arch code should abort 453 * the system call. That must prevent normal entry so no system call is 454 * made. If @task ever returns to user mode after this, its register state 455 * is unspecified, but should be something harmless like an %ENOSYS error 456 * return. It should preserve enough information so that syscall_rollback() 457 * can work (see asm-generic/syscall.h). 458 * 459 * Called without locks, just after entering kernel mode. 460 */ 461static inline __must_check int ptrace_report_syscall_entry( 462 struct pt_regs *regs) 463{ 464 return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY); 465} 466 467/** 468 * ptrace_report_syscall_exit - task has just finished a system call 469 * @regs: user register state of current task 470 * @step: nonzero if simulating single-step or block-step 471 * 472 * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when 473 * the current task has just finished an attempted system call. Full 474 * user register state is available here. It is safe to block here, 475 * preventing signals from being processed. 476 * 477 * If @step is nonzero, this report is also in lieu of the normal 478 * trap that would follow the system call instruction because 479 * user_enable_block_step() or user_enable_single_step() was used. 480 * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set. 481 * 482 * Called without locks, just before checking for pending signals. 483 */ 484static inline void ptrace_report_syscall_exit(struct pt_regs *regs, int step) 485{ 486 if (step) 487 user_single_step_report(regs); 488 else 489 ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT); 490} 491#endif