Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'ptrace_stop-cleanup-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull ptrace_stop cleanups from Eric Biederman:
"While looking at the ptrace problems with PREEMPT_RT and the problems
Peter Zijlstra was encountering with ptrace in his freezer rewrite I
identified some cleanups to ptrace_stop that make sense on their own
and move make resolving the other problems much simpler.

The biggest issue is the habit of the ptrace code to change
task->__state from the tracer to suppress TASK_WAKEKILL from waking up
the tracee. No other code in the kernel does that and it is straight
forward to update signal_wake_up and friends to make that unnecessary.

Peter's task freezer sets frozen tasks to a new state TASK_FROZEN and
then it stores them by calling "wake_up_state(t, TASK_FROZEN)" relying
on the fact that all stopped states except the special stop states can
tolerate spurious wake up and recover their state.

The state of stopped and traced tasked is changed to be stored in
task->jobctl as well as in task->__state. This makes it possible for
the freezer to recover tasks in these special states, as well as
serving as a general cleanup. With a little more work in that
direction I believe TASK_STOPPED can learn to tolerate spurious wake
ups and become an ordinary stop state.

The TASK_TRACED state has to remain a special state as the registers
for a process are only reliably available when the process is stopped
in the scheduler. Fundamentally ptrace needs acess to the saved
register values of a task.

There are bunch of semi-random ptrace related cleanups that were found
while looking at these issues.

One cleanup that deserves to be called out is from commit 57b6de08b5f6
("ptrace: Admit ptrace_stop can generate spuriuos SIGTRAPs"). This
makes a change that is technically user space visible, in the handling
of what happens to a tracee when a tracer dies unexpectedly. According
to our testing and our understanding of userspace nothing cares that
spurious SIGTRAPs can be generated in that case"

* tag 'ptrace_stop-cleanup-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
sched,signal,ptrace: Rework TASK_TRACED, TASK_STOPPED state
ptrace: Always take siglock in ptrace_resume
ptrace: Don't change __state
ptrace: Admit ptrace_stop can generate spuriuos SIGTRAPs
ptrace: Document that wait_task_inactive can't fail
ptrace: Reimplement PTRACE_KILL by always sending SIGKILL
signal: Use lockdep_assert_held instead of assert_spin_locked
ptrace: Remove arch_ptrace_attach
ptrace/xtensa: Replace PT_SINGLESTEP with TIF_SINGLESTEP
ptrace/um: Replace PT_DTRACE with TIF_SINGLESTEP
signal: Replace __group_send_sig_info with send_signal_locked
signal: Rename send_signal send_signal_locked

+140 -240
-4
arch/ia64/include/asm/ptrace.h
··· 139 139 #define arch_ptrace_stop_needed() \ 140 140 (!test_thread_flag(TIF_RESTORE_RSE)) 141 141 142 - extern void ptrace_attach_sync_user_rbs (struct task_struct *); 143 - #define arch_ptrace_attach(child) \ 144 - ptrace_attach_sync_user_rbs(child) 145 - 146 142 #define arch_has_single_step() (1) 147 143 #define arch_has_block_step() (1) 148 144
-57
arch/ia64/kernel/ptrace.c
··· 618 618 } 619 619 620 620 /* 621 - * After PTRACE_ATTACH, a thread's register backing store area in user 622 - * space is assumed to contain correct data whenever the thread is 623 - * stopped. arch_ptrace_stop takes care of this on tracing stops. 624 - * But if the child was already stopped for job control when we attach 625 - * to it, then it might not ever get into ptrace_stop by the time we 626 - * want to examine the user memory containing the RBS. 627 - */ 628 - void 629 - ptrace_attach_sync_user_rbs (struct task_struct *child) 630 - { 631 - int stopped = 0; 632 - struct unw_frame_info info; 633 - 634 - /* 635 - * If the child is in TASK_STOPPED, we need to change that to 636 - * TASK_TRACED momentarily while we operate on it. This ensures 637 - * that the child won't be woken up and return to user mode while 638 - * we are doing the sync. (It can only be woken up for SIGKILL.) 639 - */ 640 - 641 - read_lock(&tasklist_lock); 642 - if (child->sighand) { 643 - spin_lock_irq(&child->sighand->siglock); 644 - if (READ_ONCE(child->__state) == TASK_STOPPED && 645 - !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) { 646 - set_notify_resume(child); 647 - 648 - WRITE_ONCE(child->__state, TASK_TRACED); 649 - stopped = 1; 650 - } 651 - spin_unlock_irq(&child->sighand->siglock); 652 - } 653 - read_unlock(&tasklist_lock); 654 - 655 - if (!stopped) 656 - return; 657 - 658 - unw_init_from_blocked_task(&info, child); 659 - do_sync_rbs(&info, ia64_sync_user_rbs); 660 - 661 - /* 662 - * Now move the child back into TASK_STOPPED if it should be in a 663 - * job control stop, so that SIGCONT can be used to wake it up. 664 - */ 665 - read_lock(&tasklist_lock); 666 - if (child->sighand) { 667 - spin_lock_irq(&child->sighand->siglock); 668 - if (READ_ONCE(child->__state) == TASK_TRACED && 669 - (child->signal->flags & SIGNAL_STOP_STOPPED)) { 670 - WRITE_ONCE(child->__state, TASK_STOPPED); 671 - } 672 - spin_unlock_irq(&child->sighand->siglock); 673 - } 674 - read_unlock(&tasklist_lock); 675 - } 676 - 677 - /* 678 621 * Write f32-f127 back to task->thread.fph if it has been modified. 679 622 */ 680 623 inline void
+2
arch/um/include/asm/thread_info.h
··· 60 60 #define TIF_RESTORE_SIGMASK 7 61 61 #define TIF_NOTIFY_RESUME 8 62 62 #define TIF_SECCOMP 9 /* secure computing */ 63 + #define TIF_SINGLESTEP 10 /* single stepping userspace */ 63 64 64 65 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 65 66 #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) ··· 69 68 #define _TIF_MEMDIE (1 << TIF_MEMDIE) 70 69 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) 71 70 #define _TIF_SECCOMP (1 << TIF_SECCOMP) 71 + #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) 72 72 73 73 #endif
+1 -1
arch/um/kernel/exec.c
··· 43 43 { 44 44 PT_REGS_IP(regs) = eip; 45 45 PT_REGS_SP(regs) = esp; 46 - current->ptrace &= ~PT_DTRACE; 46 + clear_thread_flag(TIF_SINGLESTEP); 47 47 #ifdef SUBARCH_EXECVE1 48 48 SUBARCH_EXECVE1(regs->regs); 49 49 #endif
+1 -1
arch/um/kernel/process.c
··· 336 336 { 337 337 struct task_struct *task = t ? t : current; 338 338 339 - if (!(task->ptrace & PT_DTRACE)) 339 + if (!test_thread_flag(TIF_SINGLESTEP)) 340 340 return 0; 341 341 342 342 if (task->thread.singlestep_syscall)
+4 -4
arch/um/kernel/ptrace.c
··· 11 11 12 12 void user_enable_single_step(struct task_struct *child) 13 13 { 14 - child->ptrace |= PT_DTRACE; 14 + set_tsk_thread_flag(child, TIF_SINGLESTEP); 15 15 child->thread.singlestep_syscall = 0; 16 16 17 17 #ifdef SUBARCH_SET_SINGLESTEPPING ··· 21 21 22 22 void user_disable_single_step(struct task_struct *child) 23 23 { 24 - child->ptrace &= ~PT_DTRACE; 24 + clear_tsk_thread_flag(child, TIF_SINGLESTEP); 25 25 child->thread.singlestep_syscall = 0; 26 26 27 27 #ifdef SUBARCH_SET_SINGLESTEPPING ··· 120 120 } 121 121 122 122 /* 123 - * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and 123 + * XXX Check TIF_SINGLESTEP for singlestepping check and 124 124 * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check 125 125 */ 126 126 int syscall_trace_enter(struct pt_regs *regs) ··· 144 144 audit_syscall_exit(regs); 145 145 146 146 /* Fake a debug trap */ 147 - if (ptraced & PT_DTRACE) 147 + if (test_thread_flag(TIF_SINGLESTEP)) 148 148 send_sigtrap(&regs->regs, 0); 149 149 150 150 if (!test_thread_flag(TIF_SYSCALL_TRACE))
+2 -2
arch/um/kernel/signal.c
··· 53 53 unsigned long sp; 54 54 int err; 55 55 56 - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) 56 + if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED)) 57 57 singlestep = 1; 58 58 59 59 /* Did we come from a system call? */ ··· 128 128 * on the host. The tracing thread will check this flag and 129 129 * PTRACE_SYSCALL if necessary. 130 130 */ 131 - if (current->ptrace & PT_DTRACE) 131 + if (test_thread_flag(TIF_SINGLESTEP)) 132 132 current->thread.singlestep_syscall = 133 133 is_syscall(PT_REGS_IP(&current->thread.regs)); 134 134
+1 -2
arch/x86/kernel/step.c
··· 180 180 * 181 181 * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if 182 182 * task is current or it can't be running, otherwise we can race 183 - * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but 184 - * PTRACE_KILL is not safe. 183 + * with __switch_to_xtra(). We rely on ptrace_freeze_traced(). 185 184 */ 186 185 local_irq_disable(); 187 186 debugctl = get_debugctlmsr();
+2 -2
arch/xtensa/kernel/ptrace.c
··· 224 224 225 225 void user_enable_single_step(struct task_struct *child) 226 226 { 227 - child->ptrace |= PT_SINGLESTEP; 227 + set_tsk_thread_flag(child, TIF_SINGLESTEP); 228 228 } 229 229 230 230 void user_disable_single_step(struct task_struct *child) 231 231 { 232 - child->ptrace &= ~PT_SINGLESTEP; 232 + clear_tsk_thread_flag(child, TIF_SINGLESTEP); 233 233 } 234 234 235 235 /*
+2 -2
arch/xtensa/kernel/signal.c
··· 472 472 /* Set up the stack frame */ 473 473 ret = setup_frame(&ksig, sigmask_to_save(), regs); 474 474 signal_setup_done(ret, &ksig, 0); 475 - if (current->ptrace & PT_SINGLESTEP) 475 + if (test_thread_flag(TIF_SINGLESTEP)) 476 476 task_pt_regs(current)->icountlevel = 1; 477 477 478 478 return; ··· 498 498 /* If there's no signal to deliver, we just restore the saved mask. */ 499 499 restore_saved_sigmask(); 500 500 501 - if (current->ptrace & PT_SINGLESTEP) 501 + if (test_thread_flag(TIF_SINGLESTEP)) 502 502 task_pt_regs(current)->icountlevel = 1; 503 503 return; 504 504 }
+2 -2
drivers/tty/tty_jobctrl.c
··· 215 215 spin_unlock_irq(&p->sighand->siglock); 216 216 continue; 217 217 } 218 - __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); 219 - __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); 218 + send_signal_locked(SIGHUP, SEND_SIG_PRIV, p, PIDTYPE_TGID); 219 + send_signal_locked(SIGCONT, SEND_SIG_PRIV, p, PIDTYPE_TGID); 220 220 put_pid(p->signal->tty_old_pgrp); /* A noop */ 221 221 spin_lock(&tty->ctrl.lock); 222 222 tty_pgrp = get_pid(tty->ctrl.pgrp);
-7
include/linux/ptrace.h
··· 30 30 31 31 #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ 32 32 #define PT_PTRACED 0x00000001 33 - #define PT_DTRACE 0x00000002 /* delayed trace (used on um) */ 34 33 35 34 #define PT_OPT_FLAG_SHIFT 3 36 35 /* PT_TRACE_* event enable flags */ ··· 45 46 46 47 #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) 47 48 #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) 48 - 49 - /* single stepping state bits (used on ARM and PA-RISC) */ 50 - #define PT_SINGLESTEP_BIT 31 51 - #define PT_SINGLESTEP (1<<PT_SINGLESTEP_BIT) 52 - #define PT_BLOCKSTEP_BIT 30 53 - #define PT_BLOCKSTEP (1<<PT_BLOCKSTEP_BIT) 54 49 55 50 extern long arch_ptrace(struct task_struct *child, long request, 56 51 unsigned long addr, unsigned long data);
+4 -6
include/linux/sched.h
··· 103 103 /* Convenience macros for the sake of set_current_state: */ 104 104 #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) 105 105 #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) 106 - #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) 106 + #define TASK_TRACED __TASK_TRACED 107 107 108 108 #define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) 109 109 ··· 118 118 119 119 #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) 120 120 121 - #define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0) 122 - 123 - #define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0) 124 - 125 - #define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0) 121 + #define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0) 122 + #define task_is_stopped(task) ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0) 123 + #define task_is_stopped_or_traced(task) ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0) 126 124 127 125 /* 128 126 * Special states are those that do not use the normal wait-loop pattern. See
+8
include/linux/sched/jobctl.h
··· 19 19 #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ 20 20 #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ 21 21 #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ 22 + #define JOBCTL_PTRACE_FROZEN_BIT 24 /* frozen for ptrace */ 23 + 24 + #define JOBCTL_STOPPED_BIT 26 /* do_signal_stop() */ 25 + #define JOBCTL_TRACED_BIT 27 /* ptrace_stop() */ 22 26 23 27 #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) 24 28 #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) ··· 32 28 #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) 33 29 #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) 34 30 #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) 31 + #define JOBCTL_PTRACE_FROZEN (1UL << JOBCTL_PTRACE_FROZEN_BIT) 32 + 33 + #define JOBCTL_STOPPED (1UL << JOBCTL_STOPPED_BIT) 34 + #define JOBCTL_TRACED (1UL << JOBCTL_TRACED_BIT) 35 35 36 36 #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) 37 37 #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
+16 -4
include/linux/sched/signal.h
··· 294 294 static inline void kernel_signal_stop(void) 295 295 { 296 296 spin_lock_irq(&current->sighand->siglock); 297 - if (current->jobctl & JOBCTL_STOP_DEQUEUED) 297 + if (current->jobctl & JOBCTL_STOP_DEQUEUED) { 298 + current->jobctl |= JOBCTL_STOPPED; 298 299 set_special_state(TASK_STOPPED); 300 + } 299 301 spin_unlock_irq(&current->sighand->siglock); 300 302 301 303 schedule(); ··· 446 444 447 445 extern void signal_wake_up_state(struct task_struct *t, unsigned int state); 448 446 449 - static inline void signal_wake_up(struct task_struct *t, bool resume) 447 + static inline void signal_wake_up(struct task_struct *t, bool fatal) 450 448 { 451 - signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); 449 + unsigned int state = 0; 450 + if (fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN)) { 451 + t->jobctl &= ~(JOBCTL_STOPPED | JOBCTL_TRACED); 452 + state = TASK_WAKEKILL | __TASK_TRACED; 453 + } 454 + signal_wake_up_state(t, state); 452 455 } 453 456 static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) 454 457 { 455 - signal_wake_up_state(t, resume ? __TASK_TRACED : 0); 458 + unsigned int state = 0; 459 + if (resume) { 460 + t->jobctl &= ~JOBCTL_TRACED; 461 + state = __TASK_TRACED; 462 + } 463 + signal_wake_up_state(t, state); 456 464 } 457 465 458 466 void task_join_group_stop(struct task_struct *task);
+2 -1
include/linux/signal.h
··· 282 282 struct task_struct *p, enum pid_type type); 283 283 extern int group_send_sig_info(int sig, struct kernel_siginfo *info, 284 284 struct task_struct *p, enum pid_type type); 285 - extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struct *); 285 + extern int send_signal_locked(int sig, struct kernel_siginfo *info, 286 + struct task_struct *p, enum pid_type type); 286 287 extern int sigprocmask(int, sigset_t *, sigset_t *); 287 288 extern void set_current_blocked(sigset_t *); 288 289 extern void __set_current_blocked(const sigset_t *);
+28 -59
kernel/ptrace.c
··· 185 185 return true; 186 186 } 187 187 188 - /* Ensure that nothing can wake it up, even SIGKILL */ 188 + /* 189 + * Ensure that nothing can wake it up, even SIGKILL 190 + * 191 + * A task is switched to this state while a ptrace operation is in progress; 192 + * such that the ptrace operation is uninterruptible. 193 + */ 189 194 static bool ptrace_freeze_traced(struct task_struct *task) 190 195 { 191 196 bool ret = false; ··· 202 197 spin_lock_irq(&task->sighand->siglock); 203 198 if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && 204 199 !__fatal_signal_pending(task)) { 205 - WRITE_ONCE(task->__state, __TASK_TRACED); 200 + task->jobctl |= JOBCTL_PTRACE_FROZEN; 206 201 ret = true; 207 202 } 208 203 spin_unlock_irq(&task->sighand->siglock); ··· 212 207 213 208 static void ptrace_unfreeze_traced(struct task_struct *task) 214 209 { 215 - if (READ_ONCE(task->__state) != __TASK_TRACED) 216 - return; 217 - 218 - WARN_ON(!task->ptrace || task->parent != current); 210 + unsigned long flags; 219 211 220 212 /* 221 - * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely. 222 - * Recheck state under the lock to close this race. 213 + * The child may be awake and may have cleared 214 + * JOBCTL_PTRACE_FROZEN (see ptrace_resume). The child will 215 + * not set JOBCTL_PTRACE_FROZEN or enter __TASK_TRACED anew. 223 216 */ 224 - spin_lock_irq(&task->sighand->siglock); 225 - if (READ_ONCE(task->__state) == __TASK_TRACED) { 226 - if (__fatal_signal_pending(task)) 217 + if (lock_task_sighand(task, &flags)) { 218 + task->jobctl &= ~JOBCTL_PTRACE_FROZEN; 219 + if (__fatal_signal_pending(task)) { 220 + task->jobctl &= ~TASK_TRACED; 227 221 wake_up_state(task, __TASK_TRACED); 228 - else 229 - WRITE_ONCE(task->__state, TASK_TRACED); 222 + } 223 + unlock_task_sighand(task, &flags); 230 224 } 231 - spin_unlock_irq(&task->sighand->siglock); 232 225 } 233 226 234 227 /** ··· 259 256 */ 260 257 read_lock(&tasklist_lock); 261 258 if (child->ptrace && child->parent == current) { 262 - WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); 263 259 /* 264 260 * child->sighand can't be NULL, release_task() 265 261 * does ptrace_unlink() before __exit_signal(). ··· 268 266 } 269 267 read_unlock(&tasklist_lock); 270 268 271 - if (!ret && !ignore_state) { 272 - if (!wait_task_inactive(child, __TASK_TRACED)) { 273 - /* 274 - * This can only happen if may_ptrace_stop() fails and 275 - * ptrace_stop() changes ->state back to TASK_RUNNING, 276 - * so we should not worry about leaking __TASK_TRACED. 277 - */ 278 - WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); 279 - ret = -ESRCH; 280 - } 281 - } 269 + if (!ret && !ignore_state && 270 + WARN_ON_ONCE(!wait_task_inactive(child, __TASK_TRACED))) 271 + ret = -ESRCH; 282 272 283 273 return ret; 284 274 } ··· 469 475 * in and out of STOPPED are protected by siglock. 470 476 */ 471 477 if (task_is_stopped(task) && 472 - task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) 478 + task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) { 479 + task->jobctl &= ~JOBCTL_STOPPED; 473 480 signal_wake_up_state(task, __TASK_STOPPED); 481 + } 474 482 475 483 spin_unlock(&task->sighand->siglock); 476 484 ··· 842 846 static int ptrace_resume(struct task_struct *child, long request, 843 847 unsigned long data) 844 848 { 845 - bool need_siglock; 846 - 847 849 if (!valid_signal(data)) 848 850 return -EIO; 849 851 ··· 877 883 * Note that we need siglock even if ->exit_code == data and/or this 878 884 * status was not reported yet, the new status must not be cleared by 879 885 * wait_task_stopped() after resume. 880 - * 881 - * If data == 0 we do not care if wait_task_stopped() reports the old 882 - * status and clears the code too; this can't race with the tracee, it 883 - * takes siglock after resume. 884 886 */ 885 - need_siglock = data && !thread_group_empty(current); 886 - if (need_siglock) 887 - spin_lock_irq(&child->sighand->siglock); 887 + spin_lock_irq(&child->sighand->siglock); 888 888 child->exit_code = data; 889 + child->jobctl &= ~JOBCTL_TRACED; 889 890 wake_up_state(child, __TASK_TRACED); 890 - if (need_siglock) 891 - spin_unlock_irq(&child->sighand->siglock); 891 + spin_unlock_irq(&child->sighand->siglock); 892 892 893 893 return 0; 894 894 } ··· 1218 1230 return ptrace_resume(child, request, data); 1219 1231 1220 1232 case PTRACE_KILL: 1221 - if (child->exit_state) /* already dead */ 1222 - return 0; 1223 - return ptrace_resume(child, request, SIGKILL); 1233 + send_sig_info(SIGKILL, SEND_SIG_NOINFO, child); 1234 + return 0; 1224 1235 1225 1236 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 1226 1237 case PTRACE_GETREGSET: ··· 1266 1279 return ret; 1267 1280 } 1268 1281 1269 - #ifndef arch_ptrace_attach 1270 - #define arch_ptrace_attach(child) do { } while (0) 1271 - #endif 1272 - 1273 1282 SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, 1274 1283 unsigned long, data) 1275 1284 { ··· 1274 1291 1275 1292 if (request == PTRACE_TRACEME) { 1276 1293 ret = ptrace_traceme(); 1277 - if (!ret) 1278 - arch_ptrace_attach(current); 1279 1294 goto out; 1280 1295 } 1281 1296 ··· 1285 1304 1286 1305 if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { 1287 1306 ret = ptrace_attach(child, request, addr, data); 1288 - /* 1289 - * Some architectures need to do book-keeping after 1290 - * a ptrace attach. 1291 - */ 1292 - if (!ret) 1293 - arch_ptrace_attach(child); 1294 1307 goto out_put_task_struct; 1295 1308 } 1296 1309 ··· 1424 1449 1425 1450 if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { 1426 1451 ret = ptrace_attach(child, request, addr, data); 1427 - /* 1428 - * Some architectures need to do book-keeping after 1429 - * a ptrace attach. 1430 - */ 1431 - if (!ret) 1432 - arch_ptrace_attach(child); 1433 1452 goto out_put_task_struct; 1434 1453 } 1435 1454
+1 -4
kernel/sched/core.c
··· 6353 6353 6354 6354 /* 6355 6355 * We must load prev->state once (task_struct::state is volatile), such 6356 - * that: 6357 - * 6358 - * - we form a control dependency vs deactivate_task() below. 6359 - * - ptrace_{,un}freeze_traced() can change ->state underneath us. 6356 + * that we form a control dependency vs deactivate_task() below. 6360 6357 */ 6361 6358 prev_state = READ_ONCE(prev->__state); 6362 6359 if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) {
+61 -79
kernel/signal.c
··· 762 762 */ 763 763 void signal_wake_up_state(struct task_struct *t, unsigned int state) 764 764 { 765 + lockdep_assert_held(&t->sighand->siglock); 766 + 765 767 set_tsk_thread_flag(t, TIF_SIGPENDING); 768 + 766 769 /* 767 770 * TASK_WAKEKILL also means wake it up in the stopped/traced/killable 768 771 * case. We don't check t->state here because there is a race with it ··· 887 884 static void ptrace_trap_notify(struct task_struct *t) 888 885 { 889 886 WARN_ON_ONCE(!(t->ptrace & PT_SEIZED)); 890 - assert_spin_locked(&t->sighand->siglock); 887 + lockdep_assert_held(&t->sighand->siglock); 891 888 892 889 task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); 893 890 ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); ··· 933 930 for_each_thread(p, t) { 934 931 flush_sigqueue_mask(&flush, &t->pending); 935 932 task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); 936 - if (likely(!(t->ptrace & PT_SEIZED))) 933 + if (likely(!(t->ptrace & PT_SEIZED))) { 934 + t->jobctl &= ~JOBCTL_STOPPED; 937 935 wake_up_state(t, __TASK_STOPPED); 938 - else 936 + } else 939 937 ptrace_trap_notify(t); 940 938 } 941 939 ··· 1075 1071 return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); 1076 1072 } 1077 1073 1078 - static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t, 1079 - enum pid_type type, bool force) 1074 + static int __send_signal_locked(int sig, struct kernel_siginfo *info, 1075 + struct task_struct *t, enum pid_type type, bool force) 1080 1076 { 1081 1077 struct sigpending *pending; 1082 1078 struct sigqueue *q; 1083 1079 int override_rlimit; 1084 1080 int ret = 0, result; 1085 1081 1086 - assert_spin_locked(&t->sighand->siglock); 1082 + lockdep_assert_held(&t->sighand->siglock); 1087 1083 1088 1084 result = TRACE_SIGNAL_IGNORED; 1089 1085 if (!prepare_signal(sig, t, force)) ··· 1216 1212 return ret; 1217 1213 } 1218 1214 1219 - static int send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t, 1220 - enum pid_type type) 1215 + int send_signal_locked(int sig, struct kernel_siginfo *info, 1216 + struct task_struct *t, enum pid_type type) 1221 1217 { 1222 1218 /* Should SIGKILL or SIGSTOP be received by a pid namespace init? */ 1223 1219 bool force = false; ··· 1249 1245 force = true; 1250 1246 } 1251 1247 } 1252 - return __send_signal(sig, info, t, type, force); 1248 + return __send_signal_locked(sig, info, t, type, force); 1253 1249 } 1254 1250 1255 1251 static void print_fatal_signal(int signr) ··· 1285 1281 1286 1282 __setup("print-fatal-signals=", setup_print_fatal_signals); 1287 1283 1288 - int 1289 - __group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p) 1290 - { 1291 - return send_signal(sig, info, p, PIDTYPE_TGID); 1292 - } 1293 - 1294 1284 int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, 1295 1285 enum pid_type type) 1296 1286 { ··· 1292 1294 int ret = -ESRCH; 1293 1295 1294 1296 if (lock_task_sighand(p, &flags)) { 1295 - ret = send_signal(sig, info, p, type); 1297 + ret = send_signal_locked(sig, info, p, type); 1296 1298 unlock_task_sighand(p, &flags); 1297 1299 } 1298 1300 ··· 1345 1347 if (action->sa.sa_handler == SIG_DFL && 1346 1348 (!t->ptrace || (handler == HANDLER_EXIT))) 1347 1349 t->signal->flags &= ~SIGNAL_UNKILLABLE; 1348 - ret = send_signal(sig, info, t, PIDTYPE_PID); 1350 + ret = send_signal_locked(sig, info, t, PIDTYPE_PID); 1349 1351 spin_unlock_irqrestore(&t->sighand->siglock, flags); 1350 1352 1351 1353 return ret; ··· 1565 1567 1566 1568 if (sig) { 1567 1569 if (lock_task_sighand(p, &flags)) { 1568 - ret = __send_signal(sig, &info, p, PIDTYPE_TGID, false); 1570 + ret = __send_signal_locked(sig, &info, p, PIDTYPE_TGID, false); 1569 1571 unlock_task_sighand(p, &flags); 1570 1572 } else 1571 1573 ret = -ESRCH; ··· 2112 2114 * parent's namespaces. 2113 2115 */ 2114 2116 if (valid_signal(sig) && sig) 2115 - __send_signal(sig, &info, tsk->parent, PIDTYPE_TGID, false); 2117 + __send_signal_locked(sig, &info, tsk->parent, PIDTYPE_TGID, false); 2116 2118 __wake_up_parent(tsk, tsk->parent); 2117 2119 spin_unlock_irqrestore(&psig->siglock, flags); 2118 2120 ··· 2182 2184 spin_lock_irqsave(&sighand->siglock, flags); 2183 2185 if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN && 2184 2186 !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) 2185 - __group_send_sig_info(SIGCHLD, &info, parent); 2187 + send_signal_locked(SIGCHLD, &info, parent, PIDTYPE_TGID); 2186 2188 /* 2187 2189 * Even if SIGCHLD is not generated, we must wake up wait4 calls. 2188 2190 */ ··· 2202 2204 * with. If the code did not stop because the tracer is gone, 2203 2205 * the stop signal remains unchanged unless clear_code. 2204 2206 */ 2205 - static int ptrace_stop(int exit_code, int why, int clear_code, 2206 - unsigned long message, kernel_siginfo_t *info) 2207 + static int ptrace_stop(int exit_code, int why, unsigned long message, 2208 + kernel_siginfo_t *info) 2207 2209 __releases(&current->sighand->siglock) 2208 2210 __acquires(&current->sighand->siglock) 2209 2211 { 2210 2212 bool gstop_done = false; 2211 - bool read_code = true; 2212 2213 2213 2214 if (arch_ptrace_stop_needed()) { 2214 2215 /* ··· 2224 2227 } 2225 2228 2226 2229 /* 2227 - * schedule() will not sleep if there is a pending signal that 2228 - * can awaken the task. 2230 + * After this point ptrace_signal_wake_up or signal_wake_up 2231 + * will clear TASK_TRACED if ptrace_unlink happens or a fatal 2232 + * signal comes in. Handle previous ptrace_unlinks and fatal 2233 + * signals here to prevent ptrace_stop sleeping in schedule. 2229 2234 */ 2235 + if (!current->ptrace || __fatal_signal_pending(current)) 2236 + return exit_code; 2237 + 2230 2238 set_special_state(TASK_TRACED); 2239 + current->jobctl |= JOBCTL_TRACED; 2231 2240 2232 2241 /* 2233 2242 * We're committing to trapping. TRACED should be visible before ··· 2279 2276 2280 2277 spin_unlock_irq(&current->sighand->siglock); 2281 2278 read_lock(&tasklist_lock); 2282 - if (likely(current->ptrace)) { 2283 - /* 2284 - * Notify parents of the stop. 2285 - * 2286 - * While ptraced, there are two parents - the ptracer and 2287 - * the real_parent of the group_leader. The ptracer should 2288 - * know about every stop while the real parent is only 2289 - * interested in the completion of group stop. The states 2290 - * for the two don't interact with each other. Notify 2291 - * separately unless they're gonna be duplicates. 2292 - */ 2279 + /* 2280 + * Notify parents of the stop. 2281 + * 2282 + * While ptraced, there are two parents - the ptracer and 2283 + * the real_parent of the group_leader. The ptracer should 2284 + * know about every stop while the real parent is only 2285 + * interested in the completion of group stop. The states 2286 + * for the two don't interact with each other. Notify 2287 + * separately unless they're gonna be duplicates. 2288 + */ 2289 + if (current->ptrace) 2293 2290 do_notify_parent_cldstop(current, true, why); 2294 - if (gstop_done && ptrace_reparented(current)) 2295 - do_notify_parent_cldstop(current, false, why); 2291 + if (gstop_done && (!current->ptrace || ptrace_reparented(current))) 2292 + do_notify_parent_cldstop(current, false, why); 2296 2293 2297 - /* 2298 - * Don't want to allow preemption here, because 2299 - * sys_ptrace() needs this task to be inactive. 2300 - * 2301 - * XXX: implement read_unlock_no_resched(). 2302 - */ 2303 - preempt_disable(); 2304 - read_unlock(&tasklist_lock); 2305 - cgroup_enter_frozen(); 2306 - preempt_enable_no_resched(); 2307 - freezable_schedule(); 2308 - cgroup_leave_frozen(true); 2309 - } else { 2310 - /* 2311 - * By the time we got the lock, our tracer went away. 2312 - * Don't drop the lock yet, another tracer may come. 2313 - * 2314 - * If @gstop_done, the ptracer went away between group stop 2315 - * completion and here. During detach, it would have set 2316 - * JOBCTL_STOP_PENDING on us and we'll re-enter 2317 - * TASK_STOPPED in do_signal_stop() on return, so notifying 2318 - * the real parent of the group stop completion is enough. 2319 - */ 2320 - if (gstop_done) 2321 - do_notify_parent_cldstop(current, false, why); 2322 - 2323 - /* tasklist protects us from ptrace_freeze_traced() */ 2324 - __set_current_state(TASK_RUNNING); 2325 - read_code = false; 2326 - if (clear_code) 2327 - exit_code = 0; 2328 - read_unlock(&tasklist_lock); 2329 - } 2294 + /* 2295 + * Don't want to allow preemption here, because 2296 + * sys_ptrace() needs this task to be inactive. 2297 + * 2298 + * XXX: implement read_unlock_no_resched(). 2299 + */ 2300 + preempt_disable(); 2301 + read_unlock(&tasklist_lock); 2302 + cgroup_enter_frozen(); 2303 + preempt_enable_no_resched(); 2304 + freezable_schedule(); 2305 + cgroup_leave_frozen(true); 2330 2306 2331 2307 /* 2332 2308 * We are back. Now reacquire the siglock before touching ··· 2313 2331 * any signal-sending on another CPU that wants to examine it. 2314 2332 */ 2315 2333 spin_lock_irq(&current->sighand->siglock); 2316 - if (read_code) 2317 - exit_code = current->exit_code; 2334 + exit_code = current->exit_code; 2318 2335 current->last_siginfo = NULL; 2319 2336 current->ptrace_message = 0; 2320 2337 current->exit_code = 0; 2321 2338 2322 2339 /* LISTENING can be set only during STOP traps, clear it */ 2323 - current->jobctl &= ~JOBCTL_LISTENING; 2340 + current->jobctl &= ~(JOBCTL_LISTENING | JOBCTL_PTRACE_FROZEN); 2324 2341 2325 2342 /* 2326 2343 * Queued signals ignored us while we were stopped for tracing. ··· 2341 2360 info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); 2342 2361 2343 2362 /* Let the debugger run. */ 2344 - return ptrace_stop(exit_code, why, 1, message, &info); 2363 + return ptrace_stop(exit_code, why, message, &info); 2345 2364 } 2346 2365 2347 2366 int ptrace_notify(int exit_code, unsigned long message) ··· 2452 2471 if (task_participate_group_stop(current)) 2453 2472 notify = CLD_STOPPED; 2454 2473 2474 + current->jobctl |= JOBCTL_STOPPED; 2455 2475 set_special_state(TASK_STOPPED); 2456 2476 spin_unlock_irq(&current->sighand->siglock); 2457 2477 ··· 2514 2532 CLD_STOPPED, 0); 2515 2533 } else { 2516 2534 WARN_ON_ONCE(!signr); 2517 - ptrace_stop(signr, CLD_STOPPED, 0, 0, NULL); 2535 + ptrace_stop(signr, CLD_STOPPED, 0, NULL); 2518 2536 } 2519 2537 } 2520 2538 ··· 2567 2585 * comment in dequeue_signal(). 2568 2586 */ 2569 2587 current->jobctl |= JOBCTL_STOP_DEQUEUED; 2570 - signr = ptrace_stop(signr, CLD_TRAPPED, 0, 0, info); 2588 + signr = ptrace_stop(signr, CLD_TRAPPED, 0, info); 2571 2589 2572 2590 /* We're back. Did the debugger cancel the sig? */ 2573 2591 if (signr == 0) ··· 2594 2612 /* If the (new) signal is now blocked, requeue it. */ 2595 2613 if (sigismember(&current->blocked, signr) || 2596 2614 fatal_signal_pending(current)) { 2597 - send_signal(signr, info, current, type); 2615 + send_signal_locked(signr, info, current, type); 2598 2616 signr = 0; 2599 2617 } 2600 2618 ··· 4789 4807 "the deadlock.\n"); 4790 4808 return; 4791 4809 } 4792 - ret = send_signal(sig, SEND_SIG_PRIV, t, PIDTYPE_PID); 4810 + ret = send_signal_locked(sig, SEND_SIG_PRIV, t, PIDTYPE_PID); 4793 4811 spin_unlock(&t->sighand->siglock); 4794 4812 if (ret) 4795 4813 kdb_printf("Fail to deliver Signal %d to process %d.\n",
+3 -3
kernel/time/posix-cpu-timers.c
··· 870 870 { 871 871 if (tsk->dl.dl_overrun) { 872 872 tsk->dl.dl_overrun = 0; 873 - __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); 873 + send_signal_locked(SIGXCPU, SEND_SIG_PRIV, tsk, PIDTYPE_TGID); 874 874 } 875 875 } 876 876 ··· 884 884 rt ? "RT" : "CPU", hard ? "hard" : "soft", 885 885 current->comm, task_pid_nr(current)); 886 886 } 887 - __group_send_sig_info(signo, SEND_SIG_PRIV, current); 887 + send_signal_locked(signo, SEND_SIG_PRIV, current, PIDTYPE_TGID); 888 888 return true; 889 889 } 890 890 ··· 958 958 trace_itimer_expire(signo == SIGPROF ? 959 959 ITIMER_PROF : ITIMER_VIRTUAL, 960 960 task_tgid(tsk), cur_time); 961 - __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); 961 + send_signal_locked(signo, SEND_SIG_PRIV, tsk, PIDTYPE_TGID); 962 962 } 963 963 964 964 if (it->expires && it->expires < *expires)