Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sched,signal,ptrace: Rework TASK_TRACED, TASK_STOPPED state

Currently ptrace_stop() / do_signal_stop() rely on the special states
TASK_TRACED and TASK_STOPPED resp. to keep unique state. That is, this
state exists only in task->__state and nowhere else.

There's two spots of bother with this:

- PREEMPT_RT has task->saved_state which complicates matters,
meaning task_is_{traced,stopped}() needs to check an additional
variable.

- An alternative freezer implementation that itself relies on a
special TASK state would loose TASK_TRACED/TASK_STOPPED and will
result in misbehaviour.

As such, add additional state to task->jobctl to track this state
outside of task->__state.

NOTE: this doesn't actually fix anything yet, just adds extra state.

--EWB
* didn't add a unnecessary newline in signal.h
* Update t->jobctl in signal_wake_up and ptrace_signal_wake_up
instead of in signal_wake_up_state. This prevents the clearing
of TASK_STOPPED and TASK_TRACED from getting lost.
* Added warnings if JOBCTL_STOPPED or JOBCTL_TRACED are not cleared

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220421150654.757693825@infradead.org
Tested-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Link: https://lkml.kernel.org/r/20220505182645.497868-12-ebiederm@xmission.com
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>

authored by

Peter Zijlstra and committed by
Eric W. Biederman
31cae1ea 5b4197cb

+45 -14
+3 -5
include/linux/sched.h
··· 118 118 119 119 #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) 120 120 121 - #define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0) 122 - 123 - #define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0) 124 - 125 - #define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0) 121 + #define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0) 122 + #define task_is_stopped(task) ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0) 123 + #define task_is_stopped_or_traced(task) ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0) 126 124 127 125 /* 128 126 * Special states are those that do not use the normal wait-loop pattern. See
+6
include/linux/sched/jobctl.h
··· 21 21 #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ 22 22 #define JOBCTL_PTRACE_FROZEN_BIT 24 /* frozen for ptrace */ 23 23 24 + #define JOBCTL_STOPPED_BIT 26 /* do_signal_stop() */ 25 + #define JOBCTL_TRACED_BIT 27 /* ptrace_stop() */ 26 + 24 27 #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) 25 28 #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) 26 29 #define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT) ··· 33 30 #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) 34 31 #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) 35 32 #define JOBCTL_PTRACE_FROZEN (1UL << JOBCTL_PTRACE_FROZEN_BIT) 33 + 34 + #define JOBCTL_STOPPED (1UL << JOBCTL_STOPPED_BIT) 35 + #define JOBCTL_TRACED (1UL << JOBCTL_TRACED_BIT) 36 36 37 37 #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) 38 38 #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
+15 -4
include/linux/sched/signal.h
··· 294 294 static inline void kernel_signal_stop(void) 295 295 { 296 296 spin_lock_irq(&current->sighand->siglock); 297 - if (current->jobctl & JOBCTL_STOP_DEQUEUED) 297 + if (current->jobctl & JOBCTL_STOP_DEQUEUED) { 298 + current->jobctl |= JOBCTL_STOPPED; 298 299 set_special_state(TASK_STOPPED); 300 + } 299 301 spin_unlock_irq(&current->sighand->siglock); 300 302 301 303 schedule(); ··· 439 437 440 438 static inline void signal_wake_up(struct task_struct *t, bool fatal) 441 439 { 442 - fatal = fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN); 443 - signal_wake_up_state(t, fatal ? TASK_WAKEKILL | __TASK_TRACED : 0); 440 + unsigned int state = 0; 441 + if (fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN)) { 442 + t->jobctl &= ~(JOBCTL_STOPPED | JOBCTL_TRACED); 443 + state = TASK_WAKEKILL | __TASK_TRACED; 444 + } 445 + signal_wake_up_state(t, state); 444 446 } 445 447 static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) 446 448 { 447 - signal_wake_up_state(t, resume ? __TASK_TRACED : 0); 449 + unsigned int state = 0; 450 + if (resume) { 451 + t->jobctl &= ~JOBCTL_TRACED; 452 + state = __TASK_TRACED; 453 + } 454 + signal_wake_up_state(t, state); 448 455 } 449 456 450 457 void task_join_group_stop(struct task_struct *task);
+13 -3
kernel/ptrace.c
··· 185 185 return true; 186 186 } 187 187 188 - /* Ensure that nothing can wake it up, even SIGKILL */ 188 + /* 189 + * Ensure that nothing can wake it up, even SIGKILL 190 + * 191 + * A task is switched to this state while a ptrace operation is in progress; 192 + * such that the ptrace operation is uninterruptible. 193 + */ 189 194 static bool ptrace_freeze_traced(struct task_struct *task) 190 195 { 191 196 bool ret = false; ··· 221 216 */ 222 217 if (lock_task_sighand(task, &flags)) { 223 218 task->jobctl &= ~JOBCTL_PTRACE_FROZEN; 224 - if (__fatal_signal_pending(task)) 219 + if (__fatal_signal_pending(task)) { 220 + task->jobctl &= ~TASK_TRACED; 225 221 wake_up_state(task, __TASK_TRACED); 222 + } 226 223 unlock_task_sighand(task, &flags); 227 224 } 228 225 } ··· 469 462 * in and out of STOPPED are protected by siglock. 470 463 */ 471 464 if (task_is_stopped(task) && 472 - task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) 465 + task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) { 466 + task->jobctl &= ~JOBCTL_STOPPED; 473 467 signal_wake_up_state(task, __TASK_STOPPED); 468 + } 474 469 475 470 spin_unlock(&task->sighand->siglock); 476 471 ··· 884 875 */ 885 876 spin_lock_irq(&child->sighand->siglock); 886 877 child->exit_code = data; 878 + child->jobctl &= ~JOBCTL_TRACED; 887 879 wake_up_state(child, __TASK_TRACED); 888 880 spin_unlock_irq(&child->sighand->siglock); 889 881
+8 -2
kernel/signal.c
··· 762 762 */ 763 763 void signal_wake_up_state(struct task_struct *t, unsigned int state) 764 764 { 765 + lockdep_assert_held(&t->sighand->siglock); 766 + 765 767 set_tsk_thread_flag(t, TIF_SIGPENDING); 768 + 766 769 /* 767 770 * TASK_WAKEKILL also means wake it up in the stopped/traced/killable 768 771 * case. We don't check t->state here because there is a race with it ··· 933 930 for_each_thread(p, t) { 934 931 flush_sigqueue_mask(&flush, &t->pending); 935 932 task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); 936 - if (likely(!(t->ptrace & PT_SEIZED))) 933 + if (likely(!(t->ptrace & PT_SEIZED))) { 934 + t->jobctl &= ~JOBCTL_STOPPED; 937 935 wake_up_state(t, __TASK_STOPPED); 938 - else 936 + } else 939 937 ptrace_trap_notify(t); 940 938 } 941 939 ··· 2222 2218 return exit_code; 2223 2219 2224 2220 set_special_state(TASK_TRACED); 2221 + current->jobctl |= JOBCTL_TRACED; 2225 2222 2226 2223 /* 2227 2224 * We're committing to trapping. TRACED should be visible before ··· 2441 2436 if (task_participate_group_stop(current)) 2442 2437 notify = CLD_STOPPED; 2443 2438 2439 + current->jobctl |= JOBCTL_STOPPED; 2444 2440 set_special_state(TASK_STOPPED); 2445 2441 spin_unlock_irq(&current->sighand->siglock); 2446 2442