Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

signal: requeuing undeliverable signals

Kyle Huey recently reported[1] that rr gets confused if SIGKILL prevents
ptrace_signal from delivering a signal, as the kernel setups up a signal
frame for a signal that rr did not have a chance to observe with ptrace.

In looking into it I found a couple of bugs and a quality of
implementation issue.

- The test for signal_group_exit should be inside the for loop in get_signal.
- Signals should be requeued on the same queue they were dequeued from.
- When a fatal signal is pending ptrace_signal should not return another
signal for delivery.

Kyle Huey has verified[2] an earlier version of this change.

I have reworked things one more time to completely fix the issues
raised, and to keep the code maintainable long term.

I have smoke tested this code and combined with a careful review I
expect this code to work fine. Kyle if you can double check that
my last round of changes still works for rr I would appreciate it.

Eric W. Biederman (3):
signal: In get_signal test for signal_group_exit every time through the loop
signal: Requeue signals in the appropriate queue
signal: Requeue ptrace signals

fs/signalfd.c | 5 +++--
include/linux/sched/signal.h | 7 ++++---
kernel/signal.c | 44 ++++++++++++++++++++++++++------------------
3 files changed, 33 insertions(+), 23 deletions(-)

[1] https://lkml.kernel.org/r/20211101034147.6203-1-khuey@kylehuey.com
[2] https://lkml.kernel.org/r/CAP045ApAX725ZfujaK-jJNkfCo5s+oVFpBvNfPJk+DKY8K7d=Q@mail.gmail.com
Tested-by: Kyle Huey <khuey@kylehuey.com>
Link: https://lkml.kernel.org/r/87bl2kekig.fsf_-_@email.froward.int.ebiederm.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

+33 -23
+3 -2
fs/signalfd.c
··· 165 165 static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info, 166 166 int nonblock) 167 167 { 168 + enum pid_type type; 168 169 ssize_t ret; 169 170 DECLARE_WAITQUEUE(wait, current); 170 171 171 172 spin_lock_irq(&current->sighand->siglock); 172 - ret = dequeue_signal(current, &ctx->sigmask, info); 173 + ret = dequeue_signal(current, &ctx->sigmask, info, &type); 173 174 switch (ret) { 174 175 case 0: 175 176 if (!nonblock) ··· 185 184 add_wait_queue(&current->sighand->signalfd_wqh, &wait); 186 185 for (;;) { 187 186 set_current_state(TASK_INTERRUPTIBLE); 188 - ret = dequeue_signal(current, &ctx->sigmask, info); 187 + ret = dequeue_signal(current, &ctx->sigmask, info, &type); 189 188 if (ret != 0) 190 189 break; 191 190 if (signal_pending(current)) {
+4 -3
include/linux/sched/signal.h
··· 286 286 extern void flush_signals(struct task_struct *); 287 287 extern void ignore_signals(struct task_struct *); 288 288 extern void flush_signal_handlers(struct task_struct *, int force_default); 289 - extern int dequeue_signal(struct task_struct *task, 290 - sigset_t *mask, kernel_siginfo_t *info); 289 + extern int dequeue_signal(struct task_struct *task, sigset_t *mask, 290 + kernel_siginfo_t *info, enum pid_type *type); 291 291 292 292 static inline int kernel_dequeue_signal(void) 293 293 { 294 294 struct task_struct *task = current; 295 295 kernel_siginfo_t __info; 296 + enum pid_type __type; 296 297 int ret; 297 298 298 299 spin_lock_irq(&task->sighand->siglock); 299 - ret = dequeue_signal(task, &task->blocked, &__info); 300 + ret = dequeue_signal(task, &task->blocked, &__info, &__type); 300 301 spin_unlock_irq(&task->sighand->siglock); 301 302 302 303 return ret;
+26 -18
kernel/signal.c
··· 626 626 * 627 627 * All callers have to hold the siglock. 628 628 */ 629 - int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *info) 629 + int dequeue_signal(struct task_struct *tsk, sigset_t *mask, 630 + kernel_siginfo_t *info, enum pid_type *type) 630 631 { 631 632 bool resched_timer = false; 632 633 int signr; ··· 635 634 /* We only dequeue private signals from ourselves, we don't let 636 635 * signalfd steal them 637 636 */ 637 + *type = PIDTYPE_PID; 638 638 signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer); 639 639 if (!signr) { 640 + *type = PIDTYPE_TGID; 640 641 signr = __dequeue_signal(&tsk->signal->shared_pending, 641 642 mask, info, &resched_timer); 642 643 #ifdef CONFIG_POSIX_TIMERS ··· 2525 2522 freezable_schedule(); 2526 2523 } 2527 2524 2528 - static int ptrace_signal(int signr, kernel_siginfo_t *info) 2525 + static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type) 2529 2526 { 2530 2527 /* 2531 2528 * We do not check sig_kernel_stop(signr) but set this marker ··· 2565 2562 } 2566 2563 2567 2564 /* If the (new) signal is now blocked, requeue it. */ 2568 - if (sigismember(&current->blocked, signr)) { 2569 - send_signal(signr, info, current, PIDTYPE_PID); 2565 + if (sigismember(&current->blocked, signr) || 2566 + fatal_signal_pending(current)) { 2567 + send_signal(signr, info, current, type); 2570 2568 signr = 0; 2571 2569 } 2572 2570 ··· 2666 2662 goto relock; 2667 2663 } 2668 2664 2669 - /* Has this task already been marked for death? */ 2670 - if (signal_group_exit(signal)) { 2671 - ksig->info.si_signo = signr = SIGKILL; 2672 - sigdelset(&current->pending.signal, SIGKILL); 2673 - trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO, 2674 - &sighand->action[SIGKILL - 1]); 2675 - recalc_sigpending(); 2676 - goto fatal; 2677 - } 2678 - 2679 2665 for (;;) { 2680 2666 struct k_sigaction *ka; 2667 + enum pid_type type; 2668 + 2669 + /* Has this task already been marked for death? */ 2670 + if (signal_group_exit(signal)) { 2671 + ksig->info.si_signo = signr = SIGKILL; 2672 + sigdelset(&current->pending.signal, SIGKILL); 2673 + trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO, 2674 + &sighand->action[SIGKILL - 1]); 2675 + recalc_sigpending(); 2676 + goto fatal; 2677 + } 2681 2678 2682 2679 if (unlikely(current->jobctl & JOBCTL_STOP_PENDING) && 2683 2680 do_signal_stop(0)) ··· 2711 2706 * so that the instruction pointer in the signal stack 2712 2707 * frame points to the faulting instruction. 2713 2708 */ 2709 + type = PIDTYPE_PID; 2714 2710 signr = dequeue_synchronous_signal(&ksig->info); 2715 2711 if (!signr) 2716 - signr = dequeue_signal(current, &current->blocked, &ksig->info); 2712 + signr = dequeue_signal(current, &current->blocked, 2713 + &ksig->info, &type); 2717 2714 2718 2715 if (!signr) 2719 2716 break; /* will return 0 */ 2720 2717 2721 2718 if (unlikely(current->ptrace) && (signr != SIGKILL) && 2722 2719 !(sighand->action[signr -1].sa.sa_flags & SA_IMMUTABLE)) { 2723 - signr = ptrace_signal(signr, &ksig->info); 2720 + signr = ptrace_signal(signr, &ksig->info, type); 2724 2721 if (!signr) 2725 2722 continue; 2726 2723 } ··· 3547 3540 ktime_t *to = NULL, timeout = KTIME_MAX; 3548 3541 struct task_struct *tsk = current; 3549 3542 sigset_t mask = *which; 3543 + enum pid_type type; 3550 3544 int sig, ret = 0; 3551 3545 3552 3546 if (ts) { ··· 3564 3556 signotset(&mask); 3565 3557 3566 3558 spin_lock_irq(&tsk->sighand->siglock); 3567 - sig = dequeue_signal(tsk, &mask, info); 3559 + sig = dequeue_signal(tsk, &mask, info, &type); 3568 3560 if (!sig && timeout) { 3569 3561 /* 3570 3562 * None ready, temporarily unblock those we're interested ··· 3583 3575 spin_lock_irq(&tsk->sighand->siglock); 3584 3576 __set_task_blocked(tsk, &tsk->real_blocked); 3585 3577 sigemptyset(&tsk->real_blocked); 3586 - sig = dequeue_signal(tsk, &mask, info); 3578 + sig = dequeue_signal(tsk, &mask, info, &type); 3587 3579 } 3588 3580 spin_unlock_irq(&tsk->sighand->siglock); 3589 3581