signalfd simplification

This simplifies signalfd code, by avoiding it to remain attached to the
sighand during its lifetime.

In this way, the signalfd remain attached to the sighand only during
poll(2) (and select and epoll) and read(2). This also allows to remove
all the custom "tsk == current" checks in kernel/signal.c, since
dequeue_signal() will only be called by "current".

I think this is also what Ben was suggesting time ago.

The external effect of this, is that a thread can extract only its own
private signals and the group ones. I think this is an acceptable
behaviour, in that those are the signals the thread would be able to
fetch w/out signalfd.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by Davide Libenzi and committed by Linus Torvalds b8fceee1 9db619e6

+39 -217
-3
fs/exec.c
··· 50 #include <linux/tsacct_kern.h> 51 #include <linux/cn_proc.h> 52 #include <linux/audit.h> 53 - #include <linux/signalfd.h> 54 55 #include <asm/uaccess.h> 56 #include <asm/mmu_context.h> ··· 783 * and we can just re-use it all. 784 */ 785 if (atomic_read(&oldsighand->count) <= 1) { 786 - signalfd_detach(tsk); 787 exit_itimers(sig); 788 return 0; 789 } ··· 921 sig->flags = 0; 922 923 no_thread_group: 924 - signalfd_detach(tsk); 925 exit_itimers(sig); 926 if (leader) 927 release_task(leader);
··· 50 #include <linux/tsacct_kern.h> 51 #include <linux/cn_proc.h> 52 #include <linux/audit.h> 53 54 #include <asm/uaccess.h> 55 #include <asm/mmu_context.h> ··· 784 * and we can just re-use it all. 785 */ 786 if (atomic_read(&oldsighand->count) <= 1) { 787 exit_itimers(sig); 788 return 0; 789 } ··· 923 sig->flags = 0; 924 925 no_thread_group: 926 exit_itimers(sig); 927 if (leader) 928 release_task(leader);
+29 -161
fs/signalfd.c
··· 11 * Now using anonymous inode source. 12 * Thanks to Oleg Nesterov for useful code review and suggestions. 13 * More comments and suggestions from Arnd Bergmann. 14 - * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br> 15 * Retrieve multiple signals with one read() call 16 */ 17 18 #include <linux/file.h> ··· 29 #include <linux/signalfd.h> 30 31 struct signalfd_ctx { 32 - struct list_head lnk; 33 - wait_queue_head_t wqh; 34 sigset_t sigmask; 35 - struct task_struct *tsk; 36 }; 37 - 38 - struct signalfd_lockctx { 39 - struct task_struct *tsk; 40 - unsigned long flags; 41 - }; 42 - 43 - /* 44 - * Tries to acquire the sighand lock. We do not increment the sighand 45 - * use count, and we do not even pin the task struct, so we need to 46 - * do it inside an RCU read lock, and we must be prepared for the 47 - * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand 48 - * being detached. We return 0 if the sighand has been detached, or 49 - * 1 if we were able to pin the sighand lock. 50 - */ 51 - static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk) 52 - { 53 - struct sighand_struct *sighand = NULL; 54 - 55 - rcu_read_lock(); 56 - lk->tsk = rcu_dereference(ctx->tsk); 57 - if (likely(lk->tsk != NULL)) 58 - sighand = lock_task_sighand(lk->tsk, &lk->flags); 59 - rcu_read_unlock(); 60 - 61 - if (!sighand) 62 - return 0; 63 - 64 - if (!ctx->tsk) { 65 - unlock_task_sighand(lk->tsk, &lk->flags); 66 - return 0; 67 - } 68 - 69 - if (lk->tsk->tgid == current->tgid) 70 - lk->tsk = current; 71 - 72 - return 1; 73 - } 74 - 75 - static void signalfd_unlock(struct signalfd_lockctx *lk) 76 - { 77 - unlock_task_sighand(lk->tsk, &lk->flags); 78 - } 79 - 80 - /* 81 - * This must be called with the sighand lock held. 82 - */ 83 - void signalfd_deliver(struct task_struct *tsk, int sig) 84 - { 85 - struct sighand_struct *sighand = tsk->sighand; 86 - struct signalfd_ctx *ctx, *tmp; 87 - 88 - BUG_ON(!sig); 89 - list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) { 90 - /* 91 - * We use a negative signal value as a way to broadcast that the 92 - * sighand has been orphaned, so that we can notify all the 93 - * listeners about this. Remember the ctx->sigmask is inverted, 94 - * so if the user is interested in a signal, that corresponding 95 - * bit will be zero. 96 - */ 97 - if (sig < 0) { 98 - if (ctx->tsk == tsk) { 99 - ctx->tsk = NULL; 100 - list_del_init(&ctx->lnk); 101 - wake_up(&ctx->wqh); 102 - } 103 - } else { 104 - if (!sigismember(&ctx->sigmask, sig)) 105 - wake_up(&ctx->wqh); 106 - } 107 - } 108 - } 109 - 110 - static void signalfd_cleanup(struct signalfd_ctx *ctx) 111 - { 112 - struct signalfd_lockctx lk; 113 - 114 - /* 115 - * This is tricky. If the sighand is gone, we do not need to remove 116 - * context from the list, the list itself won't be there anymore. 117 - */ 118 - if (signalfd_lock(ctx, &lk)) { 119 - list_del(&ctx->lnk); 120 - signalfd_unlock(&lk); 121 - } 122 - kfree(ctx); 123 - } 124 125 static int signalfd_release(struct inode *inode, struct file *file) 126 { 127 - signalfd_cleanup(file->private_data); 128 return 0; 129 } 130 ··· 42 { 43 struct signalfd_ctx *ctx = file->private_data; 44 unsigned int events = 0; 45 - struct signalfd_lockctx lk; 46 47 - poll_wait(file, &ctx->wqh, wait); 48 49 - /* 50 - * Let the caller get a POLLIN in this case, ala socket recv() when 51 - * the peer disconnects. 52 - */ 53 - if (signalfd_lock(ctx, &lk)) { 54 - if ((lk.tsk == current && 55 - next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) || 56 - next_signal(&lk.tsk->signal->shared_pending, 57 - &ctx->sigmask) > 0) 58 - events |= POLLIN; 59 - signalfd_unlock(&lk); 60 - } else 61 events |= POLLIN; 62 63 return events; 64 } ··· 123 int nonblock) 124 { 125 ssize_t ret; 126 - struct signalfd_lockctx lk; 127 DECLARE_WAITQUEUE(wait, current); 128 129 - if (!signalfd_lock(ctx, &lk)) 130 - return 0; 131 - 132 - ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); 133 switch (ret) { 134 case 0: 135 if (!nonblock) 136 break; 137 ret = -EAGAIN; 138 default: 139 - signalfd_unlock(&lk); 140 return ret; 141 } 142 143 - add_wait_queue(&ctx->wqh, &wait); 144 for (;;) { 145 set_current_state(TASK_INTERRUPTIBLE); 146 - ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); 147 - signalfd_unlock(&lk); 148 if (ret != 0) 149 break; 150 if (signal_pending(current)) { 151 ret = -ERESTARTSYS; 152 break; 153 } 154 schedule(); 155 - ret = signalfd_lock(ctx, &lk); 156 - if (unlikely(!ret)) { 157 - /* 158 - * Let the caller read zero byte, ala socket 159 - * recv() when the peer disconnect. This test 160 - * must be done before doing a dequeue_signal(), 161 - * because if the sighand has been orphaned, 162 - * the dequeue_signal() call is going to crash 163 - * because ->sighand will be long gone. 164 - */ 165 - break; 166 - } 167 } 168 169 - remove_wait_queue(&ctx->wqh, &wait); 170 __set_current_state(TASK_RUNNING); 171 172 return ret; 173 } 174 175 /* 176 - * Returns either the size of a "struct signalfd_siginfo", or zero if the 177 - * sighand we are attached to, has been orphaned. The "count" parameter 178 - * must be at least the size of a "struct signalfd_siginfo". 179 */ 180 static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, 181 loff_t *ppos) ··· 178 return -EINVAL; 179 180 siginfo = (struct signalfd_siginfo __user *) buf; 181 - 182 do { 183 ret = signalfd_dequeue(ctx, &info, nonblock); 184 if (unlikely(ret <= 0)) ··· 190 nonblock = 1; 191 } while (--count); 192 193 - return total ? total : ret; 194 } 195 196 static const struct file_operations signalfd_fops = { ··· 199 .read = signalfd_read, 200 }; 201 202 - /* 203 - * Create a file descriptor that is associated with our signal 204 - * state. We can pass it around to others if we want to, but 205 - * it will always be _our_ signal state. 206 - */ 207 asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) 208 { 209 int error; 210 sigset_t sigmask; 211 struct signalfd_ctx *ctx; 212 - struct sighand_struct *sighand; 213 struct file *file; 214 struct inode *inode; 215 - struct signalfd_lockctx lk; 216 217 if (sizemask != sizeof(sigset_t) || 218 copy_from_user(&sigmask, user_mask, sizeof(sigmask))) ··· 218 if (!ctx) 219 return -ENOMEM; 220 221 - init_waitqueue_head(&ctx->wqh); 222 ctx->sigmask = sigmask; 223 - ctx->tsk = current->group_leader; 224 - 225 - sighand = current->sighand; 226 - /* 227 - * Add this fd to the list of signal listeners. 228 - */ 229 - spin_lock_irq(&sighand->siglock); 230 - list_add_tail(&ctx->lnk, &sighand->signalfd_list); 231 - spin_unlock_irq(&sighand->siglock); 232 233 /* 234 * When we call this, the initialization must be complete, since ··· 237 fput(file); 238 return -EINVAL; 239 } 240 - /* 241 - * We need to be prepared of the fact that the sighand this fd 242 - * is attached to, has been detched. In that case signalfd_lock() 243 - * will return 0, and we'll just skip setting the new mask. 244 - */ 245 - if (signalfd_lock(ctx, &lk)) { 246 - ctx->sigmask = sigmask; 247 - signalfd_unlock(&lk); 248 - } 249 - wake_up(&ctx->wqh); 250 fput(file); 251 } 252 253 return ufd; 254 255 err_fdalloc: 256 - signalfd_cleanup(ctx); 257 return error; 258 } 259
··· 11 * Now using anonymous inode source. 12 * Thanks to Oleg Nesterov for useful code review and suggestions. 13 * More comments and suggestions from Arnd Bergmann. 14 + * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br> 15 * Retrieve multiple signals with one read() call 16 + * Sun Jul 15, 2007: Davide Libenzi <davidel@xmailserver.org> 17 + * Attach to the sighand only during read() and poll(). 18 */ 19 20 #include <linux/file.h> ··· 27 #include <linux/signalfd.h> 28 29 struct signalfd_ctx { 30 sigset_t sigmask; 31 }; 32 33 static int signalfd_release(struct inode *inode, struct file *file) 34 { 35 + kfree(file->private_data); 36 return 0; 37 } 38 ··· 130 { 131 struct signalfd_ctx *ctx = file->private_data; 132 unsigned int events = 0; 133 134 + poll_wait(file, &current->sighand->signalfd_wqh, wait); 135 136 + spin_lock_irq(&current->sighand->siglock); 137 + if (next_signal(&current->pending, &ctx->sigmask) || 138 + next_signal(&current->signal->shared_pending, 139 + &ctx->sigmask)) 140 events |= POLLIN; 141 + spin_unlock_irq(&current->sighand->siglock); 142 143 return events; 144 } ··· 219 int nonblock) 220 { 221 ssize_t ret; 222 DECLARE_WAITQUEUE(wait, current); 223 224 + spin_lock_irq(&current->sighand->siglock); 225 + ret = dequeue_signal(current, &ctx->sigmask, info); 226 switch (ret) { 227 case 0: 228 if (!nonblock) 229 break; 230 ret = -EAGAIN; 231 default: 232 + spin_unlock_irq(&current->sighand->siglock); 233 return ret; 234 } 235 236 + add_wait_queue(&current->sighand->signalfd_wqh, &wait); 237 for (;;) { 238 set_current_state(TASK_INTERRUPTIBLE); 239 + ret = dequeue_signal(current, &ctx->sigmask, info); 240 if (ret != 0) 241 break; 242 if (signal_pending(current)) { 243 ret = -ERESTARTSYS; 244 break; 245 } 246 + spin_unlock_irq(&current->sighand->siglock); 247 schedule(); 248 + spin_lock_irq(&current->sighand->siglock); 249 } 250 + spin_unlock_irq(&current->sighand->siglock); 251 252 + remove_wait_queue(&current->sighand->signalfd_wqh, &wait); 253 __set_current_state(TASK_RUNNING); 254 255 return ret; 256 } 257 258 /* 259 + * Returns a multiple of the size of a "struct signalfd_siginfo", or a negative 260 + * error code. The "count" parameter must be at least the size of a 261 + * "struct signalfd_siginfo". 262 */ 263 static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, 264 loff_t *ppos) ··· 287 return -EINVAL; 288 289 siginfo = (struct signalfd_siginfo __user *) buf; 290 do { 291 ret = signalfd_dequeue(ctx, &info, nonblock); 292 if (unlikely(ret <= 0)) ··· 300 nonblock = 1; 301 } while (--count); 302 303 + return total ? total: ret; 304 } 305 306 static const struct file_operations signalfd_fops = { ··· 309 .read = signalfd_read, 310 }; 311 312 asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) 313 { 314 int error; 315 sigset_t sigmask; 316 struct signalfd_ctx *ctx; 317 struct file *file; 318 struct inode *inode; 319 320 if (sizemask != sizeof(sigset_t) || 321 copy_from_user(&sigmask, user_mask, sizeof(sigmask))) ··· 335 if (!ctx) 336 return -ENOMEM; 337 338 ctx->sigmask = sigmask; 339 340 /* 341 * When we call this, the initialization must be complete, since ··· 364 fput(file); 365 return -EINVAL; 366 } 367 + spin_lock_irq(&current->sighand->siglock); 368 + ctx->sigmask = sigmask; 369 + spin_unlock_irq(&current->sighand->siglock); 370 + 371 + wake_up(&current->sighand->signalfd_wqh); 372 fput(file); 373 } 374 375 return ufd; 376 377 err_fdalloc: 378 + kfree(ctx); 379 return error; 380 } 381
+1 -1
include/linux/init_task.h
··· 86 .count = ATOMIC_INIT(1), \ 87 .action = { { { .sa_handler = NULL, } }, }, \ 88 .siglock = __SPIN_LOCK_UNLOCKED(sighand.siglock), \ 89 - .signalfd_list = LIST_HEAD_INIT(sighand.signalfd_list), \ 90 } 91 92 extern struct group_info init_groups;
··· 86 .count = ATOMIC_INIT(1), \ 87 .action = { { { .sa_handler = NULL, } }, }, \ 88 .siglock = __SPIN_LOCK_UNLOCKED(sighand.siglock), \ 89 + .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(sighand.signalfd_wqh), \ 90 } 91 92 extern struct group_info init_groups;
+1 -1
include/linux/sched.h
··· 438 atomic_t count; 439 struct k_sigaction action[_NSIG]; 440 spinlock_t siglock; 441 - struct list_head signalfd_list; 442 }; 443 444 struct pacct_struct {
··· 438 atomic_t count; 439 struct k_sigaction action[_NSIG]; 440 spinlock_t siglock; 441 + wait_queue_head_t signalfd_wqh; 442 }; 443 444 struct pacct_struct {
+4 -36
include/linux/signalfd.h
··· 45 #ifdef CONFIG_SIGNALFD 46 47 /* 48 - * Deliver the signal to listening signalfd. This must be called 49 - * with the sighand lock held. Same are the following that end up 50 - * calling signalfd_deliver(). 51 - */ 52 - void signalfd_deliver(struct task_struct *tsk, int sig); 53 - 54 - /* 55 - * No need to fall inside signalfd_deliver() if no signal listeners 56 - * are available. 57 */ 58 static inline void signalfd_notify(struct task_struct *tsk, int sig) 59 { 60 - if (unlikely(!list_empty(&tsk->sighand->signalfd_list))) 61 - signalfd_deliver(tsk, sig); 62 - } 63 - 64 - /* 65 - * The signal -1 is used to notify the signalfd that the sighand 66 - * is on its way to be detached. 67 - */ 68 - static inline void signalfd_detach_locked(struct task_struct *tsk) 69 - { 70 - if (unlikely(!list_empty(&tsk->sighand->signalfd_list))) 71 - signalfd_deliver(tsk, -1); 72 - } 73 - 74 - static inline void signalfd_detach(struct task_struct *tsk) 75 - { 76 - struct sighand_struct *sighand = tsk->sighand; 77 - 78 - if (unlikely(!list_empty(&sighand->signalfd_list))) { 79 - spin_lock_irq(&sighand->siglock); 80 - signalfd_deliver(tsk, -1); 81 - spin_unlock_irq(&sighand->siglock); 82 - } 83 } 84 85 #else /* CONFIG_SIGNALFD */ 86 87 - #define signalfd_deliver(t, s) do { } while (0) 88 - #define signalfd_notify(t, s) do { } while (0) 89 - #define signalfd_detach_locked(t) do { } while (0) 90 - #define signalfd_detach(t) do { } while (0) 91 92 #endif /* CONFIG_SIGNALFD */ 93
··· 45 #ifdef CONFIG_SIGNALFD 46 47 /* 48 + * Deliver the signal to listening signalfd. 49 */ 50 static inline void signalfd_notify(struct task_struct *tsk, int sig) 51 { 52 + if (unlikely(waitqueue_active(&tsk->sighand->signalfd_wqh))) 53 + wake_up(&tsk->sighand->signalfd_wqh); 54 } 55 56 #else /* CONFIG_SIGNALFD */ 57 58 + static inline void signalfd_notify(struct task_struct *tsk, int sig) { } 59 60 #endif /* CONFIG_SIGNALFD */ 61
-9
kernel/exit.c
··· 24 #include <linux/pid_namespace.h> 25 #include <linux/ptrace.h> 26 #include <linux/profile.h> 27 - #include <linux/signalfd.h> 28 #include <linux/mount.h> 29 #include <linux/proc_fs.h> 30 #include <linux/kthread.h> ··· 84 rcu_read_lock(); 85 sighand = rcu_dereference(tsk->sighand); 86 spin_lock(&sighand->siglock); 87 - 88 - /* 89 - * Notify that this sighand has been detached. This must 90 - * be called with the tsk->sighand lock held. Also, this 91 - * access tsk->sighand internally, so it must be called 92 - * before tsk->sighand is reset. 93 - */ 94 - signalfd_detach_locked(tsk); 95 96 posix_cpu_timers_exit(tsk); 97 if (atomic_dec_and_test(&sig->count))
··· 24 #include <linux/pid_namespace.h> 25 #include <linux/ptrace.h> 26 #include <linux/profile.h> 27 #include <linux/mount.h> 28 #include <linux/proc_fs.h> 29 #include <linux/kthread.h> ··· 85 rcu_read_lock(); 86 sighand = rcu_dereference(tsk->sighand); 87 spin_lock(&sighand->siglock); 88 89 posix_cpu_timers_exit(tsk); 90 if (atomic_dec_and_test(&sig->count))
+1 -1
kernel/fork.c
··· 1438 struct sighand_struct *sighand = data; 1439 1440 spin_lock_init(&sighand->siglock); 1441 - INIT_LIST_HEAD(&sighand->signalfd_list); 1442 } 1443 1444 void __init proc_caches_init(void)
··· 1438 struct sighand_struct *sighand = data; 1439 1440 spin_lock_init(&sighand->siglock); 1441 + init_waitqueue_head(&sighand->signalfd_wqh); 1442 } 1443 1444 void __init proc_caches_init(void)
+3 -5
kernel/signal.c
··· 378 /* We only dequeue private signals from ourselves, we don't let 379 * signalfd steal them 380 */ 381 - if (likely(tsk == current)) 382 - signr = __dequeue_signal(&tsk->pending, mask, info); 383 if (!signr) { 384 signr = __dequeue_signal(&tsk->signal->shared_pending, 385 mask, info); ··· 406 } 407 } 408 } 409 - if (likely(tsk == current)) 410 - recalc_sigpending(); 411 if (signr && unlikely(sig_kernel_stop(signr))) { 412 /* 413 * Set a marker that we have dequeued a stop signal. Our ··· 423 if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) 424 tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; 425 } 426 - if (signr && likely(tsk == current) && 427 ((info->si_code & __SI_MASK) == __SI_TIMER) && 428 info->si_sys_private){ 429 /*
··· 378 /* We only dequeue private signals from ourselves, we don't let 379 * signalfd steal them 380 */ 381 + signr = __dequeue_signal(&tsk->pending, mask, info); 382 if (!signr) { 383 signr = __dequeue_signal(&tsk->signal->shared_pending, 384 mask, info); ··· 407 } 408 } 409 } 410 + recalc_sigpending(); 411 if (signr && unlikely(sig_kernel_stop(signr))) { 412 /* 413 * Set a marker that we have dequeued a stop signal. Our ··· 425 if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) 426 tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; 427 } 428 + if (signr && 429 ((info->si_code & __SI_MASK) == __SI_TIMER) && 430 info->si_sys_private){ 431 /*