Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

posix-cpu-timers: Store a reference to a pid not a task

posix cpu timers do not handle the death of a process well.

This is most clearly seen when a multi-threaded process calls exec from a
thread that is not the leader of the thread group. The posix cpu timer code
continues to pin the old thread group leader and is unable to find the
siglock from there.

This results in posix_cpu_timer_del being unable to delete a timer,
posix_cpu_timer_set being unable to set a timer. Further to compensate for
the problems in posix_cpu_timer_del on a multi-threaded exec all timers
that point at the multi-threaded task are stopped.

The code for the timers fundamentally needs to check if the target
process/thread is alive. This needs an extra level of indirection. This
level of indirection is already available in struct pid.

So replace cpu.task with cpu.pid to get the needed extra layer of
indirection.

In addition to handling things more cleanly this reduces the amount of
memory a timer can pin when a process exits and then is reaped from
a task_struct to the vastly smaller struct pid.

Fixes: e0a70217107e ("posix-cpu-timers: workaround to suppress the problems with mt exec")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/87wo86tz6d.fsf@x220.int.ebiederm.org

authored by

Eric W. Biederman and committed by
Thomas Gleixner
55e8c8eb beb41d9c

+56 -19
+1 -1
include/linux/posix-timers.h
··· 69 69 struct cpu_timer { 70 70 struct timerqueue_node node; 71 71 struct timerqueue_head *head; 72 - struct task_struct *task; 72 + struct pid *pid; 73 73 struct list_head elist; 74 74 int firing; 75 75 };
+55 -18
kernel/time/posix-cpu-timers.c
··· 118 118 return __get_task_for_clock(clock, false, false) ? 0 : -EINVAL; 119 119 } 120 120 121 + static inline enum pid_type cpu_timer_pid_type(struct k_itimer *timer) 122 + { 123 + return CPUCLOCK_PERTHREAD(timer->it_clock) ? PIDTYPE_PID : PIDTYPE_TGID; 124 + } 125 + 126 + static inline struct task_struct *cpu_timer_task_rcu(struct k_itimer *timer) 127 + { 128 + return pid_task(timer->it.cpu.pid, cpu_timer_pid_type(timer)); 129 + } 130 + 121 131 /* 122 132 * Update expiry time from increment, and increase overrun count, 123 133 * given the current clock sample. ··· 401 391 402 392 new_timer->kclock = &clock_posix_cpu; 403 393 timerqueue_init(&new_timer->it.cpu.node); 404 - new_timer->it.cpu.task = p; 394 + new_timer->it.cpu.pid = get_task_pid(p, cpu_timer_pid_type(new_timer)); 395 + /* 396 + * get_task_for_clock() took a reference on @p. Drop it as the timer 397 + * holds a reference on the pid of @p. 398 + */ 399 + put_task_struct(p); 405 400 return 0; 406 401 } 407 402 ··· 419 404 static int posix_cpu_timer_del(struct k_itimer *timer) 420 405 { 421 406 struct cpu_timer *ctmr = &timer->it.cpu; 422 - struct task_struct *p = ctmr->task; 423 407 struct sighand_struct *sighand; 408 + struct task_struct *p; 424 409 unsigned long flags; 425 410 int ret = 0; 426 411 427 - if (WARN_ON_ONCE(!p)) 428 - return -EINVAL; 412 + rcu_read_lock(); 413 + p = cpu_timer_task_rcu(timer); 414 + if (!p) 415 + goto out; 429 416 430 417 /* 431 418 * Protect against sighand release/switch in exit/exec and process/ ··· 449 432 unlock_task_sighand(p, &flags); 450 433 } 451 434 435 + out: 436 + rcu_read_unlock(); 452 437 if (!ret) 453 - put_task_struct(p); 438 + put_pid(ctmr->pid); 454 439 455 440 return ret; 456 441 } ··· 580 561 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); 581 562 u64 old_expires, new_expires, old_incr, val; 582 563 struct cpu_timer *ctmr = &timer->it.cpu; 583 - struct task_struct *p = ctmr->task; 584 564 struct sighand_struct *sighand; 565 + struct task_struct *p; 585 566 unsigned long flags; 586 567 int ret = 0; 587 568 588 - if (WARN_ON_ONCE(!p)) 589 - return -EINVAL; 569 + rcu_read_lock(); 570 + p = cpu_timer_task_rcu(timer); 571 + if (!p) { 572 + /* 573 + * If p has just been reaped, we can no 574 + * longer get any information about it at all. 575 + */ 576 + rcu_read_unlock(); 577 + return -ESRCH; 578 + } 590 579 591 580 /* 592 581 * Use the to_ktime conversion because that clamps the maximum ··· 611 584 * If p has just been reaped, we can no 612 585 * longer get any information about it at all. 613 586 */ 614 - if (unlikely(sighand == NULL)) 587 + if (unlikely(sighand == NULL)) { 588 + rcu_read_unlock(); 615 589 return -ESRCH; 590 + } 616 591 617 592 /* 618 593 * Disarm any old timer after extracting its expiry time. ··· 719 690 720 691 ret = 0; 721 692 out: 693 + rcu_read_unlock(); 722 694 if (old) 723 695 old->it_interval = ns_to_timespec64(old_incr); 724 696 ··· 731 701 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); 732 702 struct cpu_timer *ctmr = &timer->it.cpu; 733 703 u64 now, expires = cpu_timer_getexpires(ctmr); 734 - struct task_struct *p = ctmr->task; 704 + struct task_struct *p; 735 705 736 - if (WARN_ON_ONCE(!p)) 737 - return; 706 + rcu_read_lock(); 707 + p = cpu_timer_task_rcu(timer); 708 + if (!p) 709 + goto out; 738 710 739 711 /* 740 712 * Easy part: convert the reload time. ··· 744 712 itp->it_interval = ktime_to_timespec64(timer->it_interval); 745 713 746 714 if (!expires) 747 - return; 715 + goto out; 748 716 749 717 /* 750 718 * Sample the clock to take the difference with the expiry time. ··· 764 732 itp->it_value.tv_nsec = 1; 765 733 itp->it_value.tv_sec = 0; 766 734 } 735 + out: 736 + rcu_read_unlock(); 767 737 } 768 738 769 739 #define MAX_COLLECTED 20 ··· 986 952 static void posix_cpu_timer_rearm(struct k_itimer *timer) 987 953 { 988 954 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); 989 - struct cpu_timer *ctmr = &timer->it.cpu; 990 - struct task_struct *p = ctmr->task; 955 + struct task_struct *p; 991 956 struct sighand_struct *sighand; 992 957 unsigned long flags; 993 958 u64 now; 994 959 995 - if (WARN_ON_ONCE(!p)) 996 - return; 960 + rcu_read_lock(); 961 + p = cpu_timer_task_rcu(timer); 962 + if (!p) 963 + goto out; 997 964 998 965 /* 999 966 * Fetch the current sample and update the timer's expiry time. ··· 1009 974 /* Protect timer list r/w in arm_timer() */ 1010 975 sighand = lock_task_sighand(p, &flags); 1011 976 if (unlikely(sighand == NULL)) 1012 - return; 977 + goto out; 1013 978 1014 979 /* 1015 980 * Now re-arm for the new expiry time. 1016 981 */ 1017 982 arm_timer(timer, p); 1018 983 unlock_task_sighand(p, &flags); 984 + out: 985 + rcu_read_unlock(); 1019 986 } 1020 987 1021 988 /**