[PATCH] pi-futex: robust-futex exit

Fix robust PI-futexes to be properly unlocked on unexpected exit.

For this to work the kernel has to know whether a futex is a PI or a
non-PI one, because the semantics are different. Since the space in
relevant glibc data structures is extremely scarce, the best solution is
to encode the 'PI' information in bit 0 of the robust list pointer.
Existing (non-PI) glibc robust futexes have this bit always zero, so the
ABI is kept. New glibc with PI-robust-futexes will set this bit.

Further fixes from Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by Ingo Molnar and committed by Linus Torvalds e3f2ddea 627371d7

+89 -39
+2 -1
include/linux/futex.h
··· 96 long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, 97 u32 __user *uaddr2, u32 val2, u32 val3); 98 99 - extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr); 100 101 #ifdef CONFIG_FUTEX 102 extern void exit_robust_list(struct task_struct *curr);
··· 96 long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, 97 u32 __user *uaddr2, u32 val2, u32 val3); 98 99 + extern int 100 + handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); 101 102 #ifdef CONFIG_FUTEX 103 extern void exit_robust_list(struct task_struct *curr);
+62 -29
kernel/futex.c
··· 495 } 496 497 /* 498 - * We are the first waiter - try to look up the real owner and 499 - * attach the new pi_state to it: 500 */ 501 pid = uval & FUTEX_TID_MASK; 502 p = futex_find_get_task(pid); 503 if (!p) 504 return -ESRCH; ··· 582 * kept enabled while there is PI state around. We must also 583 * preserve the owner died bit.) 584 */ 585 - newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid; 586 587 - inc_preempt_count(); 588 - curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); 589 - dec_preempt_count(); 590 - 591 - if (curval == -EFAULT) 592 - return -EFAULT; 593 - if (curval != uval) 594 - return -EINVAL; 595 596 spin_lock_irq(&pi_state->owner->pi_lock); 597 WARN_ON(list_empty(&pi_state->list)); ··· 1447 * again. If it succeeds then we can return without waking 1448 * anyone else up: 1449 */ 1450 - inc_preempt_count(); 1451 - uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); 1452 - dec_preempt_count(); 1453 1454 if (unlikely(uval == -EFAULT)) 1455 goto pi_faulted; ··· 1484 /* 1485 * No waiters - kernel unlocks the futex: 1486 */ 1487 - ret = unlock_futex_pi(uaddr, uval); 1488 - if (ret == -EFAULT) 1489 - goto pi_faulted; 1490 1491 out_unlock: 1492 spin_unlock(&hb->lock); ··· 1707 * Process a futex-list entry, check whether it's owned by the 1708 * dying task, and do notification if so: 1709 */ 1710 - int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) 1711 { 1712 - u32 uval, nval; 1713 1714 retry: 1715 if (get_user(uval, uaddr)) ··· 1726 * thread-death.) The rest of the cleanup is done in 1727 * userspace. 1728 */ 1729 - nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 1730 - uval | FUTEX_OWNER_DIED); 1731 if (nval == -EFAULT) 1732 return -1; 1733 1734 if (nval != uval) 1735 goto retry; 1736 1737 - if (uval & FUTEX_WAITERS) 1738 - futex_wake(uaddr, 1); 1739 } 1740 return 0; 1741 } 1742 ··· 1774 { 1775 struct robust_list_head __user *head = curr->robust_list; 1776 struct robust_list __user *entry, *pending; 1777 - unsigned int limit = ROBUST_LIST_LIMIT; 1778 unsigned long futex_offset; 1779 1780 /* 1781 * Fetch the list head (which was registered earlier, via 1782 * sys_set_robust_list()): 1783 */ 1784 - if (get_user(entry, &head->list.next)) 1785 return; 1786 /* 1787 * Fetch the relative futex offset: ··· 1792 * Fetch any possibly pending lock-add first, and handle it 1793 * if it exists: 1794 */ 1795 - if (get_user(pending, &head->list_op_pending)) 1796 return; 1797 if (pending) 1798 - handle_futex_death((void *)pending + futex_offset, curr); 1799 1800 while (entry != &head->list) { 1801 /* ··· 1805 */ 1806 if (entry != pending) 1807 if (handle_futex_death((void *)entry + futex_offset, 1808 - curr)) 1809 return; 1810 /* 1811 * Fetch the next entry in the list: 1812 */ 1813 - if (get_user(entry, &entry->next)) 1814 return; 1815 /* 1816 * Avoid excessively long or circular lists:
··· 495 } 496 497 /* 498 + * We are the first waiter - try to look up the real owner and attach 499 + * the new pi_state to it, but bail out when the owner died bit is set 500 + * and TID = 0: 501 */ 502 pid = uval & FUTEX_TID_MASK; 503 + if (!pid && (uval & FUTEX_OWNER_DIED)) 504 + return -ESRCH; 505 p = futex_find_get_task(pid); 506 if (!p) 507 return -ESRCH; ··· 579 * kept enabled while there is PI state around. We must also 580 * preserve the owner died bit.) 581 */ 582 + if (!(uval & FUTEX_OWNER_DIED)) { 583 + newval = FUTEX_WAITERS | new_owner->pid; 584 585 + inc_preempt_count(); 586 + curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); 587 + dec_preempt_count(); 588 + if (curval == -EFAULT) 589 + return -EFAULT; 590 + if (curval != uval) 591 + return -EINVAL; 592 + } 593 594 spin_lock_irq(&pi_state->owner->pi_lock); 595 WARN_ON(list_empty(&pi_state->list)); ··· 1443 * again. If it succeeds then we can return without waking 1444 * anyone else up: 1445 */ 1446 + if (!(uval & FUTEX_OWNER_DIED)) { 1447 + inc_preempt_count(); 1448 + uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); 1449 + dec_preempt_count(); 1450 + } 1451 1452 if (unlikely(uval == -EFAULT)) 1453 goto pi_faulted; ··· 1478 /* 1479 * No waiters - kernel unlocks the futex: 1480 */ 1481 + if (!(uval & FUTEX_OWNER_DIED)) { 1482 + ret = unlock_futex_pi(uaddr, uval); 1483 + if (ret == -EFAULT) 1484 + goto pi_faulted; 1485 + } 1486 1487 out_unlock: 1488 spin_unlock(&hb->lock); ··· 1699 * Process a futex-list entry, check whether it's owned by the 1700 * dying task, and do notification if so: 1701 */ 1702 + int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) 1703 { 1704 + u32 uval, nval, mval; 1705 1706 retry: 1707 if (get_user(uval, uaddr)) ··· 1718 * thread-death.) The rest of the cleanup is done in 1719 * userspace. 1720 */ 1721 + mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; 1722 + nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); 1723 + 1724 if (nval == -EFAULT) 1725 return -1; 1726 1727 if (nval != uval) 1728 goto retry; 1729 1730 + /* 1731 + * Wake robust non-PI futexes here. The wakeup of 1732 + * PI futexes happens in exit_pi_state(): 1733 + */ 1734 + if (!pi) { 1735 + if (uval & FUTEX_WAITERS) 1736 + futex_wake(uaddr, 1); 1737 + } 1738 } 1739 + return 0; 1740 + } 1741 + 1742 + /* 1743 + * Fetch a robust-list pointer. Bit 0 signals PI futexes: 1744 + */ 1745 + static inline int fetch_robust_entry(struct robust_list __user **entry, 1746 + struct robust_list __user **head, int *pi) 1747 + { 1748 + unsigned long uentry; 1749 + 1750 + if (get_user(uentry, (unsigned long *)head)) 1751 + return -EFAULT; 1752 + 1753 + *entry = (void *)(uentry & ~1UL); 1754 + *pi = uentry & 1; 1755 + 1756 return 0; 1757 } 1758 ··· 1742 { 1743 struct robust_list_head __user *head = curr->robust_list; 1744 struct robust_list __user *entry, *pending; 1745 + unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; 1746 unsigned long futex_offset; 1747 1748 /* 1749 * Fetch the list head (which was registered earlier, via 1750 * sys_set_robust_list()): 1751 */ 1752 + if (fetch_robust_entry(&entry, &head->list.next, &pi)) 1753 return; 1754 /* 1755 * Fetch the relative futex offset: ··· 1760 * Fetch any possibly pending lock-add first, and handle it 1761 * if it exists: 1762 */ 1763 + if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) 1764 return; 1765 + 1766 if (pending) 1767 + handle_futex_death((void *)pending + futex_offset, curr, pip); 1768 1769 while (entry != &head->list) { 1770 /* ··· 1772 */ 1773 if (entry != pending) 1774 if (handle_futex_death((void *)entry + futex_offset, 1775 + curr, pi)) 1776 return; 1777 /* 1778 * Fetch the next entry in the list: 1779 */ 1780 + if (fetch_robust_entry(&entry, &entry->next, &pi)) 1781 return; 1782 /* 1783 * Avoid excessively long or circular lists:
+25 -9
kernel/futex_compat.c
··· 12 13 #include <asm/uaccess.h> 14 15 /* 16 * Walk curr->robust_list (very carefully, it's a userspace list!) 17 * and mark any locks found there dead, and notify any waiters. ··· 39 { 40 struct compat_robust_list_head __user *head = curr->compat_robust_list; 41 struct robust_list __user *entry, *pending; 42 compat_uptr_t uentry, upending; 43 - unsigned int limit = ROBUST_LIST_LIMIT; 44 compat_long_t futex_offset; 45 46 /* 47 * Fetch the list head (which was registered earlier, via 48 * sys_set_robust_list()): 49 */ 50 - if (get_user(uentry, &head->list.next)) 51 return; 52 - entry = compat_ptr(uentry); 53 /* 54 * Fetch the relative futex offset: 55 */ ··· 58 * Fetch any possibly pending lock-add first, and handle it 59 * if it exists: 60 */ 61 - if (get_user(upending, &head->list_op_pending)) 62 return; 63 - pending = compat_ptr(upending); 64 if (upending) 65 - handle_futex_death((void *)pending + futex_offset, curr); 66 67 while (compat_ptr(uentry) != &head->list) { 68 /* ··· 71 */ 72 if (entry != pending) 73 if (handle_futex_death((void *)entry + futex_offset, 74 - curr)) 75 return; 76 77 /* 78 * Fetch the next entry in the list: 79 */ 80 - if (get_user(uentry, (compat_uptr_t *)&entry->next)) 81 return; 82 - entry = compat_ptr(uentry); 83 /* 84 * Avoid excessively long or circular lists: 85 */
··· 12 13 #include <asm/uaccess.h> 14 15 + 16 + /* 17 + * Fetch a robust-list pointer. Bit 0 signals PI futexes: 18 + */ 19 + static inline int 20 + fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, 21 + compat_uptr_t *head, int *pi) 22 + { 23 + if (get_user(*uentry, head)) 24 + return -EFAULT; 25 + 26 + *entry = compat_ptr((*uentry) & ~1); 27 + *pi = (unsigned int)(*uentry) & 1; 28 + 29 + return 0; 30 + } 31 + 32 /* 33 * Walk curr->robust_list (very carefully, it's a userspace list!) 34 * and mark any locks found there dead, and notify any waiters. ··· 22 { 23 struct compat_robust_list_head __user *head = curr->compat_robust_list; 24 struct robust_list __user *entry, *pending; 25 + unsigned int limit = ROBUST_LIST_LIMIT, pi; 26 compat_uptr_t uentry, upending; 27 compat_long_t futex_offset; 28 29 /* 30 * Fetch the list head (which was registered earlier, via 31 * sys_set_robust_list()): 32 */ 33 + if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) 34 return; 35 /* 36 * Fetch the relative futex offset: 37 */ ··· 42 * Fetch any possibly pending lock-add first, and handle it 43 * if it exists: 44 */ 45 + if (fetch_robust_entry(&upending, &pending, 46 + &head->list_op_pending, &pi)) 47 return; 48 if (upending) 49 + handle_futex_death((void *)pending + futex_offset, curr, pi); 50 51 while (compat_ptr(uentry) != &head->list) { 52 /* ··· 55 */ 56 if (entry != pending) 57 if (handle_futex_death((void *)entry + futex_offset, 58 + curr, pi)) 59 return; 60 61 /* 62 * Fetch the next entry in the list: 63 */ 64 + if (fetch_robust_entry(&uentry, &entry, 65 + (compat_uptr_t *)&entry->next, &pi)) 66 return; 67 /* 68 * Avoid excessively long or circular lists: 69 */