Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

futex: runtime enable pi and robust functionality

Not all architectures implement futex_atomic_cmpxchg_inatomic(). The default
implementation returns -ENOSYS, which is currently not handled inside of the
futex guts.

Futex PI calls and robust list exits with a held futex result in an endless
loop in the futex code on architectures which have no support.

Fixing up every place where futex_atomic_cmpxchg_inatomic() is called would
add a fair amount of extra if/else constructs to the already complex code. It
is also not possible to disable the robust feature before user space tries to
register robust lists.

Compile time disabling is not a good idea either, as there are already
architectures with runtime detection of futex_atomic_cmpxchg_inatomic support.

Detect the functionality at runtime instead by calling
cmpxchg_futex_value_locked() with a NULL pointer from the futex initialization
code. This is guaranteed to fail, but the call of
futex_atomic_cmpxchg_inatomic() happens with pagefaults disabled.

On architectures, which use the asm-generic implementation or have a runtime
CPU feature detection, a -ENOSYS return value disables the PI/robust features.

On architectures with a working implementation the call returns -EFAULT and
the PI/robust features are enabled.

The relevant syscalls return -ENOSYS and the robust list exit code is blocked,
when the detection fails.

Fixes http://lkml.org/lkml/2008/2/11/149
Originally reported by: Lennart Buytenhek

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Lennert Buytenhek <buytenh@wantstofly.org>
Cc: Riku Voipio <riku.voipio@movial.fi>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Thomas Gleixner and committed by
Linus Torvalds
a0c1e907 3e4ab747

+44 -4
+1
include/linux/futex.h
··· 167 167 #ifdef CONFIG_FUTEX 168 168 extern void exit_robust_list(struct task_struct *curr); 169 169 extern void exit_pi_state_list(struct task_struct *curr); 170 + extern int futex_cmpxchg_enabled; 170 171 #else 171 172 static inline void exit_robust_list(struct task_struct *curr) 172 173 {
+34 -4
kernel/futex.c
··· 60 60 61 61 #include "rtmutex_common.h" 62 62 63 + int __read_mostly futex_cmpxchg_enabled; 64 + 63 65 #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) 64 66 65 67 /* ··· 471 469 struct futex_hash_bucket *hb; 472 470 union futex_key key; 473 471 472 + if (!futex_cmpxchg_enabled) 473 + return; 474 474 /* 475 475 * We are a ZOMBIE and nobody can enqueue itself on 476 476 * pi_state_list anymore, but we have to be careful ··· 1874 1870 sys_set_robust_list(struct robust_list_head __user *head, 1875 1871 size_t len) 1876 1872 { 1873 + if (!futex_cmpxchg_enabled) 1874 + return -ENOSYS; 1877 1875 /* 1878 1876 * The kernel knows only one size for now: 1879 1877 */ ··· 1899 1893 { 1900 1894 struct robust_list_head __user *head; 1901 1895 unsigned long ret; 1896 + 1897 + if (!futex_cmpxchg_enabled) 1898 + return -ENOSYS; 1902 1899 1903 1900 if (!pid) 1904 1901 head = current->robust_list; ··· 2006 1997 unsigned long futex_offset; 2007 1998 int rc; 2008 1999 2000 + if (!futex_cmpxchg_enabled) 2001 + return; 2002 + 2009 2003 /* 2010 2004 * Fetch the list head (which was registered earlier, via 2011 2005 * sys_set_robust_list()): ··· 2063 2051 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, 2064 2052 u32 __user *uaddr2, u32 val2, u32 val3) 2065 2053 { 2066 - int ret; 2054 + int ret = -ENOSYS; 2067 2055 int cmd = op & FUTEX_CMD_MASK; 2068 2056 struct rw_semaphore *fshared = NULL; 2069 2057 ··· 2095 2083 ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); 2096 2084 break; 2097 2085 case FUTEX_LOCK_PI: 2098 - ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); 2086 + if (futex_cmpxchg_enabled) 2087 + ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); 2099 2088 break; 2100 2089 case FUTEX_UNLOCK_PI: 2101 - ret = futex_unlock_pi(uaddr, fshared); 2090 + if (futex_cmpxchg_enabled) 2091 + ret = futex_unlock_pi(uaddr, fshared); 2102 2092 break; 2103 2093 case FUTEX_TRYLOCK_PI: 2104 - ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); 2094 + if (futex_cmpxchg_enabled) 2095 + ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); 2105 2096 break; 2106 2097 default: 2107 2098 ret = -ENOSYS; ··· 2160 2145 2161 2146 static int __init init(void) 2162 2147 { 2148 + u32 curval; 2163 2149 int i; 2150 + 2151 + /* 2152 + * This will fail and we want it. Some arch implementations do 2153 + * runtime detection of the futex_atomic_cmpxchg_inatomic() 2154 + * functionality. We want to know that before we call in any 2155 + * of the complex code paths. Also we want to prevent 2156 + * registration of robust lists in that case. NULL is 2157 + * guaranteed to fault and we get -EFAULT on functional 2158 + * implementation, the non functional ones will return 2159 + * -ENOSYS. 2160 + */ 2161 + curval = cmpxchg_futex_value_locked(NULL, 0, 0); 2162 + if (curval == -EFAULT) 2163 + futex_cmpxchg_enabled = 1; 2164 2164 2165 2165 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { 2166 2166 plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
+9
kernel/futex_compat.c
··· 54 54 compat_long_t futex_offset; 55 55 int rc; 56 56 57 + if (!futex_cmpxchg_enabled) 58 + return; 59 + 57 60 /* 58 61 * Fetch the list head (which was registered earlier, via 59 62 * sys_set_robust_list()): ··· 118 115 compat_sys_set_robust_list(struct compat_robust_list_head __user *head, 119 116 compat_size_t len) 120 117 { 118 + if (!futex_cmpxchg_enabled) 119 + return -ENOSYS; 120 + 121 121 if (unlikely(len != sizeof(*head))) 122 122 return -EINVAL; 123 123 ··· 135 129 { 136 130 struct compat_robust_list_head __user *head; 137 131 unsigned long ret; 132 + 133 + if (!futex_cmpxchg_enabled) 134 + return -ENOSYS; 138 135 139 136 if (!pid) 140 137 head = current->compat_robust_list;