Merge branch 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking fixes from Thomas Gleixner:
"A small series of fixes which all address possible missed wakeups:

- Document and fix the wakeup ordering of wake_q

- Add the missing barrier in rcuwait_wake_up(), which was documented
in the comment but missing in the code

- Fix the possible missed wakeups in the rwsem and futex code"

* 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
locking/rwsem: Fix (possible) missed wakeup
futex: Fix (possible) missed wakeup
sched/wake_q: Fix wakeup ordering for wake_q
sched/wake_q: Document wake_q_add()
sched/wait: Fix rcuwait_wake_up() ordering

Changed files
+39 -12
include
linux
sched
kernel
+5 -1
include/linux/sched/wake_q.h
··· 24 24 * called near the end of a function. Otherwise, the list can be 25 25 * re-initialized for later re-use by wake_q_init(). 26 26 * 27 - * Note that this can cause spurious wakeups. schedule() callers 27 + * NOTE that this can cause spurious wakeups. schedule() callers 28 28 * must ensure the call is done inside a loop, confirming that the 29 29 * wakeup condition has in fact occurred. 30 + * 31 + * NOTE that there is no guarantee the wakeup will happen any later than the 32 + * wake_q_add() location. Therefore task must be ready to be woken at the 33 + * location of the wake_q_add(). 30 34 */ 31 35 32 36 #include <linux/sched.h>
+1 -1
kernel/exit.c
··· 307 307 * MB (A) MB (B) 308 308 * [L] cond [L] tsk 309 309 */ 310 - smp_rmb(); /* (B) */ 310 + smp_mb(); /* (B) */ 311 311 312 312 /* 313 313 * Avoid using task_rcu_dereference() magic as long as we are careful,
+8 -5
kernel/futex.c
··· 1452 1452 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) 1453 1453 return; 1454 1454 1455 - /* 1456 - * Queue the task for later wakeup for after we've released 1457 - * the hb->lock. wake_q_add() grabs reference to p. 1458 - */ 1459 - wake_q_add(wake_q, p); 1455 + get_task_struct(p); 1460 1456 __unqueue_futex(q); 1461 1457 /* 1462 1458 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL ··· 1462 1466 * plist_del in __unqueue_futex(). 1463 1467 */ 1464 1468 smp_store_release(&q->lock_ptr, NULL); 1469 + 1470 + /* 1471 + * Queue the task for later wakeup for after we've released 1472 + * the hb->lock. wake_q_add() grabs reference to p. 1473 + */ 1474 + wake_q_add(wake_q, p); 1475 + put_task_struct(p); 1465 1476 } 1466 1477 1467 1478 /*
+9 -2
kernel/locking/rwsem-xadd.c
··· 198 198 woken++; 199 199 tsk = waiter->task; 200 200 201 - wake_q_add(wake_q, tsk); 201 + get_task_struct(tsk); 202 202 list_del(&waiter->list); 203 203 /* 204 - * Ensure that the last operation is setting the reader 204 + * Ensure calling get_task_struct() before setting the reader 205 205 * waiter to nil such that rwsem_down_read_failed() cannot 206 206 * race with do_exit() by always holding a reference count 207 207 * to the task to wakeup. 208 208 */ 209 209 smp_store_release(&waiter->task, NULL); 210 + /* 211 + * Ensure issuing the wakeup (either by us or someone else) 212 + * after setting the reader waiter to nil. 213 + */ 214 + wake_q_add(wake_q, tsk); 215 + /* wake_q_add() already take the task ref */ 216 + put_task_struct(tsk); 210 217 } 211 218 212 219 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+16 -3
kernel/sched/core.c
··· 396 396 #endif 397 397 #endif 398 398 399 + /** 400 + * wake_q_add() - queue a wakeup for 'later' waking. 401 + * @head: the wake_q_head to add @task to 402 + * @task: the task to queue for 'later' wakeup 403 + * 404 + * Queue a task for later wakeup, most likely by the wake_up_q() call in the 405 + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come 406 + * instantly. 407 + * 408 + * This function must be used as-if it were wake_up_process(); IOW the task 409 + * must be ready to be woken at this location. 410 + */ 399 411 void wake_q_add(struct wake_q_head *head, struct task_struct *task) 400 412 { 401 413 struct wake_q_node *node = &task->wake_q; ··· 417 405 * its already queued (either by us or someone else) and will get the 418 406 * wakeup due to that. 419 407 * 420 - * This cmpxchg() executes a full barrier, which pairs with the full 421 - * barrier executed by the wakeup in wake_up_q(). 408 + * In order to ensure that a pending wakeup will observe our pending 409 + * state, even in the failed case, an explicit smp_mb() must be used. 422 410 */ 423 - if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) 411 + smp_mb__before_atomic(); 412 + if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)) 424 413 return; 425 414 426 415 get_task_struct(task);