locking/barriers: Replace smp_cond_acquire() with smp_cond_load_acquire()

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

This new form allows using hardware assisted waiting.

Some hardware (ARM64 and x86) allow monitoring an address for changes,
so by providing a pointer we can use this to replace the cpu_relax()
with hardware optimized methods in the future.

Requested-by: Will Deacon <will.deacon@arm.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Peter Zijlstra and committed by

Ingo Molnar 9 years ago 1f03e8d2 245050c2

+31 -18

5 changed files

expand all

include

linux

compiler.h

kernel

locking

qspinlock.c

sched

core.c

sched.h

smp.c

+19 -6

include/linux/compiler.h

··· 305 305 }) 306 306 307 307 /** 308 - * smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering 308 + * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering 309 + * @ptr: pointer to the variable to wait on 309 310 * @cond: boolean expression to wait for 310 311 * 311 312 * Equivalent to using smp_load_acquire() on the condition variable but employs 312 313 * the control dependency of the wait to reduce the barrier on many platforms. 313 314 * 315 + * Due to C lacking lambda expressions we load the value of *ptr into a 316 + * pre-named variable @VAL to be used in @cond. 317 + * 314 318 * The control dependency provides a LOAD->STORE order, the additional RMB 315 319 * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order, 316 320 * aka. ACQUIRE. 317 321 */ 318 - #define smp_cond_acquire(cond) do { \ 319 - while (!(cond)) \ 320 - cpu_relax(); \ 321 - smp_rmb(); /* ctrl + rmb := acquire */ \ 322 - } while (0) 322 + #ifndef smp_cond_load_acquire 323 + #define smp_cond_load_acquire(ptr, cond_expr) ({ \ 324 + typeof(ptr) __PTR = (ptr); \ 325 + typeof(*ptr) VAL; \ 326 + for (;;) { \ 327 + VAL = READ_ONCE(*__PTR); \ 328 + if (cond_expr) \ 329 + break; \ 330 + cpu_relax(); \ 331 + } \ 332 + smp_rmb(); /* ctrl + rmb := acquire */ \ 333 + VAL; \ 334 + }) 335 + #endif 323 336 324 337 #endif /* __KERNEL__ */ 325 338

+6 -6

kernel/locking/qspinlock.c

··· 475 475 * sequentiality; this is because not all clear_pending_set_locked() 476 476 * implementations imply full barriers. 477 477 */ 478 - smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK)); 478 + smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK)); 479 479 480 480 /* 481 481 * take ownership and clear the pending bit. ··· 562 562 * 563 563 * The PV pv_wait_head_or_lock function, if active, will acquire 564 564 * the lock and return a non-zero value. So we have to skip the 565 - * smp_cond_acquire() call. As the next PV queue head hasn't been 565 + * smp_cond_load_acquire() call. As the next PV queue head hasn't been 566 566 * designated yet, there is no way for the locked value to become 567 567 * _Q_SLOW_VAL. So both the set_locked() and the 568 568 * atomic_cmpxchg_relaxed() calls will be safe. ··· 573 573 if ((val = pv_wait_head_or_lock(lock, node))) 574 574 goto locked; 575 575 576 - smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK)); 576 + val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK)); 577 577 578 578 locked: 579 579 /* ··· 593 593 break; 594 594 } 595 595 /* 596 - * The smp_cond_acquire() call above has provided the necessary 597 - * acquire semantics required for locking. At most two 598 - * iterations of this loop may be ran. 596 + * The smp_cond_load_acquire() call above has provided the 597 + * necessary acquire semantics required for locking. At most 598 + * two iterations of this loop may be ran. 599 599 */ 600 600 old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL); 601 601 if (old == val)

+4 -4

kernel/sched/core.c

··· 1935 1935 * chain to provide order. Instead we do: 1936 1936 * 1937 1937 * 1) smp_store_release(X->on_cpu, 0) 1938 - * 2) smp_cond_acquire(!X->on_cpu) 1938 + * 2) smp_cond_load_acquire(!X->on_cpu) 1939 1939 * 1940 1940 * Example: 1941 1941 * ··· 1946 1946 * sched-out X 1947 1947 * smp_store_release(X->on_cpu, 0); 1948 1948 * 1949 - * smp_cond_acquire(!X->on_cpu); 1949 + * smp_cond_load_acquire(&X->on_cpu, !VAL); 1950 1950 * X->state = WAKING 1951 1951 * set_task_cpu(X,2) 1952 1952 * ··· 1972 1972 * This means that any means of doing remote wakeups must order the CPU doing 1973 1973 * the wakeup against the CPU the task is going to end up running on. This, 1974 1974 * however, is already required for the regular Program-Order guarantee above, 1975 - * since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire). 1975 + * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire). 1976 1976 * 1977 1977 */ 1978 1978 ··· 2045 2045 * This ensures that tasks getting woken will be fully ordered against 2046 2046 * their previous state and preserve Program Order. 2047 2047 */ 2048 - smp_cond_acquire(!p->on_cpu); 2048 + smp_cond_load_acquire(&p->on_cpu, !VAL); 2049 2049 2050 2050 p->sched_contributes_to_load = !!task_contributes_to_load(p); 2051 2051 p->state = TASK_WAKING;

+1 -1

kernel/sched/sched.h

··· 1113 1113 * In particular, the load of prev->state in finish_task_switch() must 1114 1114 * happen before this. 1115 1115 * 1116 - * Pairs with the smp_cond_acquire() in try_to_wake_up(). 1116 + * Pairs with the smp_cond_load_acquire() in try_to_wake_up(). 1117 1117 */ 1118 1118 smp_store_release(&prev->on_cpu, 0); 1119 1119 #endif

+1 -1

kernel/smp.c

··· 107 107 */ 108 108 static __always_inline void csd_lock_wait(struct call_single_data *csd) 109 109 { 110 - smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK)); 110 + smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); 111 111 } 112 112 113 113 static __always_inline void csd_lock(struct call_single_data *csd)