Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

locking/spinlock, arch: Update and fix spin_unlock_wait() implementations

This patch updates/fixes all spin_unlock_wait() implementations.

The update is in semantics; where it previously was only a control
dependency, we now upgrade to a full load-acquire to match the
store-release from the spin_unlock() we waited on. This ensures that
when spin_unlock_wait() returns, we're guaranteed to observe the full
critical section we waited on.

This fixes a number of spin_unlock_wait() users that (not
unreasonably) rely on this.

I also fixed a number of ticket lock versions to only wait on the
current lock holder, instead of for a full unlock, as this is
sufficient.

Furthermore; again for ticket locks; I added an smp_rmb() in between
the initial ticket load and the spin loop testing the current value
because I could not convince myself the address dependency is
sufficient, esp. if the loads are of different sizes.

I'm more than happy to remove this smp_rmb() again if people are
certain the address dependency does indeed work as expected.

Note: PPC32 will be fixed independently

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: chris@zankel.net
Cc: cmetcalf@mellanox.com
Cc: davem@davemloft.net
Cc: dhowells@redhat.com
Cc: james.hogan@imgtec.com
Cc: jejb@parisc-linux.org
Cc: linux@armlinux.org.uk
Cc: mpe@ellerman.id.au
Cc: ralf@linux-mips.org
Cc: realmz6@gmail.com
Cc: rkuo@codeaurora.org
Cc: rth@twiddle.net
Cc: schwidefsky@de.ibm.com
Cc: tony.luck@intel.com
Cc: vgupta@synopsys.com
Cc: ysato@users.sourceforge.jp
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Peter Zijlstra and committed by
Ingo Molnar
726328d9 b464d127

+145 -32
+7 -2
arch/alpha/include/asm/spinlock.h
··· 3 3 4 4 #include <linux/kernel.h> 5 5 #include <asm/current.h> 6 + #include <asm/barrier.h> 7 + #include <asm/processor.h> 6 8 7 9 /* 8 10 * Simple spin lock operations. There are two variants, one clears IRQ's ··· 15 13 16 14 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 17 15 #define arch_spin_is_locked(x) ((x)->lock != 0) 18 - #define arch_spin_unlock_wait(x) \ 19 - do { cpu_relax(); } while ((x)->lock) 16 + 17 + static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 18 + { 19 + smp_cond_load_acquire(&lock->lock, !VAL); 20 + } 20 21 21 22 static inline int arch_spin_value_unlocked(arch_spinlock_t lock) 22 23 {
+5 -2
arch/arc/include/asm/spinlock.h
··· 15 15 16 16 #define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__) 17 17 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 18 - #define arch_spin_unlock_wait(x) \ 19 - do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) 18 + 19 + static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 20 + { 21 + smp_cond_load_acquire(&lock->slock, !VAL); 22 + } 20 23 21 24 #ifdef CONFIG_ARC_HAS_LLSC 22 25
+17 -2
arch/arm/include/asm/spinlock.h
··· 6 6 #endif 7 7 8 8 #include <linux/prefetch.h> 9 + #include <asm/barrier.h> 10 + #include <asm/processor.h> 9 11 10 12 /* 11 13 * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K ··· 52 50 * memory. 53 51 */ 54 52 55 - #define arch_spin_unlock_wait(lock) \ 56 - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) 53 + static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 54 + { 55 + u16 owner = READ_ONCE(lock->tickets.owner); 56 + 57 + for (;;) { 58 + arch_spinlock_t tmp = READ_ONCE(*lock); 59 + 60 + if (tmp.tickets.owner == tmp.tickets.next || 61 + tmp.tickets.owner != owner) 62 + break; 63 + 64 + wfe(); 65 + } 66 + smp_acquire__after_ctrl_dep(); 67 + } 57 68 58 69 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 59 70
+3 -2
arch/blackfin/include/asm/spinlock.h
··· 12 12 #else 13 13 14 14 #include <linux/atomic.h> 15 + #include <asm/processor.h> 16 + #include <asm/barrier.h> 15 17 16 18 asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr); 17 19 asmlinkage void __raw_spin_lock_asm(volatile int *ptr); ··· 50 48 51 49 static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 52 50 { 53 - while (arch_spin_is_locked(lock)) 54 - cpu_relax(); 51 + smp_cond_load_acquire(&lock->lock, !VAL); 55 52 } 56 53 57 54 static inline int arch_read_can_lock(arch_rwlock_t *rw)
+8 -2
arch/hexagon/include/asm/spinlock.h
··· 23 23 #define _ASM_SPINLOCK_H 24 24 25 25 #include <asm/irqflags.h> 26 + #include <asm/barrier.h> 27 + #include <asm/processor.h> 26 28 27 29 /* 28 30 * This file is pulled in for SMP builds. ··· 178 176 * SMP spinlocks are intended to allow only a single CPU at the lock 179 177 */ 180 178 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 181 - #define arch_spin_unlock_wait(lock) \ 182 - do {while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) 179 + 180 + static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 181 + { 182 + smp_cond_load_acquire(&lock->lock, !VAL); 183 + } 184 + 183 185 #define arch_spin_is_locked(x) ((x)->lock != 0) 184 186 185 187 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+4
arch/ia64/include/asm/spinlock.h
··· 15 15 16 16 #include <linux/atomic.h> 17 17 #include <asm/intrinsics.h> 18 + #include <asm/barrier.h> 19 + #include <asm/processor.h> 18 20 19 21 #define arch_spin_lock_init(x) ((x)->lock = 0) 20 22 ··· 88 86 return; 89 87 cpu_relax(); 90 88 } 89 + 90 + smp_acquire__after_ctrl_dep(); 91 91 } 92 92 93 93 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
+7 -2
arch/m32r/include/asm/spinlock.h
··· 13 13 #include <linux/atomic.h> 14 14 #include <asm/dcache_clear.h> 15 15 #include <asm/page.h> 16 + #include <asm/barrier.h> 17 + #include <asm/processor.h> 16 18 17 19 /* 18 20 * Your basic SMP spinlocks, allowing only a single CPU anywhere ··· 29 27 30 28 #define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0) 31 29 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 32 - #define arch_spin_unlock_wait(x) \ 33 - do { cpu_relax(); } while (arch_spin_is_locked(x)) 30 + 31 + static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 32 + { 33 + smp_cond_load_acquire(&lock->slock, VAL > 0); 34 + } 34 35 35 36 /** 36 37 * arch_spin_trylock - Try spin lock and return a result
+12 -2
arch/metag/include/asm/spinlock.h
··· 1 1 #ifndef __ASM_SPINLOCK_H 2 2 #define __ASM_SPINLOCK_H 3 3 4 + #include <asm/barrier.h> 5 + #include <asm/processor.h> 6 + 4 7 #ifdef CONFIG_METAG_ATOMICITY_LOCK1 5 8 #include <asm/spinlock_lock1.h> 6 9 #else 7 10 #include <asm/spinlock_lnkget.h> 8 11 #endif 9 12 10 - #define arch_spin_unlock_wait(lock) \ 11 - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) 13 + /* 14 + * both lock1 and lnkget are test-and-set spinlocks with 0 unlocked and 1 15 + * locked. 16 + */ 17 + 18 + static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 19 + { 20 + smp_cond_load_acquire(&lock->lock, !VAL); 21 + } 12 22 13 23 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 14 24
+17 -2
arch/mips/include/asm/spinlock.h
··· 12 12 #include <linux/compiler.h> 13 13 14 14 #include <asm/barrier.h> 15 + #include <asm/processor.h> 15 16 #include <asm/compiler.h> 16 17 #include <asm/war.h> 17 18 ··· 49 48 } 50 49 51 50 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 52 - #define arch_spin_unlock_wait(x) \ 53 - while (arch_spin_is_locked(x)) { cpu_relax(); } 51 + 52 + static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 53 + { 54 + u16 owner = READ_ONCE(lock->h.serving_now); 55 + smp_rmb(); 56 + for (;;) { 57 + arch_spinlock_t tmp = READ_ONCE(*lock); 58 + 59 + if (tmp.h.serving_now == tmp.h.ticket || 60 + tmp.h.serving_now != owner) 61 + break; 62 + 63 + cpu_relax(); 64 + } 65 + smp_acquire__after_ctrl_dep(); 66 + } 54 67 55 68 static inline int arch_spin_is_contended(arch_spinlock_t *lock) 56 69 {
+7 -1
arch/mn10300/include/asm/spinlock.h
··· 12 12 #define _ASM_SPINLOCK_H 13 13 14 14 #include <linux/atomic.h> 15 + #include <asm/barrier.h> 16 + #include <asm/processor.h> 15 17 #include <asm/rwlock.h> 16 18 #include <asm/page.h> 17 19 ··· 25 23 */ 26 24 27 25 #define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) != 0) 28 - #define arch_spin_unlock_wait(x) do { barrier(); } while (arch_spin_is_locked(x)) 26 + 27 + static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 28 + { 29 + smp_cond_load_acquire(&lock->slock, !VAL); 30 + } 29 31 30 32 static inline void arch_spin_unlock(arch_spinlock_t *lock) 31 33 {
+7 -2
arch/parisc/include/asm/spinlock.h
··· 13 13 } 14 14 15 15 #define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0) 16 - #define arch_spin_unlock_wait(x) \ 17 - do { cpu_relax(); } while (arch_spin_is_locked(x)) 16 + 17 + static inline void arch_spin_unlock_wait(arch_spinlock_t *x) 18 + { 19 + volatile unsigned int *a = __ldcw_align(x); 20 + 21 + smp_cond_load_acquire(a, VAL); 22 + } 18 23 19 24 static inline void arch_spin_lock_flags(arch_spinlock_t *x, 20 25 unsigned long flags)
+3
arch/s390/include/asm/spinlock.h
··· 10 10 #define __ASM_SPINLOCK_H 11 11 12 12 #include <linux/smp.h> 13 + #include <asm/barrier.h> 14 + #include <asm/processor.h> 13 15 14 16 #define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval) 15 17 ··· 99 97 { 100 98 while (arch_spin_is_locked(lock)) 101 99 arch_spin_relax(lock); 100 + smp_acquire__after_ctrl_dep(); 102 101 } 103 102 104 103 /*
+8 -2
arch/sh/include/asm/spinlock.h
··· 19 19 #error "Need movli.l/movco.l for spinlocks" 20 20 #endif 21 21 22 + #include <asm/barrier.h> 23 + #include <asm/processor.h> 24 + 22 25 /* 23 26 * Your basic SMP spinlocks, allowing only a single CPU anywhere 24 27 */ 25 28 26 29 #define arch_spin_is_locked(x) ((x)->lock <= 0) 27 30 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 28 - #define arch_spin_unlock_wait(x) \ 29 - do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) 31 + 32 + static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 33 + { 34 + smp_cond_load_acquire(&lock->lock, VAL > 0); 35 + } 30 36 31 37 /* 32 38 * Simple spin lock operations. There are two variants, one clears IRQ's
+5 -2
arch/sparc/include/asm/spinlock_32.h
··· 9 9 #ifndef __ASSEMBLY__ 10 10 11 11 #include <asm/psr.h> 12 + #include <asm/barrier.h> 12 13 #include <asm/processor.h> /* for cpu_relax */ 13 14 14 15 #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) 15 16 16 - #define arch_spin_unlock_wait(lock) \ 17 - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) 17 + static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 18 + { 19 + smp_cond_load_acquire(&lock->lock, !VAL); 20 + } 18 21 19 22 static inline void arch_spin_lock(arch_spinlock_t *lock) 20 23 {
+7 -3
arch/sparc/include/asm/spinlock_64.h
··· 8 8 9 9 #ifndef __ASSEMBLY__ 10 10 11 + #include <asm/processor.h> 12 + #include <asm/barrier.h> 13 + 11 14 /* To get debugging spinlocks which detect and catch 12 15 * deadlock situations, set CONFIG_DEBUG_SPINLOCK 13 16 * and rebuild your kernel. ··· 26 23 27 24 #define arch_spin_is_locked(lp) ((lp)->lock != 0) 28 25 29 - #define arch_spin_unlock_wait(lp) \ 30 - do { rmb(); \ 31 - } while((lp)->lock) 26 + static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 27 + { 28 + smp_cond_load_acquire(&lock->lock, !VAL); 29 + } 32 30 33 31 static inline void arch_spin_lock(arch_spinlock_t *lock) 34 32 {
+6
arch/tile/lib/spinlock_32.c
··· 76 76 do { 77 77 delay_backoff(iterations++); 78 78 } while (READ_ONCE(lock->current_ticket) == curr); 79 + 80 + /* 81 + * The TILE architecture doesn't do read speculation; therefore 82 + * a control dependency guarantees a LOAD->{LOAD,STORE} order. 83 + */ 84 + barrier(); 79 85 } 80 86 EXPORT_SYMBOL(arch_spin_unlock_wait); 81 87
+6
arch/tile/lib/spinlock_64.c
··· 76 76 do { 77 77 delay_backoff(iterations++); 78 78 } while (arch_spin_current(READ_ONCE(lock->lock)) == curr); 79 + 80 + /* 81 + * The TILE architecture doesn't do read speculation; therefore 82 + * a control dependency guarantees a LOAD->{LOAD,STORE} order. 83 + */ 84 + barrier(); 79 85 } 80 86 EXPORT_SYMBOL(arch_spin_unlock_wait); 81 87
+8 -2
arch/xtensa/include/asm/spinlock.h
··· 11 11 #ifndef _XTENSA_SPINLOCK_H 12 12 #define _XTENSA_SPINLOCK_H 13 13 14 + #include <asm/barrier.h> 15 + #include <asm/processor.h> 16 + 14 17 /* 15 18 * spinlock 16 19 * ··· 32 29 */ 33 30 34 31 #define arch_spin_is_locked(x) ((x)->slock != 0) 35 - #define arch_spin_unlock_wait(lock) \ 36 - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) 32 + 33 + static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 34 + { 35 + smp_cond_load_acquire(&lock->slock, !VAL); 36 + } 37 37 38 38 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 39 39
+1 -1
include/asm-generic/barrier.h
··· 194 194 }) 195 195 #endif 196 196 197 - #endif 197 + #endif /* CONFIG_SMP */ 198 198 199 199 /* Barriers for virtual machine guests when talking to an SMP host */ 200 200 #define virt_mb() __smp_mb()
+7 -3
include/linux/spinlock_up.h
··· 6 6 #endif 7 7 8 8 #include <asm/processor.h> /* for cpu_relax() */ 9 + #include <asm/barrier.h> 9 10 10 11 /* 11 12 * include/linux/spinlock_up.h - UP-debug version of spinlocks. ··· 25 24 26 25 #ifdef CONFIG_DEBUG_SPINLOCK 27 26 #define arch_spin_is_locked(x) ((x)->slock == 0) 27 + 28 + static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 29 + { 30 + smp_cond_load_acquire(&lock->slock, VAL); 31 + } 28 32 29 33 static inline void arch_spin_lock(arch_spinlock_t *lock) 30 34 { ··· 73 67 74 68 #else /* DEBUG_SPINLOCK */ 75 69 #define arch_spin_is_locked(lock) ((void)(lock), 0) 70 + #define arch_spin_unlock_wait(lock) do { barrier(); (void)(lock); } while (0) 76 71 /* for sched/core.c and kernel_lock.c: */ 77 72 # define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0) 78 73 # define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0) ··· 85 78 86 79 #define arch_read_can_lock(lock) (((void)(lock), 1)) 87 80 #define arch_write_can_lock(lock) (((void)(lock), 1)) 88 - 89 - #define arch_spin_unlock_wait(lock) \ 90 - do { cpu_relax(); } while (arch_spin_is_locked(lock)) 91 81 92 82 #endif /* __LINUX_SPINLOCK_UP_H */