Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARC: LLOCK/SCOND based spin_lock

Current spin_lock uses EXchange instruction to implement the atomic test
and set of lock location (reads orig value and ST 1). This however forces
the cacheline into exclusive state (because of the ST) and concurrent
loops in multiple cores will bounce the line around between cores.

Instead, use LLOCK/SCOND to implement the atomic test and set which is
better as line is in shared state while lock is spinning on LLOCK

The real motivation of this change however is to make way for future
changes in atomics to implement delayed retry (with backoff).
Initial experiment with delayed retry in atomics combined with orig
EX based spinlock was a total disaster (broke even LMBench) as
struct sock has a cache line sharing an atomic_t and spinlock. The
tight spinning on lock, caused the atomic retry to keep backing off
such that it would never finish.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

+69 -7
+69 -7
arch/arc/include/asm/spinlock.h
··· 18 18 #define arch_spin_unlock_wait(x) \ 19 19 do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) 20 20 21 + #ifdef CONFIG_ARC_HAS_LLSC 22 + 21 23 static inline void arch_spin_lock(arch_spinlock_t *lock) 22 24 { 23 - unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; 25 + unsigned int val; 26 + 27 + smp_mb(); 28 + 29 + __asm__ __volatile__( 30 + "1: llock %[val], [%[slock]] \n" 31 + " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */ 32 + " scond %[LOCKED], [%[slock]] \n" /* acquire */ 33 + " bnz 1b \n" 34 + " \n" 35 + : [val] "=&r" (val) 36 + : [slock] "r" (&(lock->slock)), 37 + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) 38 + : "memory", "cc"); 39 + 40 + smp_mb(); 41 + } 42 + 43 + /* 1 - lock taken successfully */ 44 + static inline int arch_spin_trylock(arch_spinlock_t *lock) 45 + { 46 + unsigned int val, got_it = 0; 47 + 48 + smp_mb(); 49 + 50 + __asm__ __volatile__( 51 + "1: llock %[val], [%[slock]] \n" 52 + " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */ 53 + " scond %[LOCKED], [%[slock]] \n" /* acquire */ 54 + " bnz 1b \n" 55 + " mov %[got_it], 1 \n" 56 + "4: \n" 57 + " \n" 58 + : [val] "=&r" (val), 59 + [got_it] "+&r" (got_it) 60 + : [slock] "r" (&(lock->slock)), 61 + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) 62 + : "memory", "cc"); 63 + 64 + smp_mb(); 65 + 66 + return got_it; 67 + } 68 + 69 + static inline void arch_spin_unlock(arch_spinlock_t *lock) 70 + { 71 + smp_mb(); 72 + 73 + lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; 74 + 75 + smp_mb(); 76 + } 77 + 78 + #else /* !CONFIG_ARC_HAS_LLSC */ 79 + 80 + static inline void arch_spin_lock(arch_spinlock_t *lock) 81 + { 82 + unsigned int val = __ARCH_SPIN_LOCK_LOCKED__; 24 83 25 84 /* 26 85 * This smp_mb() is technically superfluous, we only need the one ··· 92 33 __asm__ __volatile__( 93 34 "1: ex %0, [%1] \n" 94 35 " breq %0, %2, 1b \n" 95 - : "+&r" (tmp) 36 + : "+&r" (val) 96 37 : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) 97 38 : "memory"); 98 39 ··· 107 48 smp_mb(); 108 49 } 109 50 51 + /* 1 - lock taken successfully */ 110 52 static inline int arch_spin_trylock(arch_spinlock_t *lock) 111 53 { 112 - unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; 54 + unsigned int val = __ARCH_SPIN_LOCK_LOCKED__; 113 55 114 56 smp_mb(); 115 57 116 58 __asm__ __volatile__( 117 59 "1: ex %0, [%1] \n" 118 - : "+r" (tmp) 60 + : "+r" (val) 119 61 : "r"(&(lock->slock)) 120 62 : "memory"); 121 63 122 64 smp_mb(); 123 65 124 - return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); 66 + return (val == __ARCH_SPIN_LOCK_UNLOCKED__); 125 67 } 126 68 127 69 static inline void arch_spin_unlock(arch_spinlock_t *lock) 128 70 { 129 - unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; 71 + unsigned int val = __ARCH_SPIN_LOCK_UNLOCKED__; 130 72 131 73 /* 132 74 * RELEASE barrier: given the instructions avail on ARCv2, full barrier ··· 137 77 138 78 __asm__ __volatile__( 139 79 " ex %0, [%1] \n" 140 - : "+r" (tmp) 80 + : "+r" (val) 141 81 : "r"(&(lock->slock)) 142 82 : "memory"); 143 83 ··· 147 87 */ 148 88 smp_mb(); 149 89 } 90 + 91 + #endif 150 92 151 93 /* 152 94 * Read-write spinlocks, allowing multiple readers but only one writer.