Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARM: locks: prefetch the destination word for write prior to strex

The cost of changing a cacheline from shared to exclusive state can be
significant, especially when this is triggered by an exclusive store,
since it may result in having to retry the transaction.

This patch prefixes our {spin,read,write}_[try]lock implementations with
pldw instructions (on CPUs which support them) to try and grab the line
in exclusive state from the start. arch_rwlock_t is changed to avoid
using a volatile member, since this generates compiler warnings when
falling back on the __builtin_prefetch intrinsic which expects a const
void * argument.

Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>

+11 -4
+10 -3
arch/arm/include/asm/spinlock.h
··· 5 5 #error SMP not supported on pre-ARMv6 CPUs 6 6 #endif 7 7 8 - #include <asm/processor.h> 8 + #include <linux/prefetch.h> 9 9 10 10 /* 11 11 * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K ··· 70 70 u32 newval; 71 71 arch_spinlock_t lockval; 72 72 73 + prefetchw(&lock->slock); 73 74 __asm__ __volatile__( 74 75 "1: ldrex %0, [%3]\n" 75 76 " add %1, %0, %4\n" ··· 94 93 unsigned long contended, res; 95 94 u32 slock; 96 95 96 + prefetchw(&lock->slock); 97 97 do { 98 98 __asm__ __volatile__( 99 99 " ldrex %0, [%3]\n" ··· 147 145 { 148 146 unsigned long tmp; 149 147 148 + prefetchw(&rw->lock); 150 149 __asm__ __volatile__( 151 150 "1: ldrex %0, [%1]\n" 152 151 " teq %0, #0\n" ··· 166 163 { 167 164 unsigned long contended, res; 168 165 166 + prefetchw(&rw->lock); 169 167 do { 170 168 __asm__ __volatile__( 171 169 " ldrex %0, [%2]\n" ··· 200 196 } 201 197 202 198 /* write_can_lock - would write_trylock() succeed? */ 203 - #define arch_write_can_lock(x) ((x)->lock == 0) 199 + #define arch_write_can_lock(x) (ACCESS_ONCE((x)->lock) == 0) 204 200 205 201 /* 206 202 * Read locks are a bit more hairy: ··· 218 214 { 219 215 unsigned long tmp, tmp2; 220 216 217 + prefetchw(&rw->lock); 221 218 __asm__ __volatile__( 222 219 "1: ldrex %0, [%2]\n" 223 220 " adds %0, %0, #1\n" ··· 239 234 240 235 smp_mb(); 241 236 237 + prefetchw(&rw->lock); 242 238 __asm__ __volatile__( 243 239 "1: ldrex %0, [%2]\n" 244 240 " sub %0, %0, #1\n" ··· 258 252 { 259 253 unsigned long contended, res; 260 254 255 + prefetchw(&rw->lock); 261 256 do { 262 257 __asm__ __volatile__( 263 258 " ldrex %0, [%2]\n" ··· 280 273 } 281 274 282 275 /* read_can_lock - would read_trylock() succeed? */ 283 - #define arch_read_can_lock(x) ((x)->lock < 0x80000000) 276 + #define arch_read_can_lock(x) (ACCESS_ONCE((x)->lock) < 0x80000000) 284 277 285 278 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) 286 279 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+1 -1
arch/arm/include/asm/spinlock_types.h
··· 25 25 #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } 26 26 27 27 typedef struct { 28 - volatile unsigned int lock; 28 + u32 lock; 29 29 } arch_rwlock_t; 30 30 31 31 #define __ARCH_RW_LOCK_UNLOCKED { 0 }