Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86: Fix write lock scalability 64-bit issue

With the write lock path simply subtracting RW_LOCK_BIAS there
is, on large systems, the theoretical possibility of overflowing
the 32-bit value that was used so far (namely if 128 or more
CPUs manage to do the subtraction, but don't get to do the
inverse addition in the failure path quickly enough).

A first measure is to modify RW_LOCK_BIAS itself - with the new
value chosen, it is good for up to 2048 CPUs each allowed to
nest over 2048 times on the read path without causing an issue.
Quite possibly it would even be sufficient to adjust the bias a
little further, assuming that allowing for significantly less
nesting would suffice.

However, as the original value chosen allowed for even more
nesting levels, to support more than 2048 CPUs (possible
currently only for 64-bit kernels) the lock itself gets widened
to 64 bits.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/4E258E0D020000780004E3F0@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by

Jan Beulich and committed by
Ingo Molnar
a750036f a7386694

+73 -28
+2
arch/x86/include/asm/asm.h
··· 3 3 4 4 #ifdef __ASSEMBLY__ 5 5 # define __ASM_FORM(x) x 6 + # define __ASM_FORM_COMMA(x) x, 6 7 # define __ASM_EX_SEC .section __ex_table, "a" 7 8 #else 8 9 # define __ASM_FORM(x) " " #x " " 10 + # define __ASM_FORM_COMMA(x) " " #x "," 9 11 # define __ASM_EX_SEC " .section __ex_table,\"a\"\n" 10 12 #endif 11 13
+42 -1
arch/x86/include/asm/rwlock.h
··· 1 1 #ifndef _ASM_X86_RWLOCK_H 2 2 #define _ASM_X86_RWLOCK_H 3 3 4 - #define RW_LOCK_BIAS 0x01000000 4 + #include <asm/asm.h> 5 + 6 + #if CONFIG_NR_CPUS <= 2048 7 + 8 + #ifndef __ASSEMBLY__ 9 + typedef union { 10 + s32 lock; 11 + s32 write; 12 + } arch_rwlock_t; 13 + #endif 14 + 15 + #define RW_LOCK_BIAS 0x00100000 16 + #define READ_LOCK_SIZE(insn) __ASM_FORM(insn##l) 17 + #define READ_LOCK_ATOMIC(n) atomic_##n 18 + #define WRITE_LOCK_ADD(n) __ASM_FORM_COMMA(addl n) 19 + #define WRITE_LOCK_SUB(n) __ASM_FORM_COMMA(subl n) 20 + #define WRITE_LOCK_CMP RW_LOCK_BIAS 21 + 22 + #else /* CONFIG_NR_CPUS > 2048 */ 23 + 24 + #include <linux/const.h> 25 + 26 + #ifndef __ASSEMBLY__ 27 + typedef union { 28 + s64 lock; 29 + struct { 30 + u32 read; 31 + s32 write; 32 + }; 33 + } arch_rwlock_t; 34 + #endif 35 + 36 + #define RW_LOCK_BIAS (_AC(1,L) << 32) 37 + #define READ_LOCK_SIZE(insn) __ASM_FORM(insn##q) 38 + #define READ_LOCK_ATOMIC(n) atomic64_##n 39 + #define WRITE_LOCK_ADD(n) __ASM_FORM(incl) 40 + #define WRITE_LOCK_SUB(n) __ASM_FORM(decl) 41 + #define WRITE_LOCK_CMP 1 42 + 43 + #endif /* CONFIG_NR_CPUS */ 44 + 45 + #define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } 5 46 6 47 /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */ 7 48
+22 -15
arch/x86/include/asm/spinlock.h
··· 2 2 #define _ASM_X86_SPINLOCK_H 3 3 4 4 #include <asm/atomic.h> 5 - #include <asm/rwlock.h> 6 5 #include <asm/page.h> 7 6 #include <asm/processor.h> 8 7 #include <linux/compiler.h> ··· 233 234 */ 234 235 static inline int arch_read_can_lock(arch_rwlock_t *lock) 235 236 { 236 - return (int)(lock)->lock > 0; 237 + return lock->lock > 0; 237 238 } 238 239 239 240 /** ··· 242 243 */ 243 244 static inline int arch_write_can_lock(arch_rwlock_t *lock) 244 245 { 245 - return (lock)->lock == RW_LOCK_BIAS; 246 + return lock->write == WRITE_LOCK_CMP; 246 247 } 247 248 248 249 static inline void arch_read_lock(arch_rwlock_t *rw) 249 250 { 250 - asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" 251 + asm volatile(LOCK_PREFIX READ_LOCK_SIZE(dec) " (%0)\n\t" 251 252 "jns 1f\n" 252 253 "call __read_lock_failed\n\t" 253 254 "1:\n" ··· 256 257 257 258 static inline void arch_write_lock(arch_rwlock_t *rw) 258 259 { 259 - asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" 260 + asm volatile(LOCK_PREFIX WRITE_LOCK_SUB(%1) "(%0)\n\t" 260 261 "jz 1f\n" 261 262 "call __write_lock_failed\n\t" 262 263 "1:\n" 263 - ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); 264 + ::LOCK_PTR_REG (&rw->write), "i" (RW_LOCK_BIAS) 265 + : "memory"); 264 266 } 265 267 266 268 static inline int arch_read_trylock(arch_rwlock_t *lock) 267 269 { 268 - atomic_t *count = (atomic_t *)lock; 270 + READ_LOCK_ATOMIC(t) *count = (READ_LOCK_ATOMIC(t) *)lock; 269 271 270 - if (atomic_dec_return(count) >= 0) 272 + if (READ_LOCK_ATOMIC(dec_return)(count) >= 0) 271 273 return 1; 272 - atomic_inc(count); 274 + READ_LOCK_ATOMIC(inc)(count); 273 275 return 0; 274 276 } 275 277 276 278 static inline int arch_write_trylock(arch_rwlock_t *lock) 277 279 { 278 - atomic_t *count = (atomic_t *)lock; 280 + atomic_t *count = (atomic_t *)&lock->write; 279 281 280 - if (atomic_sub_and_test(RW_LOCK_BIAS, count)) 282 + if (atomic_sub_and_test(WRITE_LOCK_CMP, count)) 281 283 return 1; 282 - atomic_add(RW_LOCK_BIAS, count); 284 + atomic_add(WRITE_LOCK_CMP, count); 283 285 return 0; 284 286 } 285 287 286 288 static inline void arch_read_unlock(arch_rwlock_t *rw) 287 289 { 288 - asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); 290 + asm volatile(LOCK_PREFIX READ_LOCK_SIZE(inc) " %0" 291 + :"+m" (rw->lock) : : "memory"); 289 292 } 290 293 291 294 static inline void arch_write_unlock(arch_rwlock_t *rw) 292 295 { 293 - asm volatile(LOCK_PREFIX "addl %1, %0" 294 - : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); 296 + asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0" 297 + : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory"); 295 298 } 296 299 297 300 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) 298 301 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) 302 + 303 + #undef READ_LOCK_SIZE 304 + #undef READ_LOCK_ATOMIC 305 + #undef WRITE_LOCK_ADD 306 + #undef WRITE_LOCK_SUB 307 + #undef WRITE_LOCK_CMP 299 308 300 309 #define arch_spin_relax(lock) cpu_relax() 301 310 #define arch_read_relax(lock) cpu_relax()
+1 -5
arch/x86/include/asm/spinlock_types.h
··· 11 11 12 12 #define __ARCH_SPIN_LOCK_UNLOCKED { 0 } 13 13 14 - typedef struct { 15 - unsigned int lock; 16 - } arch_rwlock_t; 17 - 18 - #define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } 14 + #include <asm/rwlock.h> 19 15 20 16 #endif /* _ASM_X86_SPINLOCK_TYPES_H */
+6 -6
arch/x86/lib/rwlock.S
··· 15 15 CFI_STARTPROC 16 16 FRAME 17 17 0: LOCK_PREFIX 18 - addl $RW_LOCK_BIAS, (%__lock_ptr) 18 + WRITE_LOCK_ADD($RW_LOCK_BIAS) (%__lock_ptr) 19 19 1: rep; nop 20 - cmpl $RW_LOCK_BIAS, (%__lock_ptr) 20 + cmpl $WRITE_LOCK_CMP, (%__lock_ptr) 21 21 jne 1b 22 22 LOCK_PREFIX 23 - subl $RW_LOCK_BIAS, (%__lock_ptr) 23 + WRITE_LOCK_SUB($RW_LOCK_BIAS) (%__lock_ptr) 24 24 jnz 0b 25 25 ENDFRAME 26 26 ret ··· 31 31 CFI_STARTPROC 32 32 FRAME 33 33 0: LOCK_PREFIX 34 - incl (%__lock_ptr) 34 + READ_LOCK_SIZE(inc) (%__lock_ptr) 35 35 1: rep; nop 36 - cmpl $1, (%__lock_ptr) 36 + READ_LOCK_SIZE(cmp) $1, (%__lock_ptr) 37 37 js 1b 38 38 LOCK_PREFIX 39 - decl (%__lock_ptr) 39 + READ_LOCK_SIZE(dec) (%__lock_ptr) 40 40 js 0b 41 41 ENDFRAME 42 42 ret
-1
arch/x86/lib/thunk_64.S
··· 8 8 #include <linux/linkage.h> 9 9 #include <asm/dwarf2.h> 10 10 #include <asm/calling.h> 11 - #include <asm/rwlock.h> 12 11 13 12 /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ 14 13 .macro THUNK name, func, put_ret_addr_in_rdi=0