Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc: Fix atomic_xxx_return barrier semantics

The Documentation/memory-barriers.txt document requires that atomic
operations that return a value act as a memory barrier both before
and after the actual atomic operation.

Our current implementation doesn't guarantee this. More specifically,
while a load following the isync can not be issued before stwcx. has
completed, that completion doesn't architecturally means that the
result of stwcx. is visible to other processors (or any previous stores
for that matter) (typically, the other processors L1 caches can still
hold the old value).

This has caused an actual crash in RCU torture testing on Power 7

This fixes it by changing those atomic ops to use new macros instead
of RELEASE/ACQUIRE barriers, called ATOMIC_ENTRY and ATMOIC_EXIT barriers,
which are then defined respectively to lwsync and sync.

I haven't had a chance to measure the performance impact (or rather
what I measured with kernel compiles is in the noise, I yet have to
find a more precise benchmark)

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

+40 -35
+24 -24
arch/powerpc/include/asm/atomic.h
··· 49 49 int t; 50 50 51 51 __asm__ __volatile__( 52 - PPC_RELEASE_BARRIER 52 + PPC_ATOMIC_ENTRY_BARRIER 53 53 "1: lwarx %0,0,%2 # atomic_add_return\n\ 54 54 add %0,%1,%0\n" 55 55 PPC405_ERR77(0,%2) 56 56 " stwcx. %0,0,%2 \n\ 57 57 bne- 1b" 58 - PPC_ACQUIRE_BARRIER 58 + PPC_ATOMIC_EXIT_BARRIER 59 59 : "=&r" (t) 60 60 : "r" (a), "r" (&v->counter) 61 61 : "cc", "memory"); ··· 85 85 int t; 86 86 87 87 __asm__ __volatile__( 88 - PPC_RELEASE_BARRIER 88 + PPC_ATOMIC_ENTRY_BARRIER 89 89 "1: lwarx %0,0,%2 # atomic_sub_return\n\ 90 90 subf %0,%1,%0\n" 91 91 PPC405_ERR77(0,%2) 92 92 " stwcx. %0,0,%2 \n\ 93 93 bne- 1b" 94 - PPC_ACQUIRE_BARRIER 94 + PPC_ATOMIC_EXIT_BARRIER 95 95 : "=&r" (t) 96 96 : "r" (a), "r" (&v->counter) 97 97 : "cc", "memory"); ··· 119 119 int t; 120 120 121 121 __asm__ __volatile__( 122 - PPC_RELEASE_BARRIER 122 + PPC_ATOMIC_ENTRY_BARRIER 123 123 "1: lwarx %0,0,%1 # atomic_inc_return\n\ 124 124 addic %0,%0,1\n" 125 125 PPC405_ERR77(0,%1) 126 126 " stwcx. %0,0,%1 \n\ 127 127 bne- 1b" 128 - PPC_ACQUIRE_BARRIER 128 + PPC_ATOMIC_EXIT_BARRIER 129 129 : "=&r" (t) 130 130 : "r" (&v->counter) 131 131 : "cc", "xer", "memory"); ··· 163 163 int t; 164 164 165 165 __asm__ __volatile__( 166 - PPC_RELEASE_BARRIER 166 + PPC_ATOMIC_ENTRY_BARRIER 167 167 "1: lwarx %0,0,%1 # atomic_dec_return\n\ 168 168 addic %0,%0,-1\n" 169 169 PPC405_ERR77(0,%1) 170 170 " stwcx. %0,0,%1\n\ 171 171 bne- 1b" 172 - PPC_ACQUIRE_BARRIER 172 + PPC_ATOMIC_EXIT_BARRIER 173 173 : "=&r" (t) 174 174 : "r" (&v->counter) 175 175 : "cc", "xer", "memory"); ··· 194 194 int t; 195 195 196 196 __asm__ __volatile__ ( 197 - PPC_RELEASE_BARRIER 197 + PPC_ATOMIC_ENTRY_BARRIER 198 198 "1: lwarx %0,0,%1 # __atomic_add_unless\n\ 199 199 cmpw 0,%0,%3 \n\ 200 200 beq- 2f \n\ ··· 202 202 PPC405_ERR77(0,%2) 203 203 " stwcx. %0,0,%1 \n\ 204 204 bne- 1b \n" 205 - PPC_ACQUIRE_BARRIER 205 + PPC_ATOMIC_EXIT_BARRIER 206 206 " subf %0,%2,%0 \n\ 207 207 2:" 208 208 : "=&r" (t) ··· 226 226 int t; 227 227 228 228 __asm__ __volatile__( 229 - PPC_RELEASE_BARRIER 229 + PPC_ATOMIC_ENTRY_BARRIER 230 230 "1: lwarx %0,0,%1 # atomic_dec_if_positive\n\ 231 231 cmpwi %0,1\n\ 232 232 addi %0,%0,-1\n\ ··· 234 234 PPC405_ERR77(0,%1) 235 235 " stwcx. %0,0,%1\n\ 236 236 bne- 1b" 237 - PPC_ACQUIRE_BARRIER 237 + PPC_ATOMIC_EXIT_BARRIER 238 238 "\n\ 239 239 2:" : "=&b" (t) 240 240 : "r" (&v->counter) ··· 285 285 long t; 286 286 287 287 __asm__ __volatile__( 288 - PPC_RELEASE_BARRIER 288 + PPC_ATOMIC_ENTRY_BARRIER 289 289 "1: ldarx %0,0,%2 # atomic64_add_return\n\ 290 290 add %0,%1,%0\n\ 291 291 stdcx. %0,0,%2 \n\ 292 292 bne- 1b" 293 - PPC_ACQUIRE_BARRIER 293 + PPC_ATOMIC_EXIT_BARRIER 294 294 : "=&r" (t) 295 295 : "r" (a), "r" (&v->counter) 296 296 : "cc", "memory"); ··· 319 319 long t; 320 320 321 321 __asm__ __volatile__( 322 - PPC_RELEASE_BARRIER 322 + PPC_ATOMIC_ENTRY_BARRIER 323 323 "1: ldarx %0,0,%2 # atomic64_sub_return\n\ 324 324 subf %0,%1,%0\n\ 325 325 stdcx. %0,0,%2 \n\ 326 326 bne- 1b" 327 - PPC_ACQUIRE_BARRIER 327 + PPC_ATOMIC_EXIT_BARRIER 328 328 : "=&r" (t) 329 329 : "r" (a), "r" (&v->counter) 330 330 : "cc", "memory"); ··· 351 351 long t; 352 352 353 353 __asm__ __volatile__( 354 - PPC_RELEASE_BARRIER 354 + PPC_ATOMIC_ENTRY_BARRIER 355 355 "1: ldarx %0,0,%1 # atomic64_inc_return\n\ 356 356 addic %0,%0,1\n\ 357 357 stdcx. %0,0,%1 \n\ 358 358 bne- 1b" 359 - PPC_ACQUIRE_BARRIER 359 + PPC_ATOMIC_EXIT_BARRIER 360 360 : "=&r" (t) 361 361 : "r" (&v->counter) 362 362 : "cc", "xer", "memory"); ··· 393 393 long t; 394 394 395 395 __asm__ __volatile__( 396 - PPC_RELEASE_BARRIER 396 + PPC_ATOMIC_ENTRY_BARRIER 397 397 "1: ldarx %0,0,%1 # atomic64_dec_return\n\ 398 398 addic %0,%0,-1\n\ 399 399 stdcx. %0,0,%1\n\ 400 400 bne- 1b" 401 - PPC_ACQUIRE_BARRIER 401 + PPC_ATOMIC_EXIT_BARRIER 402 402 : "=&r" (t) 403 403 : "r" (&v->counter) 404 404 : "cc", "xer", "memory"); ··· 418 418 long t; 419 419 420 420 __asm__ __volatile__( 421 - PPC_RELEASE_BARRIER 421 + PPC_ATOMIC_ENTRY_BARRIER 422 422 "1: ldarx %0,0,%1 # atomic64_dec_if_positive\n\ 423 423 addic. %0,%0,-1\n\ 424 424 blt- 2f\n\ 425 425 stdcx. %0,0,%1\n\ 426 426 bne- 1b" 427 - PPC_ACQUIRE_BARRIER 427 + PPC_ATOMIC_EXIT_BARRIER 428 428 "\n\ 429 429 2:" : "=&r" (t) 430 430 : "r" (&v->counter) ··· 450 450 long t; 451 451 452 452 __asm__ __volatile__ ( 453 - PPC_RELEASE_BARRIER 453 + PPC_ATOMIC_ENTRY_BARRIER 454 454 "1: ldarx %0,0,%1 # __atomic_add_unless\n\ 455 455 cmpd 0,%0,%3 \n\ 456 456 beq- 2f \n\ 457 457 add %0,%2,%0 \n" 458 458 " stdcx. %0,0,%1 \n\ 459 459 bne- 1b \n" 460 - PPC_ACQUIRE_BARRIER 460 + PPC_ATOMIC_EXIT_BARRIER 461 461 " subf %0,%2,%0 \n\ 462 462 2:" 463 463 : "=&r" (t)
+6 -6
arch/powerpc/include/asm/bitops.h
··· 124 124 return (old & mask); \ 125 125 } 126 126 127 - DEFINE_TESTOP(test_and_set_bits, or, PPC_RELEASE_BARRIER, 128 - PPC_ACQUIRE_BARRIER, 0) 127 + DEFINE_TESTOP(test_and_set_bits, or, PPC_ATOMIC_ENTRY_BARRIER, 128 + PPC_ATOMIC_EXIT_BARRIER, 0) 129 129 DEFINE_TESTOP(test_and_set_bits_lock, or, "", 130 130 PPC_ACQUIRE_BARRIER, 1) 131 - DEFINE_TESTOP(test_and_clear_bits, andc, PPC_RELEASE_BARRIER, 132 - PPC_ACQUIRE_BARRIER, 0) 133 - DEFINE_TESTOP(test_and_change_bits, xor, PPC_RELEASE_BARRIER, 134 - PPC_ACQUIRE_BARRIER, 0) 131 + DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER, 132 + PPC_ATOMIC_EXIT_BARRIER, 0) 133 + DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER, 134 + PPC_ATOMIC_EXIT_BARRIER, 0) 135 135 136 136 static __inline__ int test_and_set_bit(unsigned long nr, 137 137 volatile unsigned long *addr)
+4 -3
arch/powerpc/include/asm/futex.h
··· 11 11 12 12 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ 13 13 __asm__ __volatile ( \ 14 - PPC_RELEASE_BARRIER \ 14 + PPC_ATOMIC_ENTRY_BARRIER \ 15 15 "1: lwarx %0,0,%2\n" \ 16 16 insn \ 17 17 PPC405_ERR77(0, %2) \ 18 18 "2: stwcx. %1,0,%2\n" \ 19 19 "bne- 1b\n" \ 20 + PPC_ATOMIC_EXIT_BARRIER \ 20 21 "li %1,0\n" \ 21 22 "3: .section .fixup,\"ax\"\n" \ 22 23 "4: li %1,%3\n" \ ··· 93 92 return -EFAULT; 94 93 95 94 __asm__ __volatile__ ( 96 - PPC_RELEASE_BARRIER 95 + PPC_ATOMIC_ENTRY_BARRIER 97 96 "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ 98 97 cmpw 0,%1,%4\n\ 99 98 bne- 3f\n" 100 99 PPC405_ERR77(0,%3) 101 100 "2: stwcx. %5,0,%3\n\ 102 101 bne- 1b\n" 103 - PPC_ACQUIRE_BARRIER 102 + PPC_ATOMIC_EXIT_BARRIER 104 103 "3: .section .fixup,\"ax\"\n\ 105 104 4: li %0,%6\n\ 106 105 b 3b\n\
+6 -2
arch/powerpc/include/asm/synch.h
··· 42 42 START_LWSYNC_SECTION(97); \ 43 43 isync; \ 44 44 MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup); 45 - #define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER) 46 - #define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n" 45 + #define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER) 46 + #define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n" 47 + #define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(LWSYNC) "\n" 48 + #define PPC_ATOMIC_EXIT_BARRIER "\n" stringify_in_c(sync) "\n" 47 49 #else 48 50 #define PPC_ACQUIRE_BARRIER 49 51 #define PPC_RELEASE_BARRIER 52 + #define PPC_ATOMIC_ENTRY_BARRIER 53 + #define PPC_ATOMIC_EXIT_BARRIER 50 54 #endif 51 55 52 56 #endif /* __KERNEL__ */