[SPARC64]: Avoid membar instructions in delay slots.

In particular, avoid membar instructions in the delay
slot of a jmpl instruction.

UltraSPARC-I, II, IIi, and IIe have a bug, documented in
the UltraSPARC-IIi User's Manual, Appendix K, Erratum 51

The long and short of it is that if the IMU unit misses
on a branch or jmpl, and there is a store buffer synchronizing
membar in the delay slot, the chip can stop fetching instructions.

If interrupts are enabled or some other trap is enabled, the
chip will unwedge itself, but performance will suffer.

We already had a workaround for this bug in a few spots, but
it's better to have the entire tree sanitized for this rule.

Signed-off-by: David S. Miller <davem@davemloft.net>

+172 -111
+4 -2
arch/sparc64/kernel/entry.S
··· 271 fmuld %f0, %f2, %f26 272 faddd %f0, %f2, %f28 273 fmuld %f0, %f2, %f30 274 b,pt %xcc, fpdis_exit 275 - membar #Sync 276 2: andcc %g5, FPRS_DU, %g0 277 bne,pt %icc, 3f 278 fzero %f32 ··· 302 fmuld %f32, %f34, %f58 303 faddd %f32, %f34, %f60 304 fmuld %f32, %f34, %f62 305 ba,pt %xcc, fpdis_exit 306 - membar #Sync 307 3: mov SECONDARY_CONTEXT, %g3 308 add %g6, TI_FPREGS, %g1 309 ldxa [%g3] ASI_DMMU, %g5
··· 271 fmuld %f0, %f2, %f26 272 faddd %f0, %f2, %f28 273 fmuld %f0, %f2, %f30 274 + membar #Sync 275 b,pt %xcc, fpdis_exit 276 + nop 277 2: andcc %g5, FPRS_DU, %g0 278 bne,pt %icc, 3f 279 fzero %f32 ··· 301 fmuld %f32, %f34, %f58 302 faddd %f32, %f34, %f60 303 fmuld %f32, %f34, %f62 304 + membar #Sync 305 ba,pt %xcc, fpdis_exit 306 + nop 307 3: mov SECONDARY_CONTEXT, %g3 308 add %g6, TI_FPREGS, %g1 309 ldxa [%g3] ASI_DMMU, %g5
+8 -4
arch/sparc64/kernel/semaphore.c
··· 32 " add %1, %4, %1\n" 33 " cas [%3], %0, %1\n" 34 " cmp %0, %1\n" 35 " bne,pn %%icc, 1b\n" 36 - " membar #StoreLoad | #StoreStore\n" 37 : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) 38 : "r" (&sem->count), "r" (incr), "m" (sem->count) 39 : "cc"); ··· 72 " cmp %%g1, %%g7\n" 73 " bne,pn %%icc, 1b\n" 74 " addcc %%g7, 1, %%g0\n" 75 " ble,pn %%icc, 3f\n" 76 - " membar #StoreLoad | #StoreStore\n" 77 "2:\n" 78 " .subsection 2\n" 79 "3: mov %0, %%g1\n" ··· 130 " cmp %%g1, %%g7\n" 131 " bne,pn %%icc, 1b\n" 132 " cmp %%g7, 1\n" 133 " bl,pn %%icc, 3f\n" 134 - " membar #StoreLoad | #StoreStore\n" 135 "2:\n" 136 " .subsection 2\n" 137 "3: mov %0, %%g1\n" ··· 236 " cmp %%g1, %%g7\n" 237 " bne,pn %%icc, 1b\n" 238 " cmp %%g7, 1\n" 239 " bl,pn %%icc, 3f\n" 240 - " membar #StoreLoad | #StoreStore\n" 241 "2:\n" 242 " .subsection 2\n" 243 "3: mov %2, %%g1\n"
··· 32 " add %1, %4, %1\n" 33 " cas [%3], %0, %1\n" 34 " cmp %0, %1\n" 35 + " membar #StoreLoad | #StoreStore\n" 36 " bne,pn %%icc, 1b\n" 37 + " nop\n" 38 : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) 39 : "r" (&sem->count), "r" (incr), "m" (sem->count) 40 : "cc"); ··· 71 " cmp %%g1, %%g7\n" 72 " bne,pn %%icc, 1b\n" 73 " addcc %%g7, 1, %%g0\n" 74 + " membar #StoreLoad | #StoreStore\n" 75 " ble,pn %%icc, 3f\n" 76 + " nop\n" 77 "2:\n" 78 " .subsection 2\n" 79 "3: mov %0, %%g1\n" ··· 128 " cmp %%g1, %%g7\n" 129 " bne,pn %%icc, 1b\n" 130 " cmp %%g7, 1\n" 131 + " membar #StoreLoad | #StoreStore\n" 132 " bl,pn %%icc, 3f\n" 133 + " nop\n" 134 "2:\n" 135 " .subsection 2\n" 136 "3: mov %0, %%g1\n" ··· 233 " cmp %%g1, %%g7\n" 234 " bne,pn %%icc, 1b\n" 235 " cmp %%g7, 1\n" 236 + " membar #StoreLoad | #StoreStore\n" 237 " bl,pn %%icc, 3f\n" 238 + " nop\n" 239 "2:\n" 240 " .subsection 2\n" 241 "3: mov %2, %%g1\n"
+2 -1
arch/sparc64/kernel/trampoline.S
··· 98 99 sethi %hi(prom_entry_lock), %g2 100 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 101 brnz,pn %g1, 1b 102 - membar #StoreLoad | #StoreStore 103 104 sethi %hi(p1275buf), %g2 105 or %g2, %lo(p1275buf), %g2
··· 98 99 sethi %hi(prom_entry_lock), %g2 100 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 101 + membar #StoreLoad | #StoreStore 102 brnz,pn %g1, 1b 103 + nop 104 105 sethi %hi(p1275buf), %g2 106 or %g2, %lo(p1275buf), %g2
+53 -50
arch/sparc64/lib/U1memcpy.S
··· 87 #define LOOP_CHUNK3(src, dest, len, branch_dest) \ 88 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) 89 90 #define STORE_SYNC(dest, fsrc) \ 91 EX_ST(STORE_BLK(%fsrc, %dest)); \ 92 - add %dest, 0x40, %dest; 93 94 #define STORE_JUMP(dest, fsrc, target) \ 95 EX_ST(STORE_BLK(%fsrc, %dest)); \ 96 add %dest, 0x40, %dest; \ 97 - ba,pt %xcc, target; 98 99 #define FINISH_VISCHUNK(dest, f0, f1, left) \ 100 subcc %left, 8, %left;\ ··· 242 ba,pt %xcc, 1b+4 243 faligndata %f0, %f2, %f48 244 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 245 - STORE_SYNC(o0, f48) membar #Sync 246 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 247 - STORE_JUMP(o0, f48, 40f) membar #Sync 248 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 249 - STORE_SYNC(o0, f48) membar #Sync 250 FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 251 - STORE_JUMP(o0, f48, 48f) membar #Sync 252 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 253 - STORE_SYNC(o0, f48) membar #Sync 254 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 255 - STORE_JUMP(o0, f48, 56f) membar #Sync 256 257 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 258 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 263 ba,pt %xcc, 1b+4 264 faligndata %f2, %f4, %f48 265 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 266 - STORE_SYNC(o0, f48) membar #Sync 267 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 268 - STORE_JUMP(o0, f48, 41f) membar #Sync 269 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 270 - STORE_SYNC(o0, f48) membar #Sync 271 FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 272 - STORE_JUMP(o0, f48, 49f) membar #Sync 273 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 274 - STORE_SYNC(o0, f48) membar #Sync 275 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 276 - STORE_JUMP(o0, f48, 57f) membar #Sync 277 278 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 279 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 284 ba,pt %xcc, 1b+4 285 faligndata %f4, %f6, %f48 286 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 287 - STORE_SYNC(o0, f48) membar #Sync 288 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 289 - STORE_JUMP(o0, f48, 42f) membar #Sync 290 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 291 - STORE_SYNC(o0, f48) membar #Sync 292 FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 293 - STORE_JUMP(o0, f48, 50f) membar #Sync 294 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 295 - STORE_SYNC(o0, f48) membar #Sync 296 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 297 - STORE_JUMP(o0, f48, 58f) membar #Sync 298 299 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 300 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 305 ba,pt %xcc, 1b+4 306 faligndata %f6, %f8, %f48 307 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 308 - STORE_SYNC(o0, f48) membar #Sync 309 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 310 - STORE_JUMP(o0, f48, 43f) membar #Sync 311 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 312 - STORE_SYNC(o0, f48) membar #Sync 313 FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 314 - STORE_JUMP(o0, f48, 51f) membar #Sync 315 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 316 - STORE_SYNC(o0, f48) membar #Sync 317 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 318 - STORE_JUMP(o0, f48, 59f) membar #Sync 319 320 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 321 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 326 ba,pt %xcc, 1b+4 327 faligndata %f8, %f10, %f48 328 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 329 - STORE_SYNC(o0, f48) membar #Sync 330 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 331 - STORE_JUMP(o0, f48, 44f) membar #Sync 332 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 333 - STORE_SYNC(o0, f48) membar #Sync 334 FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 335 - STORE_JUMP(o0, f48, 52f) membar #Sync 336 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 337 - STORE_SYNC(o0, f48) membar #Sync 338 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 339 - STORE_JUMP(o0, f48, 60f) membar #Sync 340 341 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 342 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 347 ba,pt %xcc, 1b+4 348 faligndata %f10, %f12, %f48 349 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 350 - STORE_SYNC(o0, f48) membar #Sync 351 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 352 - STORE_JUMP(o0, f48, 45f) membar #Sync 353 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 354 - STORE_SYNC(o0, f48) membar #Sync 355 FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 356 - STORE_JUMP(o0, f48, 53f) membar #Sync 357 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 358 - STORE_SYNC(o0, f48) membar #Sync 359 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 360 - STORE_JUMP(o0, f48, 61f) membar #Sync 361 362 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 363 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 368 ba,pt %xcc, 1b+4 369 faligndata %f12, %f14, %f48 370 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 371 - STORE_SYNC(o0, f48) membar #Sync 372 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 373 - STORE_JUMP(o0, f48, 46f) membar #Sync 374 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 375 - STORE_SYNC(o0, f48) membar #Sync 376 FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 377 - STORE_JUMP(o0, f48, 54f) membar #Sync 378 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 379 - STORE_SYNC(o0, f48) membar #Sync 380 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 381 - STORE_JUMP(o0, f48, 62f) membar #Sync 382 383 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 384 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 389 ba,pt %xcc, 1b+4 390 faligndata %f14, %f16, %f48 391 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 392 - STORE_SYNC(o0, f48) membar #Sync 393 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 394 - STORE_JUMP(o0, f48, 47f) membar #Sync 395 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 396 - STORE_SYNC(o0, f48) membar #Sync 397 FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 398 - STORE_JUMP(o0, f48, 55f) membar #Sync 399 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 400 - STORE_SYNC(o0, f48) membar #Sync 401 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 402 - STORE_JUMP(o0, f48, 63f) membar #Sync 403 404 40: FINISH_VISCHUNK(o0, f0, f2, g3) 405 41: FINISH_VISCHUNK(o0, f2, f4, g3)
··· 87 #define LOOP_CHUNK3(src, dest, len, branch_dest) \ 88 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) 89 90 + #define DO_SYNC membar #Sync; 91 #define STORE_SYNC(dest, fsrc) \ 92 EX_ST(STORE_BLK(%fsrc, %dest)); \ 93 + add %dest, 0x40, %dest; \ 94 + DO_SYNC 95 96 #define STORE_JUMP(dest, fsrc, target) \ 97 EX_ST(STORE_BLK(%fsrc, %dest)); \ 98 add %dest, 0x40, %dest; \ 99 + ba,pt %xcc, target; \ 100 + nop; 101 102 #define FINISH_VISCHUNK(dest, f0, f1, left) \ 103 subcc %left, 8, %left;\ ··· 239 ba,pt %xcc, 1b+4 240 faligndata %f0, %f2, %f48 241 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 242 + STORE_SYNC(o0, f48) 243 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 244 + STORE_JUMP(o0, f48, 40f) 245 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 246 + STORE_SYNC(o0, f48) 247 FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 248 + STORE_JUMP(o0, f48, 48f) 249 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 250 + STORE_SYNC(o0, f48) 251 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 252 + STORE_JUMP(o0, f48, 56f) 253 254 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 255 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 260 ba,pt %xcc, 1b+4 261 faligndata %f2, %f4, %f48 262 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 263 + STORE_SYNC(o0, f48) 264 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 265 + STORE_JUMP(o0, f48, 41f) 266 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 267 + STORE_SYNC(o0, f48) 268 FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 269 + STORE_JUMP(o0, f48, 49f) 270 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 271 + STORE_SYNC(o0, f48) 272 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 273 + STORE_JUMP(o0, f48, 57f) 274 275 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 276 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 281 ba,pt %xcc, 1b+4 282 faligndata %f4, %f6, %f48 283 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 284 + STORE_SYNC(o0, f48) 285 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 286 + STORE_JUMP(o0, f48, 42f) 287 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 288 + STORE_SYNC(o0, f48) 289 FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 290 + STORE_JUMP(o0, f48, 50f) 291 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 292 + STORE_SYNC(o0, f48) 293 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 294 + STORE_JUMP(o0, f48, 58f) 295 296 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 297 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 302 ba,pt %xcc, 1b+4 303 faligndata %f6, %f8, %f48 304 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 305 + STORE_SYNC(o0, f48) 306 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 307 + STORE_JUMP(o0, f48, 43f) 308 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 309 + STORE_SYNC(o0, f48) 310 FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 311 + STORE_JUMP(o0, f48, 51f) 312 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 313 + STORE_SYNC(o0, f48) 314 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 315 + STORE_JUMP(o0, f48, 59f) 316 317 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 318 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 323 ba,pt %xcc, 1b+4 324 faligndata %f8, %f10, %f48 325 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 326 + STORE_SYNC(o0, f48) 327 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 328 + STORE_JUMP(o0, f48, 44f) 329 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 330 + STORE_SYNC(o0, f48) 331 FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 332 + STORE_JUMP(o0, f48, 52f) 333 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 334 + STORE_SYNC(o0, f48) 335 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 336 + STORE_JUMP(o0, f48, 60f) 337 338 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 339 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 344 ba,pt %xcc, 1b+4 345 faligndata %f10, %f12, %f48 346 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 347 + STORE_SYNC(o0, f48) 348 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 349 + STORE_JUMP(o0, f48, 45f) 350 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 351 + STORE_SYNC(o0, f48) 352 FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 353 + STORE_JUMP(o0, f48, 53f) 354 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 355 + STORE_SYNC(o0, f48) 356 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 357 + STORE_JUMP(o0, f48, 61f) 358 359 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 360 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 365 ba,pt %xcc, 1b+4 366 faligndata %f12, %f14, %f48 367 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 368 + STORE_SYNC(o0, f48) 369 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 370 + STORE_JUMP(o0, f48, 46f) 371 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 372 + STORE_SYNC(o0, f48) 373 FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 374 + STORE_JUMP(o0, f48, 54f) 375 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 376 + STORE_SYNC(o0, f48) 377 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 378 + STORE_JUMP(o0, f48, 62f) 379 380 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 381 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) ··· 386 ba,pt %xcc, 1b+4 387 faligndata %f14, %f16, %f48 388 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 389 + STORE_SYNC(o0, f48) 390 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 391 + STORE_JUMP(o0, f48, 47f) 392 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 393 + STORE_SYNC(o0, f48) 394 FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 395 + STORE_JUMP(o0, f48, 55f) 396 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 397 + STORE_SYNC(o0, f48) 398 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 399 + STORE_JUMP(o0, f48, 63f) 400 401 40: FINISH_VISCHUNK(o0, f0, f2, g3) 402 41: FINISH_VISCHUNK(o0, f2, f4, g3)
+13 -2
arch/sparc64/lib/VISsave.S
··· 72 73 stda %f48, [%g3 + %g1] ASI_BLK_P 74 5: membar #Sync 75 - jmpl %g7 + %g0, %g0 76 nop 77 78 6: ldub [%g3 + TI_FPSAVED], %o5 ··· 91 stda %f32, [%g2 + %g1] ASI_BLK_P 92 stda %f48, [%g3 + %g1] ASI_BLK_P 93 membar #Sync 94 - jmpl %g7 + %g0, %g0 95 96 nop 97 98 .align 32 ··· 133 stda %f0, [%g2 + %g1] ASI_BLK_P 134 stda %f16, [%g3 + %g1] ASI_BLK_P 135 membar #Sync 136 4: and %o5, FPRS_DU, %o5 137 jmpl %g7 + %g0, %g0 138 wr %o5, FPRS_FEF, %fprs
··· 72 73 stda %f48, [%g3 + %g1] ASI_BLK_P 74 5: membar #Sync 75 + ba,pt %xcc, 80f 76 + nop 77 + 78 + .align 32 79 + 80: jmpl %g7 + %g0, %g0 80 nop 81 82 6: ldub [%g3 + TI_FPSAVED], %o5 ··· 87 stda %f32, [%g2 + %g1] ASI_BLK_P 88 stda %f48, [%g3 + %g1] ASI_BLK_P 89 membar #Sync 90 + ba,pt %xcc, 80f 91 + nop 92 93 + .align 32 94 + 80: jmpl %g7 + %g0, %g0 95 nop 96 97 .align 32 ··· 126 stda %f0, [%g2 + %g1] ASI_BLK_P 127 stda %f16, [%g3 + %g1] ASI_BLK_P 128 membar #Sync 129 + ba,pt %xcc, 4f 130 + nop 131 + 132 + .align 32 133 4: and %o5, FPRS_DU, %o5 134 jmpl %g7 + %g0, %g0 135 wr %o5, FPRS_FEF, %fprs
+26 -16
arch/sparc64/lib/atomic.S
··· 7 #include <linux/config.h> 8 #include <asm/asi.h> 9 10 - /* On SMP we need to use memory barriers to ensure 11 - * correct memory operation ordering, nop these out 12 - * for uniprocessor. 13 - */ 14 - #ifdef CONFIG_SMP 15 - #define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad 16 - #define ATOMIC_POST_BARRIER membar #StoreLoad | #StoreStore 17 - #else 18 - #define ATOMIC_PRE_BARRIER nop 19 - #define ATOMIC_POST_BARRIER nop 20 - #endif 21 - 22 .text 23 24 /* Two versions of the atomic routines, one that ··· 40 nop 41 .size atomic_sub, .-atomic_sub 42 43 .globl atomic_add_ret 44 .type atomic_add_ret,#function 45 atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ ··· 68 cmp %g1, %g7 69 bne,pn %icc, 1b 70 add %g7, %o0, %g7 71 ATOMIC_POST_BARRIER 72 retl 73 - sra %g7, 0, %o0 74 .size atomic_add_ret, .-atomic_add_ret 75 76 .globl atomic_sub_ret ··· 84 cmp %g1, %g7 85 bne,pn %icc, 1b 86 sub %g7, %o0, %g7 87 ATOMIC_POST_BARRIER 88 retl 89 - sra %g7, 0, %o0 90 .size atomic_sub_ret, .-atomic_sub_ret 91 92 .globl atomic64_add ··· 126 cmp %g1, %g7 127 bne,pn %xcc, 1b 128 add %g7, %o0, %g7 129 ATOMIC_POST_BARRIER 130 retl 131 - mov %g7, %o0 132 .size atomic64_add_ret, .-atomic64_add_ret 133 134 .globl atomic64_sub_ret ··· 142 cmp %g1, %g7 143 bne,pn %xcc, 1b 144 sub %g7, %o0, %g7 145 ATOMIC_POST_BARRIER 146 retl 147 - mov %g7, %o0 148 .size atomic64_sub_ret, .-atomic64_sub_ret
··· 7 #include <linux/config.h> 8 #include <asm/asi.h> 9 10 .text 11 12 /* Two versions of the atomic routines, one that ··· 52 nop 53 .size atomic_sub, .-atomic_sub 54 55 + /* On SMP we need to use memory barriers to ensure 56 + * correct memory operation ordering, nop these out 57 + * for uniprocessor. 58 + */ 59 + #ifdef CONFIG_SMP 60 + 61 + #define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad; 62 + #define ATOMIC_POST_BARRIER \ 63 + ba,pt %xcc, 80b; \ 64 + membar #StoreLoad | #StoreStore 65 + 66 + 80: retl 67 + nop 68 + #else 69 + #define ATOMIC_PRE_BARRIER 70 + #define ATOMIC_POST_BARRIER 71 + #endif 72 + 73 .globl atomic_add_ret 74 .type atomic_add_ret,#function 75 atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ ··· 62 cmp %g1, %g7 63 bne,pn %icc, 1b 64 add %g7, %o0, %g7 65 + sra %g7, 0, %o0 66 ATOMIC_POST_BARRIER 67 retl 68 + nop 69 .size atomic_add_ret, .-atomic_add_ret 70 71 .globl atomic_sub_ret ··· 77 cmp %g1, %g7 78 bne,pn %icc, 1b 79 sub %g7, %o0, %g7 80 + sra %g7, 0, %o0 81 ATOMIC_POST_BARRIER 82 retl 83 + nop 84 .size atomic_sub_ret, .-atomic_sub_ret 85 86 .globl atomic64_add ··· 118 cmp %g1, %g7 119 bne,pn %xcc, 1b 120 add %g7, %o0, %g7 121 + mov %g7, %o0 122 ATOMIC_POST_BARRIER 123 retl 124 + nop 125 .size atomic64_add_ret, .-atomic64_add_ret 126 127 .globl atomic64_sub_ret ··· 133 cmp %g1, %g7 134 bne,pn %xcc, 1b 135 sub %g7, %o0, %g7 136 + mov %g7, %o0 137 ATOMIC_POST_BARRIER 138 retl 139 + nop 140 .size atomic64_sub_ret, .-atomic64_sub_ret
+21 -12
arch/sparc64/lib/bitops.S
··· 7 #include <linux/config.h> 8 #include <asm/asi.h> 9 10 /* On SMP we need to use memory barriers to ensure 11 * correct memory operation ordering, nop these out 12 * for uniprocessor. 13 */ 14 #ifdef CONFIG_SMP 15 #define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad 16 - #define BITOP_POST_BARRIER membar #StoreLoad | #StoreStore 17 - #else 18 - #define BITOP_PRE_BARRIER nop 19 - #define BITOP_POST_BARRIER nop 20 - #endif 21 22 - .text 23 24 .globl test_and_set_bit 25 .type test_and_set_bit,#function ··· 43 cmp %g7, %g1 44 bne,pn %xcc, 1b 45 and %g7, %o2, %g2 46 - BITOP_POST_BARRIER 47 clr %o0 48 retl 49 - movrne %g2, 1, %o0 50 .size test_and_set_bit, .-test_and_set_bit 51 52 .globl test_and_clear_bit ··· 66 cmp %g7, %g1 67 bne,pn %xcc, 1b 68 and %g7, %o2, %g2 69 - BITOP_POST_BARRIER 70 clr %o0 71 retl 72 - movrne %g2, 1, %o0 73 .size test_and_clear_bit, .-test_and_clear_bit 74 75 .globl test_and_change_bit ··· 89 cmp %g7, %g1 90 bne,pn %xcc, 1b 91 and %g7, %o2, %g2 92 - BITOP_POST_BARRIER 93 clr %o0 94 retl 95 - movrne %g2, 1, %o0 96 .size test_and_change_bit, .-test_and_change_bit 97 98 .globl set_bit
··· 7 #include <linux/config.h> 8 #include <asm/asi.h> 9 10 + .text 11 + 12 /* On SMP we need to use memory barriers to ensure 13 * correct memory operation ordering, nop these out 14 * for uniprocessor. 15 */ 16 + 17 #ifdef CONFIG_SMP 18 #define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad 19 + #define BITOP_POST_BARRIER \ 20 + ba,pt %xcc, 80b; \ 21 + membar #StoreLoad | #StoreStore 22 23 + 80: retl 24 + nop 25 + #else 26 + #define BITOP_PRE_BARRIER 27 + #define BITOP_POST_BARRIER 28 + #endif 29 30 .globl test_and_set_bit 31 .type test_and_set_bit,#function ··· 37 cmp %g7, %g1 38 bne,pn %xcc, 1b 39 and %g7, %o2, %g2 40 clr %o0 41 + movrne %g2, 1, %o0 42 + BITOP_POST_BARRIER 43 retl 44 + nop 45 .size test_and_set_bit, .-test_and_set_bit 46 47 .globl test_and_clear_bit ··· 59 cmp %g7, %g1 60 bne,pn %xcc, 1b 61 and %g7, %o2, %g2 62 clr %o0 63 + movrne %g2, 1, %o0 64 + BITOP_POST_BARRIER 65 retl 66 + nop 67 .size test_and_clear_bit, .-test_and_clear_bit 68 69 .globl test_and_change_bit ··· 81 cmp %g7, %g1 82 bne,pn %xcc, 1b 83 and %g7, %o2, %g2 84 clr %o0 85 + movrne %g2, 1, %o0 86 + BITOP_POST_BARRIER 87 retl 88 + nop 89 .size test_and_change_bit, .-test_and_change_bit 90 91 .globl set_bit
+4 -2
arch/sparc64/lib/debuglocks.c
··· 252 " andn %%g1, %%g3, %%g7\n" 253 " casx [%0], %%g1, %%g7\n" 254 " cmp %%g1, %%g7\n" 255 " bne,pn %%xcc, 1b\n" 256 - " membar #StoreLoad | #StoreStore" 257 : /* no outputs */ 258 : "r" (&(rw->lock)) 259 : "g3", "g1", "g7", "cc", "memory"); ··· 352 " andn %%g1, %%g3, %%g7\n" 353 " casx [%0], %%g1, %%g7\n" 354 " cmp %%g1, %%g7\n" 355 " bne,pn %%xcc, 1b\n" 356 - " membar #StoreLoad | #StoreStore" 357 : /* no outputs */ 358 : "r" (&(rw->lock)) 359 : "g3", "g1", "g7", "cc", "memory");
··· 252 " andn %%g1, %%g3, %%g7\n" 253 " casx [%0], %%g1, %%g7\n" 254 " cmp %%g1, %%g7\n" 255 + " membar #StoreLoad | #StoreStore\n" 256 " bne,pn %%xcc, 1b\n" 257 + " nop" 258 : /* no outputs */ 259 : "r" (&(rw->lock)) 260 : "g3", "g1", "g7", "cc", "memory"); ··· 351 " andn %%g1, %%g3, %%g7\n" 352 " casx [%0], %%g1, %%g7\n" 353 " cmp %%g1, %%g7\n" 354 + " membar #StoreLoad | #StoreStore\n" 355 " bne,pn %%xcc, 1b\n" 356 + " nop" 357 : /* no outputs */ 358 : "r" (&(rw->lock)) 359 : "g3", "g1", "g7", "cc", "memory");
+4 -2
arch/sparc64/lib/dec_and_lock.S
··· 48 #endif 49 to_zero: 50 ldstub [%o1], %g3 51 brnz,pn %g3, spin_on_lock 52 - membar #StoreLoad | #StoreStore 53 loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ 54 cmp %g2, %g7 55 ··· 72 nop 73 spin_on_lock: 74 ldub [%o1], %g3 75 brnz,pt %g3, spin_on_lock 76 - membar #LoadLoad 77 ba,pt %xcc, to_zero 78 nop 79 nop
··· 48 #endif 49 to_zero: 50 ldstub [%o1], %g3 51 + membar #StoreLoad | #StoreStore 52 brnz,pn %g3, spin_on_lock 53 + nop 54 loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ 55 cmp %g2, %g7 56 ··· 71 nop 72 spin_on_lock: 73 ldub [%o1], %g3 74 + membar #LoadLoad 75 brnz,pt %g3, spin_on_lock 76 + nop 77 ba,pt %xcc, to_zero 78 nop 79 nop
+10 -5
arch/sparc64/lib/rwsem.S
··· 17 bne,pn %icc, 1b 18 add %g7, 1, %g7 19 cmp %g7, 0 20 bl,pn %icc, 3f 21 - membar #StoreLoad | #StoreStore 22 2: 23 retl 24 nop ··· 58 cmp %g3, %g7 59 bne,pn %icc, 1b 60 cmp %g7, 0 61 bne,pn %icc, 3f 62 - membar #StoreLoad | #StoreStore 63 2: retl 64 nop 65 3: ··· 99 cmp %g1, %g7 100 bne,pn %icc, 1b 101 cmp %g7, 0 102 bl,pn %icc, 3f 103 - membar #StoreLoad | #StoreStore 104 2: retl 105 nop 106 3: sethi %hi(RWSEM_ACTIVE_MASK), %g1 ··· 129 bne,pn %icc, 1b 130 sub %g7, %g1, %g7 131 cmp %g7, 0 132 bl,pn %icc, 3f 133 - membar #StoreLoad | #StoreStore 134 2: 135 retl 136 nop ··· 155 bne,pn %icc, 1b 156 sub %g7, %g1, %g7 157 cmp %g7, 0 158 bl,pn %icc, 3f 159 - membar #StoreLoad | #StoreStore 160 2: 161 retl 162 nop
··· 17 bne,pn %icc, 1b 18 add %g7, 1, %g7 19 cmp %g7, 0 20 + membar #StoreLoad | #StoreStore 21 bl,pn %icc, 3f 22 + nop 23 2: 24 retl 25 nop ··· 57 cmp %g3, %g7 58 bne,pn %icc, 1b 59 cmp %g7, 0 60 + membar #StoreLoad | #StoreStore 61 bne,pn %icc, 3f 62 + nop 63 2: retl 64 nop 65 3: ··· 97 cmp %g1, %g7 98 bne,pn %icc, 1b 99 cmp %g7, 0 100 + membar #StoreLoad | #StoreStore 101 bl,pn %icc, 3f 102 + nop 103 2: retl 104 nop 105 3: sethi %hi(RWSEM_ACTIVE_MASK), %g1 ··· 126 bne,pn %icc, 1b 127 sub %g7, %g1, %g7 128 cmp %g7, 0 129 + membar #StoreLoad | #StoreStore 130 bl,pn %icc, 3f 131 + nop 132 2: 133 retl 134 nop ··· 151 bne,pn %icc, 1b 152 sub %g7, %g1, %g7 153 cmp %g7, 0 154 + membar #StoreLoad | #StoreStore 155 bl,pn %icc, 3f 156 + nop 157 2: 158 retl 159 nop
+4 -2
arch/sparc64/mm/init.c
··· 136 "or %%g1, %0, %%g1\n\t" 137 "casx [%2], %%g7, %%g1\n\t" 138 "cmp %%g7, %%g1\n\t" 139 "bne,pn %%xcc, 1b\n\t" 140 - " membar #StoreLoad | #StoreStore" 141 : /* no outputs */ 142 : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) 143 : "g1", "g7"); ··· 158 " andn %%g7, %1, %%g1\n\t" 159 "casx [%2], %%g7, %%g1\n\t" 160 "cmp %%g7, %%g1\n\t" 161 "bne,pn %%xcc, 1b\n\t" 162 - " membar #StoreLoad | #StoreStore\n" 163 "2:" 164 : /* no outputs */ 165 : "r" (cpu), "r" (mask), "r" (&page->flags),
··· 136 "or %%g1, %0, %%g1\n\t" 137 "casx [%2], %%g7, %%g1\n\t" 138 "cmp %%g7, %%g1\n\t" 139 + "membar #StoreLoad | #StoreStore\n\t" 140 "bne,pn %%xcc, 1b\n\t" 141 + " nop" 142 : /* no outputs */ 143 : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) 144 : "g1", "g7"); ··· 157 " andn %%g7, %1, %%g1\n\t" 158 "casx [%2], %%g7, %%g1\n\t" 159 "cmp %%g7, %%g1\n\t" 160 + "membar #StoreLoad | #StoreStore\n\t" 161 "bne,pn %%xcc, 1b\n\t" 162 + " nop\n" 163 "2:" 164 : /* no outputs */ 165 : "r" (cpu), "r" (mask), "r" (&page->flags),
+2 -1
arch/sparc64/mm/ultra.S
··· 266 andn %o3, 1, %o3 267 stxa %g0, [%o3] ASI_IMMU_DEMAP 268 2: stxa %g0, [%o3] ASI_DMMU_DEMAP 269 brnz,pt %o1, 1b 270 - membar #Sync 271 stxa %g2, [%o4] ASI_DMMU 272 flush %g6 273 wrpr %g0, 0, %tl
··· 266 andn %o3, 1, %o3 267 stxa %g0, [%o3] ASI_IMMU_DEMAP 268 2: stxa %g0, [%o3] ASI_DMMU_DEMAP 269 + membar #Sync 270 brnz,pt %o1, 1b 271 + nop 272 stxa %g2, [%o4] ASI_DMMU 273 flush %g6 274 wrpr %g0, 0, %tl
+2 -1
include/asm-sparc64/rwsem.h
··· 55 "add %%g1, %1, %%g7\n\t" 56 "cas [%2], %%g1, %%g7\n\t" 57 "cmp %%g1, %%g7\n\t" 58 "bne,pn %%icc, 1b\n\t" 59 - " membar #StoreLoad | #StoreStore\n\t" 60 "mov %%g7, %0\n\t" 61 : "=&r" (tmp) 62 : "0" (tmp), "r" (sem)
··· 55 "add %%g1, %1, %%g7\n\t" 56 "cas [%2], %%g1, %%g7\n\t" 57 "cmp %%g1, %%g7\n\t" 58 + "membar #StoreLoad | #StoreStore\n\t" 59 "bne,pn %%icc, 1b\n\t" 60 + " nop\n\t" 61 "mov %%g7, %0\n\t" 62 : "=&r" (tmp) 63 : "0" (tmp), "r" (sem)
+19 -10
include/asm-sparc64/spinlock.h
··· 52 53 __asm__ __volatile__( 54 "1: ldstub [%1], %0\n" 55 " brnz,pn %0, 2f\n" 56 - " membar #StoreLoad | #StoreStore\n" 57 " .subsection 2\n" 58 "2: ldub [%1], %0\n" 59 " brnz,pt %0, 2b\n" 60 - " membar #LoadLoad\n" 61 " ba,a,pt %%xcc, 1b\n" 62 " .previous" 63 : "=&r" (tmp) ··· 97 98 __asm__ __volatile__( 99 "1: ldstub [%2], %0\n" 100 - " brnz,pn %0, 2f\n" 101 " membar #StoreLoad | #StoreStore\n" 102 " .subsection 2\n" 103 "2: rdpr %%pil, %1\n" 104 " wrpr %3, %%pil\n" 105 "3: ldub [%2], %0\n" 106 - " brnz,pt %0, 3b\n" 107 " membar #LoadLoad\n" 108 " ba,pt %%xcc, 1b\n" 109 - " wrpr %1, %%pil\n" 110 " .previous" 111 : "=&r" (tmp1), "=&r" (tmp2) 112 : "r"(lock), "r"(flags) ··· 166 "4: add %0, 1, %1\n" 167 " cas [%2], %0, %1\n" 168 " cmp %0, %1\n" 169 " bne,pn %%icc, 1b\n" 170 - " membar #StoreLoad | #StoreStore\n" 171 " .subsection 2\n" 172 "2: ldsw [%2], %0\n" 173 " brlz,pt %0, 2b\n" 174 - " membar #LoadLoad\n" 175 " ba,a,pt %%xcc, 4b\n" 176 " .previous" 177 : "=&r" (tmp1), "=&r" (tmp2) ··· 210 "4: or %0, %3, %1\n" 211 " cas [%2], %0, %1\n" 212 " cmp %0, %1\n" 213 " bne,pn %%icc, 1b\n" 214 - " membar #StoreLoad | #StoreStore\n" 215 " .subsection 2\n" 216 "2: lduw [%2], %0\n" 217 " brnz,pt %0, 2b\n" 218 - " membar #LoadLoad\n" 219 " ba,a,pt %%xcc, 4b\n" 220 " .previous" 221 : "=&r" (tmp1), "=&r" (tmp2) ··· 248 " or %0, %4, %1\n" 249 " cas [%3], %0, %1\n" 250 " cmp %0, %1\n" 251 " bne,pn %%icc, 1b\n" 252 - " membar #StoreLoad | #StoreStore\n" 253 " mov 1, %2\n" 254 "2:" 255 : "=&r" (tmp1), "=&r" (tmp2), "=&r" (result)
··· 52 53 __asm__ __volatile__( 54 "1: ldstub [%1], %0\n" 55 + " membar #StoreLoad | #StoreStore\n" 56 " brnz,pn %0, 2f\n" 57 + " nop\n" 58 " .subsection 2\n" 59 "2: ldub [%1], %0\n" 60 + " membar #LoadLoad\n" 61 " brnz,pt %0, 2b\n" 62 + " nop\n" 63 " ba,a,pt %%xcc, 1b\n" 64 " .previous" 65 : "=&r" (tmp) ··· 95 96 __asm__ __volatile__( 97 "1: ldstub [%2], %0\n" 98 " membar #StoreLoad | #StoreStore\n" 99 + " brnz,pn %0, 2f\n" 100 + " nop\n" 101 " .subsection 2\n" 102 "2: rdpr %%pil, %1\n" 103 " wrpr %3, %%pil\n" 104 "3: ldub [%2], %0\n" 105 " membar #LoadLoad\n" 106 + " brnz,pt %0, 3b\n" 107 + " nop\n" 108 " ba,pt %%xcc, 1b\n" 109 + " wrpr %1, %%pil\n" 110 " .previous" 111 : "=&r" (tmp1), "=&r" (tmp2) 112 : "r"(lock), "r"(flags) ··· 162 "4: add %0, 1, %1\n" 163 " cas [%2], %0, %1\n" 164 " cmp %0, %1\n" 165 + " membar #StoreLoad | #StoreStore\n" 166 " bne,pn %%icc, 1b\n" 167 + " nop\n" 168 " .subsection 2\n" 169 "2: ldsw [%2], %0\n" 170 + " membar #LoadLoad\n" 171 " brlz,pt %0, 2b\n" 172 + " nop\n" 173 " ba,a,pt %%xcc, 4b\n" 174 " .previous" 175 : "=&r" (tmp1), "=&r" (tmp2) ··· 204 "4: or %0, %3, %1\n" 205 " cas [%2], %0, %1\n" 206 " cmp %0, %1\n" 207 + " membar #StoreLoad | #StoreStore\n" 208 " bne,pn %%icc, 1b\n" 209 + " nop\n" 210 " .subsection 2\n" 211 "2: lduw [%2], %0\n" 212 + " membar #LoadLoad\n" 213 " brnz,pt %0, 2b\n" 214 + " nop\n" 215 " ba,a,pt %%xcc, 4b\n" 216 " .previous" 217 : "=&r" (tmp1), "=&r" (tmp2) ··· 240 " or %0, %4, %1\n" 241 " cas [%3], %0, %1\n" 242 " cmp %0, %1\n" 243 + " membar #StoreLoad | #StoreStore\n" 244 " bne,pn %%icc, 1b\n" 245 + " nop\n" 246 " mov 1, %2\n" 247 "2:" 248 : "=&r" (tmp1), "=&r" (tmp2), "=&r" (result)
-1
include/asm-sparc64/spitfire.h
··· 111 "membar #Sync" 112 : /* No outputs */ 113 : "r" (tag), "r" (addr), "i" (ASI_DCACHE_TAG)); 114 - __asm__ __volatile__ ("membar #Sync" : : : "memory"); 115 } 116 117 /* The instruction cache lines are flushed with this, but note that
··· 111 "membar #Sync" 112 : /* No outputs */ 113 : "r" (tag), "r" (addr), "i" (ASI_DCACHE_TAG)); 114 } 115 116 /* The instruction cache lines are flushed with this, but note that