Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: atomics: prefetch the destination word for write prior to stxr

The cost of changing a cacheline from shared to exclusive state can be
significant, especially when this is triggered by an exclusive store,
since it may result in having to retry the transaction.

This patch makes use of prfm to prefetch cachelines for write prior to
ldxr/stxr loops when using the ll/sc atomic routines.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>

+21
+9
arch/arm64/include/asm/atomic_ll_sc.h
··· 45 45 int result; \ 46 46 \ 47 47 asm volatile("// atomic_" #op "\n" \ 48 + " prfm pstl1strm, %2\n" \ 48 49 "1: ldxr %w0, %2\n" \ 49 50 " " #asm_op " %w0, %w0, %w3\n" \ 50 51 " stxr %w1, %w0, %2\n" \ ··· 63 62 int result; \ 64 63 \ 65 64 asm volatile("// atomic_" #op "_return\n" \ 65 + " prfm pstl1strm, %2\n" \ 66 66 "1: ldxr %w0, %2\n" \ 67 67 " " #asm_op " %w0, %w0, %w3\n" \ 68 68 " stlxr %w1, %w0, %2\n" \ ··· 100 98 int oldval; 101 99 102 100 asm volatile("// atomic_cmpxchg\n" 101 + " prfm pstl1strm, %2\n" 103 102 "1: ldxr %w1, %2\n" 104 103 " eor %w0, %w1, %w3\n" 105 104 " cbnz %w0, 2f\n" ··· 124 121 unsigned long tmp; \ 125 122 \ 126 123 asm volatile("// atomic64_" #op "\n" \ 124 + " prfm pstl1strm, %2\n" \ 127 125 "1: ldxr %0, %2\n" \ 128 126 " " #asm_op " %0, %0, %3\n" \ 129 127 " stxr %w1, %0, %2\n" \ ··· 142 138 unsigned long tmp; \ 143 139 \ 144 140 asm volatile("// atomic64_" #op "_return\n" \ 141 + " prfm pstl1strm, %2\n" \ 145 142 "1: ldxr %0, %2\n" \ 146 143 " " #asm_op " %0, %0, %3\n" \ 147 144 " stlxr %w1, %0, %2\n" \ ··· 179 174 unsigned long res; 180 175 181 176 asm volatile("// atomic64_cmpxchg\n" 177 + " prfm pstl1strm, %2\n" 182 178 "1: ldxr %1, %2\n" 183 179 " eor %0, %1, %3\n" 184 180 " cbnz %w0, 2f\n" ··· 202 196 unsigned long tmp; 203 197 204 198 asm volatile("// atomic64_dec_if_positive\n" 199 + " prfm pstl1strm, %2\n" 205 200 "1: ldxr %0, %2\n" 206 201 " subs %0, %0, #1\n" 207 202 " b.mi 2f\n" ··· 227 220 unsigned long tmp, oldval; \ 228 221 \ 229 222 asm volatile( \ 223 + " prfm pstl1strm, %2\n" \ 230 224 "1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \ 231 225 " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ 232 226 " cbnz %" #w "[tmp], 2f\n" \ ··· 267 259 unsigned long tmp, ret; \ 268 260 \ 269 261 asm volatile("// __cmpxchg_double" #name "\n" \ 262 + " prfm pstl1strm, %2\n" \ 270 263 "1: ldxp %0, %1, %2\n" \ 271 264 " eor %0, %0, %3\n" \ 272 265 " eor %1, %1, %4\n" \
+8
arch/arm64/include/asm/cmpxchg.h
··· 33 33 case 1: 34 34 asm volatile(ARM64_LSE_ATOMIC_INSN( 35 35 /* LL/SC */ 36 + " prfm pstl1strm, %2\n" 36 37 "1: ldxrb %w0, %2\n" 37 38 " stlxrb %w1, %w3, %2\n" 38 39 " cbnz %w1, 1b\n" 39 40 " dmb ish", 40 41 /* LSE atomics */ 42 + " nop\n" 41 43 " nop\n" 42 44 " swpalb %w3, %w0, %2\n" 43 45 " nop\n" ··· 51 49 case 2: 52 50 asm volatile(ARM64_LSE_ATOMIC_INSN( 53 51 /* LL/SC */ 52 + " prfm pstl1strm, %2\n" 54 53 "1: ldxrh %w0, %2\n" 55 54 " stlxrh %w1, %w3, %2\n" 56 55 " cbnz %w1, 1b\n" 57 56 " dmb ish", 58 57 /* LSE atomics */ 58 + " nop\n" 59 59 " nop\n" 60 60 " swpalh %w3, %w0, %2\n" 61 61 " nop\n" ··· 69 65 case 4: 70 66 asm volatile(ARM64_LSE_ATOMIC_INSN( 71 67 /* LL/SC */ 68 + " prfm pstl1strm, %2\n" 72 69 "1: ldxr %w0, %2\n" 73 70 " stlxr %w1, %w3, %2\n" 74 71 " cbnz %w1, 1b\n" 75 72 " dmb ish", 76 73 /* LSE atomics */ 74 + " nop\n" 77 75 " nop\n" 78 76 " swpal %w3, %w0, %2\n" 79 77 " nop\n" ··· 87 81 case 8: 88 82 asm volatile(ARM64_LSE_ATOMIC_INSN( 89 83 /* LL/SC */ 84 + " prfm pstl1strm, %2\n" 90 85 "1: ldxr %0, %2\n" 91 86 " stlxr %w1, %3, %2\n" 92 87 " cbnz %w1, 1b\n" 93 88 " dmb ish", 94 89 /* LSE atomics */ 90 + " nop\n" 95 91 " nop\n" 96 92 " swpal %3, %0, %2\n" 97 93 " nop\n"
+2
arch/arm64/include/asm/futex.h
··· 30 30 asm volatile( \ 31 31 ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ 32 32 CONFIG_ARM64_PAN) \ 33 + " prfm pstl1strm, %2\n" \ 33 34 "1: ldxr %w1, %2\n" \ 34 35 insn "\n" \ 35 36 "2: stlxr %w3, %w0, %2\n" \ ··· 121 120 return -EFAULT; 122 121 123 122 asm volatile("// futex_atomic_cmpxchg_inatomic\n" 123 + " prfm pstl1strm, %2\n" 124 124 "1: ldxr %w1, %2\n" 125 125 " sub %w3, %w1, %w4\n" 126 126 " cbnz %w3, 3f\n"
+2
arch/arm64/lib/bitops.S
··· 31 31 eor w0, w0, w3 // Clear low bits 32 32 mov x2, #1 33 33 add x1, x1, x0, lsr #3 // Get word offset 34 + alt_lse " prfm pstl1strm, [x1]", "nop" 34 35 lsl x3, x2, x3 // Create mask 35 36 36 37 alt_lse "1: ldxr x2, [x1]", "\lse x3, [x1]" ··· 49 48 eor w0, w0, w3 // Clear low bits 50 49 mov x2, #1 51 50 add x1, x1, x0, lsr #3 // Get word offset 51 + alt_lse " prfm pstl1strm, [x1]", "nop" 52 52 lsl x4, x2, x3 // Create mask 53 53 54 54 alt_lse "1: ldxr x2, [x1]", "\lse x4, x2, [x1]"