Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/64s: Implement local_t using irq soft masking

local_t is used for atomic modifications for per-CPU data, versus
re-entrant modifications via interrupts.

local_t read-modify-write atomic operations are currently implemented
with hardware atomics (larx/stcx), which are quite slow. This patch
implements them by masking all types of interrupts that may do local_t
operations ("standard" and perf interrupts).

Rusty's benchmark (https://lkml.org/lkml/2008/12/16/450) gives the
following timings for the local_t test, in nanoseconds per iteration:

larx/stcx irq+pmu disable
_inc 38 10
_add 38 10
_read 4 4
_add_return 38 10

There are still some interrupt types (system reset, machine check, and
watchdog), which can not safely use local_t operations, because they
are not masked.

An alternative approach was proposed, using a CR bit to mark a critical
section, which is tested in the interrupt return path, and would then
branch to a fixup handler (similar to exception fixups), which re-starts
the operation. The problem with this was the complexity of the fixup
handler and the latency of the slow path.

https://lists.ozlabs.org/pipermail/linuxppc-dev/2014-November/123024.html

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

authored by

Madhavan Srinivasan and committed by
Michael Ellerman
6cd74d2b 3b7e3020

+141
+141
arch/powerpc/include/asm/local.h
··· 2 2 #ifndef _ARCH_POWERPC_LOCAL_H 3 3 #define _ARCH_POWERPC_LOCAL_H 4 4 5 + #ifdef CONFIG_PPC_BOOK3S_64 6 + 7 + #include <linux/percpu.h> 8 + #include <linux/atomic.h> 9 + #include <linux/irqflags.h> 10 + 11 + #include <asm/hw_irq.h> 12 + 13 + typedef struct 14 + { 15 + long v; 16 + } local_t; 17 + 18 + #define LOCAL_INIT(i) { (i) } 19 + 20 + static __inline__ long local_read(local_t *l) 21 + { 22 + return READ_ONCE(l->v); 23 + } 24 + 25 + static __inline__ void local_set(local_t *l, long i) 26 + { 27 + WRITE_ONCE(l->v, i); 28 + } 29 + 30 + #define LOCAL_OP(op, c_op) \ 31 + static __inline__ void local_##op(long i, local_t *l) \ 32 + { \ 33 + unsigned long flags; \ 34 + \ 35 + powerpc_local_irq_pmu_save(flags); \ 36 + l->v c_op i; \ 37 + powerpc_local_irq_pmu_restore(flags); \ 38 + } 39 + 40 + #define LOCAL_OP_RETURN(op, c_op) \ 41 + static __inline__ long local_##op##_return(long a, local_t *l) \ 42 + { \ 43 + long t; \ 44 + unsigned long flags; \ 45 + \ 46 + powerpc_local_irq_pmu_save(flags); \ 47 + t = (l->v c_op a); \ 48 + powerpc_local_irq_pmu_restore(flags); \ 49 + \ 50 + return t; \ 51 + } 52 + 53 + #define LOCAL_OPS(op, c_op) \ 54 + LOCAL_OP(op, c_op) \ 55 + LOCAL_OP_RETURN(op, c_op) 56 + 57 + LOCAL_OPS(add, +=) 58 + LOCAL_OPS(sub, -=) 59 + 60 + #define local_add_negative(a, l) (local_add_return((a), (l)) < 0) 61 + #define local_inc_return(l) local_add_return(1LL, l) 62 + #define local_inc(l) local_inc_return(l) 63 + 64 + /* 65 + * local_inc_and_test - increment and test 66 + * @l: pointer of type local_t 67 + * 68 + * Atomically increments @l by 1 69 + * and returns true if the result is zero, or false for all 70 + * other cases. 71 + */ 72 + #define local_inc_and_test(l) (local_inc_return(l) == 0) 73 + 74 + #define local_dec_return(l) local_sub_return(1LL, l) 75 + #define local_dec(l) local_dec_return(l) 76 + #define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0) 77 + #define local_dec_and_test(l) (local_dec_return((l)) == 0) 78 + 79 + static __inline__ long local_cmpxchg(local_t *l, long o, long n) 80 + { 81 + long t; 82 + unsigned long flags; 83 + 84 + powerpc_local_irq_pmu_save(flags); 85 + t = l->v; 86 + if (t == o) 87 + l->v = n; 88 + powerpc_local_irq_pmu_restore(flags); 89 + 90 + return t; 91 + } 92 + 93 + static __inline__ long local_xchg(local_t *l, long n) 94 + { 95 + long t; 96 + unsigned long flags; 97 + 98 + powerpc_local_irq_pmu_save(flags); 99 + t = l->v; 100 + l->v = n; 101 + powerpc_local_irq_pmu_restore(flags); 102 + 103 + return t; 104 + } 105 + 106 + /** 107 + * local_add_unless - add unless the number is a given value 108 + * @l: pointer of type local_t 109 + * @a: the amount to add to v... 110 + * @u: ...unless v is equal to u. 111 + * 112 + * Atomically adds @a to @l, so long as it was not @u. 113 + * Returns non-zero if @l was not @u, and zero otherwise. 114 + */ 115 + static __inline__ int local_add_unless(local_t *l, long a, long u) 116 + { 117 + unsigned long flags; 118 + int ret = 0; 119 + 120 + powerpc_local_irq_pmu_save(flags); 121 + if (l->v != u) { 122 + l->v += a; 123 + ret = 1; 124 + } 125 + powerpc_local_irq_pmu_restore(flags); 126 + 127 + return ret; 128 + } 129 + 130 + #define local_inc_not_zero(l) local_add_unless((l), 1, 0) 131 + 132 + /* Use these for per-cpu local_t variables: on some archs they are 133 + * much more efficient than these naive implementations. Note they take 134 + * a variable, not an address. 135 + */ 136 + 137 + #define __local_inc(l) ((l)->v++) 138 + #define __local_dec(l) ((l)->v++) 139 + #define __local_add(i,l) ((l)->v+=(i)) 140 + #define __local_sub(i,l) ((l)->v-=(i)) 141 + 142 + #else /* CONFIG_PPC64 */ 143 + 5 144 #include <asm-generic/local.h> 145 + 146 + #endif /* CONFIG_PPC64 */ 6 147 7 148 #endif /* _ARCH_POWERPC_LOCAL_H */