Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

pcpcntr: add group allocation/free

Allocations and frees are globally serialized on the pcpu lock (and the
CPU hotplug lock if enabled, which is the case on Debian).

At least one frequent consumer allocates 4 back-to-back counters (and
frees them in the same manner), exacerbating the problem.

While this does not fully remedy scalability issues, it is a step
towards that goal and provides immediate relief.

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Reviewed-by: Dennis Zhou <dennis@kernel.org>
Reviewed-by: Vegard Nossum <vegard.nossum@oracle.com>
Link: https://lore.kernel.org/r/20230823050609.2228718-2-mjguzik@gmail.com
[Dennis: reflowed a few lines]
Signed-off-by: Dennis Zhou <dennis@kernel.org>

authored by

Mateusz Guzik and committed by
Dennis Zhou
c439d5e8 f7d77dfc

+76 -25
+34 -7
include/linux/percpu_counter.h
··· 30 30 31 31 extern int percpu_counter_batch; 32 32 33 - int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, 34 - struct lock_class_key *key); 33 + int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, 34 + gfp_t gfp, u32 nr_counters, 35 + struct lock_class_key *key); 35 36 36 - #define percpu_counter_init(fbc, value, gfp) \ 37 + #define percpu_counter_init_many(fbc, value, gfp, nr_counters) \ 37 38 ({ \ 38 39 static struct lock_class_key __key; \ 39 40 \ 40 - __percpu_counter_init(fbc, value, gfp, &__key); \ 41 + __percpu_counter_init_many(fbc, value, gfp, nr_counters,\ 42 + &__key); \ 41 43 }) 42 44 43 - void percpu_counter_destroy(struct percpu_counter *fbc); 45 + 46 + #define percpu_counter_init(fbc, value, gfp) \ 47 + percpu_counter_init_many(fbc, value, gfp, 1) 48 + 49 + void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters); 50 + static inline void percpu_counter_destroy(struct percpu_counter *fbc) 51 + { 52 + percpu_counter_destroy_many(fbc, 1); 53 + } 54 + 44 55 void percpu_counter_set(struct percpu_counter *fbc, s64 amount); 45 56 void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, 46 57 s32 batch); ··· 127 116 s64 count; 128 117 }; 129 118 119 + static inline int percpu_counter_init_many(struct percpu_counter *fbc, 120 + s64 amount, gfp_t gfp, 121 + u32 nr_counters) 122 + { 123 + u32 i; 124 + 125 + for (i = 0; i < nr_counters; i++) 126 + fbc[i].count = amount; 127 + 128 + return 0; 129 + } 130 + 130 131 static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount, 131 132 gfp_t gfp) 132 133 { 133 - fbc->count = amount; 134 - return 0; 134 + return percpu_counter_init_many(fbc, amount, gfp, 1); 135 + } 136 + 137 + static inline void percpu_counter_destroy_many(struct percpu_counter *fbc, 138 + u32 nr_counters) 139 + { 135 140 } 136 141 137 142 static inline void percpu_counter_destroy(struct percpu_counter *fbc)
+42 -18
lib/percpu_counter.c
··· 151 151 } 152 152 EXPORT_SYMBOL(__percpu_counter_sum); 153 153 154 - int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, 155 - struct lock_class_key *key) 154 + int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, 155 + gfp_t gfp, u32 nr_counters, 156 + struct lock_class_key *key) 156 157 { 157 158 unsigned long flags __maybe_unused; 159 + size_t counter_size; 160 + s32 __percpu *counters; 161 + u32 i; 158 162 159 - raw_spin_lock_init(&fbc->lock); 160 - lockdep_set_class(&fbc->lock, key); 161 - fbc->count = amount; 162 - fbc->counters = alloc_percpu_gfp(s32, gfp); 163 - if (!fbc->counters) 163 + counter_size = ALIGN(sizeof(*counters), __alignof__(*counters)); 164 + counters = __alloc_percpu_gfp(nr_counters * counter_size, 165 + __alignof__(*counters), gfp); 166 + if (!counters) { 167 + fbc[0].counters = NULL; 164 168 return -ENOMEM; 169 + } 165 170 166 - debug_percpu_counter_activate(fbc); 171 + for (i = 0; i < nr_counters; i++) { 172 + raw_spin_lock_init(&fbc[i].lock); 173 + lockdep_set_class(&fbc[i].lock, key); 174 + #ifdef CONFIG_HOTPLUG_CPU 175 + INIT_LIST_HEAD(&fbc[i].list); 176 + #endif 177 + fbc[i].count = amount; 178 + fbc[i].counters = (void *)counters + (i * counter_size); 179 + 180 + debug_percpu_counter_activate(&fbc[i]); 181 + } 167 182 168 183 #ifdef CONFIG_HOTPLUG_CPU 169 - INIT_LIST_HEAD(&fbc->list); 170 184 spin_lock_irqsave(&percpu_counters_lock, flags); 171 - list_add(&fbc->list, &percpu_counters); 185 + for (i = 0; i < nr_counters; i++) 186 + list_add(&fbc[i].list, &percpu_counters); 172 187 spin_unlock_irqrestore(&percpu_counters_lock, flags); 173 188 #endif 174 189 return 0; 175 190 } 176 - EXPORT_SYMBOL(__percpu_counter_init); 191 + EXPORT_SYMBOL(__percpu_counter_init_many); 177 192 178 - void percpu_counter_destroy(struct percpu_counter *fbc) 193 + void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters) 179 194 { 180 195 unsigned long flags __maybe_unused; 196 + u32 i; 181 197 182 - if (!fbc->counters) 198 + if (WARN_ON_ONCE(!fbc)) 183 199 return; 184 200 185 - debug_percpu_counter_deactivate(fbc); 201 + if (!fbc[0].counters) 202 + return; 203 + 204 + for (i = 0; i < nr_counters; i++) 205 + debug_percpu_counter_deactivate(&fbc[i]); 186 206 187 207 #ifdef CONFIG_HOTPLUG_CPU 188 208 spin_lock_irqsave(&percpu_counters_lock, flags); 189 - list_del(&fbc->list); 209 + for (i = 0; i < nr_counters; i++) 210 + list_del(&fbc[i].list); 190 211 spin_unlock_irqrestore(&percpu_counters_lock, flags); 191 212 #endif 192 - free_percpu(fbc->counters); 193 - fbc->counters = NULL; 213 + 214 + free_percpu(fbc[0].counters); 215 + 216 + for (i = 0; i < nr_counters; i++) 217 + fbc[i].counters = NULL; 194 218 } 195 - EXPORT_SYMBOL(percpu_counter_destroy); 219 + EXPORT_SYMBOL(percpu_counter_destroy_many); 196 220 197 221 int percpu_counter_batch __read_mostly = 32; 198 222 EXPORT_SYMBOL(percpu_counter_batch);