Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf annotation: Switch lock from a mutex to a sharded_mutex

Remove the "struct mutex lock" variable from annotation that is
allocated per symbol. This removes in the region of 40 bytes per
symbol allocation. Use a sharded mutex where the number of shards is
set to the number of CPUs. Assuming good hashing of the annotation
(done based on the pointer), this means in order to contend there
needs to be more threads than CPUs, which is not currently true in any
perf command. Were contention an issue it is straightforward to
increase the number of shards in the mutex.

On my Debian/glibc based machine, this reduces the size of struct
annotation from 136 bytes to 96 bytes, or nearly 30%.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andres Freund <andres@anarazel.de>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Yuan Can <yuancan@huawei.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Link: https://lore.kernel.org/r/20230615040715.2064350-2-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>

authored by

Ian Rogers and committed by
Namhyung Kim
2e9f9d4a 0650b2b2

+78 -25
+7 -7
tools/perf/builtin-top.c
··· 137 137 } 138 138 139 139 notes = symbol__annotation(sym); 140 - mutex_lock(&notes->lock); 140 + annotation__lock(notes); 141 141 142 142 if (!symbol__hists(sym, top->evlist->core.nr_entries)) { 143 - mutex_unlock(&notes->lock); 143 + annotation__unlock(notes); 144 144 pr_err("Not enough memory for annotating '%s' symbol!\n", 145 145 sym->name); 146 146 sleep(1); ··· 156 156 pr_err("Couldn't annotate %s: %s\n", sym->name, msg); 157 157 } 158 158 159 - mutex_unlock(&notes->lock); 159 + annotation__unlock(notes); 160 160 return err; 161 161 } 162 162 ··· 211 211 212 212 notes = symbol__annotation(sym); 213 213 214 - if (!mutex_trylock(&notes->lock)) 214 + if (!annotation__trylock(notes)) 215 215 return; 216 216 217 217 err = hist_entry__inc_addr_samples(he, sample, evsel, ip); 218 218 219 - mutex_unlock(&notes->lock); 219 + annotation__unlock(notes); 220 220 221 221 if (unlikely(err)) { 222 222 /* ··· 253 253 symbol = he->ms.sym; 254 254 notes = symbol__annotation(symbol); 255 255 256 - mutex_lock(&notes->lock); 256 + annotation__lock(notes); 257 257 258 258 symbol__calc_percent(symbol, evsel); 259 259 ··· 274 274 if (more != 0) 275 275 printf("%d lines not displayed, maybe increase display entries [e]\n", more); 276 276 out_unlock: 277 - mutex_unlock(&notes->lock); 277 + annotation__unlock(notes); 278 278 } 279 279 280 280 static void perf_top__resort_hists(struct perf_top *t)
+5 -5
tools/perf/ui/browsers/annotate.c
··· 314 314 315 315 browser->entries = RB_ROOT; 316 316 317 - mutex_lock(&notes->lock); 317 + annotation__lock(notes); 318 318 319 319 symbol__calc_percent(sym, evsel); 320 320 ··· 343 343 } 344 344 disasm_rb_tree__insert(browser, &pos->al); 345 345 } 346 - mutex_unlock(&notes->lock); 346 + annotation__unlock(notes); 347 347 348 348 browser->curr_hot = rb_last(&browser->entries); 349 349 } ··· 470 470 } 471 471 472 472 notes = symbol__annotation(dl->ops.target.sym); 473 - mutex_lock(&notes->lock); 473 + annotation__lock(notes); 474 474 475 475 if (!symbol__hists(dl->ops.target.sym, evsel->evlist->core.nr_entries)) { 476 - mutex_unlock(&notes->lock); 476 + annotation__unlock(notes); 477 477 ui__warning("Not enough memory for annotating '%s' symbol!\n", 478 478 dl->ops.target.sym->name); 479 479 return true; ··· 482 482 target_ms.maps = ms->maps; 483 483 target_ms.map = ms->map; 484 484 target_ms.sym = dl->ops.target.sym; 485 - mutex_unlock(&notes->lock); 485 + annotation__unlock(notes); 486 486 symbol__tui_annotate(&target_ms, evsel, hbt, browser->opts); 487 487 sym_title(ms->sym, ms->map, title, sizeof(title), browser->opts->percent_type); 488 488 ui_browser__show_title(&browser->b, title);
+58 -10
tools/perf/util/annotate.c
··· 32 32 #include "block-range.h" 33 33 #include "string2.h" 34 34 #include "util/event.h" 35 + #include "util/sharded_mutex.h" 35 36 #include "arch/common.h" 36 37 #include "namespaces.h" 37 38 #include <regex.h> ··· 857 856 { 858 857 struct annotation *notes = symbol__annotation(sym); 859 858 860 - mutex_lock(&notes->lock); 859 + annotation__lock(notes); 861 860 if (notes->src != NULL) { 862 861 memset(notes->src->histograms, 0, 863 862 notes->src->nr_histograms * notes->src->sizeof_sym_hist); ··· 865 864 memset(notes->src->cycles_hist, 0, 866 865 symbol__size(sym) * sizeof(struct cyc_hist)); 867 866 } 868 - mutex_unlock(&notes->lock); 867 + annotation__unlock(notes); 869 868 } 870 869 871 870 static int __symbol__account_cycles(struct cyc_hist *ch, ··· 1122 1121 notes->hit_insn = 0; 1123 1122 notes->cover_insn = 0; 1124 1123 1125 - mutex_lock(&notes->lock); 1124 + annotation__lock(notes); 1126 1125 for (offset = size - 1; offset >= 0; --offset) { 1127 1126 struct cyc_hist *ch; 1128 1127 ··· 1141 1140 notes->have_cycles = true; 1142 1141 } 1143 1142 } 1144 - mutex_unlock(&notes->lock); 1143 + annotation__unlock(notes); 1145 1144 } 1146 1145 1147 1146 int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, ··· 1292 1291 return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); 1293 1292 } 1294 1293 1295 - void annotation__init(struct annotation *notes) 1296 - { 1297 - mutex_init(&notes->lock); 1298 - } 1299 - 1300 1294 void annotation__exit(struct annotation *notes) 1301 1295 { 1302 1296 annotated_source__delete(notes->src); 1303 - mutex_destroy(&notes->lock); 1304 1297 } 1298 + 1299 + static struct sharded_mutex *sharded_mutex; 1300 + 1301 + static void annotation__init_sharded_mutex(void) 1302 + { 1303 + /* As many mutexes as there are CPUs. */ 1304 + sharded_mutex = sharded_mutex__new(cpu__max_present_cpu().cpu); 1305 + } 1306 + 1307 + static size_t annotation__hash(const struct annotation *notes) 1308 + { 1309 + return (size_t)notes; 1310 + } 1311 + 1312 + static struct mutex *annotation__get_mutex(const struct annotation *notes) 1313 + { 1314 + static pthread_once_t once = PTHREAD_ONCE_INIT; 1315 + 1316 + pthread_once(&once, annotation__init_sharded_mutex); 1317 + if (!sharded_mutex) 1318 + return NULL; 1319 + 1320 + return sharded_mutex__get_mutex(sharded_mutex, annotation__hash(notes)); 1321 + } 1322 + 1323 + void annotation__lock(struct annotation *notes) 1324 + NO_THREAD_SAFETY_ANALYSIS 1325 + { 1326 + struct mutex *mutex = annotation__get_mutex(notes); 1327 + 1328 + if (mutex) 1329 + mutex_lock(mutex); 1330 + } 1331 + 1332 + void annotation__unlock(struct annotation *notes) 1333 + NO_THREAD_SAFETY_ANALYSIS 1334 + { 1335 + struct mutex *mutex = annotation__get_mutex(notes); 1336 + 1337 + if (mutex) 1338 + mutex_unlock(mutex); 1339 + } 1340 + 1341 + bool annotation__trylock(struct annotation *notes) 1342 + { 1343 + struct mutex *mutex = annotation__get_mutex(notes); 1344 + 1345 + if (!mutex) 1346 + return false; 1347 + 1348 + return mutex_trylock(mutex); 1349 + } 1350 + 1305 1351 1306 1352 static void annotation_line__add(struct annotation_line *al, struct list_head *head) 1307 1353 {
+8 -3
tools/perf/util/annotate.h
··· 271 271 struct sym_hist *histograms; 272 272 }; 273 273 274 - struct annotation { 275 - struct mutex lock; 274 + struct LOCKABLE annotation { 276 275 u64 max_coverage; 277 276 u64 start; 278 277 u64 hit_cycles; ··· 297 298 struct annotated_source *src; 298 299 }; 299 300 300 - void annotation__init(struct annotation *notes); 301 + static inline void annotation__init(struct annotation *notes __maybe_unused) 302 + { 303 + } 301 304 void annotation__exit(struct annotation *notes); 305 + 306 + void annotation__lock(struct annotation *notes) EXCLUSIVE_LOCK_FUNCTION(*notes); 307 + void annotation__unlock(struct annotation *notes) UNLOCK_FUNCTION(*notes); 308 + bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(true, *notes); 302 309 303 310 static inline int annotation__cycles_width(struct annotation *notes) 304 311 {