perf trace: Ignore thread hashing in summary

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Commit 91e467bc568f ("perf machine: Use hashtable for machine
threads") made the iteration of thread tids unordered. The perf trace
--summary output sorts and prints each hash bucket, rather than all
threads globally. Change this behavior by turn all threads into a
list, sort the list by number of trace events then by tids, finally
print the list. This also allows the rbtree in threads to be not
accessed outside of machine.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240301053646.1449657-3-irogers@google.com

authored by

Ian Rogers and committed by

Namhyung Kim 2 years ago f178ffdf 2f1e20fe

+23 -23

2 changed files

expand all

tools

perf

builtin-trace.c

util

rb_resort.h

+23 -18

tools/perf/builtin-trace.c

··· 74 74 #include <linux/err.h> 75 75 #include <linux/filter.h> 76 76 #include <linux/kernel.h> 77 + #include <linux/list_sort.h> 77 78 #include <linux/random.h> 78 79 #include <linux/stringify.h> 79 80 #include <linux/time64.h> ··· 4313 4312 return ttrace ? ttrace->nr_events : 0; 4314 4313 } 4315 4314 4316 - DEFINE_RESORT_RB(threads, 4317 - (thread__nr_events(thread__priv(a->thread)) < 4318 - thread__nr_events(thread__priv(b->thread))), 4319 - struct thread *thread; 4320 - ) 4315 + static int trace_nr_events_cmp(void *priv __maybe_unused, 4316 + const struct list_head *la, 4317 + const struct list_head *lb) 4321 4318 { 4322 - entry->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread; 4319 + struct thread_list *a = list_entry(la, struct thread_list, list); 4320 + struct thread_list *b = list_entry(lb, struct thread_list, list); 4321 + unsigned long a_nr_events = thread__nr_events(thread__priv(a->thread)); 4322 + unsigned long b_nr_events = thread__nr_events(thread__priv(b->thread)); 4323 + 4324 + if (a_nr_events != b_nr_events) 4325 + return a_nr_events < b_nr_events ? -1 : 1; 4326 + 4327 + /* Identical number of threads, place smaller tids first. */ 4328 + return thread__tid(a->thread) < thread__tid(b->thread) 4329 + ? -1 4330 + : (thread__tid(a->thread) > thread__tid(b->thread) ? 1 : 0); 4323 4331 } 4324 4332 4325 4333 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 4326 4334 { 4327 4335 size_t printed = trace__fprintf_threads_header(fp); 4328 - struct rb_node *nd; 4329 - int i; 4336 + LIST_HEAD(threads); 4330 4337 4331 - for (i = 0; i < THREADS__TABLE_SIZE; i++) { 4332 - DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i); 4338 + if (machine__thread_list(trace->host, &threads) == 0) { 4339 + struct thread_list *pos; 4333 4340 4334 - if (threads == NULL) { 4335 - fprintf(fp, "%s", "Error sorting output by nr_events!\n"); 4336 - return 0; 4337 - } 4341 + list_sort(NULL, &threads, trace_nr_events_cmp); 4338 4342 4339 - resort_rb__for_each_entry(nd, threads) 4340 - printed += trace__fprintf_thread(fp, threads_entry->thread, trace); 4341 - 4342 - resort_rb__delete(threads); 4343 + list_for_each_entry(pos, &threads, list) 4344 + printed += trace__fprintf_thread(fp, pos->thread, trace); 4343 4345 } 4346 + thread_list__delete(&threads); 4344 4347 return printed; 4345 4348 } 4346 4349

-5

tools/perf/util/rb_resort.h

··· 143 143 DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \ 144 144 __ilist->rblist.nr_entries) 145 145 146 - /* For 'struct machine->threads' */ 147 - #define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \ 148 - DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \ 149 - __machine->threads[hash_bucket].nr) 150 - 151 146 #endif /* _PERF_RESORT_RB_H_ */