Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf trace: Add --max-summary option

The --max-summary option is to limit the number of output lines for
syscall summary stats. The max applies to each entries like thread and
cgroups. For total summary, it will just print up to the given number.

For example,

$ sudo perf trace -as --max-summary 3 sleep 0.1

ThreadPoolServi (1011651), 114 events, 14.8%

syscall calls errors total min avg max stddev
(msec) (msec) (msec) (msec) (%)
--------------- -------- ------ -------- --------- --------- --------- ------
epoll_wait 38 0 95.589 0.000 2.515 11.153 28.98%
futex 9 0 0.040 0.002 0.004 0.014 28.63%
read 10 0 0.037 0.003 0.004 0.005 4.67%

sleep (1050529), 250 events, 32.4%

syscall calls errors total min avg max stddev
(msec) (msec) (msec) (msec) (%)
--------------- -------- ------ -------- --------- --------- --------- ------
clock_nanosleep 1 0 100.156 100.156 100.156 100.156 0.00%
execve 4 3 1.020 0.005 0.255 0.989 95.93%
openat 36 17 0.416 0.003 0.012 0.029 10.58%

...

And this is for per-cgroup summary using BPF.

$ sudo perf trace -as --max-summary 3 --summary-mode=cgroup --bpf-summary sleep 0.1

cgroup /user.slice/user-657345.slice/user@657345.service/session.slice/org.gnome.Shell@x11.service, 12 events

syscall calls errors total min avg max stddev
(msec) (msec) (msec) (msec) (%)
--------------- -------- ------ -------- --------- --------- --------- ------
recvmsg 8 7 0.016 0.001 0.002 0.006 39.73%
ppoll 1 0 0.014 0.014 0.014 0.014 0.00%
write 2 0 0.010 0.002 0.005 0.008 61.02%

cgroup /user.slice/user-657345.slice/session-4.scope, 73 events

syscall calls errors total min avg max stddev
(msec) (msec) (msec) (msec) (%)
--------------- -------- ------ -------- --------- --------- --------- ------
epoll_wait 8 0 13.461 0.010 1.683 12.235 89.66%
ioctl 20 0 0.204 0.001 0.010 0.113 54.01%
writev 11 0 0.164 0.004 0.015 0.042 20.34%

Reviewed-by: Howard Chu <howardchu95@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Namhyung Kim and committed by
Arnaldo Carvalho de Melo
ece3c775 d120cb34

+38 -20
+4
tools/perf/Documentation/perf-trace.txt
··· 249 249 works well with -s/--summary option where no argument information is 250 250 required. 251 251 252 + --max-summary=N:: 253 + Maximum number of lines in the summary mode. Note that this applies to 254 + each entry (thread or cgroup). 255 + 252 256 253 257 PAGEFAULTS 254 258 ----------
+9 -1
tools/perf/builtin-trace.c
··· 196 196 unsigned int max_stack; 197 197 unsigned int min_stack; 198 198 enum trace_summary_mode summary_mode; 199 + int max_summary; 199 200 int raw_augmented_syscalls_args_size; 200 201 bool raw_augmented_syscalls; 201 202 bool fd_path_disabled; ··· 4600 4599 if (!err) { 4601 4600 if (trace->summary) { 4602 4601 if (trace->summary_bpf) 4603 - trace_print_bpf_summary(trace->output); 4602 + trace_print_bpf_summary(trace->output, trace->max_summary); 4604 4603 else if (trace->summary_mode == SUMMARY__BY_TOTAL) 4605 4604 trace__fprintf_total_summary(trace, trace->output); 4606 4605 else ··· 4823 4822 struct hashmap *syscall_stats) 4824 4823 { 4825 4824 size_t printed = 0; 4825 + int lines = 0; 4826 4826 struct syscall *sc; 4827 4827 struct syscall_entry *entries; 4828 4828 ··· 4868 4866 fprintf(fp, "\t\t\t\t%s: %d\n", perf_env__arch_strerrno(trace->host->env, e + 1), stats->errnos[e]); 4869 4867 } 4870 4868 } 4869 + lines++; 4871 4870 } 4871 + 4872 + if (trace->max_summary && trace->max_summary <= lines) 4873 + break; 4872 4874 } 4873 4875 4874 4876 free(entries); ··· 5449 5443 OPT_BOOLEAN(0, "force-btf", &trace.force_btf, "Prefer btf_dump general pretty printer" 5450 5444 "to customized ones"), 5451 5445 OPT_BOOLEAN(0, "bpf-summary", &trace.summary_bpf, "Summary syscall stats in BPF"), 5446 + OPT_INTEGER(0, "max-summary", &trace.max_summary, 5447 + "Max number of entries in the summary."), 5452 5448 OPTS_EVSWITCH(&trace.evswitch), 5453 5449 OPT_END() 5454 5450 };
+23 -17
tools/perf/util/bpf-trace-summary.c
··· 138 138 return key1 == key2; 139 139 } 140 140 141 - static int print_common_stats(struct syscall_data *data, FILE *fp) 141 + static int print_common_stats(struct syscall_data *data, int max_summary, FILE *fp) 142 142 { 143 143 int printed = 0; 144 144 145 - for (int i = 0; i < data->nr_nodes; i++) { 145 + if (max_summary == 0 || max_summary > data->nr_nodes) 146 + max_summary = data->nr_nodes; 147 + 148 + for (int i = 0; i < max_summary; i++) { 146 149 struct syscall_node *node = &data->nodes[i]; 147 150 struct syscall_stats *stat = &node->stats; 148 151 double total = (double)(stat->total_time) / NSEC_PER_MSEC; ··· 203 200 return 0; 204 201 } 205 202 206 - static int print_thread_stat(struct syscall_data *data, FILE *fp) 203 + static int print_thread_stat(struct syscall_data *data, int max_summary, FILE *fp) 207 204 { 208 205 int printed = 0; 209 206 ··· 216 213 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); 217 214 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); 218 215 219 - printed += print_common_stats(data, fp); 216 + printed += print_common_stats(data, max_summary, fp); 220 217 printed += fprintf(fp, "\n\n"); 221 218 222 219 return printed; 223 220 } 224 221 225 - static int print_thread_stats(struct syscall_data **data, int nr_data, FILE *fp) 222 + static int print_thread_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp) 226 223 { 227 224 int printed = 0; 228 225 229 226 for (int i = 0; i < nr_data; i++) 230 - printed += print_thread_stat(data[i], fp); 227 + printed += print_thread_stat(data[i], max_summary, fp); 231 228 232 229 return printed; 233 230 } ··· 280 277 return 0; 281 278 } 282 279 283 - static int print_total_stats(struct syscall_data **data, int nr_data, FILE *fp) 280 + static int print_total_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp) 284 281 { 285 282 int printed = 0; 286 283 int nr_events = 0; ··· 294 291 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); 295 292 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); 296 293 297 - for (int i = 0; i < nr_data; i++) 298 - printed += print_common_stats(data[i], fp); 294 + if (max_summary == 0 || max_summary > nr_data) 295 + max_summary = nr_data; 296 + 297 + for (int i = 0; i < max_summary; i++) 298 + printed += print_common_stats(data[i], max_summary, fp); 299 299 300 300 printed += fprintf(fp, "\n\n"); 301 301 return printed; ··· 339 333 return 0; 340 334 } 341 335 342 - static int print_cgroup_stat(struct syscall_data *data, FILE *fp) 336 + static int print_cgroup_stat(struct syscall_data *data, int max_summary, FILE *fp) 343 337 { 344 338 int printed = 0; 345 339 struct cgroup *cgrp = __cgroup__find(&cgroups, data->key); ··· 357 351 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); 358 352 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); 359 353 360 - printed += print_common_stats(data, fp); 354 + printed += print_common_stats(data, max_summary, fp); 361 355 printed += fprintf(fp, "\n\n"); 362 356 363 357 return printed; 364 358 } 365 359 366 - static int print_cgroup_stats(struct syscall_data **data, int nr_data, FILE *fp) 360 + static int print_cgroup_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp) 367 361 { 368 362 int printed = 0; 369 363 370 364 for (int i = 0; i < nr_data; i++) 371 - printed += print_cgroup_stat(data[i], fp); 365 + printed += print_cgroup_stat(data[i], max_summary, fp); 372 366 373 367 return printed; 374 368 } 375 369 376 - int trace_print_bpf_summary(FILE *fp) 370 + int trace_print_bpf_summary(FILE *fp, int max_summary) 377 371 { 378 372 struct bpf_map *map = skel->maps.syscall_stats_map; 379 373 struct syscall_key *prev_key, key; ··· 426 420 427 421 switch (skel->rodata->aggr_mode) { 428 422 case SYSCALL_AGGR_THREAD: 429 - printed += print_thread_stats(data, nr_data, fp); 423 + printed += print_thread_stats(data, nr_data, max_summary, fp); 430 424 break; 431 425 case SYSCALL_AGGR_CPU: 432 - printed += print_total_stats(data, nr_data, fp); 426 + printed += print_total_stats(data, nr_data, max_summary, fp); 433 427 break; 434 428 case SYSCALL_AGGR_CGROUP: 435 - printed += print_cgroup_stats(data, nr_data, fp); 429 + printed += print_cgroup_stats(data, nr_data, max_summary, fp); 436 430 break; 437 431 default: 438 432 break;
+2 -2
tools/perf/util/trace.h
··· 16 16 int trace_prepare_bpf_summary(enum trace_summary_mode mode); 17 17 void trace_start_bpf_summary(void); 18 18 void trace_end_bpf_summary(void); 19 - int trace_print_bpf_summary(FILE *fp); 19 + int trace_print_bpf_summary(FILE *fp, int max_summary); 20 20 void trace_cleanup_bpf_summary(void); 21 21 22 22 #else /* !HAVE_BPF_SKEL */ ··· 27 27 } 28 28 static inline void trace_start_bpf_summary(void) {} 29 29 static inline void trace_end_bpf_summary(void) {} 30 - static inline int trace_print_bpf_summary(FILE *fp __maybe_unused) 30 + static inline int trace_print_bpf_summary(FILE *fp __maybe_unused, int max_summary __maybe_unused) 31 31 { 32 32 return 0; 33 33 }