Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf stat: Add user_time and system_time events

It bothered me that during benchmarking using 'perf stat' (to collect
for example CPU cache events) I could not simultaneously retrieve the
times spend in user or kernel mode in a machine readable format.

When running 'perf stat' the output for humans contains the times
reported by rusage and wait4.

$ perf stat -e cache-misses:u -- true

Performance counter stats for 'true':

4,206 cache-misses:u

0.001113619 seconds time elapsed

0.001175000 seconds user
0.000000000 seconds sys

But 'perf stat's machine-readable format does not provide this information.

$ perf stat -x, -e cache-misses:u -- true
4282,,cache-misses:u,492859,100.00,,

I found no way to retrieve this information using the available events
while using machine-readable output.

This patch adds two new tool internal events 'user_time' and
'system_time', similarly to the already present 'duration_time' event.

Both events use the already collected rusage information obtained by
wait4 and tracked in the global ru_stats.

Examples presenting cache-misses and rusage information in both human
and machine-readable form:

$ perf stat -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time .

Performance counter stats for 'grep -q -r duration_time .':

67,422,542 ns duration_time:u
50,517,000 ns user_time:u
16,839,000 ns system_time:u
30,937 cache-misses:u

0.067422542 seconds time elapsed

0.050517000 seconds user
0.016839000 seconds sys

$ perf stat -x, -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time .
72134524,ns,duration_time:u,72134524,100.00,,
65225000,ns,user_time:u,65225000,100.00,,
6865000,ns,system_time:u,6865000,100.00,,
38705,,cache-misses:u,71189328,100.00,,

Signed-off-by: Florian Fischer <florian.fischer@muhq.space>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20220420102354.468173-3-florian.fischer@muhq.space
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Florian Fischer and committed by
Arnaldo Carvalho de Melo
b03b89b3 c735b0a5

+37 -9
+28 -8
tools/perf/builtin-stat.c
··· 342 342 static int read_single_counter(struct evsel *counter, int cpu_map_idx, 343 343 int thread, struct timespec *rs) 344 344 { 345 - if (counter->tool_event == PERF_TOOL_DURATION_TIME) { 346 - u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL; 347 - struct perf_counts_values *count = 348 - perf_counts(counter->counts, cpu_map_idx, thread); 349 - count->ena = count->run = val; 350 - count->val = val; 351 - return 0; 345 + switch(counter->tool_event) { 346 + case PERF_TOOL_DURATION_TIME: { 347 + u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL; 348 + struct perf_counts_values *count = 349 + perf_counts(counter->counts, cpu_map_idx, thread); 350 + count->ena = count->run = val; 351 + count->val = val; 352 + return 0; 353 + } 354 + case PERF_TOOL_USER_TIME: 355 + case PERF_TOOL_SYSTEM_TIME: { 356 + u64 val; 357 + struct perf_counts_values *count = 358 + perf_counts(counter->counts, cpu_map_idx, thread); 359 + if (counter->tool_event == PERF_TOOL_USER_TIME) 360 + val = ru_stats.ru_utime_usec_stat.mean; 361 + else 362 + val = ru_stats.ru_stime_usec_stat.mean; 363 + count->ena = count->run = val; 364 + count->val = val; 365 + return 0; 366 + } 367 + default: 368 + case PERF_TOOL_NONE: 369 + return evsel__read_counter(counter, cpu_map_idx, thread); 370 + case PERF_TOOL_MAX: 371 + /* This should never be reached */ 372 + return 0; 352 373 } 353 - return evsel__read_counter(counter, cpu_map_idx, thread); 354 374 } 355 375 356 376 /*
+4
tools/perf/util/evsel.h
··· 30 30 enum perf_tool_event { 31 31 PERF_TOOL_NONE = 0, 32 32 PERF_TOOL_DURATION_TIME = 1, 33 + PERF_TOOL_USER_TIME = 2, 34 + PERF_TOOL_SYSTEM_TIME = 3, 35 + 36 + PERF_TOOL_MAX, 33 37 }; 34 38 35 39 /** struct evsel - event selector
+3 -1
tools/perf/util/parse-events.c
··· 402 402 if (!evsel) 403 403 return -ENOMEM; 404 404 evsel->tool_event = tool_event; 405 - if (tool_event == PERF_TOOL_DURATION_TIME) { 405 + if (tool_event == PERF_TOOL_DURATION_TIME 406 + || tool_event == PERF_TOOL_USER_TIME 407 + || tool_event == PERF_TOOL_SYSTEM_TIME) { 406 408 free((char *)evsel->unit); 407 409 evsel->unit = strdup("ns"); 408 410 }
+2
tools/perf/util/parse-events.l
··· 353 353 emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } 354 354 dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } 355 355 duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); } 356 + user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); } 357 + system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); } 356 358 bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } 357 359 cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } 358 360