Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf stat: Fix uncore aggregation number

Follow up:
lore.kernel.org/CAP-5=fVDF4-qYL1Lm7efgiHk7X=_nw_nEFMBZFMcsnOOJgX4Kg@mail.gmail.com/

The patch adds unit aggregation during evsel merge the aggregated uncore
counters. Change the name of the column to `ctrs` and `counters` for
json mode.

Tested on a 2-socket machine with SNC3, uncore_imc_[0-11] and
cpumask="0,120"
Before:
perf stat -e clockticks -I 1000 --per-socket
# time socket cpus counts unit events
1.001085024 S0 1 9615386315 clockticks
1.001085024 S1 1 9614287448 clockticks
perf stat -e clockticks -I 1000 --per-node
# time node cpus counts unit events
1.001029867 N0 1 3205726984 clockticks
1.001029867 N1 1 3205444421 clockticks
1.001029867 N2 1 3205234018 clockticks
1.001029867 N3 1 3205224660 clockticks
1.001029867 N4 1 3205207213 clockticks
1.001029867 N5 1 3205528246 clockticks
After:
perf stat -e clockticks -I 1000 --per-socket
# time socket ctrs counts unit events
1.001026071 S0 12 9619677996 clockticks
1.001026071 S1 12 9618612614 clockticks
perf stat -e clockticks -I 1000 --per-node
# time node ctrs counts unit events
1.001027449 N0 4 3207251859 clockticks
1.001027449 N1 4 3207315930 clockticks
1.001027449 N2 4 3206981828 clockticks
1.001027449 N3 4 3206566126 clockticks
1.001027449 N4 4 3206032609 clockticks
1.001027449 N5 4 3205651355 clockticks

Tested with JSON output linter:
perf test "perf stat JSON output linter"
94: perf stat JSON output linter : Ok

Suggested-by: Ian Rogers <irogers@google.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Chun-Tse Shao <ctshao@google.com>
Link: https://lore.kernel.org/r/20250627201818.479421-1-ctshao@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>

authored by

Chun-Tse Shao and committed by
Namhyung Kim
aa497357 ef0f7c23

+24 -22
+4 -2
tools/perf/Documentation/perf-stat.txt
··· 640 640 With -j, perf stat is able to print out a JSON format output 641 641 that can be used for parsing. 642 642 643 - - timestamp : optional usec time stamp in fractions of second (with -I) 643 + - interval : optional timestamp in fractions of second (with -I) 644 644 - optional aggregate options: 645 645 - core : core identifier (with --per-core) 646 646 - die : die identifier (with --per-die) 647 647 - socket : socket identifier (with --per-socket) 648 648 - node : node identifier (with --per-node) 649 649 - thread : thread identifier (with --per-thread) 650 + - counters : number of aggregated PMU counters 650 651 - counter-value : counter value 651 652 - unit : unit of the counter value or empty 652 653 - event : event name 653 654 - variance : optional variance if multiple values are collected (with -r) 654 - - runtime : run time of counter 655 + - event-runtime : run time of the event 656 + - pcnt-running : percentage of time the event was running 655 657 - metric-value : optional metric value 656 658 - metric-unit : optional unit of metric 657 659
+2 -2
tools/perf/tests/shell/lib/perf_json_output_lint.py
··· 45 45 46 46 def check_json_output(expected_items): 47 47 checks = { 48 - 'aggregate-number': lambda x: isfloat(x), 48 + 'counters': lambda x: isfloat(x), 49 49 'core': lambda x: True, 50 50 'counter-value': lambda x: is_counter_value(x), 51 51 'cgroup': lambda x: True, ··· 75 75 if count not in expected_items and count >= 1 and count <= 7 and 'metric-value' in item: 76 76 # Events that generate >1 metric may have isolated metric 77 77 # values and possibly other prefixes like interval, core, 78 - # aggregate-number, or event-runtime/pcnt-running from multiplexing. 78 + # counters, or event-runtime/pcnt-running from multiplexing. 79 79 pass 80 80 elif count not in expected_items and count >= 1 and count <= 5 and 'metricgroup' in item: 81 81 pass
+17 -17
tools/perf/util/stat-display.c
··· 50 50 }; 51 51 52 52 static const char *aggr_header_csv[] = { 53 - [AGGR_CORE] = "core,cpus,", 54 - [AGGR_CACHE] = "cache,cpus,", 55 - [AGGR_CLUSTER] = "cluster,cpus,", 56 - [AGGR_DIE] = "die,cpus,", 57 - [AGGR_SOCKET] = "socket,cpus,", 58 - [AGGR_NONE] = "cpu,", 59 - [AGGR_THREAD] = "comm-pid,", 60 - [AGGR_NODE] = "node,", 61 - [AGGR_GLOBAL] = "" 53 + [AGGR_CORE] = "core,ctrs,", 54 + [AGGR_CACHE] = "cache,ctrs,", 55 + [AGGR_CLUSTER] = "cluster,ctrs,", 56 + [AGGR_DIE] = "die,ctrs,", 57 + [AGGR_SOCKET] = "socket,ctrs,", 58 + [AGGR_NONE] = "cpu,", 59 + [AGGR_THREAD] = "comm-pid,", 60 + [AGGR_NODE] = "node,", 61 + [AGGR_GLOBAL] = "" 62 62 }; 63 63 64 64 static const char *aggr_header_std[] = { ··· 304 304 return; 305 305 } 306 306 307 - fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, 4, aggr_nr); 307 + fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, /*strlen("ctrs")*/ 4, aggr_nr); 308 308 } 309 309 310 310 static void print_aggr_id_csv(struct perf_stat_config *config, ··· 366 366 { 367 367 switch (config->aggr_mode) { 368 368 case AGGR_CORE: 369 - json_out(os, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d", 369 + json_out(os, "\"core\" : \"S%d-D%d-C%d\", \"counters\" : %d", 370 370 id.socket, id.die, id.core, aggr_nr); 371 371 break; 372 372 case AGGR_CACHE: 373 - json_out(os, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d", 373 + json_out(os, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"counters\" : %d", 374 374 id.socket, id.die, id.cache_lvl, id.cache, aggr_nr); 375 375 break; 376 376 case AGGR_CLUSTER: 377 - json_out(os, "\"cluster\" : \"S%d-D%d-CLS%d\", \"aggregate-number\" : %d", 377 + json_out(os, "\"cluster\" : \"S%d-D%d-CLS%d\", \"counters\" : %d", 378 378 id.socket, id.die, id.cluster, aggr_nr); 379 379 break; 380 380 case AGGR_DIE: 381 - json_out(os, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d", 381 + json_out(os, "\"die\" : \"S%d-D%d\", \"counters\" : %d", 382 382 id.socket, id.die, aggr_nr); 383 383 break; 384 384 case AGGR_SOCKET: 385 - json_out(os, "\"socket\" : \"S%d\", \"aggregate-number\" : %d", 385 + json_out(os, "\"socket\" : \"S%d\", \"counters\" : %d", 386 386 id.socket, aggr_nr); 387 387 break; 388 388 case AGGR_NODE: 389 - json_out(os, "\"node\" : \"N%d\", \"aggregate-number\" : %d", 389 + json_out(os, "\"node\" : \"N%d\", \"counters\" : %d", 390 390 id.node, aggr_nr); 391 391 break; 392 392 case AGGR_NONE: ··· 1317 1317 case AGGR_CLUSTER: 1318 1318 case AGGR_CACHE: 1319 1319 case AGGR_CORE: 1320 - fprintf(output, "#%*s %-*s cpus", 1320 + fprintf(output, "#%*s %-*s ctrs", 1321 1321 INTERVAL_LEN - 1, "time", 1322 1322 aggr_header_lens[config->aggr_mode], 1323 1323 aggr_header_std[config->aggr_mode]);
+1 -1
tools/perf/util/stat.c
··· 526 526 struct perf_counts_values *aggr_counts_a = &ps_a->aggr[i].counts; 527 527 struct perf_counts_values *aggr_counts_b = &ps_b->aggr[i].counts; 528 528 529 - /* NB: don't increase aggr.nr for aliases */ 529 + ps_a->aggr[i].nr += ps_b->aggr[i].nr; 530 530 531 531 aggr_counts_a->val += aggr_counts_b->val; 532 532 aggr_counts_a->ena += aggr_counts_b->ena;