Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf parse-events: Add "cpu" term to set the CPU an event is recorded on

The -C option allows the CPUs for a list of events to be specified but
its not possible to set the CPU for a single event. Add a term to
allow this. The term isn't a general CPU list due to ',' already being
a special character in event parsing instead multiple cpu= terms may
be provided and they will be merged/unioned together.

An example of mixing different types of events counted on different CPUs:
```
$ perf stat -A -C 0,4-5,8 -e "instructions/cpu=0/,l1d-misses/cpu=4,cpu=5/,inst_retired.any/cpu=8/,cycles" -a sleep 0.1

Performance counter stats for 'system wide':

CPU0 6,979,225 instructions/cpu=0/ # 0.89 insn per cycle
CPU4 75,138 cpu/l1d-misses/
CPU5 1,418,939 cpu/l1d-misses/
CPU8 797,553 cpu/inst_retired.any,cpu=8/
CPU0 7,845,302 cycles
CPU4 6,546,859 cycles
CPU5 185,915,438 cycles
CPU8 2,065,668 cycles

0.112449242 seconds time elapsed
```

Committer testing:

root@number:~# grep -m1 "model name" /proc/cpuinfo
model name : AMD Ryzen 9 9950X3D 16-Core Processor
root@number:~# perf stat -A -e "instructions/cpu=0/,instructions,l1d-misses/cpu=4,cpu=5/,cycles" -a sleep 0.1

Performance counter stats for 'system wide':

CPU0 2,398,351 instructions/cpu=0/ # 0.44 insn per cycle
CPU0 2,398,152 instructions # 0.44 insn per cycle
CPU1 1,265,634 instructions # 0.49 insn per cycle
CPU2 606,087 instructions # 0.50 insn per cycle
CPU3 4,025,752 instructions # 0.52 insn per cycle
CPU4 4,236,810 instructions # 0.53 insn per cycle
CPU5 3,984,832 instructions # 0.66 insn per cycle
CPU6 434,132 instructions # 0.44 insn per cycle
CPU7 65,752 instructions # 0.41 insn per cycle
CPU8 459,083 instructions # 0.48 insn per cycle
CPU9 6,464,161 instructions # 1.31 insn per cycle
<SNIP>
root@number:~# perf stat -e "instructions/cpu=0/,instructions,l1d-misses/cpu=4,cpu=5/,cycles" -a sleep 0.

Performance counter stats for 'system wide':

144,822 instructions/cpu=0/ # 0.03 insn per cycle
4,666,114 instructions # 0.93 insn per cycle
2,583 l1d-misses
4,993,633 cycles

0.000868512 seconds time elapsed

root@number:~#

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dominique Martinet <asmadeus@codewreck.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Link: https://lore.kernel.org/r/20250403194337.40202-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ian Rogers and committed by
Arnaldo Carvalho de Melo
255f5b6d 168c7b50

+76 -17
+9
tools/perf/Documentation/perf-list.txt
··· 289 289 290 290 perf stat -e cpu/event=0,umask=0x3,percore=1/ 291 291 292 + cpu: 293 + 294 + Specifies the CPU to open the event upon. The value may be repeated to 295 + specify opening the event on multiple CPUs: 296 + 297 + 298 + perf stat -e instructions/cpu=0,cpu=2/,cycles/cpu=1,cpu=2/ -a sleep 1 299 + perf stat -e data_read/cpu=0/,data_write/cpu=1/ -a sleep 1 300 + 292 301 293 302 EVENT GROUPS 294 303 ------------
+1
tools/perf/util/evsel_config.h
··· 48 48 u32 aux_sample_size; 49 49 u64 cfg_chg; 50 50 char *str; 51 + int cpu; 51 52 } val; 52 53 bool weak; 53 54 };
+61 -15
tools/perf/util/parse-events.c
··· 7 7 #include <errno.h> 8 8 #include <sys/ioctl.h> 9 9 #include <sys/param.h> 10 + #include "cpumap.h" 10 11 #include "term.h" 11 12 #include "env.h" 12 13 #include "evlist.h" ··· 179 178 static char *get_config_name(const struct parse_events_terms *head_terms) 180 179 { 181 180 return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME); 181 + } 182 + 183 + static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head_terms) 184 + { 185 + struct parse_events_term *term; 186 + struct perf_cpu_map *cpus = NULL; 187 + 188 + if (!head_terms) 189 + return NULL; 190 + 191 + list_for_each_entry(term, &head_terms->terms, list) { 192 + if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) { 193 + struct perf_cpu_map *cpu = perf_cpu_map__new_int(term->val.num); 194 + 195 + perf_cpu_map__merge(&cpus, cpu); 196 + perf_cpu_map__put(cpu); 197 + } 198 + } 199 + 200 + return cpus; 182 201 } 183 202 184 203 /** ··· 464 443 bool found_supported = false; 465 444 const char *config_name = get_config_name(parsed_terms); 466 445 const char *metric_id = get_config_metric_id(parsed_terms); 446 + struct perf_cpu_map *cpus = get_config_cpu(parsed_terms); 447 + int ret = 0; 467 448 468 449 while ((pmu = perf_pmus__scan(pmu)) != NULL) { 469 450 LIST_HEAD(config_terms); 470 451 struct perf_event_attr attr; 471 - int ret; 472 452 473 453 if (parse_events__filter_pmu(parse_state, pmu)) 474 454 continue; ··· 484 462 perf_pmu__auto_merge_stats(pmu), 485 463 /*alternate_hw_config=*/PERF_COUNT_HW_MAX); 486 464 if (ret) 487 - return ret; 465 + goto out_err; 488 466 continue; 489 467 } 490 468 ··· 504 482 505 483 if (parsed_terms) { 506 484 if (config_attr(&attr, parsed_terms, parse_state->error, 507 - config_term_common)) 508 - return -EINVAL; 509 - 510 - if (get_config_terms(parsed_terms, &config_terms)) 511 - return -ENOMEM; 485 + config_term_common)) { 486 + ret = -EINVAL; 487 + goto out_err; 488 + } 489 + if (get_config_terms(parsed_terms, &config_terms)) { 490 + ret = -ENOMEM; 491 + goto out_err; 492 + } 512 493 } 513 494 514 495 if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name, 515 496 metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, 516 - /*cpu_list=*/NULL, 517 - /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL) 518 - return -ENOMEM; 497 + cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL) 498 + ret = -ENOMEM; 519 499 520 500 free_config_terms(&config_terms); 501 + if (ret) 502 + goto out_err; 521 503 } 504 + out_err: 505 + perf_cpu_map__put(cpus); 522 506 return found_supported ? 0 : -EINVAL; 523 507 } 524 508 ··· 843 815 [PARSE_EVENTS__TERM_TYPE_RAW] = "raw", 844 816 [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache", 845 817 [PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware", 818 + [PARSE_EVENTS__TERM_TYPE_CPU] = "cpu", 846 819 }; 847 820 if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR) 848 821 return "unknown term"; ··· 873 844 case PARSE_EVENTS__TERM_TYPE_METRIC_ID: 874 845 case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: 875 846 case PARSE_EVENTS__TERM_TYPE_PERCORE: 847 + case PARSE_EVENTS__TERM_TYPE_CPU: 876 848 return true; 877 849 case PARSE_EVENTS__TERM_TYPE_USER: 878 850 case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: ··· 1021 991 return -EINVAL; 1022 992 } 1023 993 break; 994 + case PARSE_EVENTS__TERM_TYPE_CPU: 995 + CHECK_TYPE_VAL(NUM); 996 + if (term->val.num >= (u64)cpu__max_present_cpu().cpu) { 997 + parse_events_error__handle(err, term->err_val, 998 + strdup("too big"), 999 + NULL); 1000 + return -EINVAL; 1001 + } 1002 + break; 1024 1003 case PARSE_EVENTS__TERM_TYPE_DRV_CFG: 1025 1004 case PARSE_EVENTS__TERM_TYPE_USER: 1026 1005 case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: ··· 1157 1118 case PARSE_EVENTS__TERM_TYPE_RAW: 1158 1119 case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: 1159 1120 case PARSE_EVENTS__TERM_TYPE_HARDWARE: 1121 + case PARSE_EVENTS__TERM_TYPE_CPU: 1160 1122 default: 1161 1123 if (err) { 1162 1124 parse_events_error__handle(err, term->err_term, ··· 1292 1252 case PARSE_EVENTS__TERM_TYPE_RAW: 1293 1253 case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: 1294 1254 case PARSE_EVENTS__TERM_TYPE_HARDWARE: 1255 + case PARSE_EVENTS__TERM_TYPE_CPU: 1295 1256 default: 1296 1257 break; 1297 1258 } ··· 1347 1306 case PARSE_EVENTS__TERM_TYPE_RAW: 1348 1307 case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: 1349 1308 case PARSE_EVENTS__TERM_TYPE_HARDWARE: 1309 + case PARSE_EVENTS__TERM_TYPE_CPU: 1350 1310 default: 1351 1311 break; 1352 1312 } ··· 1392 1350 struct perf_event_attr attr; 1393 1351 LIST_HEAD(config_terms); 1394 1352 const char *name, *metric_id; 1353 + struct perf_cpu_map *cpus; 1395 1354 int ret; 1396 1355 1397 1356 memset(&attr, 0, sizeof(attr)); ··· 1414 1371 1415 1372 name = get_config_name(head_config); 1416 1373 metric_id = get_config_metric_id(head_config); 1374 + cpus = get_config_cpu(head_config); 1417 1375 ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name, 1418 - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, 1419 - /*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX 1420 - ) == NULL ? -ENOMEM : 0; 1376 + metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, 1377 + cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) ? 0 : -ENOMEM; 1378 + perf_cpu_map__put(cpus); 1421 1379 free_config_terms(&config_terms); 1422 1380 return ret; 1423 1381 } ··· 1478 1434 LIST_HEAD(config_terms); 1479 1435 struct parse_events_terms parsed_terms; 1480 1436 bool alias_rewrote_terms = false; 1437 + struct perf_cpu_map *term_cpu = NULL; 1481 1438 1482 1439 if (verbose > 1) { 1483 1440 struct strbuf sb; ··· 1573 1528 return -EINVAL; 1574 1529 } 1575 1530 1531 + term_cpu = get_config_cpu(&parsed_terms); 1576 1532 evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, 1577 1533 get_config_name(&parsed_terms), 1578 1534 get_config_metric_id(&parsed_terms), pmu, 1579 - &config_terms, auto_merge_stats, /*cpu_list=*/NULL, 1580 - alternate_hw_config); 1535 + &config_terms, auto_merge_stats, term_cpu, alternate_hw_config); 1536 + perf_cpu_map__put(term_cpu); 1581 1537 if (!evsel) { 1582 1538 parse_events_terms__exit(&parsed_terms); 1583 1539 return -ENOMEM;
+2 -1
tools/perf/util/parse-events.h
··· 80 80 PARSE_EVENTS__TERM_TYPE_RAW, 81 81 PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, 82 82 PARSE_EVENTS__TERM_TYPE_HARDWARE, 83 - #define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_HARDWARE + 1) 83 + PARSE_EVENTS__TERM_TYPE_CPU, 84 + #define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_CPU + 1) 84 85 }; 85 86 86 87 struct parse_events_term {
+1
tools/perf/util/parse-events.l
··· 335 335 aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); } 336 336 aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } 337 337 metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } 338 + cpu { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CPU); } 338 339 cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); } 339 340 stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } 340 341 stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
+2 -1
tools/perf/util/pmu.c
··· 1470 1470 break; 1471 1471 case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */ 1472 1472 return -EINVAL; 1473 - case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_HARDWARE: 1473 + case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_CPU: 1474 1474 /* Skip non-config terms. */ 1475 1475 break; 1476 1476 default: ··· 1852 1852 "aux-output", 1853 1853 "aux-action=(pause|resume|start-paused)", 1854 1854 "aux-sample-size=number", 1855 + "cpu=number", 1855 1856 }; 1856 1857 struct perf_pmu_format *format; 1857 1858 int ret;