Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf parse-events: Use wildcard processing to set an event to merge into

The merge stat code fails for uncore events if they are repeated twice,
for example `perf stat -e clockticks,clockticks -I 1000` as the counts
of the second set of uncore events will be merged into the first
counter.

Reimplement the logic to have a first_wildcard_match so that merged
later events correctly merge into the first wildcard event that they
will be aggregated into.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Chun-Tse Shao <ctshao@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dr. David Alan Gilbert <linux@treblig.org>
Cc: Howard Chu <howardchu95@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Levi Yun <yeoreum.yun@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Weilin Wang <weilin.wang@intel.com>
Link: https://lore.kernel.org/r/20250513215401.2315949-3-ctshao@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ian Rogers and committed by
Arnaldo Carvalho de Melo
137359b7 7d45f402

+88 -72
+11 -7
tools/perf/util/evsel.c
··· 552 552 553 553 evsel->exclude_GH = orig->exclude_GH; 554 554 evsel->sample_read = orig->sample_read; 555 - evsel->auto_merge_stats = orig->auto_merge_stats; 556 555 evsel->collect_stat = orig->collect_stat; 557 556 evsel->weak_group = orig->weak_group; 558 557 evsel->use_config_name = orig->use_config_name; 559 558 evsel->pmu = orig->pmu; 559 + evsel->first_wildcard_match = orig->first_wildcard_match; 560 560 561 561 if (evsel__copy_config_terms(evsel, orig) < 0) 562 562 goto out_err; ··· 3964 3964 return true; 3965 3965 } 3966 3966 3967 - if (counter->merged_stat) { 3968 - /* Counter won't be shown. */ 3969 - return false; 3970 - } 3971 - 3972 3967 if (counter->use_config_name || counter->is_libpfm_event) { 3973 3968 /* Original name will be used. */ 3974 3969 return false; ··· 3992 3997 return true; 3993 3998 } 3994 3999 4000 + if (counter->first_wildcard_match != NULL) { 4001 + /* 4002 + * If stats are merged then only the first_wildcard_match is 4003 + * displayed, there is no need to uniquify this evsel as the 4004 + * name won't be shown. 4005 + */ 4006 + return false; 4007 + } 4008 + 3995 4009 /* 3996 4010 * Do other non-merged events in the evlist have the same name? If so 3997 4011 * uniquify is necessary. 3998 4012 */ 3999 4013 evlist__for_each_entry(counter->evlist, evsel) { 4000 - if (evsel == counter || evsel->merged_stat || evsel->pmu == counter->pmu) 4014 + if (evsel == counter || evsel->first_wildcard_match || evsel->pmu == counter->pmu) 4001 4015 continue; 4002 4016 4003 4017 if (evsel__name_is(counter, evsel__name(evsel))) {
+5 -2
tools/perf/util/evsel.h
··· 70 70 const char *unit; 71 71 struct cgroup *cgrp; 72 72 const char *metric_id; 73 + /* 74 + * This point to the first evsel with the same name, intended to store the 75 + * aggregated counts in aggregation mode. 76 + */ 77 + struct evsel *first_wildcard_match; 73 78 /* parse modifier helper */ 74 79 int exclude_GH; 75 80 int sample_read; ··· 83 78 bool percore; 84 79 bool precise_max; 85 80 bool is_libpfm_event; 86 - bool auto_merge_stats; 87 81 bool collect_stat; 88 82 bool weak_group; 89 83 bool bpf_counter; ··· 119 115 bool ignore_missing_thread; 120 116 bool forced_leader; 121 117 bool cmdline_group_boundary; 122 - bool merged_stat; 123 118 bool reset_group; 124 119 bool errored; 125 120 bool needs_auxtrace_mmap;
+60 -24
tools/perf/util/parse-events.c
··· 250 250 struct perf_event_attr *attr, 251 251 bool init_attr, 252 252 const char *name, const char *metric_id, struct perf_pmu *pmu, 253 - struct list_head *config_terms, bool auto_merge_stats, 253 + struct list_head *config_terms, struct evsel *first_wildcard_match, 254 254 struct perf_cpu_map *cpu_list, u64 alternate_hw_config) 255 255 { 256 256 struct evsel *evsel; 257 257 bool is_pmu_core; 258 258 struct perf_cpu_map *cpus; 259 + 260 + /* 261 + * Ensure the first_wildcard_match's PMU matches that of the new event 262 + * being added. Otherwise try to match with another event further down 263 + * the evlist. 264 + */ 265 + if (first_wildcard_match) { 266 + struct evsel *pos = list_prev_entry(first_wildcard_match, core.node); 267 + 268 + first_wildcard_match = NULL; 269 + list_for_each_entry_continue(pos, list, core.node) { 270 + if (perf_pmu__name_no_suffix_match(pos->pmu, pmu->name)) { 271 + first_wildcard_match = pos; 272 + break; 273 + } 274 + if (pos->pmu->is_core && (!pmu || pmu->is_core)) { 275 + first_wildcard_match = pos; 276 + break; 277 + } 278 + } 279 + } 259 280 260 281 if (pmu) { 261 282 is_pmu_core = pmu->is_core; ··· 314 293 evsel->core.own_cpus = perf_cpu_map__get(cpus); 315 294 evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; 316 295 evsel->core.is_pmu_core = is_pmu_core; 317 - evsel->auto_merge_stats = auto_merge_stats; 318 296 evsel->pmu = pmu; 319 297 evsel->alternate_hw_config = alternate_hw_config; 298 + evsel->first_wildcard_match = first_wildcard_match; 320 299 321 300 if (name) 322 301 evsel->name = strdup(name); ··· 339 318 { 340 319 return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name, 341 320 metric_id, pmu, /*config_terms=*/NULL, 342 - /*auto_merge_stats=*/false, /*cpu_list=*/NULL, 321 + /*first_wildcard_match=*/NULL, /*cpu_list=*/NULL, 343 322 /*alternate_hw_config=*/PERF_COUNT_HW_MAX); 344 323 } 345 324 ··· 350 329 { 351 330 return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id, 352 331 /*pmu=*/NULL, config_terms, 353 - /*auto_merge_stats=*/false, /*cpu_list=*/NULL, 332 + /*first_wildcard_match=*/NULL, /*cpu_list=*/NULL, 354 333 alternate_hw_config) ? 0 : -ENOMEM; 355 334 } 356 335 ··· 475 454 static int parse_events_add_pmu(struct parse_events_state *parse_state, 476 455 struct list_head *list, struct perf_pmu *pmu, 477 456 const struct parse_events_terms *const_parsed_terms, 478 - bool auto_merge_stats, u64 alternate_hw_config); 457 + struct evsel *first_wildcard_match, u64 alternate_hw_config); 479 458 480 459 int parse_events_add_cache(struct list_head *list, int *idx, const char *name, 481 460 struct parse_events_state *parse_state, ··· 487 466 const char *metric_id = get_config_metric_id(parsed_terms); 488 467 struct perf_cpu_map *cpus = get_config_cpu(parsed_terms); 489 468 int ret = 0; 469 + struct evsel *first_wildcard_match = NULL; 490 470 491 471 while ((pmu = perf_pmus__scan(pmu)) != NULL) { 492 472 LIST_HEAD(config_terms); ··· 503 481 */ 504 482 ret = parse_events_add_pmu(parse_state, list, pmu, 505 483 parsed_terms, 506 - perf_pmu__auto_merge_stats(pmu), 484 + first_wildcard_match, 507 485 /*alternate_hw_config=*/PERF_COUNT_HW_MAX); 508 486 if (ret) 509 487 goto out_err; 488 + if (first_wildcard_match == NULL) 489 + first_wildcard_match = 490 + container_of(list->prev, struct evsel, core.node); 510 491 continue; 511 492 } 512 493 ··· 540 515 } 541 516 542 517 if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name, 543 - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, 518 + metric_id, pmu, &config_terms, first_wildcard_match, 544 519 cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL) 545 520 ret = -ENOMEM; 546 521 522 + if (first_wildcard_match == NULL) 523 + first_wildcard_match = container_of(list->prev, struct evsel, core.node); 547 524 free_config_terms(&config_terms); 548 525 if (ret) 549 526 goto out_err; ··· 1414 1387 static int __parse_events_add_numeric(struct parse_events_state *parse_state, 1415 1388 struct list_head *list, 1416 1389 struct perf_pmu *pmu, u32 type, u32 extended_type, 1417 - u64 config, const struct parse_events_terms *head_config) 1390 + u64 config, const struct parse_events_terms *head_config, 1391 + struct evsel *first_wildcard_match) 1418 1392 { 1419 1393 struct perf_event_attr attr; 1420 1394 LIST_HEAD(config_terms); ··· 1444 1416 metric_id = get_config_metric_id(head_config); 1445 1417 cpus = get_config_cpu(head_config); 1446 1418 ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name, 1447 - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, 1419 + metric_id, pmu, &config_terms, first_wildcard_match, 1448 1420 cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) ? 0 : -ENOMEM; 1449 1421 perf_cpu_map__put(cpus); 1450 1422 free_config_terms(&config_terms); ··· 1462 1434 1463 1435 /* Wildcards on numeric values are only supported by core PMUs. */ 1464 1436 if (wildcard && perf_pmus__supports_extended_type()) { 1437 + struct evsel *first_wildcard_match = NULL; 1465 1438 while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { 1466 1439 int ret; 1467 1440 ··· 1472 1443 1473 1444 ret = __parse_events_add_numeric(parse_state, list, pmu, 1474 1445 type, pmu->type, 1475 - config, head_config); 1446 + config, head_config, 1447 + first_wildcard_match); 1476 1448 if (ret) 1477 1449 return ret; 1450 + if (first_wildcard_match == NULL) 1451 + first_wildcard_match = 1452 + container_of(list->prev, struct evsel, core.node); 1478 1453 } 1479 1454 if (found_supported) 1480 1455 return 0; 1481 1456 } 1482 1457 return __parse_events_add_numeric(parse_state, list, perf_pmus__find_by_type(type), 1483 - type, /*extended_type=*/0, config, head_config); 1458 + type, /*extended_type=*/0, config, head_config, 1459 + /*first_wildcard_match=*/NULL); 1484 1460 } 1485 1461 1486 1462 static bool config_term_percore(struct list_head *config_terms) ··· 1503 1469 static int parse_events_add_pmu(struct parse_events_state *parse_state, 1504 1470 struct list_head *list, struct perf_pmu *pmu, 1505 1471 const struct parse_events_terms *const_parsed_terms, 1506 - bool auto_merge_stats, u64 alternate_hw_config) 1472 + struct evsel *first_wildcard_match, u64 alternate_hw_config) 1507 1473 { 1508 1474 struct perf_event_attr attr; 1509 1475 struct perf_pmu_info info; ··· 1540 1506 evsel = __add_event(list, &parse_state->idx, &attr, 1541 1507 /*init_attr=*/true, /*name=*/NULL, 1542 1508 /*metric_id=*/NULL, pmu, 1543 - /*config_terms=*/NULL, auto_merge_stats, 1509 + /*config_terms=*/NULL, first_wildcard_match, 1544 1510 /*cpu_list=*/NULL, alternate_hw_config); 1545 1511 return evsel ? 0 : -ENOMEM; 1546 1512 } ··· 1611 1577 evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, 1612 1578 get_config_name(&parsed_terms), 1613 1579 get_config_metric_id(&parsed_terms), pmu, 1614 - &config_terms, auto_merge_stats, term_cpu, alternate_hw_config); 1580 + &config_terms, first_wildcard_match, term_cpu, alternate_hw_config); 1615 1581 perf_cpu_map__put(term_cpu); 1616 1582 if (!evsel) { 1617 1583 parse_events_terms__exit(&parsed_terms); ··· 1648 1614 int ok = 0; 1649 1615 const char *config; 1650 1616 struct parse_events_terms parsed_terms; 1617 + struct evsel *first_wildcard_match = NULL; 1651 1618 1652 1619 *listp = NULL; 1653 1620 ··· 1681 1646 INIT_LIST_HEAD(list); 1682 1647 1683 1648 while ((pmu = perf_pmus__scan(pmu)) != NULL) { 1684 - bool auto_merge_stats; 1685 - 1686 1649 if (parse_events__filter_pmu(parse_state, pmu)) 1687 1650 continue; 1688 1651 1689 1652 if (!perf_pmu__have_event(pmu, event_name)) 1690 1653 continue; 1691 1654 1692 - auto_merge_stats = perf_pmu__auto_merge_stats(pmu); 1693 1655 if (!parse_events_add_pmu(parse_state, list, pmu, 1694 - &parsed_terms, auto_merge_stats, hw_config)) { 1656 + &parsed_terms, first_wildcard_match, hw_config)) { 1695 1657 struct strbuf sb; 1696 1658 1697 1659 strbuf_init(&sb, /*hint=*/ 0); ··· 1697 1665 strbuf_release(&sb); 1698 1666 ok++; 1699 1667 } 1668 + if (first_wildcard_match == NULL) 1669 + first_wildcard_match = container_of(list->prev, struct evsel, core.node); 1700 1670 } 1701 1671 1702 1672 if (parse_state->fake_pmu) { 1703 1673 if (!parse_events_add_pmu(parse_state, list, perf_pmus__fake_pmu(), &parsed_terms, 1704 - /*auto_merge_stats=*/true, hw_config)) { 1674 + first_wildcard_match, hw_config)) { 1705 1675 struct strbuf sb; 1706 1676 1707 1677 strbuf_init(&sb, /*hint=*/ 0); ··· 1734 1700 struct perf_pmu *pmu; 1735 1701 int ok = 0; 1736 1702 char *help; 1703 + struct evsel *first_wildcard_match = NULL; 1737 1704 1738 1705 *listp = malloc(sizeof(**listp)); 1739 1706 if (!*listp) ··· 1745 1710 /* Attempt to add to list assuming event_or_pmu is a PMU name. */ 1746 1711 pmu = perf_pmus__find(event_or_pmu); 1747 1712 if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, 1748 - /*auto_merge_stats=*/false, 1713 + first_wildcard_match, 1749 1714 /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) 1750 1715 return 0; 1751 1716 1752 1717 if (parse_state->fake_pmu) { 1753 1718 if (!parse_events_add_pmu(parse_state, *listp, perf_pmus__fake_pmu(), 1754 1719 const_parsed_terms, 1755 - /*auto_merge_stats=*/false, 1720 + first_wildcard_match, 1756 1721 /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) 1757 1722 return 0; 1758 1723 } ··· 1762 1727 while ((pmu = perf_pmus__scan(pmu)) != NULL) { 1763 1728 if (!parse_events__filter_pmu(parse_state, pmu) && 1764 1729 perf_pmu__wildcard_match(pmu, event_or_pmu)) { 1765 - bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu); 1766 - 1767 1730 if (!parse_events_add_pmu(parse_state, *listp, pmu, 1768 1731 const_parsed_terms, 1769 - auto_merge_stats, 1732 + first_wildcard_match, 1770 1733 /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) { 1771 1734 ok++; 1772 1735 parse_state->wild_card_pmus = true; 1773 1736 } 1737 + if (first_wildcard_match == NULL) 1738 + first_wildcard_match = 1739 + container_of((*listp)->prev, struct evsel, core.node); 1774 1740 } 1775 1741 } 1776 1742 if (ok)
+9 -2
tools/perf/util/stat-display.c
··· 1002 1002 os->evsel = counter; 1003 1003 1004 1004 /* Skip already merged uncore/hybrid events */ 1005 - if (counter->merged_stat) 1006 - return; 1005 + if (config->aggr_mode != AGGR_NONE) { 1006 + if (evsel__is_hybrid(counter)) { 1007 + if (config->hybrid_merge && counter->first_wildcard_match != NULL) 1008 + return; 1009 + } else { 1010 + if (counter->first_wildcard_match != NULL) 1011 + return; 1012 + } 1013 + } 1007 1014 1008 1015 val = aggr->counts.val; 1009 1016 ena = aggr->counts.ena;
+3 -37
tools/perf/util/stat.c
··· 535 535 536 536 return 0; 537 537 } 538 - /* 539 - * Events should have the same name, scale, unit, cgroup but on different core 540 - * PMUs or on different but matching uncore PMUs. 541 - */ 542 - static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b) 543 - { 544 - if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b))) 545 - return false; 546 - 547 - if (evsel_a->scale != evsel_b->scale) 548 - return false; 549 - 550 - if (evsel_a->cgrp != evsel_b->cgrp) 551 - return false; 552 - 553 - if (strcmp(evsel_a->unit, evsel_b->unit)) 554 - return false; 555 - 556 - if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b)) 557 - return false; 558 - 559 - if (evsel_a->pmu == evsel_b->pmu || evsel_a->pmu == NULL || evsel_b->pmu == NULL) 560 - return false; 561 - 562 - if (evsel_a->pmu->is_core) 563 - return evsel_b->pmu->is_core; 564 - 565 - return perf_pmu__name_no_suffix_match(evsel_a->pmu, evsel_b->pmu->name); 566 - } 567 538 568 539 static void evsel__merge_aliases(struct evsel *evsel) 569 540 { ··· 543 572 544 573 alias = list_prepare_entry(evsel, &(evlist->core.entries), core.node); 545 574 list_for_each_entry_continue(alias, &evlist->core.entries, core.node) { 546 - /* Merge the same events on different PMUs. */ 547 - if (evsel__is_alias(evsel, alias)) { 575 + if (alias->first_wildcard_match == evsel) { 576 + /* Merge the same events on different PMUs. */ 548 577 evsel__merge_aggr_counters(evsel, alias); 549 - alias->merged_stat = true; 550 578 } 551 579 } 552 580 } ··· 558 588 559 589 static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config) 560 590 { 561 - /* this evsel is already merged */ 562 - if (evsel->merged_stat) 563 - return; 564 - 565 - if (evsel->auto_merge_stats || evsel__should_merge_hybrid(evsel, config)) 591 + if (!evsel->pmu || !evsel->pmu->is_core || evsel__should_merge_hybrid(evsel, config)) 566 592 evsel__merge_aliases(evsel); 567 593 } 568 594