Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf jevents: Add metric DefaultShowEvents

Some Default group metrics require their events showing for
consistency with perf's previous behavior. Add a flag to indicate when
this is the case and use it in stat-display.

As events are coming from Default metrics remove that default hardware
and software events from perf stat.

Following this change the default perf stat output on an alderlake looks like:
```
$ perf stat -a -- sleep 1

Performance counter stats for 'system wide':

20,550 context-switches # nan cs/sec cs_per_second
TopdownL1 (cpu_core) # 9.0 % tma_bad_speculation
# 28.1 % tma_frontend_bound
TopdownL1 (cpu_core) # 29.2 % tma_backend_bound
# 33.7 % tma_retiring
6,685 page-faults # nan faults/sec page_faults_per_second
790,091,064 cpu_atom/cpu-cycles/
# nan GHz cycles_frequency (49.83%)
2,563,918,366 cpu_core/cpu-cycles/
# nan GHz cycles_frequency
# 12.3 % tma_bad_speculation
# 14.5 % tma_retiring (50.20%)
# 33.8 % tma_frontend_bound (50.24%)
76,390,322 cpu_atom/branches/ # nan M/sec branch_frequency (60.20%)
1,015,173,047 cpu_core/branches/ # nan M/sec branch_frequency
1,325 cpu-migrations # nan migrations/sec migrations_per_second
# 39.3 % tma_backend_bound (60.17%)
0.00 msec cpu-clock # 0.000 CPUs utilized
# 0.0 CPUs CPUs_utilized
554,347,072 cpu_atom/instructions/ # 0.64 insn per cycle
# 0.6 instructions insn_per_cycle (60.14%)
5,228,931,991 cpu_core/instructions/ # 2.04 insn per cycle
# 2.0 instructions insn_per_cycle
4,308,874 cpu_atom/branch-misses/ # 5.65% of all branches
# 5.6 % branch_miss_rate (49.76%)
9,890,606 cpu_core/branch-misses/ # 0.97% of all branches
# 1.0 % branch_miss_rate

1.005477803 seconds time elapsed
```

Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>

authored by

Ian Rogers and committed by
Namhyung Kim
a3248b5b c7adeb09

+102 -107
+3 -39
tools/perf/builtin-stat.c
··· 1857 1857 return 0; 1858 1858 } 1859 1859 1860 - /* Add given software event to evlist without wildcarding. */ 1861 - static int parse_software_event(struct evlist *evlist, const char *event, 1862 - struct parse_events_error *err) 1863 - { 1864 - char buf[256]; 1865 - 1866 - snprintf(buf, sizeof(buf), "software/%s,name=%s/", event, event); 1867 - return parse_events(evlist, buf, err); 1868 - } 1869 - 1870 1860 /* Add legacy hardware/hardware-cache event to evlist for all core PMUs without wildcarding. */ 1871 1861 static int parse_hardware_event(struct evlist *evlist, const char *event, 1872 1862 struct parse_events_error *err) ··· 2001 2011 stat_config.topdown_level = 1; 2002 2012 2003 2013 if (!evlist->core.nr_entries && !evsel_list->core.nr_entries) { 2004 - /* No events so add defaults. */ 2005 - const char *sw_events[] = { 2006 - target__has_cpu(&target) ? "cpu-clock" : "task-clock", 2007 - "context-switches", 2008 - "cpu-migrations", 2009 - "page-faults", 2010 - }; 2011 - const char *hw_events[] = { 2012 - "instructions", 2013 - "cycles", 2014 - "stalled-cycles-frontend", 2015 - "stalled-cycles-backend", 2016 - "branches", 2017 - "branch-misses", 2018 - }; 2019 - 2020 - for (size_t i = 0; i < ARRAY_SIZE(sw_events); i++) { 2021 - ret = parse_software_event(evlist, sw_events[i], &err); 2022 - if (ret) 2023 - goto out; 2024 - } 2025 - for (size_t i = 0; i < ARRAY_SIZE(hw_events); i++) { 2026 - ret = parse_hardware_event(evlist, hw_events[i], &err); 2027 - if (ret) 2028 - goto out; 2029 - } 2030 - 2031 2014 /* 2032 - * Add TopdownL1 metrics if they exist. To minimize 2033 - * multiplexing, don't request threshold computation. 2015 + * Add Default metrics. To minimize multiplexing, don't request 2016 + * threshold computation, but it will be computed if the events 2017 + * are present. 2034 2018 */ 2035 2019 if (metricgroup__has_metric_or_groups(pmu, "Default")) { 2036 2020 struct evlist *metric_evlist = evlist__new();
+22 -11
tools/perf/pmu-events/arch/common/common/metrics.json
··· 5 5 "MetricGroup": "Default", 6 6 "MetricName": "CPUs_utilized", 7 7 "ScaleUnit": "1CPUs", 8 - "MetricConstraint": "NO_GROUP_EVENTS" 8 + "MetricConstraint": "NO_GROUP_EVENTS", 9 + "DefaultShowEvents": "1" 9 10 }, 10 11 { 11 12 "BriefDescription": "Context switches per CPU second", ··· 14 13 "MetricGroup": "Default", 15 14 "MetricName": "cs_per_second", 16 15 "ScaleUnit": "1cs/sec", 17 - "MetricConstraint": "NO_GROUP_EVENTS" 16 + "MetricConstraint": "NO_GROUP_EVENTS", 17 + "DefaultShowEvents": "1" 18 18 }, 19 19 { 20 20 "BriefDescription": "Process migrations to a new CPU per CPU second", ··· 23 21 "MetricGroup": "Default", 24 22 "MetricName": "migrations_per_second", 25 23 "ScaleUnit": "1migrations/sec", 26 - "MetricConstraint": "NO_GROUP_EVENTS" 24 + "MetricConstraint": "NO_GROUP_EVENTS", 25 + "DefaultShowEvents": "1" 27 26 }, 28 27 { 29 28 "BriefDescription": "Page faults per CPU second", ··· 32 29 "MetricGroup": "Default", 33 30 "MetricName": "page_faults_per_second", 34 31 "ScaleUnit": "1faults/sec", 35 - "MetricConstraint": "NO_GROUP_EVENTS" 32 + "MetricConstraint": "NO_GROUP_EVENTS", 33 + "DefaultShowEvents": "1" 36 34 }, 37 35 { 38 36 "BriefDescription": "Instructions Per Cycle", ··· 41 37 "MetricGroup": "Default", 42 38 "MetricName": "insn_per_cycle", 43 39 "MetricThreshold": "insn_per_cycle < 1", 44 - "ScaleUnit": "1instructions" 40 + "ScaleUnit": "1instructions", 41 + "DefaultShowEvents": "1" 45 42 }, 46 43 { 47 44 "BriefDescription": "Max front or backend stalls per instruction", 48 45 "MetricExpr": "max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions", 49 46 "MetricGroup": "Default", 50 - "MetricName": "stalled_cycles_per_instruction" 47 + "MetricName": "stalled_cycles_per_instruction", 48 + "DefaultShowEvents": "1" 51 49 }, 52 50 { 53 51 "BriefDescription": "Frontend stalls per cycle", 54 52 "MetricExpr": "stalled\\-cycles\\-frontend / cpu\\-cycles", 55 53 "MetricGroup": "Default", 56 54 "MetricName": "frontend_cycles_idle", 57 - "MetricThreshold": "frontend_cycles_idle > 0.1" 55 + "MetricThreshold": "frontend_cycles_idle > 0.1", 56 + "DefaultShowEvents": "1" 58 57 }, 59 58 { 60 59 "BriefDescription": "Backend stalls per cycle", 61 60 "MetricExpr": "stalled\\-cycles\\-backend / cpu\\-cycles", 62 61 "MetricGroup": "Default", 63 62 "MetricName": "backend_cycles_idle", 64 - "MetricThreshold": "backend_cycles_idle > 0.2" 63 + "MetricThreshold": "backend_cycles_idle > 0.2", 64 + "DefaultShowEvents": "1" 65 65 }, 66 66 { 67 67 "BriefDescription": "Cycles per CPU second", ··· 73 65 "MetricGroup": "Default", 74 66 "MetricName": "cycles_frequency", 75 67 "ScaleUnit": "1GHz", 76 - "MetricConstraint": "NO_GROUP_EVENTS" 68 + "MetricConstraint": "NO_GROUP_EVENTS", 69 + "DefaultShowEvents": "1" 77 70 }, 78 71 { 79 72 "BriefDescription": "Branches per CPU second", ··· 82 73 "MetricGroup": "Default", 83 74 "MetricName": "branch_frequency", 84 75 "ScaleUnit": "1000M/sec", 85 - "MetricConstraint": "NO_GROUP_EVENTS" 76 + "MetricConstraint": "NO_GROUP_EVENTS", 77 + "DefaultShowEvents": "1" 86 78 }, 87 79 { 88 80 "BriefDescription": "Branch miss rate", ··· 91 81 "MetricGroup": "Default", 92 82 "MetricName": "branch_miss_rate", 93 83 "MetricThreshold": "branch_miss_rate > 0.05", 94 - "ScaleUnit": "100%" 84 + "ScaleUnit": "100%", 85 + "DefaultShowEvents": "1" 95 86 } 96 87 ]
+54 -52
tools/perf/pmu-events/empty-pmu-events.c
··· 1303 1303 /* offset=127519 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000\000\000\000" 1304 1304 /* offset=127596 */ "uncore_sys_cmn_pmu\000" 1305 1305 /* offset=127615 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000\000\000\000" 1306 - /* offset=127758 */ "CPUs_utilized\000Default\000(software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@) / (duration_time * 1e9)\000\000Average CPU utilization\000\0001CPUs\000\000\000\00001" 1307 - /* offset=127943 */ "cs_per_second\000Default\000software@context\\-switches\\,name\\=context\\-switches@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Context switches per CPU second\000\0001cs/sec\000\000\000\00001" 1308 - /* offset=128175 */ "migrations_per_second\000Default\000software@cpu\\-migrations\\,name\\=cpu\\-migrations@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Process migrations to a new CPU per CPU second\000\0001migrations/sec\000\000\000\00001" 1309 - /* offset=128434 */ "page_faults_per_second\000Default\000software@page\\-faults\\,name\\=page\\-faults@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Page faults per CPU second\000\0001faults/sec\000\000\000\00001" 1310 - /* offset=128664 */ "insn_per_cycle\000Default\000instructions / cpu\\-cycles\000insn_per_cycle < 1\000Instructions Per Cycle\000\0001instructions\000\000\000\00000" 1311 - /* offset=128776 */ "stalled_cycles_per_instruction\000Default\000max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions\000\000Max front or backend stalls per instruction\000\000\000\000\000\00000" 1312 - /* offset=128939 */ "frontend_cycles_idle\000Default\000stalled\\-cycles\\-frontend / cpu\\-cycles\000frontend_cycles_idle > 0.1\000Frontend stalls per cycle\000\000\000\000\000\00000" 1313 - /* offset=129068 */ "backend_cycles_idle\000Default\000stalled\\-cycles\\-backend / cpu\\-cycles\000backend_cycles_idle > 0.2\000Backend stalls per cycle\000\000\000\000\000\00000" 1314 - /* offset=129193 */ "cycles_frequency\000Default\000cpu\\-cycles / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Cycles per CPU second\000\0001GHz\000\000\000\00001" 1315 - /* offset=129368 */ "branch_frequency\000Default\000branches / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Branches per CPU second\000\0001000M/sec\000\000\000\00001" 1316 - /* offset=129547 */ "branch_miss_rate\000Default\000branch\\-misses / branches\000branch_miss_rate > 0.05\000Branch miss rate\000\000100%\000\000\000\00000" 1317 - /* offset=129650 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" 1318 - /* offset=129672 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" 1319 - /* offset=129735 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" 1320 - /* offset=129901 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" 1321 - /* offset=129965 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" 1322 - /* offset=130032 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" 1323 - /* offset=130103 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" 1324 - /* offset=130197 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" 1325 - /* offset=130331 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" 1326 - /* offset=130395 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" 1327 - /* offset=130463 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" 1328 - /* offset=130533 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" 1329 - /* offset=130555 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" 1330 - /* offset=130577 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" 1331 - /* offset=130597 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" 1306 + /* offset=127758 */ "CPUs_utilized\000Default\000(software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@) / (duration_time * 1e9)\000\000Average CPU utilization\000\0001CPUs\000\000\000\000011" 1307 + /* offset=127944 */ "cs_per_second\000Default\000software@context\\-switches\\,name\\=context\\-switches@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Context switches per CPU second\000\0001cs/sec\000\000\000\000011" 1308 + /* offset=128177 */ "migrations_per_second\000Default\000software@cpu\\-migrations\\,name\\=cpu\\-migrations@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Process migrations to a new CPU per CPU second\000\0001migrations/sec\000\000\000\000011" 1309 + /* offset=128437 */ "page_faults_per_second\000Default\000software@page\\-faults\\,name\\=page\\-faults@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Page faults per CPU second\000\0001faults/sec\000\000\000\000011" 1310 + /* offset=128668 */ "insn_per_cycle\000Default\000instructions / cpu\\-cycles\000insn_per_cycle < 1\000Instructions Per Cycle\000\0001instructions\000\000\000\000001" 1311 + /* offset=128781 */ "stalled_cycles_per_instruction\000Default\000max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions\000\000Max front or backend stalls per instruction\000\000\000\000\000\000001" 1312 + /* offset=128945 */ "frontend_cycles_idle\000Default\000stalled\\-cycles\\-frontend / cpu\\-cycles\000frontend_cycles_idle > 0.1\000Frontend stalls per cycle\000\000\000\000\000\000001" 1313 + /* offset=129075 */ "backend_cycles_idle\000Default\000stalled\\-cycles\\-backend / cpu\\-cycles\000backend_cycles_idle > 0.2\000Backend stalls per cycle\000\000\000\000\000\000001" 1314 + /* offset=129201 */ "cycles_frequency\000Default\000cpu\\-cycles / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Cycles per CPU second\000\0001GHz\000\000\000\000011" 1315 + /* offset=129377 */ "branch_frequency\000Default\000branches / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Branches per CPU second\000\0001000M/sec\000\000\000\000011" 1316 + /* offset=129557 */ "branch_miss_rate\000Default\000branch\\-misses / branches\000branch_miss_rate > 0.05\000Branch miss rate\000\000100%\000\000\000\000001" 1317 + /* offset=129661 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\000000" 1318 + /* offset=129684 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\000000" 1319 + /* offset=129748 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\000000" 1320 + /* offset=129915 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\000000" 1321 + /* offset=129980 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\000000" 1322 + /* offset=130048 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\000000" 1323 + /* offset=130120 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\000000" 1324 + /* offset=130215 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\000000" 1325 + /* offset=130350 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\000000" 1326 + /* offset=130415 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\000000" 1327 + /* offset=130484 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\000000" 1328 + /* offset=130555 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\000000" 1329 + /* offset=130578 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\000000" 1330 + /* offset=130601 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\000000" 1331 + /* offset=130622 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\000000" 1332 1332 ; 1333 1333 1334 1334 static const struct compact_pmu_event pmu_events__common_default_core[] = { ··· 2615 2615 }; 2616 2616 2617 2617 static const struct compact_pmu_event pmu_metrics__common_default_core[] = { 2618 - { 127758 }, /* CPUs_utilized\000Default\000(software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@) / (duration_time * 1e9)\000\000Average CPU utilization\000\0001CPUs\000\000\000\00001 */ 2619 - { 129068 }, /* backend_cycles_idle\000Default\000stalled\\-cycles\\-backend / cpu\\-cycles\000backend_cycles_idle > 0.2\000Backend stalls per cycle\000\000\000\000\000\00000 */ 2620 - { 129368 }, /* branch_frequency\000Default\000branches / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Branches per CPU second\000\0001000M/sec\000\000\000\00001 */ 2621 - { 129547 }, /* branch_miss_rate\000Default\000branch\\-misses / branches\000branch_miss_rate > 0.05\000Branch miss rate\000\000100%\000\000\000\00000 */ 2622 - { 127943 }, /* cs_per_second\000Default\000software@context\\-switches\\,name\\=context\\-switches@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Context switches per CPU second\000\0001cs/sec\000\000\000\00001 */ 2623 - { 129193 }, /* cycles_frequency\000Default\000cpu\\-cycles / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Cycles per CPU second\000\0001GHz\000\000\000\00001 */ 2624 - { 128939 }, /* frontend_cycles_idle\000Default\000stalled\\-cycles\\-frontend / cpu\\-cycles\000frontend_cycles_idle > 0.1\000Frontend stalls per cycle\000\000\000\000\000\00000 */ 2625 - { 128664 }, /* insn_per_cycle\000Default\000instructions / cpu\\-cycles\000insn_per_cycle < 1\000Instructions Per Cycle\000\0001instructions\000\000\000\00000 */ 2626 - { 128175 }, /* migrations_per_second\000Default\000software@cpu\\-migrations\\,name\\=cpu\\-migrations@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Process migrations to a new CPU per CPU second\000\0001migrations/sec\000\000\000\00001 */ 2627 - { 128434 }, /* page_faults_per_second\000Default\000software@page\\-faults\\,name\\=page\\-faults@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Page faults per CPU second\000\0001faults/sec\000\000\000\00001 */ 2628 - { 128776 }, /* stalled_cycles_per_instruction\000Default\000max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions\000\000Max front or backend stalls per instruction\000\000\000\000\000\00000 */ 2618 + { 127758 }, /* CPUs_utilized\000Default\000(software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@) / (duration_time * 1e9)\000\000Average CPU utilization\000\0001CPUs\000\000\000\000011 */ 2619 + { 129075 }, /* backend_cycles_idle\000Default\000stalled\\-cycles\\-backend / cpu\\-cycles\000backend_cycles_idle > 0.2\000Backend stalls per cycle\000\000\000\000\000\000001 */ 2620 + { 129377 }, /* branch_frequency\000Default\000branches / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Branches per CPU second\000\0001000M/sec\000\000\000\000011 */ 2621 + { 129557 }, /* branch_miss_rate\000Default\000branch\\-misses / branches\000branch_miss_rate > 0.05\000Branch miss rate\000\000100%\000\000\000\000001 */ 2622 + { 127944 }, /* cs_per_second\000Default\000software@context\\-switches\\,name\\=context\\-switches@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Context switches per CPU second\000\0001cs/sec\000\000\000\000011 */ 2623 + { 129201 }, /* cycles_frequency\000Default\000cpu\\-cycles / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Cycles per CPU second\000\0001GHz\000\000\000\000011 */ 2624 + { 128945 }, /* frontend_cycles_idle\000Default\000stalled\\-cycles\\-frontend / cpu\\-cycles\000frontend_cycles_idle > 0.1\000Frontend stalls per cycle\000\000\000\000\000\000001 */ 2625 + { 128668 }, /* insn_per_cycle\000Default\000instructions / cpu\\-cycles\000insn_per_cycle < 1\000Instructions Per Cycle\000\0001instructions\000\000\000\000001 */ 2626 + { 128177 }, /* migrations_per_second\000Default\000software@cpu\\-migrations\\,name\\=cpu\\-migrations@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Process migrations to a new CPU per CPU second\000\0001migrations/sec\000\000\000\000011 */ 2627 + { 128437 }, /* page_faults_per_second\000Default\000software@page\\-faults\\,name\\=page\\-faults@ * 1e9 / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)\000\000Page faults per CPU second\000\0001faults/sec\000\000\000\000011 */ 2628 + { 128781 }, /* stalled_cycles_per_instruction\000Default\000max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions\000\000Max front or backend stalls per instruction\000\000\000\000\000\000001 */ 2629 2629 2630 2630 }; 2631 2631 ··· 2698 2698 }; 2699 2699 2700 2700 static const struct compact_pmu_event pmu_metrics__test_soc_cpu_default_core[] = { 2701 - { 129650 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ 2702 - { 130331 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ 2703 - { 130103 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ 2704 - { 130197 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ 2705 - { 130395 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ 2706 - { 130463 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ 2707 - { 129735 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ 2708 - { 129672 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ 2709 - { 130597 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ 2710 - { 130533 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ 2711 - { 130555 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ 2712 - { 130577 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ 2713 - { 130032 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ 2714 - { 129901 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ 2715 - { 129965 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ 2701 + { 129661 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\000000 */ 2702 + { 130350 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\000000 */ 2703 + { 130120 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\000000 */ 2704 + { 130215 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\000000 */ 2705 + { 130415 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\000000 */ 2706 + { 130484 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\000000 */ 2707 + { 129748 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\000000 */ 2708 + { 129684 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\000000 */ 2709 + { 130622 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\000000 */ 2710 + { 130555 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\000000 */ 2711 + { 130578 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\000000 */ 2712 + { 130601 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\000000 */ 2713 + { 130048 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\000000 */ 2714 + { 129915 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\000000 */ 2715 + { 129980 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\000000 */ 2716 2716 2717 2717 }; 2718 2718 ··· 2894 2894 pm->aggr_mode = *p - '0'; 2895 2895 p++; 2896 2896 pm->event_grouping = *p - '0'; 2897 + p++; 2898 + pm->default_show_events = *p - '0'; 2897 2899 } 2898 2900 2899 2901 static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table,
+5 -2
tools/perf/pmu-events/jevents.py
··· 58 58 _json_metric_attributes = [ 59 59 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold', 60 60 'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group', 61 - 'default_metricgroup_name', 'aggr_mode', 'event_grouping' 61 + 'default_metricgroup_name', 'aggr_mode', 'event_grouping', 62 + 'default_show_events' 62 63 ] 63 64 # Attributes that are bools or enum int values, encoded as '0', '1',... 64 - _json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg'] 65 + _json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg', 66 + 'default_show_events'] 65 67 66 68 def removesuffix(s: str, suffix: str) -> str: 67 69 """Remove the suffix from a string ··· 358 356 self.metricgroup_no_group = jd.get('MetricgroupNoGroup') 359 357 self.default_metricgroup_name = jd.get('DefaultMetricgroupName') 360 358 self.event_grouping = convert_metric_constraint(jd.get('MetricConstraint')) 359 + self.default_show_events = jd.get('DefaultShowEvents') 361 360 self.metric_expr = None 362 361 if 'MetricExpr' in jd: 363 362 self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify()
+1
tools/perf/pmu-events/pmu-events.h
··· 74 74 const char *default_metricgroup_name; 75 75 enum aggr_mode_class aggr_mode; 76 76 enum metric_event_groups event_grouping; 77 + bool default_show_events; 77 78 }; 78 79 79 80 struct pmu_events_table;
+1
tools/perf/util/evsel.h
··· 122 122 bool reset_group; 123 123 bool needs_auxtrace_mmap; 124 124 bool default_metricgroup; /* A member of the Default metricgroup */ 125 + bool default_show_events; /* If a default group member, show the event */ 125 126 bool needs_uniquify; 126 127 struct hashmap *per_pkg_mask; 127 128 int err;
+13
tools/perf/util/metricgroup.c
··· 152 152 * Should events of the metric be grouped? 153 153 */ 154 154 bool group_events; 155 + /** Show events even if in the Default metric group. */ 156 + bool default_show_events; 155 157 /** 156 158 * Parsed events for the metric. Optional as events may be taken from a 157 159 * different metric whose group contains all the IDs necessary for this ··· 257 255 m->pctx->sctx.runtime = runtime; 258 256 m->pctx->sctx.system_wide = system_wide; 259 257 m->group_events = !metric_no_group && metric__group_events(pm, metric_no_threshold); 258 + m->default_show_events = pm->default_show_events; 260 259 m->metric_refs = NULL; 261 260 m->evlist = NULL; 262 261 ··· 1515 1512 free(expr); 1516 1513 free(metric_events); 1517 1514 goto out; 1515 + } 1516 + if (m->default_show_events) { 1517 + struct evsel *pos; 1518 + 1519 + for (int i = 0; metric_events[i]; i++) 1520 + metric_events[i]->default_show_events = true; 1521 + evlist__for_each_entry(metric_evlist, pos) { 1522 + if (pos->metric_leader && pos->metric_leader->default_show_events) 1523 + pos->default_show_events = true; 1524 + } 1518 1525 } 1519 1526 expr->metric_threshold = m->metric_threshold; 1520 1527 expr->metric_unit = m->metric_unit;
+2 -2
tools/perf/util/stat-display.c
··· 872 872 out.ctx = os; 873 873 out.force_header = false; 874 874 875 - if (!config->metric_only && !counter->default_metricgroup) { 875 + if (!config->metric_only && (!counter->default_metricgroup || counter->default_show_events)) { 876 876 abs_printout(config, os, os->id, os->aggr_nr, counter, uval, ok); 877 877 878 878 print_noise(config, os, counter, noise, /*before_metric=*/true); ··· 880 880 } 881 881 882 882 if (ok) { 883 - if (!config->metric_only && counter->default_metricgroup) { 883 + if (!config->metric_only && counter->default_metricgroup && !counter->default_show_events) { 884 884 void *from = NULL; 885 885 886 886 aggr_printout(config, os, os->evsel, os->id, os->aggr_nr);
+1 -1
tools/perf/util/stat-shadow.c
··· 665 665 if (strcmp(name, mexp->default_metricgroup_name)) 666 666 return (void *)mexp; 667 667 /* Only print the name of the metricgroup once */ 668 - if (!header_printed) { 668 + if (!header_printed && !evsel->default_show_events) { 669 669 header_printed = true; 670 670 perf_stat__print_metricgroup_header(config, evsel, ctxp, 671 671 name, out);