Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf metricgroup: Support multiple events for metricgroup

Some uncore metrics don't work as expected. For example, on
cascadelakex:

root@lkp-csl-2sp2:~# perf stat -M UNC_M_PMM_BANDWIDTH.TOTAL -a -- sleep 1

Performance counter stats for 'system wide':

1841092 unc_m_pmm_rpq_inserts
3680816 unc_m_pmm_wpq_inserts

1.001775055 seconds time elapsed

root@lkp-csl-2sp2:~# perf stat -M UNC_M_PMM_READ_LATENCY -a -- sleep 1

Performance counter stats for 'system wide':

860649746 unc_m_pmm_rpq_occupancy.all
1840557 unc_m_pmm_rpq_inserts
12790627455 unc_m_clockticks

1.001773348 seconds time elapsed

No metrics 'UNC_M_PMM_BANDWIDTH.TOTAL' or 'UNC_M_PMM_READ_LATENCY' are
reported.

The issue is, the case of an alias expanding to mulitple events is not
supported, typically the uncore events. (see comments in
find_evsel_group()).

For UNC_M_PMM_BANDWIDTH.TOTAL in above example, the expanded event group
is '{unc_m_pmm_rpq_inserts,unc_m_pmm_wpq_inserts}:W', but the actual
events passed to find_evsel_group are:

unc_m_pmm_rpq_inserts
unc_m_pmm_rpq_inserts
unc_m_pmm_rpq_inserts
unc_m_pmm_rpq_inserts
unc_m_pmm_rpq_inserts
unc_m_pmm_rpq_inserts
unc_m_pmm_wpq_inserts
unc_m_pmm_wpq_inserts
unc_m_pmm_wpq_inserts
unc_m_pmm_wpq_inserts
unc_m_pmm_wpq_inserts
unc_m_pmm_wpq_inserts

For this multiple events case, it's not supported well.

This patch introduces a new field 'metric_leader' in struct evsel. The
first event is considered as a metric leader. For the rest of same
events, they point to the first event via it's metric_leader field in
struct evsel.

This design is for adding the counting results of all same events to the
first event in group (the metric_leader).

With this patch,

root@lkp-csl-2sp2:~# perf stat -M UNC_M_PMM_BANDWIDTH.TOTAL -a -- sleep 1

Performance counter stats for 'system wide':

1842108 unc_m_pmm_rpq_inserts # 337.2 MB/sec UNC_M_PMM_BANDWIDTH.TOTAL
3682209 unc_m_pmm_wpq_inserts

1.001819706 seconds time elapsed

root@lkp-csl-2sp2:~# perf stat -M UNC_M_PMM_READ_LATENCY -a -- sleep 1

Performance counter stats for 'system wide':

861970685 unc_m_pmm_rpq_occupancy.all # 219.4 ns UNC_M_PMM_READ_LATENCY
1842772 unc_m_pmm_rpq_inserts
12790196356 unc_m_clockticks

1.001749103 seconds time elapsed

Now we can see the correct metrics 'UNC_M_PMM_BANDWIDTH.TOTAL' and
'UNC_M_PMM_READ_LATENCY'.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20190828055932.8269-5-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Jin Yao and committed by
Arnaldo Carvalho de Melo
f01642e4 287f2649

+68 -44
+1
tools/perf/util/evsel.h
··· 168 168 const char * metric_expr; 169 169 const char * metric_name; 170 170 struct evsel **metric_events; 171 + struct evsel *metric_leader; 171 172 bool collect_stat; 172 173 bool weak_group; 173 174 bool percore;
+44 -40
tools/perf/util/metricgroup.c
··· 90 90 const char *metric_unit; 91 91 }; 92 92 93 - static bool record_evsel(int *ind, struct evsel **start, 94 - int idnum, 95 - struct evsel **metric_events, 96 - struct evsel *ev) 97 - { 98 - metric_events[*ind] = ev; 99 - if (*ind == 0) 100 - *start = ev; 101 - if (++*ind == idnum) { 102 - metric_events[*ind] = NULL; 103 - return true; 104 - } 105 - return false; 106 - } 107 - 108 93 static struct evsel *find_evsel_group(struct evlist *perf_evlist, 109 94 const char **ids, 110 95 int idnum, 111 96 struct evsel **metric_events) 112 97 { 113 - struct evsel *ev, *start = NULL; 114 - int ind = 0; 98 + struct evsel *ev; 99 + int i = 0; 100 + bool leader_found; 115 101 116 102 evlist__for_each_entry (perf_evlist, ev) { 117 - if (ev->collect_stat) 118 - continue; 119 - if (!strcmp(ev->name, ids[ind])) { 120 - if (record_evsel(&ind, &start, idnum, 121 - metric_events, ev)) 122 - return start; 103 + if (!strcmp(ev->name, ids[i])) { 104 + if (!metric_events[i]) 105 + metric_events[i] = ev; 123 106 } else { 124 - /* 125 - * We saw some other event that is not 126 - * in our list of events. Discard 127 - * the whole match and start again. 128 - */ 129 - ind = 0; 130 - start = NULL; 131 - if (!strcmp(ev->name, ids[ind])) { 132 - if (record_evsel(&ind, &start, idnum, 133 - metric_events, ev)) 134 - return start; 107 + if (++i == idnum) { 108 + /* Discard the whole match and start again */ 109 + i = 0; 110 + memset(metric_events, 0, 111 + sizeof(struct evsel *) * idnum); 112 + continue; 113 + } 114 + 115 + if (!strcmp(ev->name, ids[i])) 116 + metric_events[i] = ev; 117 + else { 118 + /* Discard the whole match and start again */ 119 + i = 0; 120 + memset(metric_events, 0, 121 + sizeof(struct evsel *) * idnum); 122 + continue; 135 123 } 136 124 } 137 125 } 138 - /* 139 - * This can happen when an alias expands to multiple 140 - * events, like for uncore events. 141 - * We don't support this case for now. 142 - */ 143 - return NULL; 126 + 127 + if (i != idnum - 1) { 128 + /* Not whole match */ 129 + return NULL; 130 + } 131 + 132 + metric_events[idnum] = NULL; 133 + 134 + for (i = 0; i < idnum; i++) { 135 + leader_found = false; 136 + evlist__for_each_entry(perf_evlist, ev) { 137 + if (!leader_found && (ev == metric_events[i])) 138 + leader_found = true; 139 + 140 + if (leader_found && 141 + !strcmp(ev->name, metric_events[i]->name)) { 142 + ev->metric_leader = metric_events[i]; 143 + } 144 + } 145 + } 146 + 147 + return metric_events[0]; 144 148 } 145 149 146 150 static int metricgroup__setup_events(struct list_head *groups,
+23 -4
tools/perf/util/stat-shadow.c
··· 31 31 int cpu; 32 32 struct runtime_stat *stat; 33 33 struct stats stats; 34 + u64 metric_total; 35 + int metric_other; 34 36 }; 35 37 36 38 static int saved_value_cmp(struct rb_node *rb_node, const void *entry) ··· 214 212 { 215 213 int ctx = evsel_context(counter); 216 214 u64 count_ns = count; 215 + struct saved_value *v; 217 216 218 217 count *= counter->scale; 219 218 ··· 269 266 update_runtime_stat(st, STAT_APERF, ctx, cpu, count); 270 267 271 268 if (counter->collect_stat) { 272 - struct saved_value *v = saved_value_lookup(counter, cpu, true, 273 - STAT_NONE, 0, st); 269 + v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); 274 270 update_stats(&v->stats, count); 271 + if (counter->metric_leader) 272 + v->metric_total += count; 273 + } else if (counter->metric_leader) { 274 + v = saved_value_lookup(counter->metric_leader, 275 + cpu, true, STAT_NONE, 0, st); 276 + v->metric_total += count; 277 + v->metric_other++; 275 278 } 276 279 } 277 280 ··· 738 729 char *n, *pn; 739 730 740 731 expr__ctx_init(&pctx); 741 - expr__add_id(&pctx, name, avg); 742 732 for (i = 0; metric_events[i]; i++) { 743 733 struct saved_value *v; 744 734 struct stats *stats; 735 + u64 metric_total = 0; 745 736 746 737 if (!strcmp(metric_events[i]->name, "duration_time")) { 747 738 stats = &walltime_nsecs_stats; ··· 753 744 break; 754 745 stats = &v->stats; 755 746 scale = 1.0; 747 + 748 + if (v->metric_other) 749 + metric_total = v->metric_total; 756 750 } 757 751 758 752 n = strdup(metric_events[i]->name); ··· 769 757 pn = strchr(n, ' '); 770 758 if (pn) 771 759 *pn = 0; 772 - expr__add_id(&pctx, n, avg_stats(stats)*scale); 760 + 761 + if (metric_total) 762 + expr__add_id(&pctx, n, metric_total); 763 + else 764 + expr__add_id(&pctx, n, avg_stats(stats)*scale); 773 765 } 766 + 767 + expr__add_id(&pctx, name, avg); 768 + 774 769 if (!metric_events[i]) { 775 770 const char *p = metric_expr; 776 771