perf stat: Only auto-merge events that are PMU aliases

Peter reported that when he explicitely asked for multiple events with
the same name on the command line it got coalesced into just one line,
i.e.:

# perf stat -e cycles -e cycles -e cycles usleep 1

Performance counter stats for 'usleep 1':

3,269,652 cycles

0.000884123 seconds time elapsed

#

And while there is the --no-merges option to disable that auto-merging,
this is a blunt change in behaviour for such explicit request, so change
the code so that this auto merging is done only when handling the multi
PMU aliases with the same name that introduced this coalescing,
restoring the previous behaviour for the explicit case:

# perf stat -e cycles -e cycles -e cycles usleep 1

Performance counter stats for 'usleep 1':

1,472,837 cycles
1,472,837 cycles
1,472,837 cycles

0.001764870 seconds time elapsed

#

Reported-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 430daf2dc7af ("perf stat: Collapse identically named events")
Link: http://lkml.kernel.org/r/20170831184122.GK4831@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

+18 -9
+1 -1
tools/perf/builtin-stat.c
··· 1257 if (counter->merged_stat) 1258 return false; 1259 cb(counter, data, true); 1260 - if (!no_merge) 1261 collect_all_aliases(counter, cb, data); 1262 return true; 1263 }
··· 1257 if (counter->merged_stat) 1258 return false; 1259 cb(counter, data, true); 1260 + if (!no_merge && counter->auto_merge_stats) 1261 collect_all_aliases(counter, cb, data); 1262 return true; 1263 }
+1
tools/perf/util/evsel.h
··· 131 bool cmdline_group_boundary; 132 struct list_head config_terms; 133 int bpf_fd; 134 bool merged_stat; 135 const char * metric_expr; 136 const char * metric_name;
··· 131 bool cmdline_group_boundary; 132 struct list_head config_terms; 133 int bpf_fd; 134 + bool auto_merge_stats; 135 bool merged_stat; 136 const char * metric_expr; 137 const char * metric_name;
+16 -8
tools/perf/util/parse-events.c
··· 310 __add_event(struct list_head *list, int *idx, 311 struct perf_event_attr *attr, 312 char *name, struct cpu_map *cpus, 313 - struct list_head *config_terms) 314 { 315 struct perf_evsel *evsel; 316 ··· 324 evsel->cpus = cpu_map__get(cpus); 325 evsel->own_cpus = cpu_map__get(cpus); 326 evsel->system_wide = !!cpus; 327 328 if (name) 329 evsel->name = strdup(name); ··· 340 struct perf_event_attr *attr, char *name, 341 struct list_head *config_terms) 342 { 343 - return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM; 344 } 345 346 static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) ··· 1210 get_config_name(head_config), &config_terms); 1211 } 1212 1213 - int parse_events_add_pmu(struct parse_events_state *parse_state, 1214 struct list_head *list, char *name, 1215 - struct list_head *head_config) 1216 { 1217 struct perf_event_attr attr; 1218 struct perf_pmu_info info; ··· 1233 1234 if (!head_config) { 1235 attr.type = pmu->type; 1236 - evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL); 1237 return evsel ? 0 : -ENOMEM; 1238 } 1239 ··· 1255 1256 evsel = __add_event(list, &parse_state->idx, &attr, 1257 get_config_name(head_config), pmu->cpus, 1258 - &config_terms); 1259 if (evsel) { 1260 evsel->unit = info.unit; 1261 evsel->scale = info.scale; ··· 1266 } 1267 1268 return evsel ? 0 : -ENOMEM; 1269 } 1270 1271 int parse_events_multi_pmu_add(struct parse_events_state *parse_state, ··· 1304 return -1; 1305 list_add_tail(&term->list, head); 1306 1307 - if (!parse_events_add_pmu(parse_state, list, 1308 - pmu->name, head)) { 1309 pr_debug("%s -> %s/%s/\n", str, 1310 pmu->name, alias->str); 1311 ok++;
··· 310 __add_event(struct list_head *list, int *idx, 311 struct perf_event_attr *attr, 312 char *name, struct cpu_map *cpus, 313 + struct list_head *config_terms, bool auto_merge_stats) 314 { 315 struct perf_evsel *evsel; 316 ··· 324 evsel->cpus = cpu_map__get(cpus); 325 evsel->own_cpus = cpu_map__get(cpus); 326 evsel->system_wide = !!cpus; 327 + evsel->auto_merge_stats = auto_merge_stats; 328 329 if (name) 330 evsel->name = strdup(name); ··· 339 struct perf_event_attr *attr, char *name, 340 struct list_head *config_terms) 341 { 342 + return __add_event(list, idx, attr, name, NULL, config_terms, false) ? 0 : -ENOMEM; 343 } 344 345 static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) ··· 1209 get_config_name(head_config), &config_terms); 1210 } 1211 1212 + static int __parse_events_add_pmu(struct parse_events_state *parse_state, 1213 struct list_head *list, char *name, 1214 + struct list_head *head_config, bool auto_merge_stats) 1215 { 1216 struct perf_event_attr attr; 1217 struct perf_pmu_info info; ··· 1232 1233 if (!head_config) { 1234 attr.type = pmu->type; 1235 + evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL, auto_merge_stats); 1236 return evsel ? 0 : -ENOMEM; 1237 } 1238 ··· 1254 1255 evsel = __add_event(list, &parse_state->idx, &attr, 1256 get_config_name(head_config), pmu->cpus, 1257 + &config_terms, auto_merge_stats); 1258 if (evsel) { 1259 evsel->unit = info.unit; 1260 evsel->scale = info.scale; ··· 1265 } 1266 1267 return evsel ? 0 : -ENOMEM; 1268 + } 1269 + 1270 + int parse_events_add_pmu(struct parse_events_state *parse_state, 1271 + struct list_head *list, char *name, 1272 + struct list_head *head_config) 1273 + { 1274 + return __parse_events_add_pmu(parse_state, list, name, head_config, false); 1275 } 1276 1277 int parse_events_multi_pmu_add(struct parse_events_state *parse_state, ··· 1296 return -1; 1297 list_add_tail(&term->list, head); 1298 1299 + if (!__parse_events_add_pmu(parse_state, list, 1300 + pmu->name, head, true)) { 1301 pr_debug("%s -> %s/%s/\n", str, 1302 pmu->name, alias->str); 1303 ok++;