Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf diff: Support hot streams comparison

This patch enables perf-diff with "--stream" option.

"--stream": Enable hot streams comparison

Now let's see example.

perf record -b ... Generate perf.data.old with branch data
perf record -b ... Generate perf.data with branch data
perf diff --stream

[ Matched hot streams ]

hot chain pair 1:
cycles: 1, hits: 27.77% cycles: 1, hits: 9.24%
--------------------------- --------------------------
main div.c:39 main div.c:39
main div.c:44 main div.c:44

hot chain pair 2:
cycles: 34, hits: 20.06% cycles: 27, hits: 16.98%
--------------------------- --------------------------
__random_r random_r.c:360 __random_r random_r.c:360
__random_r random_r.c:388 __random_r random_r.c:388
__random_r random_r.c:388 __random_r random_r.c:388
__random_r random_r.c:380 __random_r random_r.c:380
__random_r random_r.c:357 __random_r random_r.c:357
__random random.c:293 __random random.c:293
__random random.c:293 __random random.c:293
__random random.c:291 __random random.c:291
__random random.c:291 __random random.c:291
__random random.c:291 __random random.c:291
__random random.c:288 __random random.c:288
rand rand.c:27 rand rand.c:27
rand rand.c:26 rand rand.c:26
rand@plt rand@plt
rand@plt rand@plt
compute_flag div.c:25 compute_flag div.c:25
compute_flag div.c:22 compute_flag div.c:22
main div.c:40 main div.c:40
main div.c:40 main div.c:40
main div.c:39 main div.c:39

hot chain pair 3:
cycles: 9, hits: 4.48% cycles: 6, hits: 4.51%
--------------------------- --------------------------
__random_r random_r.c:360 __random_r random_r.c:360
__random_r random_r.c:388 __random_r random_r.c:388
__random_r random_r.c:388 __random_r random_r.c:388
__random_r random_r.c:380 __random_r random_r.c:380

[ Hot streams in old perf data only ]

hot chain 1:
cycles: 18, hits: 6.75%
--------------------------
__random_r random_r.c:360
__random_r random_r.c:388
__random_r random_r.c:388
__random_r random_r.c:380
__random_r random_r.c:357
__random random.c:293
__random random.c:293
__random random.c:291
__random random.c:291
__random random.c:291
__random random.c:288
rand rand.c:27
rand rand.c:26
rand@plt
rand@plt
compute_flag div.c:25
compute_flag div.c:22
main div.c:40

hot chain 2:
cycles: 29, hits: 2.78%
--------------------------
compute_flag div.c:22
main div.c:40
main div.c:40
main div.c:39

[ Hot streams in new perf data only ]

hot chain 1:
cycles: 4, hits: 4.54%
--------------------------
main div.c:42
compute_flag div.c:28

hot chain 2:
cycles: 5, hits: 3.51%
--------------------------
main div.c:39
main div.c:44
main div.c:42
compute_flag div.c:28

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20201009022845.13141-8-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Jin Yao and committed by
Arnaldo Carvalho de Melo
2a09a84c 5bbd6bad

+110 -13
+4
tools/perf/Documentation/perf-diff.txt
··· 182 182 --tid=:: 183 183 Only diff samples for given thread ID (comma separated list). 184 184 185 + --stream:: 186 + Enable hot streams comparison. Stream can be a callchain which is 187 + aggregated by the branch records from samples. 188 + 185 189 COMPARISON 186 190 ---------- 187 191 The comparison is governed by the baseline file. The baseline perf.data
+106 -13
tools/perf/builtin-diff.c
··· 25 25 #include "util/map.h" 26 26 #include "util/spark.h" 27 27 #include "util/block-info.h" 28 + #include "util/stream.h" 28 29 #include <linux/err.h> 29 30 #include <linux/zalloc.h> 30 31 #include <subcmd/pager.h> ··· 43 42 int range_size; 44 43 int range_num; 45 44 bool has_br_stack; 45 + bool stream; 46 46 }; 47 47 48 48 /* Diff command specific HPP columns. */ ··· 74 72 struct perf_data data; 75 73 int idx; 76 74 struct hists *hists; 75 + struct evlist_streams *evlist_streams; 77 76 struct diff_hpp_fmt fmt[PERF_HPP_DIFF__MAX_INDEX]; 78 77 }; 79 78 ··· 109 106 COMPUTE_DELTA_ABS, 110 107 COMPUTE_CYCLES, 111 108 COMPUTE_MAX, 109 + COMPUTE_STREAM, /* After COMPUTE_MAX to avoid use current compute arrays */ 112 110 }; 113 111 114 112 const char *compute_names[COMPUTE_MAX] = { ··· 397 393 struct perf_diff *pdiff = container_of(tool, struct perf_diff, tool); 398 394 struct addr_location al; 399 395 struct hists *hists = evsel__hists(evsel); 396 + struct hist_entry_iter iter = { 397 + .evsel = evsel, 398 + .sample = sample, 399 + .ops = &hist_iter_normal, 400 + }; 400 401 int ret = -1; 401 402 402 403 if (perf_time__ranges_skip_sample(pdiff->ptime_range, pdiff->range_num, ··· 420 411 goto out_put; 421 412 } 422 413 423 - if (compute != COMPUTE_CYCLES) { 424 - if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, 425 - true)) { 426 - pr_warning("problem incrementing symbol period, " 427 - "skipping event\n"); 428 - goto out_put; 429 - } 430 - } else { 414 + switch (compute) { 415 + case COMPUTE_CYCLES: 431 416 if (!hists__add_entry_ops(hists, &block_hist_ops, &al, NULL, 432 417 NULL, NULL, sample, true)) { 433 418 pr_warning("problem incrementing symbol period, " ··· 431 428 432 429 hist__account_cycles(sample->branch_stack, &al, sample, false, 433 430 NULL); 431 + break; 432 + 433 + case COMPUTE_STREAM: 434 + if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, 435 + NULL)) { 436 + pr_debug("problem adding hist entry, skipping event\n"); 437 + goto out_put; 438 + } 439 + break; 440 + 441 + default: 442 + if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, 443 + true)) { 444 + pr_warning("problem incrementing symbol period, " 445 + "skipping event\n"); 446 + goto out_put; 447 + } 434 448 } 435 449 436 450 /* ··· 1016 996 } 1017 997 } 1018 998 999 + static int process_base_stream(struct data__file *data_base, 1000 + struct data__file *data_pair, 1001 + const char *title __maybe_unused) 1002 + { 1003 + struct evlist *evlist_base = data_base->session->evlist; 1004 + struct evlist *evlist_pair = data_pair->session->evlist; 1005 + struct evsel *evsel_base, *evsel_pair; 1006 + struct evsel_streams *es_base, *es_pair; 1007 + 1008 + evlist__for_each_entry(evlist_base, evsel_base) { 1009 + evsel_pair = evsel_match(evsel_base, evlist_pair); 1010 + if (!evsel_pair) 1011 + continue; 1012 + 1013 + es_base = evsel_streams__entry(data_base->evlist_streams, 1014 + evsel_base->idx); 1015 + if (!es_base) 1016 + return -1; 1017 + 1018 + es_pair = evsel_streams__entry(data_pair->evlist_streams, 1019 + evsel_pair->idx); 1020 + if (!es_pair) 1021 + return -1; 1022 + 1023 + evsel_streams__match(es_base, es_pair); 1024 + evsel_streams__report(es_base, es_pair); 1025 + } 1026 + 1027 + return 0; 1028 + } 1029 + 1030 + static void stream_process(void) 1031 + { 1032 + /* 1033 + * Stream comparison only supports two data files. 1034 + * perf.data.old and perf.data. data__files[0] is perf.data.old, 1035 + * data__files[1] is perf.data. 1036 + */ 1037 + process_base_stream(&data__files[0], &data__files[1], 1038 + "# Output based on old perf data:\n#\n"); 1039 + } 1040 + 1019 1041 static void data__free(struct data__file *d) 1020 1042 { 1021 1043 int col; 1044 + 1045 + if (d->evlist_streams) 1046 + evlist_streams__delete(d->evlist_streams); 1022 1047 1023 1048 for (col = 0; col < PERF_HPP_DIFF__MAX_INDEX; col++) { 1024 1049 struct diff_hpp_fmt *fmt = &d->fmt[col]; ··· 1218 1153 1219 1154 if (pdiff.ptime_range) 1220 1155 zfree(&pdiff.ptime_range); 1156 + 1157 + if (compute == COMPUTE_STREAM) { 1158 + d->evlist_streams = evlist__create_streams( 1159 + d->session->evlist, 5); 1160 + if (!d->evlist_streams) 1161 + goto out_delete; 1162 + } 1221 1163 } 1222 1164 1223 - data_process(); 1165 + if (compute == COMPUTE_STREAM) 1166 + stream_process(); 1167 + else 1168 + data_process(); 1224 1169 1225 1170 out_delete: 1226 1171 data__for_each_file(i, d) { ··· 1303 1228 "only consider symbols in these pids"), 1304 1229 OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]", 1305 1230 "only consider symbols in these tids"), 1231 + OPT_BOOLEAN(0, "stream", &pdiff.stream, 1232 + "Enable hot streams comparison."), 1306 1233 OPT_END() 1307 1234 }; 1308 1235 ··· 1964 1887 if (cycles_hist && (compute != COMPUTE_CYCLES)) 1965 1888 usage_with_options(diff_usage, options); 1966 1889 1890 + if (pdiff.stream) 1891 + compute = COMPUTE_STREAM; 1892 + 1967 1893 symbol__annotation_init(); 1968 1894 1969 1895 if (symbol__init(NULL) < 0) ··· 1978 1898 if (check_file_brstack() < 0) 1979 1899 return -1; 1980 1900 1981 - if (compute == COMPUTE_CYCLES && !pdiff.has_br_stack) 1901 + if ((compute == COMPUTE_CYCLES || compute == COMPUTE_STREAM) 1902 + && !pdiff.has_br_stack) { 1982 1903 return -1; 1904 + } 1983 1905 1984 - if (ui_init() < 0) 1985 - return -1; 1906 + if (compute == COMPUTE_STREAM) { 1907 + symbol_conf.show_branchflag_count = true; 1908 + symbol_conf.disable_add2line_warn = true; 1909 + callchain_param.mode = CHAIN_FLAT; 1910 + callchain_param.key = CCKEY_SRCLINE; 1911 + callchain_param.branch_callstack = 1; 1912 + symbol_conf.use_callchain = true; 1913 + callchain_register_param(&callchain_param); 1914 + sort_order = "srcline,symbol,dso"; 1915 + } else { 1916 + if (ui_init() < 0) 1917 + return -1; 1986 1918 1987 - sort__mode = SORT_MODE__DIFF; 1919 + sort__mode = SORT_MODE__DIFF; 1920 + } 1988 1921 1989 1922 if (setup_sorting(NULL) < 0) 1990 1923 usage_with_options(diff_usage, options);