Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf tools: Merge deferred user callchains

Save samples with deferred callchains in a separate list and deliver
them after merging the user callchains. If users don't want to merge
they can set tool->merge_deferred_callchains to false to prevent the
behavior.

With previous result, now perf script will show the merged callchains.

$ perf script
...
pwd 2312 121.163435: 249113 cpu/cycles/P:
ffffffff845b78d8 __build_id_parse.isra.0+0x218 ([kernel.kallsyms])
ffffffff83bb5bf6 perf_event_mmap+0x2e6 ([kernel.kallsyms])
ffffffff83c31959 mprotect_fixup+0x1e9 ([kernel.kallsyms])
ffffffff83c31dc5 do_mprotect_pkey+0x2b5 ([kernel.kallsyms])
ffffffff83c3206f __x64_sys_mprotect+0x1f ([kernel.kallsyms])
ffffffff845e6692 do_syscall_64+0x62 ([kernel.kallsyms])
ffffffff8360012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms])
7f18fe337fa7 mprotect+0x7 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
7f18fe330e0f _dl_sysdep_start+0x7f (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
7f18fe331448 _dl_start_user+0x0 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
...

The old output can be get using --no-merge-callchain option.
Also perf report can get the user callchain entry at the end.

$ perf report --no-children --stdio -q -S __build_id_parse.isra.0
# symbol: __build_id_parse.isra.0
8.40% pwd [kernel.kallsyms]
|
---__build_id_parse.isra.0
perf_event_mmap
mprotect_fixup
do_mprotect_pkey
__x64_sys_mprotect
do_syscall_64
entry_SYSCALL_64_after_hwframe
mprotect
_dl_sysdep_start
_dl_start_user

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>

+133 -1
+5
tools/perf/Documentation/perf-script.txt
··· 527 527 The known limitations include exception handing such as 528 528 setjmp/longjmp will have calls/returns not match. 529 529 530 + --merge-callchains:: 531 + Enable merging deferred user callchains if available. This is the 532 + default behavior. If you want to see separate CALLCHAIN_DEFERRED 533 + records for some reason, use --no-merge-callchains explicitly. 534 + 530 535 :GMEXAMPLECMD: script 531 536 :GMEXAMPLESUBCMD: 532 537 include::guest-files.txt[]
+1
tools/perf/builtin-inject.c
··· 2527 2527 inject.tool.auxtrace = perf_event__repipe_auxtrace; 2528 2528 inject.tool.bpf_metadata = perf_event__repipe_op2_synth; 2529 2529 inject.tool.dont_split_sample_group = true; 2530 + inject.tool.merge_deferred_callchains = false; 2530 2531 inject.session = __perf_session__new(&data, &inject.tool, 2531 2532 /*trace_event_repipe=*/inject.output.is_pipe, 2532 2533 /*host_env=*/NULL);
+1
tools/perf/builtin-report.c
··· 1614 1614 report.tool.event_update = perf_event__process_event_update; 1615 1615 report.tool.feature = process_feature_event; 1616 1616 report.tool.ordering_requires_timestamps = true; 1617 + report.tool.merge_deferred_callchains = !dump_trace; 1617 1618 1618 1619 session = perf_session__new(&data, &report.tool); 1619 1620 if (IS_ERR(session)) {
+4
tools/perf/builtin-script.c
··· 4009 4009 bool header_only = false; 4010 4010 bool script_started = false; 4011 4011 bool unsorted_dump = false; 4012 + bool merge_deferred_callchains = true; 4012 4013 char *rec_script_path = NULL; 4013 4014 char *rep_script_path = NULL; 4014 4015 struct perf_session *session; ··· 4163 4162 "Guest code can be found in hypervisor process"), 4164 4163 OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr, 4165 4164 "Enable LBR callgraph stitching approach"), 4165 + OPT_BOOLEAN('\0', "merge-callchains", &merge_deferred_callchains, 4166 + "Enable merge deferred user callchains"), 4166 4167 OPTS_EVSWITCH(&script.evswitch), 4167 4168 OPT_END() 4168 4169 }; ··· 4421 4418 script.tool.throttle = process_throttle_event; 4422 4419 script.tool.unthrottle = process_throttle_event; 4423 4420 script.tool.ordering_requires_timestamps = true; 4421 + script.tool.merge_deferred_callchains = merge_deferred_callchains; 4424 4422 session = perf_session__new(&data, &script.tool); 4425 4423 if (IS_ERR(session)) 4426 4424 return PTR_ERR(session);
+35
tools/perf/util/callchain.c
··· 1838 1838 } 1839 1839 return 0; 1840 1840 } 1841 + 1842 + /* 1843 + * This function merges earlier samples (@sample_orig) waiting for deferred 1844 + * user callchains with the matching callchain record (@sample_callchain) 1845 + * which is delivered now. The @sample_orig->callchain should be released 1846 + * after use if ->deferred_callchain is set. 1847 + */ 1848 + int sample__merge_deferred_callchain(struct perf_sample *sample_orig, 1849 + struct perf_sample *sample_callchain) 1850 + { 1851 + u64 nr_orig = sample_orig->callchain->nr - 1; 1852 + u64 nr_deferred = sample_callchain->callchain->nr; 1853 + struct ip_callchain *callchain; 1854 + 1855 + if (sample_orig->callchain->nr < 2) { 1856 + sample_orig->deferred_callchain = false; 1857 + return -EINVAL; 1858 + } 1859 + 1860 + callchain = calloc(1 + nr_orig + nr_deferred, sizeof(u64)); 1861 + if (callchain == NULL) { 1862 + sample_orig->deferred_callchain = false; 1863 + return -ENOMEM; 1864 + } 1865 + 1866 + callchain->nr = nr_orig + nr_deferred; 1867 + /* copy original including PERF_CONTEXT_USER_DEFERRED (but the cookie) */ 1868 + memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64)); 1869 + /* copy deferred user callchains */ 1870 + memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips, 1871 + nr_deferred * sizeof(u64)); 1872 + 1873 + sample_orig->callchain = callchain; 1874 + return 0; 1875 + }
+3
tools/perf/util/callchain.h
··· 318 318 struct perf_sample *sample, int max_stack, 319 319 bool symbols, callchain_iter_fn cb, void *data); 320 320 321 + int sample__merge_deferred_callchain(struct perf_sample *sample_orig, 322 + struct perf_sample *sample_callchain); 323 + 321 324 #endif /* __PERF_CALLCHAIN_H */
+1
tools/perf/util/evlist.c
··· 85 85 evlist->ctl_fd.pos = -1; 86 86 evlist->nr_br_cntr = -1; 87 87 metricgroup__rblist_init(&evlist->metric_events); 88 + INIT_LIST_HEAD(&evlist->deferred_samples); 88 89 } 89 90 90 91 struct evlist *evlist__new(void)
+2
tools/perf/util/evlist.h
··· 92 92 * of struct metric_expr. 93 93 */ 94 94 struct rblist metric_events; 95 + /* samples with deferred_callchain would wait here. */ 96 + struct list_head deferred_samples; 95 97 }; 96 98 97 99 struct evsel_str_handler {
+78 -1
tools/perf/util/session.c
··· 1285 1285 per_thread); 1286 1286 } 1287 1287 1288 + /* 1289 + * Samples with deferred callchains should wait for the next matching 1290 + * PERF_RECORD_CALLCHAIN_RECORD entries. Keep the events in a list and 1291 + * deliver them once it finds the callchains. 1292 + */ 1293 + struct deferred_event { 1294 + struct list_head list; 1295 + union perf_event *event; 1296 + }; 1297 + 1298 + static int evlist__deliver_deferred_callchain(struct evlist *evlist, 1299 + const struct perf_tool *tool, 1300 + union perf_event *event, 1301 + struct perf_sample *sample, 1302 + struct machine *machine) 1303 + { 1304 + struct deferred_event *de, *tmp; 1305 + struct evsel *evsel; 1306 + int ret = 0; 1307 + 1308 + if (!tool->merge_deferred_callchains) { 1309 + evsel = evlist__id2evsel(evlist, sample->id); 1310 + return tool->callchain_deferred(tool, event, sample, 1311 + evsel, machine); 1312 + } 1313 + 1314 + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) { 1315 + struct perf_sample orig_sample; 1316 + 1317 + ret = evlist__parse_sample(evlist, de->event, &orig_sample); 1318 + if (ret < 0) { 1319 + pr_err("failed to parse original sample\n"); 1320 + break; 1321 + } 1322 + 1323 + if (sample->tid != orig_sample.tid) 1324 + continue; 1325 + 1326 + if (event->callchain_deferred.cookie == orig_sample.deferred_cookie) 1327 + sample__merge_deferred_callchain(&orig_sample, sample); 1328 + else 1329 + orig_sample.deferred_callchain = false; 1330 + 1331 + evsel = evlist__id2evsel(evlist, orig_sample.id); 1332 + ret = evlist__deliver_sample(evlist, tool, de->event, 1333 + &orig_sample, evsel, machine); 1334 + 1335 + if (orig_sample.deferred_callchain) 1336 + free(orig_sample.callchain); 1337 + 1338 + list_del(&de->list); 1339 + free(de->event); 1340 + free(de); 1341 + 1342 + if (ret) 1343 + break; 1344 + } 1345 + return ret; 1346 + } 1347 + 1288 1348 static int machines__deliver_event(struct machines *machines, 1289 1349 struct evlist *evlist, 1290 1350 union perf_event *event, ··· 1373 1313 return 0; 1374 1314 } 1375 1315 dump_sample(evsel, event, sample, perf_env__arch(machine->env)); 1316 + if (sample->deferred_callchain && tool->merge_deferred_callchains) { 1317 + struct deferred_event *de = malloc(sizeof(*de)); 1318 + size_t sz = event->header.size; 1319 + 1320 + if (de == NULL) 1321 + return -ENOMEM; 1322 + 1323 + de->event = malloc(sz); 1324 + if (de->event == NULL) { 1325 + free(de); 1326 + return -ENOMEM; 1327 + } 1328 + memcpy(de->event, event, sz); 1329 + list_add_tail(&de->list, &evlist->deferred_samples); 1330 + return 0; 1331 + } 1376 1332 return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); 1377 1333 case PERF_RECORD_MMAP: 1378 1334 return tool->mmap(tool, event, sample, machine); ··· 1448 1372 return tool->aux_output_hw_id(tool, event, sample, machine); 1449 1373 case PERF_RECORD_CALLCHAIN_DEFERRED: 1450 1374 dump_deferred_callchain(evsel, event, sample); 1451 - return tool->callchain_deferred(tool, event, sample, evsel, machine); 1375 + return evlist__deliver_deferred_callchain(evlist, tool, event, 1376 + sample, machine); 1452 1377 default: 1453 1378 ++evlist->stats.nr_unknown_events; 1454 1379 return -1;
+2
tools/perf/util/tool.c
··· 266 266 tool->cgroup_events = false; 267 267 tool->no_warn = false; 268 268 tool->show_feat_hdr = SHOW_FEAT_NO_HEADER; 269 + tool->merge_deferred_callchains = true; 269 270 270 271 tool->sample = process_event_sample_stub; 271 272 tool->mmap = process_event_stub; ··· 449 448 tool->tool.cgroup_events = delegate->cgroup_events; 450 449 tool->tool.no_warn = delegate->no_warn; 451 450 tool->tool.show_feat_hdr = delegate->show_feat_hdr; 451 + tool->tool.merge_deferred_callchains = delegate->merge_deferred_callchains; 452 452 453 453 tool->tool.sample = delegate_sample; 454 454 tool->tool.read = delegate_read;
+1
tools/perf/util/tool.h
··· 90 90 bool cgroup_events; 91 91 bool no_warn; 92 92 bool dont_split_sample_group; 93 + bool merge_deferred_callchains; 93 94 enum show_feature_header show_feat_hdr; 94 95 }; 95 96