Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf inject: Add --convert-callchain option

There are applications not built with frame pointers, so DWARF is needed
to get the stack traces.

`perf record --call-graph dwarf` saves the stack and register data for
each sample to get the stacktrace offline. But sometimes this data may
have sensitive information and we don't want to keep them in the file.

This new 'perf inject --convert-callchain' option creates the callchains
and discards the stack and register after that.

This saves storage space and processing time for the new data file.

Of course, users should remove the original data file to not keep
sensitive data around. :)

The down side is that it cannot handle inlined callchain entries as they
all have the same IPs.

Maybe we can add an option to 'perf report' to look up inlined functions
using DWARF - IIUC it doesn't require stack and register data.

This is an example.

$ perf record --call-graph dwarf -- perf test -w noploop

$ perf report --stdio --no-children --percent-limit=0 > output-prev

$ perf inject -i perf.data --convert-callchain -o perf.data.out

$ perf report --stdio --no-children --percent-limit=0 -i perf.data.out > output-next

$ diff -u output-prev output-next
...
0.23% perf ld-linux-x86-64.so.2 [.] _dl_relocate_object_no_relro
|
- ---elf_dynamic_do_Rela (inlined)
- _dl_relocate_object_no_relro
+ ---_dl_relocate_object_no_relro
_dl_relocate_object
dl_main
_dl_sysdep_start
- _dl_start_final (inlined)
_dl_start
_start

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Namhyung Kim and committed by
Arnaldo Carvalho de Melo
92ea788d 28cb835f

+157
+5
tools/perf/Documentation/perf-inject.txt
··· 109 109 should be used, and also --buildid-all and --switch-events may be 110 110 useful. 111 111 112 + --convert-callchain:: 113 + Parse DWARF callchains and convert them to usual callchains. This also 114 + discards stack and register data from the samples. This will lose 115 + inlined callchain entries. 116 + 112 117 :GMEXAMPLECMD: inject 113 118 :GMEXAMPLESUBCMD: 114 119 include::guestmount.txt[]
+152
tools/perf/builtin-inject.c
··· 122 122 bool in_place_update; 123 123 bool in_place_update_dry_run; 124 124 bool copy_kcore_dir; 125 + bool convert_callchain; 125 126 const char *input_name; 126 127 struct perf_data output; 127 128 u64 bytes_written; ··· 134 133 struct guest_session guest_session; 135 134 struct strlist *known_build_ids; 136 135 const struct evsel *mmap_evsel; 136 + struct ip_callchain *raw_callchain; 137 137 }; 138 138 139 139 struct event_entry { ··· 383 381 } 384 382 385 383 return perf_event__repipe_synth(tool, event); 384 + } 385 + 386 + static int perf_event__convert_sample_callchain(const struct perf_tool *tool, 387 + union perf_event *event, 388 + struct perf_sample *sample, 389 + struct evsel *evsel, 390 + struct machine *machine) 391 + { 392 + struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 393 + struct callchain_cursor *cursor = get_tls_callchain_cursor(); 394 + union perf_event *event_copy = (void *)inject->event_copy; 395 + struct callchain_cursor_node *node; 396 + struct thread *thread; 397 + u64 sample_type = evsel->core.attr.sample_type; 398 + u32 sample_size = event->header.size; 399 + u64 i, k; 400 + int ret; 401 + 402 + if (event_copy == NULL) { 403 + inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE); 404 + if (!inject->event_copy) 405 + return -ENOMEM; 406 + 407 + event_copy = (void *)inject->event_copy; 408 + } 409 + 410 + if (cursor == NULL) 411 + return -ENOMEM; 412 + 413 + callchain_cursor_reset(cursor); 414 + 415 + thread = machine__find_thread(machine, sample->tid, sample->pid); 416 + if (thread == NULL) 417 + goto out; 418 + 419 + /* this will parse DWARF using stack and register data */ 420 + ret = thread__resolve_callchain(thread, cursor, evsel, sample, 421 + /*parent=*/NULL, /*root_al=*/NULL, 422 + PERF_MAX_STACK_DEPTH); 423 + thread__put(thread); 424 + if (ret != 0) 425 + goto out; 426 + 427 + /* copy kernel callchain and context entries */ 428 + for (i = 0; i < sample->callchain->nr; i++) { 429 + inject->raw_callchain->ips[i] = sample->callchain->ips[i]; 430 + if (sample->callchain->ips[i] == PERF_CONTEXT_USER) { 431 + i++; 432 + break; 433 + } 434 + } 435 + if (i == 0 || inject->raw_callchain->ips[i - 1] != PERF_CONTEXT_USER) 436 + inject->raw_callchain->ips[i++] = PERF_CONTEXT_USER; 437 + 438 + node = cursor->first; 439 + for (k = 0; k < cursor->nr && i < PERF_MAX_STACK_DEPTH; k++) { 440 + if (machine__kernel_ip(machine, node->ip)) 441 + /* kernel IPs were added already */; 442 + else if (node->ms.sym && node->ms.sym->inlined) 443 + /* we can't handle inlined callchains */; 444 + else 445 + inject->raw_callchain->ips[i++] = node->ip; 446 + 447 + node = node->next; 448 + } 449 + 450 + inject->raw_callchain->nr = i; 451 + sample->callchain = inject->raw_callchain; 452 + 453 + out: 454 + memcpy(event_copy, event, sizeof(event->header)); 455 + 456 + /* adjust sample size for stack and regs */ 457 + sample_size -= sample->user_stack.size; 458 + sample_size -= (hweight64(evsel->core.attr.sample_regs_user) + 1) * sizeof(u64); 459 + sample_size += (sample->callchain->nr + 1) * sizeof(u64); 460 + event_copy->header.size = sample_size; 461 + 462 + /* remove sample_type {STACK,REGS}_USER for synthesize */ 463 + sample_type &= ~(PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER); 464 + 465 + perf_event__synthesize_sample(event_copy, sample_type, 466 + evsel->core.attr.read_format, sample); 467 + return perf_event__repipe_synth(tool, event_copy); 386 468 } 387 469 388 470 static struct dso *findnew_dso(int pid, int tid, const char *filename, ··· 2356 2270 /* Allow space in the header for guest attributes */ 2357 2271 output_data_offset += gs->session->header.data_offset; 2358 2272 output_data_offset = roundup(output_data_offset, 4096); 2273 + } else if (inject->convert_callchain) { 2274 + inject->tool.sample = perf_event__convert_sample_callchain; 2275 + inject->tool.fork = perf_event__repipe_fork; 2276 + inject->tool.comm = perf_event__repipe_comm; 2277 + inject->tool.exit = perf_event__repipe_exit; 2278 + inject->tool.mmap = perf_event__repipe_mmap; 2279 + inject->tool.mmap2 = perf_event__repipe_mmap2; 2280 + inject->tool.ordered_events = true; 2281 + inject->tool.ordering_requires_timestamps = true; 2359 2282 } 2360 2283 2361 2284 if (!inject->itrace_synth_opts.set) ··· 2417 2322 perf_header__set_feat(&session->header, 2418 2323 HEADER_BRANCH_STACK); 2419 2324 } 2325 + 2326 + /* 2327 + * The converted data file won't have stack and registers. 2328 + * Update the perf_event_attr to remove them before writing. 2329 + */ 2330 + if (inject->convert_callchain) { 2331 + struct evsel *evsel; 2332 + 2333 + evlist__for_each_entry(session->evlist, evsel) { 2334 + evsel__reset_sample_bit(evsel, REGS_USER); 2335 + evsel__reset_sample_bit(evsel, STACK_USER); 2336 + evsel->core.attr.sample_regs_user = 0; 2337 + evsel->core.attr.sample_stack_user = 0; 2338 + evsel->core.attr.exclude_callchain_user = 0; 2339 + } 2340 + } 2341 + 2420 2342 session->header.data_offset = output_data_offset; 2421 2343 session->header.data_size = inject->bytes_written; 2422 2344 perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, ··· 2456 2344 } 2457 2345 2458 2346 return ret; 2347 + } 2348 + 2349 + static bool evsel__has_dwarf_callchain(struct evsel *evsel) 2350 + { 2351 + struct perf_event_attr *attr = &evsel->core.attr; 2352 + const u64 dwarf_callchain_flags = 2353 + PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_CALLCHAIN; 2354 + 2355 + if (!attr->exclude_callchain_user) 2356 + return false; 2357 + 2358 + return (attr->sample_type & dwarf_callchain_flags) == dwarf_callchain_flags; 2459 2359 } 2460 2360 2461 2361 int cmd_inject(int argc, const char **argv) ··· 2538 2414 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", 2539 2415 "guest mount directory under which every guest os" 2540 2416 " instance has a subdir"), 2417 + OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, 2418 + "Generate callchains using DWARF and drop register/stack data"), 2541 2419 OPT_END() 2542 2420 }; 2543 2421 const char * const inject_usage[] = { ··· 2555 2429 2556 2430 #ifndef HAVE_JITDUMP 2557 2431 set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); 2432 + #endif 2433 + #ifndef HAVE_LIBDW_SUPPORT 2434 + set_option_nobuild(options, 0, "convert-callchain", "NO_LIBDW=1", true); 2558 2435 #endif 2559 2436 argc = parse_options(argc, argv, options, inject_usage, 0); 2560 2437 ··· 2717 2588 } 2718 2589 } 2719 2590 2591 + if (inject.convert_callchain) { 2592 + struct evsel *evsel; 2593 + 2594 + if (inject.output.is_pipe || inject.session->data->is_pipe) { 2595 + pr_err("--convert-callchain cannot work with pipe\n"); 2596 + goto out_delete; 2597 + } 2598 + 2599 + evlist__for_each_entry(inject.session->evlist, evsel) { 2600 + if (!evsel__has_dwarf_callchain(evsel)) { 2601 + pr_err("--convert-callchain requires DWARF call graph.\n"); 2602 + goto out_delete; 2603 + } 2604 + } 2605 + 2606 + inject.raw_callchain = calloc(PERF_MAX_STACK_DEPTH, sizeof(u64)); 2607 + if (inject.raw_callchain == NULL) { 2608 + pr_err("callchain allocation failed\n"); 2609 + goto out_delete; 2610 + } 2611 + } 2612 + 2720 2613 #ifdef HAVE_JITDUMP 2721 2614 if (inject.jit_mode) { 2722 2615 inject.tool.mmap2 = perf_event__repipe_mmap2; ··· 2769 2618 free(inject.itrace_synth_opts.vm_tm_corr_args); 2770 2619 free(inject.event_copy); 2771 2620 free(inject.guest_session.ev.event_buf); 2621 + free(inject.raw_callchain); 2772 2622 return ret; 2773 2623 }