Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf record: Add --call-graph fp,defer option for deferred callchains

Add a new callchain record mode option for deferred callchains. For now
it only works with FP (frame-pointer) mode.

And add the missing feature detection logic to clear the flag on old
kernels.

$ perf record --call-graph fp,defer -vv true
...
------------------------------------------------------------
perf_event_attr:
type 0 (PERF_TYPE_HARDWARE)
size 136
config 0 (PERF_COUNT_HW_CPU_CYCLES)
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|CALLCHAIN|PERIOD
read_format ID|LOST
disabled 1
inherit 1
mmap 1
comm 1
freq 1
enable_on_exec 1
task 1
sample_id_all 1
mmap2 1
comm_exec 1
ksymbol 1
bpf_event 1
defer_callchain 1
defer_output 1
------------------------------------------------------------
sys_perf_event_open: pid 162755 cpu 0 group_fd -1 flags 0x8
sys_perf_event_open failed, error -22
switching off deferred callchain support

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>

+41 -3
+3
tools/perf/Documentation/perf-config.txt
··· 452 452 kernel space is controlled not by this option but by the 453 453 kernel config (CONFIG_UNWINDER_*). 454 454 455 + The 'defer' mode can be used with 'fp' mode to enable deferred 456 + user callchains (like 'fp,defer'). 457 + 455 458 call-graph.dump-size:: 456 459 The size of stack to dump in order to do post-unwinding. Default is 8192 (byte). 457 460 When using dwarf into record-mode, the default size will be used if omitted.
+4
tools/perf/Documentation/perf-record.txt
··· 325 325 by default. User can change the number by passing it after comma 326 326 like "--call-graph fp,32". 327 327 328 + Also "defer" can be used with "fp" (like "--call-graph fp,defer") to 329 + enable deferred user callchain which will collect user-space callchains 330 + when the thread returns to the user space. 331 + 328 332 -q:: 329 333 --quiet:: 330 334 Don't print any warnings or messages, useful for scripting.
+13 -3
tools/perf/util/callchain.c
··· 275 275 if (tok) { 276 276 unsigned long size; 277 277 278 - size = strtoul(tok, &name, 0); 279 - if (size < (unsigned) sysctl__max_stack()) 280 - param->max_stack = size; 278 + if (!strncmp(tok, "defer", sizeof("defer"))) { 279 + param->defer = true; 280 + } else { 281 + size = strtoul(tok, &name, 0); 282 + if (size < (unsigned) sysctl__max_stack()) 283 + param->max_stack = size; 284 + } 281 285 } 282 286 break; 283 287 ··· 318 314 } while (0); 319 315 320 316 free(buf); 317 + 318 + if (param->defer && param->record_mode != CALLCHAIN_FP) { 319 + pr_err("callchain: deferred callchain only works with FP\n"); 320 + return -EINVAL; 321 + } 322 + 321 323 return ret; 322 324 } 323 325
+1
tools/perf/util/callchain.h
··· 98 98 99 99 struct callchain_param { 100 100 bool enabled; 101 + bool defer; 101 102 enum perf_call_graph_mode record_mode; 102 103 u32 dump_size; 103 104 enum chain_mode mode;
+19
tools/perf/util/evsel.c
··· 1066 1066 pr_info("Disabling user space callchains for function trace event.\n"); 1067 1067 attr->exclude_callchain_user = 1; 1068 1068 } 1069 + 1070 + if (param->defer && !attr->exclude_callchain_user) 1071 + attr->defer_callchain = 1; 1069 1072 } 1070 1073 1071 1074 void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, ··· 1515 1512 attr->mmap2 = track && !perf_missing_features.mmap2; 1516 1513 attr->comm = track; 1517 1514 attr->build_id = track && opts->build_id; 1515 + attr->defer_output = track && callchain && callchain->defer; 1518 1516 1519 1517 /* 1520 1518 * ksymbol is tracked separately with text poke because it needs to be ··· 2204 2200 2205 2201 static void evsel__disable_missing_features(struct evsel *evsel) 2206 2202 { 2203 + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain) 2204 + evsel->core.attr.defer_callchain = 0; 2205 + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output) 2206 + evsel->core.attr.defer_output = 0; 2207 2207 if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit && 2208 2208 (evsel->core.attr.sample_type & PERF_SAMPLE_READ)) 2209 2209 evsel->core.attr.inherit = 0; ··· 2482 2474 2483 2475 /* Please add new feature detection here. */ 2484 2476 2477 + attr.defer_callchain = true; 2478 + if (has_attr_feature(&attr, /*flags=*/0)) 2479 + goto found; 2480 + perf_missing_features.defer_callchain = true; 2481 + pr_debug2("switching off deferred callchain support\n"); 2482 + attr.defer_callchain = false; 2483 + 2485 2484 attr.inherit = true; 2486 2485 attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID; 2487 2486 if (has_attr_feature(&attr, /*flags=*/0)) ··· 2600 2585 errno = old_errno; 2601 2586 2602 2587 check: 2588 + if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) && 2589 + perf_missing_features.defer_callchain) 2590 + return true; 2591 + 2603 2592 if (evsel->core.attr.inherit && 2604 2593 (evsel->core.attr.sample_type & PERF_SAMPLE_READ) && 2605 2594 perf_missing_features.inherit_sample_read)
+1
tools/perf/util/evsel.h
··· 221 221 bool branch_counters; 222 222 bool aux_action; 223 223 bool inherit_sample_read; 224 + bool defer_callchain; 224 225 }; 225 226 226 227 extern struct perf_missing_features perf_missing_features;