Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf top: Add --branch-history option

Add --branch-history option, to act the same as that option does for
perf report.

Example:

$ cat tcallf.c
volatile a = 10000, b = 100000, c;

__attribute__((noinline)) f2()
{
c = a / b;
}

__attribute__((noinline)) f1()
{
f2();
f2();
}
main()
{
while (1)
f1();
}
$ gcc -w -g -o tcallf tcallf.c
$ ./tcallf &
[1] 29409
$ perf top -e cycles:u -t $(pidof tcallf) --stdio --no-children --branch-history
PerfTop: 3819 irqs/sec kernel: 0.0% exact: 0.0% lost: 0/0 drop: 0/0 [4000Hz cycles:u], (target_tid: 29409)
--------------------------------------------------------------------------------------------------------------------

49.01% tcallf.c:5 [.] f2 tcallf
|
|--24.91%--f2 tcallf.c:4
| |
| |--17.14%--f1 tcallf.c:11 (cycles:1)
| | f1 tcallf.c:11
| | f2 tcallf.c:6 (cycles:3)
| | f2 tcallf.c:4
| | f1 tcallf.c:10 (cycles:2)
| | f1 tcallf.c:9
| | main tcallf.c:16 (cycles:1)
| | main tcallf.c:16
| | main tcallf.c:16 (cycles:1)
| | main tcallf.c:16
| | f1 tcallf.c:12 (cycles:1)
| | f1 tcallf.c:12
| | f2 tcallf.c:6 (cycles:3)
| | f2 tcallf.c:4
| | f1 tcallf.c:11 (cycles:1 iter:1 avg_cycles:12)
| | f1 tcallf.c:11
| | f2 tcallf.c:6 (cycles:3 iter:1 avg_cycles:12)
| | f2 tcallf.c:4
| | f1 tcallf.c:10 (cycles:2 iter:1 avg_cycles:12)
| |
| --7.78%--f1 tcallf.c:10 (cycles:2)
| f1 tcallf.c:9
| main tcallf.c:16 (cycles:1)
| main tcallf.c:16
| main tcallf.c:16 (cycles:1)
| main tcallf.c:16
| f1 tcallf.c:12 (cycles:1)
| f1 tcallf.c:12
| f2 tcallf.c:6 (cycles:3)
| f2 tcallf.c:4
| f1 tcallf.c:11 (cycles:1)
| f1 tcallf.c:11
| f2 tcallf.c:6 (cycles:3)
| f2 tcallf.c:4
| f1 tcallf.c:10 (cycles:2 iter:1 avg_cycles:12)
| f1 tcallf.c:9
| main tcallf.c:16 (cycles:1 iter:1 avg_cycles:12)
| main tcallf.c:16
| main tcallf.c:16 (cycles:1 iter:1 avg_cycles:12)
...

$ pkill tcallf
[1]+ Terminated ./tcallf

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20230330131833.12864-2-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Adrian Hunter and committed by
Arnaldo Carvalho de Melo
5ef50613 616b14b4

+21
+4
tools/perf/Documentation/perf-top.txt
··· 254 254 The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k 255 255 Note that this feature may not be available on all processors. 256 256 257 + --branch-history:: 258 + Add the addresses of sampled taken branches to the callstack. 259 + This allows to examine the path the program took to each sample. 260 + 257 261 --raw-trace:: 258 262 When displaying traceevent output, do not use print fmt or plugins. 259 263
+17
tools/perf/builtin-top.c
··· 1437 1437 .max_stack = sysctl__max_stack(), 1438 1438 .nr_threads_synthesize = UINT_MAX, 1439 1439 }; 1440 + bool branch_call_mode = false; 1440 1441 struct record_opts *opts = &top.record_opts; 1441 1442 struct target *target = &opts->target; 1442 1443 const char *disassembler_style = NULL, *objdump_path = NULL, *addr2line_path = NULL; ··· 1552 1551 OPT_CALLBACK('j', "branch-filter", &opts->branch_stack, 1553 1552 "branch filter mask", "branch stack filter modes", 1554 1553 parse_branch_stack), 1554 + OPT_BOOLEAN(0, "branch-history", &branch_call_mode, 1555 + "add last branch records to call history"), 1555 1556 OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace, 1556 1557 "Show raw trace event output (do not use print fmt or plugins)"), 1557 1558 OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, ··· 1678 1675 if (nr_cgroups > 0 && opts->record_cgroup) { 1679 1676 pr_err("--cgroup and --all-cgroups cannot be used together\n"); 1680 1677 goto out_delete_evlist; 1678 + } 1679 + 1680 + if (branch_call_mode) { 1681 + if (!opts->branch_stack) 1682 + opts->branch_stack = PERF_SAMPLE_BRANCH_ANY; 1683 + symbol_conf.use_callchain = true; 1684 + callchain_param.key = CCKEY_ADDRESS; 1685 + callchain_param.branch_callstack = true; 1686 + callchain_param.enabled = true; 1687 + if (callchain_param.record_mode == CALLCHAIN_NONE) 1688 + callchain_param.record_mode = CALLCHAIN_FP; 1689 + callchain_register_param(&callchain_param); 1690 + if (!sort_order) 1691 + sort_order = "srcline,symbol,dso"; 1681 1692 } 1682 1693 1683 1694 if (opts->branch_stack && callchain_param.enabled)