Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf machine: Add a mechanism to inject stack frames

Add a mechanism for platforms to inject stack frames for the leaf
frame caller if there is enough information to determine a frame
is missing from dwarf or other post processing mechanisms.

Reviewed-by: James Clark <james.clark@arm.com>
Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20211217154521.80603-3-german.gomez@arm.com
Signed-off-by: German Gomez <german.gomez@arm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Alexandre Truong and committed by
Arnaldo Carvalho de Melo
32bfa5bf 7248e308

+36 -1
+36 -1
tools/perf/util/machine.c
··· 2710 2710 return err; 2711 2711 } 2712 2712 2713 + static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused, 2714 + struct thread *thread __maybe_unused, int usr_idx __maybe_unused) 2715 + { 2716 + return 0; 2717 + } 2718 + 2713 2719 static int thread__resolve_callchain_sample(struct thread *thread, 2714 2720 struct callchain_cursor *cursor, 2715 2721 struct evsel *evsel, ··· 2729 2723 struct ip_callchain *chain = sample->callchain; 2730 2724 int chain_nr = 0; 2731 2725 u8 cpumode = PERF_RECORD_MISC_USER; 2732 - int i, j, err, nr_entries; 2726 + int i, j, err, nr_entries, usr_idx; 2733 2727 int skip_idx = -1; 2734 2728 int first_call = 0; 2729 + u64 leaf_frame_caller; 2735 2730 2736 2731 if (chain) 2737 2732 chain_nr = chain->nr; ··· 2855 2848 if (err) 2856 2849 return (err < 0) ? err : 0; 2857 2850 continue; 2851 + } 2852 + 2853 + /* 2854 + * PERF_CONTEXT_USER allows us to locate where the user stack ends. 2855 + * Depending on callchain_param.order and the position of PERF_CONTEXT_USER, 2856 + * the index will be different in order to add the missing frame 2857 + * at the right place. 2858 + */ 2859 + 2860 + usr_idx = callchain_param.order == ORDER_CALLEE ? j-2 : j-1; 2861 + 2862 + if (usr_idx >= 0 && chain->ips[usr_idx] == PERF_CONTEXT_USER) { 2863 + 2864 + leaf_frame_caller = get_leaf_frame_caller(sample, thread, usr_idx); 2865 + 2866 + /* 2867 + * check if leaf_frame_Caller != ip to not add the same 2868 + * value twice. 2869 + */ 2870 + 2871 + if (leaf_frame_caller && leaf_frame_caller != ip) { 2872 + 2873 + err = add_callchain_ip(thread, cursor, parent, 2874 + root_al, &cpumode, leaf_frame_caller, 2875 + false, NULL, NULL, 0); 2876 + if (err) 2877 + return (err < 0) ? err : 0; 2878 + } 2858 2879 } 2859 2880 2860 2881 err = add_callchain_ip(thread, cursor, parent,