Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf ftrace: Add -b/--use-bpf option for latency subcommand

The -b/--use-bpf option is to use BPF to get latency info of kernel
functions. It'd have better performance impact and I observed that
latency of same function is smaller than before when using BPF.

Committer testing:

# strace -e bpf perf ftrace latency -b -T __handle_mm_fault -a sleep 1
bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7fff51914e00, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 3
bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\20\0\0\0\20\0\0\0\5\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=45, btf_log_size=0, btf_log_level=0}, 128) = 3
bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\t\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=81, btf_log_size=0, btf_log_level=0}, 128) = 3
bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\08\0\0\08\0\0\0\t\0\0\0\0\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=89, btf_log_size=0, btf_log_level=0}, 128) = 3
bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\f\0\0\0\f\0\0\0\7\0\0\0\1\0\0\0\0\0\0\20"..., btf_log_buf=NULL, btf_size=43, btf_log_size=0, btf_log_level=0}, 128) = 3
bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\t\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=81, btf_log_size=0, btf_log_level=0}, 128) = 3
bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\5\0\0\0\0\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=77, btf_log_size=0, btf_log_level=0}, 128) = -1 EINVAL (Invalid argument)
bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\350\2\0\0\350\2\0\0\353\2\0\0\0\0\0\0\0\0\0\2"..., btf_log_buf=NULL, btf_size=1515, btf_log_size=0, btf_log_level=0}, 128) = 3
bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=32, max_entries=1, map_flags=0, inner_map_fd=0, map_name="", map_ifindex=0, btf_fd=0, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4
bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=5, insns=0x7fff51914c30, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 5
bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=4, max_entries=1, map_flags=BPF_F_MMAPABLE, inner_map_fd=0, map_name="", map_ifindex=0, btf_fd=0, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4
bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7fff51914a80, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="test", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 4
bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=8, value_size=8, max_entries=10000, map_flags=0, inner_map_fd=0, map_name="functime", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4
bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=4, value_size=1, max_entries=1, map_flags=0, inner_map_fd=0, map_name="cpu_filter", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 5
bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=4, value_size=1, max_entries=1, map_flags=0, inner_map_fd=0, map_name="task_filter", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 7
bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_PERCPU_ARRAY, key_size=4, value_size=8, max_entries=22, map_flags=0, inner_map_fd=0, map_name="latency", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 8
bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=4, max_entries=1, map_flags=BPF_F_MMAPABLE, inner_map_fd=0, map_name="func_lat.bss", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=30, btf_vmlinux_value_type_id=0}, 128) = 9
bpf(BPF_MAP_UPDATE_ELEM, {map_fd=9, key=0x7fff51914c40, value=0x7f6e99be2000, flags=BPF_ANY}, 128) = 0
bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_KPROBE, insn_cnt=18, insns=0x11e4160, license="", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(5, 14, 16), prog_flags=0, prog_name="func_begin", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=3, func_info_rec_size=8, func_info=0x11dfc50, func_info_cnt=1, line_info_rec_size=16, line_info=0x11e04c0, line_info_cnt=9, attach_btf_id=0, attach_prog_fd=0}, 128) = 10
bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_KPROBE, insn_cnt=99, insns=0x11ded70, license="", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(5, 14, 16), prog_flags=0, prog_name="func_end", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=3, func_info_rec_size=8, func_info=0x11dfc70, func_info_cnt=1, line_info_rec_size=16, line_info=0x11f6e10, line_info_cnt=20, attach_btf_id=0, attach_prog_fd=0}, 128) = 11
bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_TRACEPOINT, insn_cnt=2, insns=0x7fff51914a80, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 13
bpf(BPF_LINK_CREATE, {link_create={prog_fd=13, target_fd=-1, attach_type=0x29 /* BPF_??? */, flags=0}}, 128) = -1 EINVAL (Invalid argument)
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=1699992, si_uid=0, si_status=0, si_utime=0, si_stime=0} ---
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0
# DURATION | COUNT | GRAPH |
0 - 1 us | 52 | ################### |
1 - 2 us | 36 | ############# |
2 - 4 us | 24 | ######### |
4 - 8 us | 7 | ## |
8 - 16 us | 1 | |
16 - 32 us | 0 | |
32 - 64 us | 0 | |
64 - 128 us | 0 | |
128 - 256 us | 0 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - 2 ms | 0 | |
2 - 4 ms | 0 | |
4 - 8 ms | 0 | |
8 - 16 ms | 0 | |
16 - 32 ms | 0 | |
32 - 64 ms | 0 | |
64 - 128 ms | 0 | |
128 - 256 ms | 0 | |
256 - 512 ms | 0 | |
512 - 1024 ms | 0 | |
1 - ... s | 0 | |
+++ exited with 0 +++
#

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Changbin Du <changbin.du@gmail.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20211215185154.360314-5-namhyung@kernel.org
[ Add missing util/cpumap.h include and removed unused 'fd' variable ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Namhyung Kim and committed by
Arnaldo Carvalho de Melo
177f4eac 53be5028

+384 -71
+1 -1
tools/perf/Makefile.perf
··· 1041 1041 SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) 1042 1042 SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h 1043 1043 SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h 1044 - SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h 1044 + SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h 1045 1045 1046 1046 $(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): 1047 1047 $(Q)$(MKDIR) -p $@
+96 -70
tools/perf/builtin-ftrace.c
··· 30 30 #include "strfilter.h" 31 31 #include "util/cap.h" 32 32 #include "util/config.h" 33 + #include "util/ftrace.h" 33 34 #include "util/units.h" 34 35 #include "util/parse-sublevel-options.h" 35 36 36 37 #define DEFAULT_TRACER "function_graph" 37 - 38 - struct perf_ftrace { 39 - struct evlist *evlist; 40 - struct target target; 41 - const char *tracer; 42 - struct list_head filters; 43 - struct list_head notrace; 44 - struct list_head graph_funcs; 45 - struct list_head nograph_funcs; 46 - int graph_depth; 47 - unsigned long percpu_buffer_size; 48 - bool inherit; 49 - int func_stack_trace; 50 - int func_irq_info; 51 - int graph_nosleep_time; 52 - int graph_noirqs; 53 - int graph_verbose; 54 - int graph_thresh; 55 - unsigned int initial_delay; 56 - }; 57 - 58 - struct filter_entry { 59 - struct list_head list; 60 - char name[]; 61 - }; 62 38 63 39 static volatile int workload_exec_errno; 64 40 static bool done; ··· 680 704 return (done && !workload_exec_errno) ? 0 : -1; 681 705 } 682 706 683 - #define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */ 684 - 685 707 static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf) 686 708 { 687 709 char *p, *q; ··· 790 816 791 817 } 792 818 793 - static int __cmd_latency(struct perf_ftrace *ftrace) 819 + static int prepare_func_latency(struct perf_ftrace *ftrace) 794 820 { 795 821 char *trace_file; 822 + int fd; 823 + 824 + if (ftrace->target.use_bpf) 825 + return perf_ftrace__latency_prepare_bpf(ftrace); 826 + 827 + if (reset_tracing_files(ftrace) < 0) { 828 + pr_err("failed to reset ftrace\n"); 829 + return -1; 830 + } 831 + 832 + /* reset ftrace buffer */ 833 + if (write_tracing_file("trace", "0") < 0) 834 + return -1; 835 + 836 + if (set_tracing_options(ftrace) < 0) 837 + return -1; 838 + 839 + /* force to use the function_graph tracer to track duration */ 840 + if (write_tracing_file("current_tracer", "function_graph") < 0) { 841 + pr_err("failed to set current_tracer to function_graph\n"); 842 + return -1; 843 + } 844 + 845 + trace_file = get_tracing_file("trace_pipe"); 846 + if (!trace_file) { 847 + pr_err("failed to open trace_pipe\n"); 848 + return -1; 849 + } 850 + 851 + fd = open(trace_file, O_RDONLY); 852 + if (fd < 0) 853 + pr_err("failed to open trace_pipe\n"); 854 + 855 + put_tracing_file(trace_file); 856 + return fd; 857 + } 858 + 859 + static int start_func_latency(struct perf_ftrace *ftrace) 860 + { 861 + if (ftrace->target.use_bpf) 862 + return perf_ftrace__latency_start_bpf(ftrace); 863 + 864 + if (write_tracing_file("tracing_on", "1") < 0) { 865 + pr_err("can't enable tracing\n"); 866 + return -1; 867 + } 868 + 869 + return 0; 870 + } 871 + 872 + static int stop_func_latency(struct perf_ftrace *ftrace) 873 + { 874 + if (ftrace->target.use_bpf) 875 + return perf_ftrace__latency_stop_bpf(ftrace); 876 + 877 + write_tracing_file("tracing_on", "0"); 878 + return 0; 879 + } 880 + 881 + static int read_func_latency(struct perf_ftrace *ftrace, int buckets[]) 882 + { 883 + if (ftrace->target.use_bpf) 884 + return perf_ftrace__latency_read_bpf(ftrace, buckets); 885 + 886 + return 0; 887 + } 888 + 889 + static int cleanup_func_latency(struct perf_ftrace *ftrace) 890 + { 891 + if (ftrace->target.use_bpf) 892 + return perf_ftrace__latency_cleanup_bpf(ftrace); 893 + 894 + reset_tracing_files(ftrace); 895 + return 0; 896 + } 897 + 898 + static int __cmd_latency(struct perf_ftrace *ftrace) 899 + { 796 900 int trace_fd; 797 901 char buf[4096]; 798 902 char line[256]; ··· 891 839 return -1; 892 840 } 893 841 894 - if (reset_tracing_files(ftrace) < 0) { 895 - pr_err("failed to reset ftrace\n"); 842 + trace_fd = prepare_func_latency(ftrace); 843 + if (trace_fd < 0) 896 844 goto out; 897 - } 898 - 899 - /* reset ftrace buffer */ 900 - if (write_tracing_file("trace", "0") < 0) 901 - goto out; 902 - 903 - if (set_tracing_options(ftrace) < 0) 904 - goto out_reset; 905 - 906 - /* force to use the function_graph tracer to track duration */ 907 - if (write_tracing_file("current_tracer", "function_graph") < 0) { 908 - pr_err("failed to set current_tracer to function_graph\n"); 909 - goto out_reset; 910 - } 911 - 912 - trace_file = get_tracing_file("trace_pipe"); 913 - if (!trace_file) { 914 - pr_err("failed to open trace_pipe\n"); 915 - goto out_reset; 916 - } 917 - 918 - trace_fd = open(trace_file, O_RDONLY); 919 - 920 - put_tracing_file(trace_file); 921 - 922 - if (trace_fd < 0) { 923 - pr_err("failed to open trace_pipe\n"); 924 - goto out_reset; 925 - } 926 845 927 846 fcntl(trace_fd, F_SETFL, O_NONBLOCK); 928 847 pollfd.fd = trace_fd; 929 848 930 - if (write_tracing_file("tracing_on", "1") < 0) { 931 - pr_err("can't enable tracing\n"); 932 - goto out_close_fd; 933 - } 849 + if (start_func_latency(ftrace) < 0) 850 + goto out; 934 851 935 852 evlist__start_workload(ftrace->evlist); 936 853 ··· 917 896 } 918 897 } 919 898 920 - write_tracing_file("tracing_on", "0"); 899 + stop_func_latency(ftrace); 921 900 922 901 if (workload_exec_errno) { 923 902 const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf)); 924 903 pr_err("workload failed: %s\n", emsg); 925 - goto out_close_fd; 904 + goto out; 926 905 } 927 906 928 907 /* read remaining buffer contents */ 929 - while (true) { 908 + while (!ftrace->target.use_bpf) { 930 909 int n = read(trace_fd, buf, sizeof(buf) - 1); 931 910 if (n <= 0) 932 911 break; 933 912 make_histogram(buckets, buf, n, line); 934 913 } 935 914 915 + read_func_latency(ftrace, buckets); 916 + 936 917 display_histogram(buckets); 937 918 938 - out_close_fd: 939 - close(trace_fd); 940 - out_reset: 941 - reset_tracing_files(ftrace); 942 919 out: 920 + close(trace_fd); 921 + cleanup_func_latency(ftrace); 922 + 943 923 return (done && !workload_exec_errno) ? 0 : -1; 944 924 } 945 925 ··· 1166 1144 const struct option latency_options[] = { 1167 1145 OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", 1168 1146 "Show latency of given function", parse_filter_func), 1147 + #ifdef HAVE_BPF_SKEL 1148 + OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf, 1149 + "Use BPF to measure function latency"), 1150 + #endif 1169 1151 OPT_PARENT(common_options), 1170 1152 }; 1171 1153 const struct option *options = ftrace_options;
+1
tools/perf/util/Build
··· 144 144 perf-$(CONFIG_LIBBPF) += bpf_map.o 145 145 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o 146 146 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o 147 + perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o 147 148 perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o 148 149 perf-$(CONFIG_LIBELF) += symbol-elf.o 149 150 perf-$(CONFIG_LIBELF) += probe-file.o
+112
tools/perf/util/bpf_ftrace.c
··· 1 + #include <stdio.h> 2 + #include <fcntl.h> 3 + #include <stdint.h> 4 + #include <stdlib.h> 5 + 6 + #include <linux/err.h> 7 + 8 + #include "util/ftrace.h" 9 + #include "util/cpumap.h" 10 + #include "util/debug.h" 11 + #include "util/bpf_counter.h" 12 + 13 + #include "util/bpf_skel/func_latency.skel.h" 14 + 15 + static struct func_latency_bpf *skel; 16 + 17 + int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) 18 + { 19 + int err; 20 + struct filter_entry *func; 21 + 22 + if (!list_is_singular(&ftrace->filters)) { 23 + pr_err("ERROR: %s target function(s).\n", 24 + list_empty(&ftrace->filters) ? "No" : "Too many"); 25 + return -1; 26 + } 27 + 28 + func = list_first_entry(&ftrace->filters, struct filter_entry, list); 29 + 30 + skel = func_latency_bpf__open(); 31 + if (!skel) { 32 + pr_err("Failed to open func latency skeleton\n"); 33 + return -1; 34 + } 35 + 36 + set_max_rlimit(); 37 + 38 + err = func_latency_bpf__load(skel); 39 + if (err) { 40 + pr_err("Failed to load func latency skeleton\n"); 41 + goto out; 42 + } 43 + 44 + skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, 45 + false, func->name); 46 + if (IS_ERR(skel->links.func_begin)) { 47 + pr_err("Failed to attach fentry program\n"); 48 + err = PTR_ERR(skel->links.func_begin); 49 + goto out; 50 + } 51 + 52 + skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, 53 + true, func->name); 54 + if (IS_ERR(skel->links.func_end)) { 55 + pr_err("Failed to attach fexit program\n"); 56 + err = PTR_ERR(skel->links.func_end); 57 + goto out; 58 + } 59 + 60 + /* XXX: we don't actually use this fd - just for poll() */ 61 + return open("/dev/null", O_RDONLY); 62 + 63 + out: 64 + return err; 65 + } 66 + 67 + int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused) 68 + { 69 + skel->bss->enabled = 1; 70 + return 0; 71 + } 72 + 73 + int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) 74 + { 75 + skel->bss->enabled = 0; 76 + return 0; 77 + } 78 + 79 + int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, 80 + int buckets[]) 81 + { 82 + int i, fd, err; 83 + u32 idx; 84 + u64 *hist; 85 + int ncpus = cpu__max_cpu(); 86 + 87 + fd = bpf_map__fd(skel->maps.latency); 88 + 89 + hist = calloc(ncpus, sizeof(*hist)); 90 + if (hist == NULL) 91 + return -ENOMEM; 92 + 93 + for (idx = 0; idx < NUM_BUCKET; idx++) { 94 + err = bpf_map_lookup_elem(fd, &idx, hist); 95 + if (err) { 96 + buckets[idx] = 0; 97 + continue; 98 + } 99 + 100 + for (i = 0; i < ncpus; i++) 101 + buckets[idx] += hist[i]; 102 + } 103 + 104 + free(hist); 105 + return 0; 106 + } 107 + 108 + int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused) 109 + { 110 + func_latency_bpf__destroy(skel); 111 + return 0; 112 + }
+93
tools/perf/util/bpf_skel/func_latency.bpf.c
··· 1 + // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + // Copyright (c) 2021 Google 3 + #include "vmlinux.h" 4 + #include <bpf/bpf_helpers.h> 5 + #include <bpf/bpf_tracing.h> 6 + 7 + // This should be in sync with "util/ftrace.h" 8 + #define NUM_BUCKET 22 9 + 10 + struct { 11 + __uint(type, BPF_MAP_TYPE_HASH); 12 + __uint(key_size, sizeof(__u64)); 13 + __uint(value_size, sizeof(__u64)); 14 + __uint(max_entries, 10000); 15 + } functime SEC(".maps"); 16 + 17 + struct { 18 + __uint(type, BPF_MAP_TYPE_HASH); 19 + __uint(key_size, sizeof(__u32)); 20 + __uint(value_size, sizeof(__u8)); 21 + __uint(max_entries, 1); 22 + } cpu_filter SEC(".maps"); 23 + 24 + struct { 25 + __uint(type, BPF_MAP_TYPE_HASH); 26 + __uint(key_size, sizeof(__u32)); 27 + __uint(value_size, sizeof(__u8)); 28 + __uint(max_entries, 1); 29 + } task_filter SEC(".maps"); 30 + 31 + struct { 32 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 33 + __uint(key_size, sizeof(__u32)); 34 + __uint(value_size, sizeof(__u64)); 35 + __uint(max_entries, NUM_BUCKET); 36 + } latency SEC(".maps"); 37 + 38 + 39 + int enabled = 0; 40 + 41 + SEC("kprobe/func") 42 + int BPF_PROG(func_begin) 43 + { 44 + __u64 key, now; 45 + 46 + if (!enabled) 47 + return 0; 48 + 49 + key = bpf_get_current_pid_tgid(); 50 + now = bpf_ktime_get_ns(); 51 + 52 + // overwrite timestamp for nested functions 53 + bpf_map_update_elem(&functime, &key, &now, BPF_ANY); 54 + return 0; 55 + } 56 + 57 + SEC("kretprobe/func") 58 + int BPF_PROG(func_end) 59 + { 60 + __u64 tid; 61 + __u64 *start; 62 + 63 + if (!enabled) 64 + return 0; 65 + 66 + tid = bpf_get_current_pid_tgid(); 67 + 68 + start = bpf_map_lookup_elem(&functime, &tid); 69 + if (start) { 70 + __s64 delta = bpf_ktime_get_ns() - *start; 71 + __u32 key; 72 + __u64 *hist; 73 + 74 + bpf_map_delete_elem(&functime, &tid); 75 + 76 + if (delta < 0) 77 + return 0; 78 + 79 + // calculate index using delta in usec 80 + for (key = 0; key < (NUM_BUCKET - 1); key++) { 81 + if (delta < ((1000UL) << key)) 82 + break; 83 + } 84 + 85 + hist = bpf_map_lookup_elem(&latency, &key); 86 + if (!hist) 87 + return 0; 88 + 89 + *hist += 1; 90 + } 91 + 92 + return 0; 93 + }
+81
tools/perf/util/ftrace.h
··· 1 + #ifndef __PERF_FTRACE_H__ 2 + #define __PERF_FTRACE_H__ 3 + 4 + #include <linux/list.h> 5 + 6 + #include "target.h" 7 + 8 + struct evlist; 9 + 10 + struct perf_ftrace { 11 + struct evlist *evlist; 12 + struct target target; 13 + const char *tracer; 14 + struct list_head filters; 15 + struct list_head notrace; 16 + struct list_head graph_funcs; 17 + struct list_head nograph_funcs; 18 + unsigned long percpu_buffer_size; 19 + bool inherit; 20 + int graph_depth; 21 + int func_stack_trace; 22 + int func_irq_info; 23 + int graph_nosleep_time; 24 + int graph_noirqs; 25 + int graph_verbose; 26 + int graph_thresh; 27 + unsigned int initial_delay; 28 + }; 29 + 30 + struct filter_entry { 31 + struct list_head list; 32 + char name[]; 33 + }; 34 + 35 + #define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */ 36 + 37 + #ifdef HAVE_BPF_SKEL 38 + 39 + int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace); 40 + int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace); 41 + int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace); 42 + int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, 43 + int buckets[]); 44 + int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace); 45 + 46 + #else /* !HAVE_BPF_SKEL */ 47 + 48 + static inline int 49 + perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace __maybe_unused) 50 + { 51 + return -1; 52 + } 53 + 54 + static inline int 55 + perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused) 56 + { 57 + return -1; 58 + } 59 + 60 + static inline int 61 + perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) 62 + { 63 + return -1; 64 + } 65 + 66 + static inline int 67 + perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, 68 + int buckets[] __maybe_unused) 69 + { 70 + return -1; 71 + } 72 + 73 + static inline int 74 + perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused) 75 + { 76 + return -1; 77 + } 78 + 79 + #endif /* HAVE_BPF_SKEL */ 80 + 81 + #endif /* __PERF_FTRACE_H__ */