perf ftrace latency: Add -e option to measure time between two events

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

In addition to the function latency, it can measure events latencies.
Some kernel tracepoints are paired and it's menningful to measure how
long it takes between the two events. The latency is tracked for the
same thread.

Currently it only uses BPF to do the work but it can be lifted later.
Instead of having separate a BPF program for each tracepoint, it only
uses generic 'event_begin' and 'event_end' programs to attach to any
(raw) tracepoints.

$ sudo perf ftrace latency -a -b --hide-empty \
-e i915_request_wait_begin,i915_request_wait_end -- sleep 1
# DURATION | COUNT | GRAPH |
256 - 512 us | 4 | ###### |
2 - 4 ms | 2 | ### |
4 - 8 ms | 12 | ################### |
8 - 16 ms | 10 | ################ |

# statistics (in usec)
total time: 194915
avg time: 6961
max time: 12855
min time: 373
count: 28

Reviewed-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250714052143.342851-1-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>

Namhyung Kim 8 months ago 8db1d772 b4aff7ed

+205 -75

5 changed files

expand all

tools

perf

Documentation

perf-ftrace.txt

builtin-ftrace.c

util

bpf_ftrace.c

bpf_skel

func_latency.bpf.c

ftrace.h

tools/perf/Documentation/perf-ftrace.txt

··· 139 139 Set the function name to get the histogram. Unlike perf ftrace trace, 140 140 it only allows single function to calculate the histogram. 141 141 142 + -e:: 143 + --events=:: 144 + Set the pair of events to get the histogram. The histogram is calculated 145 + by the time difference between the two events from the same thread. This 146 + requires -b/--use-bpf option. 147 + 142 148 -b:: 143 149 --use-bpf:: 144 150 Use BPF to measure function latency instead of using the ftrace (it

+48 -2

tools/perf/builtin-ftrace.c

··· 1549 1549 } 1550 1550 } 1551 1551 1552 + static int parse_filter_event(const struct option *opt, const char *str, 1553 + int unset __maybe_unused) 1554 + { 1555 + struct list_head *head = opt->value; 1556 + struct filter_entry *entry; 1557 + char *s, *p; 1558 + int ret = -ENOMEM; 1559 + 1560 + s = strdup(str); 1561 + if (s == NULL) 1562 + return -ENOMEM; 1563 + 1564 + while ((p = strsep(&s, ",")) != NULL) { 1565 + entry = malloc(sizeof(*entry) + strlen(p) + 1); 1566 + if (entry == NULL) 1567 + goto out; 1568 + 1569 + strcpy(entry->name, p); 1570 + list_add_tail(&entry->list, head); 1571 + } 1572 + ret = 0; 1573 + 1574 + out: 1575 + free(s); 1576 + return ret; 1577 + } 1578 + 1552 1579 static int parse_buffer_size(const struct option *opt, 1553 1580 const char *str, int unset) 1554 1581 { ··· 1738 1711 const struct option latency_options[] = { 1739 1712 OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", 1740 1713 "Show latency of given function", parse_filter_func), 1714 + OPT_CALLBACK('e', "events", &ftrace.event_pair, "event1,event2", 1715 + "Show latency between the two events", parse_filter_event), 1741 1716 #ifdef HAVE_BPF_SKEL 1742 1717 OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf, 1743 1718 "Use BPF to measure function latency"), ··· 1792 1763 INIT_LIST_HEAD(&ftrace.notrace); 1793 1764 INIT_LIST_HEAD(&ftrace.graph_funcs); 1794 1765 INIT_LIST_HEAD(&ftrace.nograph_funcs); 1766 + INIT_LIST_HEAD(&ftrace.event_pair); 1795 1767 1796 1768 signal(SIGINT, sig_handler); 1797 1769 signal(SIGUSR1, sig_handler); ··· 1847 1817 cmd_func = __cmd_ftrace; 1848 1818 break; 1849 1819 case PERF_FTRACE_LATENCY: 1850 - if (list_empty(&ftrace.filters)) { 1851 - pr_err("Should provide a function to measure\n"); 1820 + if (list_empty(&ftrace.filters) && list_empty(&ftrace.event_pair)) { 1821 + pr_err("Should provide a function or events to measure\n"); 1852 1822 parse_options_usage(ftrace_usage, options, "T", 1); 1823 + parse_options_usage(NULL, options, "e", 1); 1824 + ret = -EINVAL; 1825 + goto out_delete_filters; 1826 + } 1827 + if (!list_empty(&ftrace.filters) && !list_empty(&ftrace.event_pair)) { 1828 + pr_err("Please specify either of function or events\n"); 1829 + parse_options_usage(ftrace_usage, options, "T", 1); 1830 + parse_options_usage(NULL, options, "e", 1); 1831 + ret = -EINVAL; 1832 + goto out_delete_filters; 1833 + } 1834 + if (!list_empty(&ftrace.event_pair) && !ftrace.target.use_bpf) { 1835 + pr_err("Event processing needs BPF\n"); 1836 + parse_options_usage(ftrace_usage, options, "b", 1); 1837 + parse_options_usage(NULL, options, "e", 1); 1853 1838 ret = -EINVAL; 1854 1839 goto out_delete_filters; 1855 1840 } ··· 1955 1910 delete_filter_func(&ftrace.notrace); 1956 1911 delete_filter_func(&ftrace.graph_funcs); 1957 1912 delete_filter_func(&ftrace.nograph_funcs); 1913 + delete_filter_func(&ftrace.event_pair); 1958 1914 1959 1915 return ret; 1960 1916 }

+55 -20

tools/perf/util/bpf_ftrace.c

··· 21 21 { 22 22 int fd, err; 23 23 int i, ncpus = 1, ntasks = 1; 24 - struct filter_entry *func; 24 + struct filter_entry *func = NULL; 25 25 26 - if (!list_is_singular(&ftrace->filters)) { 27 - pr_err("ERROR: %s target function(s).\n", 28 - list_empty(&ftrace->filters) ? "No" : "Too many"); 29 - return -1; 26 + if (!list_empty(&ftrace->filters)) { 27 + if (!list_is_singular(&ftrace->filters)) { 28 + pr_err("ERROR: Too many target functions.\n"); 29 + return -1; 30 + } 31 + func = list_first_entry(&ftrace->filters, struct filter_entry, list); 32 + } else { 33 + int count = 0; 34 + struct list_head *pos; 35 + 36 + list_for_each(pos, &ftrace->event_pair) 37 + count++; 38 + 39 + if (count != 2) { 40 + pr_err("ERROR: Needs two target events.\n"); 41 + return -1; 42 + } 30 43 } 31 - 32 - func = list_first_entry(&ftrace->filters, struct filter_entry, list); 33 44 34 45 skel = func_latency_bpf__open(); 35 46 if (!skel) { ··· 104 93 105 94 skel->bss->min = INT64_MAX; 106 95 107 - skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, 108 - false, func->name); 109 - if (IS_ERR(skel->links.func_begin)) { 110 - pr_err("Failed to attach fentry program\n"); 111 - err = PTR_ERR(skel->links.func_begin); 112 - goto out; 113 - } 96 + if (func) { 97 + skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, 98 + false, func->name); 99 + if (IS_ERR(skel->links.func_begin)) { 100 + pr_err("Failed to attach fentry program\n"); 101 + err = PTR_ERR(skel->links.func_begin); 102 + goto out; 103 + } 114 104 115 - skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, 116 - true, func->name); 117 - if (IS_ERR(skel->links.func_end)) { 118 - pr_err("Failed to attach fexit program\n"); 119 - err = PTR_ERR(skel->links.func_end); 120 - goto out; 105 + skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, 106 + true, func->name); 107 + if (IS_ERR(skel->links.func_end)) { 108 + pr_err("Failed to attach fexit program\n"); 109 + err = PTR_ERR(skel->links.func_end); 110 + goto out; 111 + } 112 + } else { 113 + struct filter_entry *event; 114 + 115 + event = list_first_entry(&ftrace->event_pair, struct filter_entry, list); 116 + 117 + skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin, 118 + event->name); 119 + if (IS_ERR(skel->links.event_begin)) { 120 + pr_err("Failed to attach first tracepoint program\n"); 121 + err = PTR_ERR(skel->links.event_begin); 122 + goto out; 123 + } 124 + 125 + event = list_next_entry(event, list); 126 + 127 + skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end, 128 + event->name); 129 + if (IS_ERR(skel->links.event_end)) { 130 + pr_err("Failed to attach second tracepoint program\n"); 131 + err = PTR_ERR(skel->links.event_end); 132 + goto out; 133 + } 121 134 } 122 135 123 136 /* XXX: we don't actually use this fd - just for poll() */

+95 -53

tools/perf/util/bpf_skel/func_latency.bpf.c

··· 52 52 const volatile unsigned int max_latency; 53 53 const volatile unsigned int bucket_num = NUM_BUCKET; 54 54 55 - SEC("kprobe/func") 56 - int BPF_PROG(func_begin) 55 + static bool can_record(void) 57 56 { 58 - __u64 key, now; 59 - 60 - if (!enabled) 61 - return 0; 62 - 63 - key = bpf_get_current_pid_tgid(); 64 - 65 57 if (has_cpu) { 66 58 __u32 cpu = bpf_get_smp_processor_id(); 67 59 __u8 *ok; 68 60 69 61 ok = bpf_map_lookup_elem(&cpu_filter, &cpu); 70 62 if (!ok) 71 - return 0; 63 + return false; 72 64 } 73 65 74 66 if (has_task) { 75 - __u32 pid = key & 0xffffffff; 67 + __u32 pid = bpf_get_current_pid_tgid(); 76 68 __u8 *ok; 77 69 78 70 ok = bpf_map_lookup_elem(&task_filter, &pid); 79 71 if (!ok) 80 - return 0; 72 + return false; 73 + } 74 + return true; 75 + } 76 + 77 + static void update_latency(__s64 delta) 78 + { 79 + __u64 val = delta; 80 + __u32 key = 0; 81 + __u64 *hist; 82 + __u64 cmp_base = use_nsec ? 1 : 1000; 83 + 84 + if (delta < 0) 85 + return; 86 + 87 + if (bucket_range != 0) { 88 + val = delta / cmp_base; 89 + 90 + if (min_latency > 0) { 91 + if (val > min_latency) 92 + val -= min_latency; 93 + else 94 + goto do_lookup; 95 + } 96 + 97 + // Less than 1 unit (ms or ns), or, in the future, 98 + // than the min latency desired. 99 + if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units ) 100 + key = val / bucket_range + 1; 101 + if (key >= bucket_num) 102 + key = bucket_num - 1; 103 + } 104 + 105 + goto do_lookup; 106 + } 107 + // calculate index using delta 108 + for (key = 0; key < (bucket_num - 1); key++) { 109 + if (delta < (cmp_base << key)) 110 + break; 81 111 } 82 112 113 + do_lookup: 114 + hist = bpf_map_lookup_elem(&latency, &key); 115 + if (!hist) 116 + return; 117 + 118 + __sync_fetch_and_add(hist, 1); 119 + 120 + __sync_fetch_and_add(&total, delta); // always in nsec 121 + __sync_fetch_and_add(&count, 1); 122 + 123 + if (delta > max) 124 + max = delta; 125 + if (delta < min) 126 + min = delta; 127 + } 128 + 129 + SEC("kprobe/func") 130 + int BPF_PROG(func_begin) 131 + { 132 + __u64 key, now; 133 + 134 + if (!enabled || !can_record()) 135 + return 0; 136 + 137 + key = bpf_get_current_pid_tgid(); 83 138 now = bpf_ktime_get_ns(); 84 139 85 140 // overwrite timestamp for nested functions ··· 147 92 { 148 93 __u64 tid; 149 94 __u64 *start; 150 - __u64 cmp_base = use_nsec ? 1 : 1000; 151 95 152 96 if (!enabled) 153 97 return 0; ··· 155 101 156 102 start = bpf_map_lookup_elem(&functime, &tid); 157 103 if (start) { 158 - __s64 delta = bpf_ktime_get_ns() - *start; 159 - __u64 val = delta; 160 - __u32 key = 0; 161 - __u64 *hist; 162 - 104 + update_latency(bpf_ktime_get_ns() - *start); 163 105 bpf_map_delete_elem(&functime, &tid); 106 + } 164 107 165 - if (delta < 0) 166 - return 0; 108 + return 0; 109 + } 167 110 168 - if (bucket_range != 0) { 169 - val = delta / cmp_base; 111 + SEC("raw_tp") 112 + int BPF_PROG(event_begin) 113 + { 114 + __u64 key, now; 170 115 171 - if (min_latency > 0) { 172 - if (val > min_latency) 173 - val -= min_latency; 174 - else 175 - goto do_lookup; 176 - } 116 + if (!enabled || !can_record()) 117 + return 0; 177 118 178 - // Less than 1 unit (ms or ns), or, in the future, 179 - // than the min latency desired. 180 - if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units ) 181 - key = val / bucket_range + 1; 182 - if (key >= bucket_num) 183 - key = bucket_num - 1; 184 - } 119 + key = bpf_get_current_pid_tgid(); 120 + now = bpf_ktime_get_ns(); 185 121 186 - goto do_lookup; 187 - } 188 - // calculate index using delta 189 - for (key = 0; key < (bucket_num - 1); key++) { 190 - if (delta < (cmp_base << key)) 191 - break; 192 - } 122 + // overwrite timestamp for nested events 123 + bpf_map_update_elem(&functime, &key, &now, BPF_ANY); 124 + return 0; 125 + } 193 126 194 - do_lookup: 195 - hist = bpf_map_lookup_elem(&latency, &key); 196 - if (!hist) 197 - return 0; 127 + SEC("raw_tp") 128 + int BPF_PROG(event_end) 129 + { 130 + __u64 tid; 131 + __u64 *start; 198 132 199 - __sync_fetch_and_add(hist, 1); 133 + if (!enabled) 134 + return 0; 200 135 201 - __sync_fetch_and_add(&total, delta); // always in nsec 202 - __sync_fetch_and_add(&count, 1); 136 + tid = bpf_get_current_pid_tgid(); 203 137 204 - if (delta > max) 205 - max = delta; 206 - if (delta < min) 207 - min = delta; 138 + start = bpf_map_lookup_elem(&functime, &tid); 139 + if (start) { 140 + update_latency(bpf_ktime_get_ns() - *start); 141 + bpf_map_delete_elem(&functime, &tid); 208 142 } 209 143 210 144 return 0;

tools/perf/util/ftrace.h

··· 17 17 struct list_head notrace; 18 18 struct list_head graph_funcs; 19 19 struct list_head nograph_funcs; 20 + struct list_head event_pair; 20 21 struct hashmap *profile_hash; 21 22 unsigned long percpu_buffer_size; 22 23 bool inherit;