tracing: Fix irqsoff and wakeup latency tracers when using function graph

The function graph tracer has become generic so that kretprobes and BPF
can use it along with function graph tracing itself. Some of the
infrastructure was specific for function graph tracing such as recording
the calltime and return time of the functions. Calling the clock code on a
high volume function does add overhead. The calculation of the calltime
was removed from the generic code and placed into the function graph
tracer itself so that the other users did not incur this overhead as they
did not need that timestamp.

The calltime field was still kept in the generic return entry structure
and the function graph return entry callback filled it as that structure
was passed to other code.

But this broke both irqsoff and wakeup latency tracer as they still
depended on the trace structure containing the calltime when the option
display-graph is set as it used some of those same functions that the
function graph tracer used. But now the calltime was not set and was just
zero. This caused the calculation of the function time to be the absolute
value of the return timestamp and not the length of the function.

# cd /sys/kernel/tracing
# echo 1 > options/display-graph
# echo irqsoff > current_tracer

The tracers went from:

# REL TIME CPU TASK/PID |||| DURATION FUNCTION CALLS
# | | | | |||| | | | | | |
0 us | 4) <idle>-0 | d..1. | 0.000 us | irqentry_enter();
3 us | 4) <idle>-0 | d..2. | | irq_enter_rcu() {
4 us | 4) <idle>-0 | d..2. | 0.431 us | preempt_count_add();
5 us | 4) <idle>-0 | d.h2. | | tick_irq_enter() {
5 us | 4) <idle>-0 | d.h2. | 0.433 us | tick_check_oneshot_broadcast_this_cpu();
6 us | 4) <idle>-0 | d.h2. | 2.426 us | ktime_get();
9 us | 4) <idle>-0 | d.h2. | | tick_nohz_stop_idle() {
10 us | 4) <idle>-0 | d.h2. | 0.398 us | nr_iowait_cpu();
11 us | 4) <idle>-0 | d.h1. | 1.903 us | }
11 us | 4) <idle>-0 | d.h2. | | tick_do_update_jiffies64() {
12 us | 4) <idle>-0 | d.h2. | | _raw_spin_lock() {
12 us | 4) <idle>-0 | d.h2. | 0.360 us | preempt_count_add();
13 us | 4) <idle>-0 | d.h3. | 0.354 us | do_raw_spin_lock();
14 us | 4) <idle>-0 | d.h2. | 2.207 us | }
15 us | 4) <idle>-0 | d.h3. | 0.428 us | calc_global_load();
16 us | 4) <idle>-0 | d.h3. | | _raw_spin_unlock() {
16 us | 4) <idle>-0 | d.h3. | 0.380 us | do_raw_spin_unlock();
17 us | 4) <idle>-0 | d.h3. | 0.334 us | preempt_count_sub();
18 us | 4) <idle>-0 | d.h1. | 1.768 us | }
18 us | 4) <idle>-0 | d.h2. | | update_wall_time() {
[..]

To:

# REL TIME CPU TASK/PID |||| DURATION FUNCTION CALLS
# | | | | |||| | | | | | |
0 us | 5) <idle>-0 | d.s2. | 0.000 us | _raw_spin_lock_irqsave();
0 us | 5) <idle>-0 | d.s3. | 312159583 us | preempt_count_add();
2 us | 5) <idle>-0 | d.s4. | 312159585 us | do_raw_spin_lock();
3 us | 5) <idle>-0 | d.s4. | | _raw_spin_unlock() {
3 us | 5) <idle>-0 | d.s4. | 312159586 us | do_raw_spin_unlock();
4 us | 5) <idle>-0 | d.s4. | 312159587 us | preempt_count_sub();
4 us | 5) <idle>-0 | d.s2. | 312159587 us | }
5 us | 5) <idle>-0 | d.s3. | | _raw_spin_lock() {
5 us | 5) <idle>-0 | d.s3. | 312159588 us | preempt_count_add();
6 us | 5) <idle>-0 | d.s4. | 312159589 us | do_raw_spin_lock();
7 us | 5) <idle>-0 | d.s3. | 312159590 us | }
8 us | 5) <idle>-0 | d.s4. | 312159591 us | calc_wheel_index();
9 us | 5) <idle>-0 | d.s4. | | enqueue_timer() {
9 us | 5) <idle>-0 | d.s4. | | wake_up_nohz_cpu() {
11 us | 5) <idle>-0 | d.s4. | | native_smp_send_reschedule() {
11 us | 5) <idle>-0 | d.s4. | 312171987 us | default_send_IPI_single_phys();
12408 us | 5) <idle>-0 | d.s3. | 312171990 us | }
12408 us | 5) <idle>-0 | d.s3. | 312171991 us | }
12409 us | 5) <idle>-0 | d.s3. | 312171991 us | }

Where the calculation of the time for each function was the return time
minus zero and not the time of when the function returned.

Have these tracers also save the calltime in the fgraph data section and
retrieve it again on the return to get the correct timings again.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/20250113183124.61767419@gandalf.local.home
Fixes: f1f36e22bee9 ("ftrace: Have calltime be saved in the fgraph storage")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

+28
+14
kernel/trace/trace_irqsoff.c
··· 182 struct trace_array_cpu *data; 183 unsigned long flags; 184 unsigned int trace_ctx; 185 int ret; 186 187 if (ftrace_graph_ignore_func(gops, trace)) ··· 200 if (!func_prolog_dec(tr, &data, &flags)) 201 return 0; 202 203 trace_ctx = tracing_gen_ctx_flags(flags); 204 ret = __trace_graph_entry(tr, trace, trace_ctx); 205 atomic_dec(&data->disabled); ··· 220 struct trace_array_cpu *data; 221 unsigned long flags; 222 unsigned int trace_ctx; 223 224 ftrace_graph_addr_finish(gops, trace); 225 226 if (!func_prolog_dec(tr, &data, &flags)) 227 return; 228 229 trace_ctx = tracing_gen_ctx_flags(flags); 230 __trace_graph_return(tr, trace, trace_ctx);
··· 182 struct trace_array_cpu *data; 183 unsigned long flags; 184 unsigned int trace_ctx; 185 + u64 *calltime; 186 int ret; 187 188 if (ftrace_graph_ignore_func(gops, trace)) ··· 199 if (!func_prolog_dec(tr, &data, &flags)) 200 return 0; 201 202 + calltime = fgraph_reserve_data(gops->idx, sizeof(*calltime)); 203 + if (!calltime) 204 + return 0; 205 + 206 + *calltime = trace_clock_local(); 207 + 208 trace_ctx = tracing_gen_ctx_flags(flags); 209 ret = __trace_graph_entry(tr, trace, trace_ctx); 210 atomic_dec(&data->disabled); ··· 213 struct trace_array_cpu *data; 214 unsigned long flags; 215 unsigned int trace_ctx; 216 + u64 *calltime; 217 + int size; 218 219 ftrace_graph_addr_finish(gops, trace); 220 221 if (!func_prolog_dec(tr, &data, &flags)) 222 return; 223 + 224 + calltime = fgraph_retrieve_data(gops->idx, &size); 225 + if (!calltime) 226 + return; 227 + trace->calltime = *calltime; 228 229 trace_ctx = tracing_gen_ctx_flags(flags); 230 __trace_graph_return(tr, trace, trace_ctx);
+14
kernel/trace/trace_sched_wakeup.c
··· 118 struct trace_array *tr = wakeup_trace; 119 struct trace_array_cpu *data; 120 unsigned int trace_ctx; 121 int ret = 0; 122 123 if (ftrace_graph_ignore_func(gops, trace)) ··· 136 if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) 137 return 0; 138 139 ret = __trace_graph_entry(tr, trace, trace_ctx); 140 atomic_dec(&data->disabled); 141 preempt_enable_notrace(); ··· 155 struct trace_array *tr = wakeup_trace; 156 struct trace_array_cpu *data; 157 unsigned int trace_ctx; 158 159 ftrace_graph_addr_finish(gops, trace); 160 161 if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) 162 return; 163 164 __trace_graph_return(tr, trace, trace_ctx); 165 atomic_dec(&data->disabled);
··· 118 struct trace_array *tr = wakeup_trace; 119 struct trace_array_cpu *data; 120 unsigned int trace_ctx; 121 + u64 *calltime; 122 int ret = 0; 123 124 if (ftrace_graph_ignore_func(gops, trace)) ··· 135 if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) 136 return 0; 137 138 + calltime = fgraph_reserve_data(gops->idx, sizeof(*calltime)); 139 + if (!calltime) 140 + return 0; 141 + 142 + *calltime = trace_clock_local(); 143 + 144 ret = __trace_graph_entry(tr, trace, trace_ctx); 145 atomic_dec(&data->disabled); 146 preempt_enable_notrace(); ··· 148 struct trace_array *tr = wakeup_trace; 149 struct trace_array_cpu *data; 150 unsigned int trace_ctx; 151 + u64 *calltime; 152 + int size; 153 154 ftrace_graph_addr_finish(gops, trace); 155 156 if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) 157 return; 158 + 159 + calltime = fgraph_retrieve_data(gops->idx, &size); 160 + if (!calltime) 161 + return; 162 + trace->calltime = *calltime; 163 164 __trace_graph_return(tr, trace, trace_ctx); 165 atomic_dec(&data->disabled);