tracing: Fix irqsoff and wakeup latency tracers when using function graph

The function graph tracer has become generic so that kretprobes and BPF
can use it along with function graph tracing itself. Some of the
infrastructure was specific for function graph tracing such as recording
the calltime and return time of the functions. Calling the clock code on a
high volume function does add overhead. The calculation of the calltime
was removed from the generic code and placed into the function graph
tracer itself so that the other users did not incur this overhead as they
did not need that timestamp.

The calltime field was still kept in the generic return entry structure
and the function graph return entry callback filled it as that structure
was passed to other code.

But this broke both irqsoff and wakeup latency tracer as they still
depended on the trace structure containing the calltime when the option
display-graph is set as it used some of those same functions that the
function graph tracer used. But now the calltime was not set and was just
zero. This caused the calculation of the function time to be the absolute
value of the return timestamp and not the length of the function.

# cd /sys/kernel/tracing
# echo 1 > options/display-graph
# echo irqsoff > current_tracer

The tracers went from:

# REL TIME CPU TASK/PID |||| DURATION FUNCTION CALLS
# | | | | |||| | | | | | |
0 us | 4) <idle>-0 | d..1. | 0.000 us | irqentry_enter();
3 us | 4) <idle>-0 | d..2. | | irq_enter_rcu() {
4 us | 4) <idle>-0 | d..2. | 0.431 us | preempt_count_add();
5 us | 4) <idle>-0 | d.h2. | | tick_irq_enter() {
5 us | 4) <idle>-0 | d.h2. | 0.433 us | tick_check_oneshot_broadcast_this_cpu();
6 us | 4) <idle>-0 | d.h2. | 2.426 us | ktime_get();
9 us | 4) <idle>-0 | d.h2. | | tick_nohz_stop_idle() {
10 us | 4) <idle>-0 | d.h2. | 0.398 us | nr_iowait_cpu();
11 us | 4) <idle>-0 | d.h1. | 1.903 us | }
11 us | 4) <idle>-0 | d.h2. | | tick_do_update_jiffies64() {
12 us | 4) <idle>-0 | d.h2. | | _raw_spin_lock() {
12 us | 4) <idle>-0 | d.h2. | 0.360 us | preempt_count_add();
13 us | 4) <idle>-0 | d.h3. | 0.354 us | do_raw_spin_lock();
14 us | 4) <idle>-0 | d.h2. | 2.207 us | }
15 us | 4) <idle>-0 | d.h3. | 0.428 us | calc_global_load();
16 us | 4) <idle>-0 | d.h3. | | _raw_spin_unlock() {
16 us | 4) <idle>-0 | d.h3. | 0.380 us | do_raw_spin_unlock();
17 us | 4) <idle>-0 | d.h3. | 0.334 us | preempt_count_sub();
18 us | 4) <idle>-0 | d.h1. | 1.768 us | }
18 us | 4) <idle>-0 | d.h2. | | update_wall_time() {
[..]

To:

# REL TIME CPU TASK/PID |||| DURATION FUNCTION CALLS
# | | | | |||| | | | | | |
0 us | 5) <idle>-0 | d.s2. | 0.000 us | _raw_spin_lock_irqsave();
0 us | 5) <idle>-0 | d.s3. | 312159583 us | preempt_count_add();
2 us | 5) <idle>-0 | d.s4. | 312159585 us | do_raw_spin_lock();
3 us | 5) <idle>-0 | d.s4. | | _raw_spin_unlock() {
3 us | 5) <idle>-0 | d.s4. | 312159586 us | do_raw_spin_unlock();
4 us | 5) <idle>-0 | d.s4. | 312159587 us | preempt_count_sub();
4 us | 5) <idle>-0 | d.s2. | 312159587 us | }
5 us | 5) <idle>-0 | d.s3. | | _raw_spin_lock() {
5 us | 5) <idle>-0 | d.s3. | 312159588 us | preempt_count_add();
6 us | 5) <idle>-0 | d.s4. | 312159589 us | do_raw_spin_lock();
7 us | 5) <idle>-0 | d.s3. | 312159590 us | }
8 us | 5) <idle>-0 | d.s4. | 312159591 us | calc_wheel_index();
9 us | 5) <idle>-0 | d.s4. | | enqueue_timer() {
9 us | 5) <idle>-0 | d.s4. | | wake_up_nohz_cpu() {
11 us | 5) <idle>-0 | d.s4. | | native_smp_send_reschedule() {
11 us | 5) <idle>-0 | d.s4. | 312171987 us | default_send_IPI_single_phys();
12408 us | 5) <idle>-0 | d.s3. | 312171990 us | }
12408 us | 5) <idle>-0 | d.s3. | 312171991 us | }
12409 us | 5) <idle>-0 | d.s3. | 312171991 us | }

Where the calculation of the time for each function was the return time
minus zero and not the time of when the function returned.

Have these tracers also save the calltime in the fgraph data section and
retrieve it again on the return to get the correct timings again.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/20250113183124.61767419@gandalf.local.home
Fixes: f1f36e22bee9 ("ftrace: Have calltime be saved in the fgraph storage")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

Changed files
+28
kernel
+14
kernel/trace/trace_irqsoff.c
··· 182 182 struct trace_array_cpu *data; 183 183 unsigned long flags; 184 184 unsigned int trace_ctx; 185 + u64 *calltime; 185 186 int ret; 186 187 187 188 if (ftrace_graph_ignore_func(gops, trace)) ··· 200 199 if (!func_prolog_dec(tr, &data, &flags)) 201 200 return 0; 202 201 202 + calltime = fgraph_reserve_data(gops->idx, sizeof(*calltime)); 203 + if (!calltime) 204 + return 0; 205 + 206 + *calltime = trace_clock_local(); 207 + 203 208 trace_ctx = tracing_gen_ctx_flags(flags); 204 209 ret = __trace_graph_entry(tr, trace, trace_ctx); 205 210 atomic_dec(&data->disabled); ··· 220 213 struct trace_array_cpu *data; 221 214 unsigned long flags; 222 215 unsigned int trace_ctx; 216 + u64 *calltime; 217 + int size; 223 218 224 219 ftrace_graph_addr_finish(gops, trace); 225 220 226 221 if (!func_prolog_dec(tr, &data, &flags)) 227 222 return; 223 + 224 + calltime = fgraph_retrieve_data(gops->idx, &size); 225 + if (!calltime) 226 + return; 227 + trace->calltime = *calltime; 228 228 229 229 trace_ctx = tracing_gen_ctx_flags(flags); 230 230 __trace_graph_return(tr, trace, trace_ctx);
+14
kernel/trace/trace_sched_wakeup.c
··· 118 118 struct trace_array *tr = wakeup_trace; 119 119 struct trace_array_cpu *data; 120 120 unsigned int trace_ctx; 121 + u64 *calltime; 121 122 int ret = 0; 122 123 123 124 if (ftrace_graph_ignore_func(gops, trace)) ··· 136 135 if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) 137 136 return 0; 138 137 138 + calltime = fgraph_reserve_data(gops->idx, sizeof(*calltime)); 139 + if (!calltime) 140 + return 0; 141 + 142 + *calltime = trace_clock_local(); 143 + 139 144 ret = __trace_graph_entry(tr, trace, trace_ctx); 140 145 atomic_dec(&data->disabled); 141 146 preempt_enable_notrace(); ··· 155 148 struct trace_array *tr = wakeup_trace; 156 149 struct trace_array_cpu *data; 157 150 unsigned int trace_ctx; 151 + u64 *calltime; 152 + int size; 158 153 159 154 ftrace_graph_addr_finish(gops, trace); 160 155 161 156 if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) 162 157 return; 158 + 159 + calltime = fgraph_retrieve_data(gops->idx, &size); 160 + if (!calltime) 161 + return; 162 + trace->calltime = *calltime; 163 163 164 164 __trace_graph_return(tr, trace, trace_ctx); 165 165 atomic_dec(&data->disabled);