Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/fgraph,bpf: Fix stack ORC unwind from kprobe_multi return probe

Currently we don't get stack trace via ORC unwinder on top of fgraph exit
handler. We can see that when generating stacktrace from kretprobe_multi
bpf program which is based on fprobe/fgraph.

The reason is that the ORC unwind code won't get pass the return_to_handler
callback installed by fgraph return probe machinery.

Solving this by creating stack frame in return_to_handler expected by
ftrace_graph_ret_addr function to recover original return address and
continue with the unwind.

Also updating the pt_regs data with cs/flags/rsp which are needed for
successful stack retrieval from ebpf bpf_get_stackid helper.
- in get_perf_callchain we check user_mode(regs) so CS has to be set
- in perf_callchain_kernel we call perf_hw_regs(regs), so EFLAGS/FIXED
has to be unset

Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20251104215405.168643-3-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>

authored by

Jiri Olsa and committed by
Alexei Starovoitov
20a0bc10 6d08340d

+21 -2
+5
arch/x86/include/asm/ftrace.h
··· 56 56 return &arch_ftrace_regs(fregs)->regs; 57 57 } 58 58 59 + #define arch_ftrace_partial_regs(regs) do { \ 60 + regs->flags &= ~X86_EFLAGS_FIXED; \ 61 + regs->cs = __KERNEL_CS; \ 62 + } while (0) 63 + 59 64 #define arch_ftrace_fill_perf_regs(fregs, _regs) do { \ 60 65 (_regs)->ip = arch_ftrace_regs(fregs)->regs.ip; \ 61 66 (_regs)->sp = arch_ftrace_regs(fregs)->regs.sp; \
+7 -1
arch/x86/kernel/ftrace_64.S
··· 354 354 UNWIND_HINT_UNDEFINED 355 355 ANNOTATE_NOENDBR 356 356 357 + /* Restore return_to_handler value that got eaten by previous ret instruction. */ 358 + subq $8, %rsp 359 + UNWIND_HINT_FUNC 360 + 357 361 /* Save ftrace_regs for function exit context */ 358 362 subq $(FRAME_SIZE), %rsp 359 363 360 364 movq %rax, RAX(%rsp) 361 365 movq %rdx, RDX(%rsp) 362 366 movq %rbp, RBP(%rsp) 367 + movq %rsp, RSP(%rsp) 363 368 movq %rsp, %rdi 364 369 365 370 call ftrace_return_to_handler ··· 373 368 movq RDX(%rsp), %rdx 374 369 movq RAX(%rsp), %rax 375 370 376 - addq $(FRAME_SIZE), %rsp 371 + addq $(FRAME_SIZE) + 8, %rsp 372 + 377 373 /* 378 374 * Jump back to the old return address. This cannot be JMP_NOSPEC rdi 379 375 * since IBT would demand that contain ENDBR, which simply isn't so for
+9 -1
include/linux/ftrace.h
··· 193 193 #if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \ 194 194 defined(CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS) 195 195 196 + #ifndef arch_ftrace_partial_regs 197 + #define arch_ftrace_partial_regs(regs) do {} while (0) 198 + #endif 199 + 196 200 static __always_inline struct pt_regs * 197 201 ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs) 198 202 { ··· 206 202 * Since arch_ftrace_get_regs() will check some members and may return 207 203 * NULL, we can not use it. 208 204 */ 209 - return &arch_ftrace_regs(fregs)->regs; 205 + regs = &arch_ftrace_regs(fregs)->regs; 206 + 207 + /* Allow arch specific updates to regs. */ 208 + arch_ftrace_partial_regs(regs); 209 + return regs; 210 210 } 211 211 212 212 #endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */