Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/unwind: Recover kretprobe trampoline entry

Since the kretprobe replaces the function return address with
the kretprobe_trampoline on the stack, x86 unwinders can not
continue the stack unwinding at that point, or record
kretprobe_trampoline instead of correct return address.

To fix this issue, find the correct return address from task's
kretprobe_instances as like as function-graph tracer does.

With this fix, the unwinder can correctly unwind the stack
from kretprobe event on x86, as below.

<...>-135 [003] ...1 6.722338: r_full_proxy_read_0: (vfs_read+0xab/0x1a0 <- full_proxy_read)
<...>-135 [003] ...1 6.722377: <stack trace>
=> kretprobe_trace_func+0x209/0x2f0
=> kretprobe_dispatcher+0x4a/0x70
=> __kretprobe_trampoline_handler+0xca/0x150
=> trampoline_handler+0x44/0x70
=> kretprobe_trampoline+0x2a/0x50
=> vfs_read+0xab/0x1a0
=> ksys_read+0x5f/0xe0
=> do_syscall_64+0x33/0x40
=> entry_SYSCALL_64_after_hwframe+0x44/0xae

Link: https://lkml.kernel.org/r/163163055130.489837.5161749078833497255.stgit@devnote2

Reported-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Tested-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>

authored by

Masami Hiramatsu and committed by
Steven Rostedt (VMware)
19138af1 1f368393

+42 -8
+23
arch/x86/include/asm/unwind.h
··· 4 4 5 5 #include <linux/sched.h> 6 6 #include <linux/ftrace.h> 7 + #include <linux/kprobes.h> 7 8 #include <asm/ptrace.h> 8 9 #include <asm/stacktrace.h> 9 10 ··· 16 15 unsigned long stack_mask; 17 16 struct task_struct *task; 18 17 int graph_idx; 18 + struct llist_node *kr_cur; 19 19 bool error; 20 20 #if defined(CONFIG_UNWINDER_ORC) 21 21 bool signal, full_regs; ··· 100 98 void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, 101 99 void *orc, size_t orc_size) {} 102 100 #endif 101 + 102 + static inline 103 + unsigned long unwind_recover_kretprobe(struct unwind_state *state, 104 + unsigned long addr, unsigned long *addr_p) 105 + { 106 + return is_kretprobe_trampoline(addr) ? 107 + kretprobe_find_ret_addr(state->task, addr_p, &state->kr_cur) : 108 + addr; 109 + } 110 + 111 + /* Recover the return address modified by kretprobe and ftrace_graph. */ 112 + static inline 113 + unsigned long unwind_recover_ret_addr(struct unwind_state *state, 114 + unsigned long addr, unsigned long *addr_p) 115 + { 116 + unsigned long ret; 117 + 118 + ret = ftrace_graph_ret_addr(state->task, &state->graph_idx, 119 + addr, addr_p); 120 + return unwind_recover_kretprobe(state, ret, addr_p); 121 + } 103 122 104 123 /* 105 124 * This disables KASAN checking when reading a value from another task's stack,
+1 -2
arch/x86/kernel/unwind_frame.c
··· 240 240 else { 241 241 addr_p = unwind_get_return_address_ptr(state); 242 242 addr = READ_ONCE_TASK_STACK(state->task, *addr_p); 243 - state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, 244 - addr, addr_p); 243 + state->ip = unwind_recover_ret_addr(state, addr, addr_p); 245 244 } 246 245 247 246 /* Save the original stack pointer for unwind_dump(): */
+1 -2
arch/x86/kernel/unwind_guess.c
··· 15 15 16 16 addr = READ_ONCE_NOCHECK(*state->sp); 17 17 18 - return ftrace_graph_ret_addr(state->task, &state->graph_idx, 19 - addr, state->sp); 18 + return unwind_recover_ret_addr(state, addr, state->sp); 20 19 } 21 20 EXPORT_SYMBOL_GPL(unwind_get_return_address); 22 21
+17 -4
arch/x86/kernel/unwind_orc.c
··· 534 534 if (!deref_stack_reg(state, ip_p, &state->ip)) 535 535 goto err; 536 536 537 - state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, 538 - state->ip, (void *)ip_p); 539 - 537 + state->ip = unwind_recover_ret_addr(state, state->ip, 538 + (unsigned long *)ip_p); 540 539 state->sp = sp; 541 540 state->regs = NULL; 542 541 state->prev_regs = NULL; ··· 548 549 (void *)orig_ip); 549 550 goto err; 550 551 } 551 - 552 + /* 553 + * There is a small chance to interrupt at the entry of 554 + * __kretprobe_trampoline() where the ORC info doesn't exist. 555 + * That point is right after the RET to __kretprobe_trampoline() 556 + * which was modified return address. 557 + * At that point, the @addr_p of the unwind_recover_kretprobe() 558 + * (this has to point the address of the stack entry storing 559 + * the modified return address) must be "SP - (a stack entry)" 560 + * because SP is incremented by the RET. 561 + */ 562 + state->ip = unwind_recover_kretprobe(state, state->ip, 563 + (unsigned long *)(state->sp - sizeof(long))); 552 564 state->regs = (struct pt_regs *)sp; 553 565 state->prev_regs = NULL; 554 566 state->full_regs = true; ··· 572 562 (void *)orig_ip); 573 563 goto err; 574 564 } 565 + /* See UNWIND_HINT_TYPE_REGS case comment. */ 566 + state->ip = unwind_recover_kretprobe(state, state->ip, 567 + (unsigned long *)(state->sp - sizeof(long))); 575 568 576 569 if (state->full_regs) 577 570 state->prev_regs = state->regs;