Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: Implement stack trace termination record

Reliable stacktracing requires that we identify when a stacktrace is
terminated early. We can do this by ensuring all tasks have a final
frame record at a known location on their task stack, and checking
that this is the final frame record in the chain.

We'd like to use task_pt_regs(task)->stackframe as the final frame
record, as this is already setup upon exception entry from EL0. For
kernel tasks we need to consistently reserve the pt_regs and point x29
at this, which we can do with small changes to __primary_switched,
__secondary_switched, and copy_process().

Since the final frame record must be at a specific location, we must
create the final frame record in __primary_switched and
__secondary_switched rather than leaving this to start_kernel and
secondary_start_kernel. Thus, __primary_switched and
__secondary_switched will now show up in stacktraces for the idle tasks.

Since the final frame record is now identified by its location rather
than by its contents, we identify it at the start of unwind_frame(),
before we read any values from it.

External debuggers may terminate the stack trace when FP == 0. In the
pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the
debugger may print an extra record 0x0 at the end. While this is not
pretty, this does not do any harm. This is a small price to pay for
having reliable stack trace termination in the kernel. That said, gdb
does not show the extra record probably because it uses DWARF and not
frame pointers for stack traces.

Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
[Mark: rebase, use ASM_BUG(), update comments, update commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>

authored by

Madhavan T. Venkataraman and committed by
Will Deacon
7d7b720a c4681547

+32 -16
+1 -1
arch/arm64/kernel/entry.S
··· 285 285 stp lr, x21, [sp, #S_LR] 286 286 287 287 /* 288 - * For exceptions from EL0, create a terminal frame record. 288 + * For exceptions from EL0, create a final frame record. 289 289 * For exceptions from EL1, create a synthetic frame record so the 290 290 * interrupted code shows up in the backtrace. 291 291 */
+19 -6
arch/arm64/kernel/head.S
··· 16 16 #include <asm/asm_pointer_auth.h> 17 17 #include <asm/assembler.h> 18 18 #include <asm/boot.h> 19 + #include <asm/bug.h> 19 20 #include <asm/ptrace.h> 20 21 #include <asm/asm-offsets.h> 21 22 #include <asm/cache.h> ··· 394 393 ret x28 395 394 SYM_FUNC_END(__create_page_tables) 396 395 396 + /* 397 + * Create a final frame record at task_pt_regs(current)->stackframe, so 398 + * that the unwinder can identify the final frame record of any task by 399 + * its location in the task stack. We reserve the entire pt_regs space 400 + * for consistency with user tasks and kthreads. 401 + */ 402 + .macro setup_final_frame 403 + sub sp, sp, #PT_REGS_SIZE 404 + stp xzr, xzr, [sp, #S_STACKFRAME] 405 + add x29, sp, #S_STACKFRAME 406 + .endm 407 + 397 408 /* 398 409 * The following fragment of code is executed with the MMU enabled. 399 410 * ··· 460 447 #endif 461 448 bl switch_to_vhe // Prefer VHE if possible 462 449 add sp, sp, #16 463 - mov x29, #0 464 - mov x30, #0 465 - b start_kernel 450 + setup_final_frame 451 + bl start_kernel 452 + ASM_BUG() 466 453 SYM_FUNC_END(__primary_switched) 467 454 468 455 .pushsection ".rodata", "a" ··· 652 639 cbz x2, __secondary_too_slow 653 640 msr sp_el0, x2 654 641 scs_load x2, x3 655 - mov x29, #0 656 - mov x30, #0 642 + setup_final_frame 657 643 658 644 #ifdef CONFIG_ARM64_PTR_AUTH 659 645 ptrauth_keys_init_cpu x2, x3, x4, x5 660 646 #endif 661 647 662 - b secondary_start_kernel 648 + bl secondary_start_kernel 649 + ASM_BUG() 663 650 SYM_FUNC_END(__secondary_switched) 664 651 665 652 SYM_FUNC_START_LOCAL(__secondary_too_slow)
+5
arch/arm64/kernel/process.c
··· 435 435 } 436 436 p->thread.cpu_context.pc = (unsigned long)ret_from_fork; 437 437 p->thread.cpu_context.sp = (unsigned long)childregs; 438 + /* 439 + * For the benefit of the unwinder, set up childregs->stackframe 440 + * as the final frame for the new task. 441 + */ 442 + p->thread.cpu_context.fp = (unsigned long)childregs->stackframe; 438 443 439 444 ptrace_hw_copy_thread(p); 440 445
+7 -9
arch/arm64/kernel/stacktrace.c
··· 68 68 unsigned long fp = frame->fp; 69 69 struct stack_info info; 70 70 71 - if (fp & 0xf) 72 - return -EINVAL; 73 - 74 71 if (!tsk) 75 72 tsk = current; 73 + 74 + /* Final frame; nothing to unwind */ 75 + if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) 76 + return -ENOENT; 77 + 78 + if (fp & 0xf) 79 + return -EINVAL; 76 80 77 81 if (!on_accessible_stack(tsk, fp, &info)) 78 82 return -EINVAL; ··· 131 127 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 132 128 133 129 frame->pc = ptrauth_strip_insn_pac(frame->pc); 134 - 135 - /* 136 - * This is a terminal record, so we have finished unwinding. 137 - */ 138 - if (!frame->fp && !frame->pc) 139 - return -ENOENT; 140 130 141 131 return 0; 142 132 }