Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: stacktrace: track all stack boundaries explicitly

Currently we call an on_accessible_stack() callback for each step of the
unwinder, requiring redundant work to be performed in the core of the
unwind loop (e.g. disabling preemption around accesses to per-cpu
variables containing stack boundaries). To prevent unwind loops which go
through a stack multiple times, we have to track the set of unwound
stacks, requiring a stack_type enum which needs to cater for all the
stacks of all possible callees. To prevent loops within a stack, we must
track the prior FP values.

This patch reworks the unwinder to minimize the work in the core of the
unwinder, and to remove the need for the stack_type enum. The set of
accessible stacks (and their boundaries) are determined at the start of
the unwind, and the current stack is tracked during the unwind, with
completed stacks removed from the set of accessible stacks. This makes
the boundary checks more accurate (e.g. detecting overlapped frame
records), and removes the need for separate tracking of the prior FP and
visited stacks.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Kalesh Singh <kaleshsingh@google.com>
Reviewed-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Cc: Fuad Tabba <tabba@google.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220901130646.1316937-9-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

authored by

Mark Rutland and committed by
Catalin Marinas
8df13730 bd8abd68

+132 -199
-5
arch/arm64/include/asm/stacktrace.h
··· 30 30 return (struct stack_info) { 31 31 .low = low, 32 32 .high = high, 33 - .type = STACK_TYPE_IRQ, 34 33 }; 35 34 } 36 35 ··· 47 48 return (struct stack_info) { 48 49 .low = low, 49 50 .high = high, 50 - .type = STACK_TYPE_TASK, 51 51 }; 52 52 } 53 53 ··· 68 70 return (struct stack_info) { 69 71 .low = low, 70 72 .high = high, 71 - .type = STACK_TYPE_OVERFLOW, 72 73 }; 73 74 } 74 75 #else ··· 86 89 return (struct stack_info) { 87 90 .low = low, 88 91 .high = high, 89 - .type = STACK_TYPE_SDEI_NORMAL, 90 92 }; 91 93 } 92 94 ··· 97 101 return (struct stack_info) { 98 102 .low = low, 99 103 .high = high, 100 - .type = STACK_TYPE_SDEI_CRITICAL, 101 104 }; 102 105 } 103 106 #else
+76 -88
arch/arm64/include/asm/stacktrace/common.h
··· 9 9 #ifndef __ASM_STACKTRACE_COMMON_H 10 10 #define __ASM_STACKTRACE_COMMON_H 11 11 12 - #include <linux/bitmap.h> 13 - #include <linux/bitops.h> 14 12 #include <linux/kprobes.h> 15 13 #include <linux/types.h> 16 - 17 - enum stack_type { 18 - STACK_TYPE_UNKNOWN, 19 - STACK_TYPE_TASK, 20 - STACK_TYPE_IRQ, 21 - STACK_TYPE_OVERFLOW, 22 - STACK_TYPE_SDEI_NORMAL, 23 - STACK_TYPE_SDEI_CRITICAL, 24 - STACK_TYPE_HYP, 25 - __NR_STACK_TYPES 26 - }; 27 14 28 15 struct stack_info { 29 16 unsigned long low; 30 17 unsigned long high; 31 - enum stack_type type; 32 18 }; 33 19 34 20 /** ··· 23 37 * @fp: The fp value in the frame record (or the real fp) 24 38 * @pc: The lr value in the frame record (or the real lr) 25 39 * 26 - * @stacks_done: Stacks which have been entirely unwound, for which it is no 27 - * longer valid to unwind to. 28 - * 29 - * @prev_fp: The fp that pointed to this frame record, or a synthetic value 30 - * of 0. This is used to ensure that within a stack, each 31 - * subsequent frame record is at an increasing address. 32 - * @prev_type: The type of stack this frame record was on, or a synthetic 33 - * value of STACK_TYPE_UNKNOWN. This is used to detect a 34 - * transition from one stack to another. 35 - * 36 40 * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance 37 41 * associated with the most recently encountered replacement lr 38 42 * value. 39 43 * 40 44 * @task: The task being unwound. 45 + * 46 + * @stack: The stack currently being unwound. 47 + * @stacks: An array of stacks which can be unwound. 48 + * @nr_stacks: The number of stacks in @stacks. 41 49 */ 42 50 struct unwind_state { 43 51 unsigned long fp; 44 52 unsigned long pc; 45 - DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES); 46 - unsigned long prev_fp; 47 - enum stack_type prev_type; 48 53 #ifdef CONFIG_KRETPROBES 49 54 struct llist_node *kr_cur; 50 55 #endif 51 56 struct task_struct *task; 57 + 58 + struct stack_info stack; 59 + struct stack_info *stacks; 60 + int nr_stacks; 52 61 }; 53 62 54 63 static inline struct stack_info stackinfo_get_unknown(void) ··· 51 70 return (struct stack_info) { 52 71 .low = 0, 53 72 .high = 0, 54 - .type = STACK_TYPE_UNKNOWN, 55 73 }; 56 74 } 57 75 ··· 74 94 state->kr_cur = NULL; 75 95 #endif 76 96 77 - /* 78 - * Prime the first unwind. 79 - * 80 - * In unwind_next() we'll check that the FP points to a valid stack, 81 - * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be 82 - * treated as a transition to whichever stack that happens to be. The 83 - * prev_fp value won't be used, but we set it to 0 such that it is 84 - * definitely not an accessible stack address. 85 - */ 86 - bitmap_zero(state->stacks_done, __NR_STACK_TYPES); 87 - state->prev_fp = 0; 88 - state->prev_type = STACK_TYPE_UNKNOWN; 97 + state->stack = stackinfo_get_unknown(); 89 98 } 90 99 91 100 /** ··· 89 120 */ 90 121 typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp); 91 122 123 + static struct stack_info *unwind_find_next_stack(const struct unwind_state *state, 124 + unsigned long sp, 125 + unsigned long size) 126 + { 127 + for (int i = 0; i < state->nr_stacks; i++) { 128 + struct stack_info *info = &state->stacks[i]; 129 + 130 + if (stackinfo_on_stack(info, sp, size)) 131 + return info; 132 + } 133 + 134 + return NULL; 135 + } 136 + 92 137 /** 93 - * typedef on_accessible_stack_fn() - Check whether a stack range is on any of 94 - * the possible stacks. 138 + * unwind_consume_stack() - Check if an object is on an accessible stack, 139 + * updating stack boundaries so that future unwind steps cannot consume this 140 + * object again. 95 141 * 96 - * @tsk: task whose stack is being unwound 97 - * @sp: stack address being checked 98 - * @size: size of the stack range being checked 99 - * @info: stack unwinding context 142 + * @state: the current unwind state. 143 + * @sp: the base address of the object. 144 + * @size: the size of the object. 100 145 * 101 - * Return: true if the stack range is accessible, false otherwise. 102 - * 103 - * Upon success @info is updated with information for the relevant stack. 104 - * 105 - * Upon failure @info is updated with the UNKNOWN stack. 146 + * Return: 0 upon success, an error code otherwise. 106 147 */ 107 - typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk, 108 - unsigned long sp, unsigned long size, 109 - struct stack_info *info); 148 + static inline int unwind_consume_stack(struct unwind_state *state, 149 + unsigned long sp, 150 + unsigned long size) 151 + { 152 + struct stack_info *next; 153 + 154 + if (stackinfo_on_stack(&state->stack, sp, size)) 155 + goto found; 156 + 157 + next = unwind_find_next_stack(state, sp, size); 158 + if (!next) 159 + return -EINVAL; 160 + 161 + /* 162 + * Stack transitions are strictly one-way, and once we've 163 + * transitioned from one stack to another, it's never valid to 164 + * unwind back to the old stack. 165 + * 166 + * Remove the current stack from the list of stacks so that it cannot 167 + * be found on a subsequent transition. 168 + * 169 + * Note that stacks can nest in several valid orders, e.g. 170 + * 171 + * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL 172 + * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW 173 + * HYP -> OVERFLOW 174 + * 175 + * ... so we do not check the specific order of stack 176 + * transitions. 177 + */ 178 + state->stack = *next; 179 + *next = stackinfo_get_unknown(); 180 + 181 + found: 182 + /* 183 + * Future unwind steps can only consume stack above this frame record. 184 + * Update the current stack to start immediately above it. 185 + */ 186 + state->stack.low = sp + size; 187 + return 0; 188 + } 110 189 111 190 /** 112 191 * unwind_next_frame_record() - Unwind to the next frame record. 113 192 * 114 193 * @state: the current unwind state. 115 - * @accessible: determines whether the frame record is accessible 116 194 * @translate_fp: translates the fp prior to access (may be NULL) 117 195 * 118 196 * Return: 0 upon success, an error code otherwise. 119 197 */ 120 198 static inline int 121 199 unwind_next_frame_record(struct unwind_state *state, 122 - on_accessible_stack_fn accessible, 123 200 stack_trace_translate_fp_fn translate_fp) 124 201 { 125 - struct stack_info info; 126 202 unsigned long fp = state->fp, kern_fp = fp; 127 - struct task_struct *tsk = state->task; 203 + int err; 128 204 129 205 if (fp & 0x7) 130 206 return -EINVAL; 131 207 132 - if (!accessible(tsk, fp, 16, &info)) 133 - return -EINVAL; 134 - 135 - if (test_bit(info.type, state->stacks_done)) 136 - return -EINVAL; 208 + err = unwind_consume_stack(state, fp, 16); 209 + if (err) 210 + return err; 137 211 138 212 /* 139 213 * If fp is not from the current address space perform the necessary ··· 186 174 return -EINVAL; 187 175 188 176 /* 189 - * As stacks grow downward, any valid record on the same stack must be 190 - * at a strictly higher address than the prior record. 191 - * 192 - * Stacks can nest in several valid orders, e.g. 193 - * 194 - * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL 195 - * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW 196 - * HYP -> OVERFLOW 197 - * 198 - * ... but the nesting itself is strict. Once we transition from one 199 - * stack to another, it's never valid to unwind back to that first 200 - * stack. 201 - */ 202 - if (info.type == state->prev_type) { 203 - if (fp <= state->prev_fp) 204 - return -EINVAL; 205 - } else { 206 - __set_bit(state->prev_type, state->stacks_done); 207 - } 208 - 209 - /* 210 - * Record this frame record's values and location. The prev_fp and 211 - * prev_type are only meaningful to the next unwind_next() invocation. 177 + * Record this frame record's values. 212 178 */ 213 179 state->fp = READ_ONCE(*(unsigned long *)(kern_fp)); 214 180 state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8)); 215 - state->prev_fp = fp; 216 - state->prev_type = info.type; 217 181 218 182 return 0; 219 183 }
+38 -53
arch/arm64/kernel/stacktrace.c
··· 67 67 state->pc = thread_saved_pc(task); 68 68 } 69 69 70 - static bool on_accessible_stack(const struct task_struct *tsk, 71 - unsigned long sp, unsigned long size, 72 - struct stack_info *info) 73 - { 74 - struct stack_info tmp; 75 - 76 - tmp = stackinfo_get_task(tsk); 77 - if (stackinfo_on_stack(&tmp, sp, size)) 78 - goto found; 79 - 80 - /* 81 - * We can only safely access per-cpu stacks when unwinding the current 82 - * task in a non-preemptible context. 83 - */ 84 - if (tsk != current || preemptible()) 85 - goto not_found; 86 - 87 - tmp = stackinfo_get_irq(); 88 - if (stackinfo_on_stack(&tmp, sp, size)) 89 - goto found; 90 - 91 - tmp = stackinfo_get_overflow(); 92 - if (stackinfo_on_stack(&tmp, sp, size)) 93 - goto found; 94 - 95 - /* 96 - * We can only safely access SDEI stacks which unwinding the current 97 - * task in an NMI context. 98 - */ 99 - if (!IS_ENABLED(CONFIG_VMAP_STACK) || 100 - !IS_ENABLED(CONFIG_ARM_SDE_INTERFACE) || 101 - !in_nmi()) 102 - goto not_found; 103 - 104 - tmp = stackinfo_get_sdei_normal(); 105 - if (stackinfo_on_stack(&tmp, sp, size)) 106 - goto found; 107 - 108 - tmp = stackinfo_get_sdei_critical(); 109 - if (stackinfo_on_stack(&tmp, sp, size)) 110 - goto found; 111 - 112 - not_found: 113 - *info = stackinfo_get_unknown(); 114 - return false; 115 - 116 - found: 117 - *info = tmp; 118 - return true; 119 - } 120 - 121 70 /* 122 71 * Unwind from one frame record (A) to the next frame record (B). 123 72 * ··· 84 135 if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) 85 136 return -ENOENT; 86 137 87 - err = unwind_next_frame_record(state, on_accessible_stack, NULL); 138 + err = unwind_next_frame_record(state, NULL); 88 139 if (err) 89 140 return err; 90 141 ··· 164 215 barrier(); 165 216 } 166 217 218 + /* 219 + * Per-cpu stacks are only accessible when unwinding the current task in a 220 + * non-preemptible context. 221 + */ 222 + #define STACKINFO_CPU(name) \ 223 + ({ \ 224 + ((task == current) && !preemptible()) \ 225 + ? stackinfo_get_##name() \ 226 + : stackinfo_get_unknown(); \ 227 + }) 228 + 229 + /* 230 + * SDEI stacks are only accessible when unwinding the current task in an NMI 231 + * context. 232 + */ 233 + #define STACKINFO_SDEI(name) \ 234 + ({ \ 235 + ((task == current) && in_nmi()) \ 236 + ? stackinfo_get_sdei_##name() \ 237 + : stackinfo_get_unknown(); \ 238 + }) 239 + 167 240 noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, 168 241 void *cookie, struct task_struct *task, 169 242 struct pt_regs *regs) 170 243 { 171 - struct unwind_state state; 244 + struct stack_info stacks[] = { 245 + stackinfo_get_task(task), 246 + STACKINFO_CPU(irq), 247 + #if defined(CONFIG_VMAP_STACK) 248 + STACKINFO_CPU(overflow), 249 + #endif 250 + #if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE) 251 + STACKINFO_SDEI(normal), 252 + STACKINFO_SDEI(critical), 253 + #endif 254 + }; 255 + struct unwind_state state = { 256 + .stacks = stacks, 257 + .nr_stacks = ARRAY_SIZE(stacks), 258 + }; 172 259 173 260 if (regs) { 174 261 if (task != current)
+9 -26
arch/arm64/kvm/hyp/nvhe/stacktrace.c
··· 47 47 return (struct stack_info) { 48 48 .low = low, 49 49 .high = high, 50 - .type = STACK_TYPE_OVERFLOW, 51 50 }; 52 51 } 53 52 ··· 59 60 return (struct stack_info) { 60 61 .low = low, 61 62 .high = high, 62 - .type = STACK_TYPE_HYP, 63 63 }; 64 - } 65 - 66 - static bool on_accessible_stack(const struct task_struct *tsk, 67 - unsigned long sp, unsigned long size, 68 - struct stack_info *info) 69 - { 70 - struct stack_info tmp; 71 - 72 - tmp = stackinfo_get_overflow(); 73 - if (stackinfo_on_stack(&tmp, sp, size)) 74 - goto found; 75 - 76 - tmp = stackinfo_get_hyp(); 77 - if (stackinfo_on_stack(&tmp, sp, size)) 78 - goto found; 79 - 80 - *info = stackinfo_get_unknown(); 81 - return false; 82 - 83 - found: 84 - *info = tmp; 85 - return true; 86 64 } 87 65 88 66 static int unwind_next(struct unwind_state *state) 89 67 { 90 - return unwind_next_frame_record(state, on_accessible_stack, NULL); 68 + return unwind_next_frame_record(state, NULL); 91 69 } 92 70 93 71 static void notrace unwind(struct unwind_state *state, ··· 120 144 */ 121 145 static void pkvm_save_backtrace(unsigned long fp, unsigned long pc) 122 146 { 123 - struct unwind_state state; 147 + struct stack_info stacks[] = { 148 + stackinfo_get_overflow(), 149 + stackinfo_get_hyp(), 150 + }; 151 + struct unwind_state state = { 152 + .stacks = stacks, 153 + .nr_stacks = ARRAY_SIZE(stacks), 154 + }; 124 155 int idx = 0; 125 156 126 157 kvm_nvhe_unwind_init(&state, fp, pc);
+9 -27
arch/arm64/kvm/stacktrace.c
··· 31 31 return (struct stack_info) { 32 32 .low = low, 33 33 .high = high, 34 - .type = STACK_TYPE_OVERFLOW, 35 34 }; 36 35 } 37 36 ··· 44 45 return (struct stack_info) { 45 46 .low = low, 46 47 .high = high, 47 - .type = STACK_TYPE_HYP, 48 48 }; 49 49 } 50 50 ··· 100 102 return kvm_nvhe_stack_kern_va(addr, 16); 101 103 } 102 104 103 - static bool on_accessible_stack(const struct task_struct *tsk, 104 - unsigned long sp, unsigned long size, 105 - struct stack_info *info) 106 - { 107 - struct stack_info tmp; 108 - 109 - tmp = stackinfo_get_overflow(); 110 - if (stackinfo_on_stack(&tmp, sp, size)) 111 - goto found; 112 - 113 - tmp = stackinfo_get_hyp(); 114 - if (stackinfo_on_stack(&tmp, sp, size)) 115 - goto found; 116 - 117 - *info = stackinfo_get_unknown(); 118 - return false; 119 - 120 - found: 121 - *info = tmp; 122 - return true; 123 - } 124 - 125 105 static int unwind_next(struct unwind_state *state) 126 106 { 127 - return unwind_next_frame_record(state, on_accessible_stack, 128 - kvm_nvhe_stack_kern_record_va); 107 + return unwind_next_frame_record(state, kvm_nvhe_stack_kern_record_va); 129 108 } 130 109 131 110 static void unwind(struct unwind_state *state, ··· 160 185 static void hyp_dump_backtrace(unsigned long hyp_offset) 161 186 { 162 187 struct kvm_nvhe_stacktrace_info *stacktrace_info; 163 - struct unwind_state state; 188 + struct stack_info stacks[] = { 189 + stackinfo_get_overflow(), 190 + stackinfo_get_hyp(), 191 + }; 192 + struct unwind_state state = { 193 + .stacks = stacks, 194 + .nr_stacks = ARRAY_SIZE(stacks), 195 + }; 164 196 165 197 stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); 166 198