Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/calldepth: Add ret/call counting for debug

Add a debuigfs mechanism to validate the accounting, e.g. vs. call/ret
balance and to gather statistics about the stuffing to call ratio.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.204285506@infradead.org

authored by

Thomas Gleixner and committed by
Peter Zijlstra
f5c1bb2a bbaceb18

+91 -5
+32 -4
arch/x86/include/asm/nospec-branch.h
··· 57 57 #define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL 58 58 #define RET_DEPTH_CREDIT 0xffffffffffffffffULL 59 59 60 + #ifdef CONFIG_CALL_THUNKS_DEBUG 61 + # define CALL_THUNKS_DEBUG_INC_CALLS \ 62 + incq %gs:__x86_call_count; 63 + # define CALL_THUNKS_DEBUG_INC_RETS \ 64 + incq %gs:__x86_ret_count; 65 + # define CALL_THUNKS_DEBUG_INC_STUFFS \ 66 + incq %gs:__x86_stuffs_count; 67 + # define CALL_THUNKS_DEBUG_INC_CTXSW \ 68 + incq %gs:__x86_ctxsw_count; 69 + #else 70 + # define CALL_THUNKS_DEBUG_INC_CALLS 71 + # define CALL_THUNKS_DEBUG_INC_RETS 72 + # define CALL_THUNKS_DEBUG_INC_STUFFS 73 + # define CALL_THUNKS_DEBUG_INC_CTXSW 74 + #endif 75 + 60 76 #if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS) 61 77 62 78 #include <asm/asm-offsets.h> ··· 91 75 #define RESET_CALL_DEPTH_FROM_CALL \ 92 76 mov $0xfc, %rax; \ 93 77 shl $56, %rax; \ 94 - movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); 78 + movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ 79 + CALL_THUNKS_DEBUG_INC_CALLS 95 80 96 81 #define INCREMENT_CALL_DEPTH \ 97 - sarq $5, %gs:pcpu_hot + X86_call_depth; 82 + sarq $5, %gs:pcpu_hot + X86_call_depth; \ 83 + CALL_THUNKS_DEBUG_INC_CALLS 98 84 99 85 #define ASM_INCREMENT_CALL_DEPTH \ 100 - sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); 86 + sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ 87 + CALL_THUNKS_DEBUG_INC_CALLS 101 88 102 89 #else 103 90 #define CREDIT_CALL_DEPTH 91 + #define ASM_CREDIT_CALL_DEPTH 104 92 #define RESET_CALL_DEPTH 105 93 #define INCREMENT_CALL_DEPTH 94 + #define ASM_INCREMENT_CALL_DEPTH 106 95 #define RESET_CALL_DEPTH_FROM_CALL 107 96 #endif 108 97 ··· 158 137 jnz 771b; \ 159 138 /* barrier for jnz misprediction */ \ 160 139 lfence; \ 161 - ASM_CREDIT_CALL_DEPTH 140 + ASM_CREDIT_CALL_DEPTH \ 141 + CALL_THUNKS_DEBUG_INC_CTXSW 162 142 #else 163 143 /* 164 144 * i386 doesn't unconditionally have LFENCE, as such it can't ··· 343 321 { 344 322 x86_return_thunk = &__x86_return_skl; 345 323 } 324 + #ifdef CONFIG_CALL_THUNKS_DEBUG 325 + DECLARE_PER_CPU(u64, __x86_call_count); 326 + DECLARE_PER_CPU(u64, __x86_ret_count); 327 + DECLARE_PER_CPU(u64, __x86_stuffs_count); 328 + DECLARE_PER_CPU(u64, __x86_ctxsw_count); 329 + #endif 346 330 #else 347 331 static inline void x86_set_skl_return_thunk(void) {} 348 332 #endif
+53
arch/x86/kernel/callthunks.c
··· 2 2 3 3 #define pr_fmt(fmt) "callthunks: " fmt 4 4 5 + #include <linux/debugfs.h> 5 6 #include <linux/kallsyms.h> 6 7 #include <linux/memory.h> 7 8 #include <linux/moduleloader.h> ··· 35 34 return 1; 36 35 } 37 36 __setup("debug-callthunks", debug_thunks); 37 + 38 + #ifdef CONFIG_CALL_THUNKS_DEBUG 39 + DEFINE_PER_CPU(u64, __x86_call_count); 40 + DEFINE_PER_CPU(u64, __x86_ret_count); 41 + DEFINE_PER_CPU(u64, __x86_stuffs_count); 42 + DEFINE_PER_CPU(u64, __x86_ctxsw_count); 43 + EXPORT_SYMBOL_GPL(__x86_ctxsw_count); 44 + EXPORT_SYMBOL_GPL(__x86_call_count); 45 + #endif 38 46 39 47 extern s32 __call_sites[], __call_sites_end[]; 40 48 ··· 293 283 mutex_unlock(&text_mutex); 294 284 } 295 285 #endif /* CONFIG_MODULES */ 286 + 287 + #if defined(CONFIG_CALL_THUNKS_DEBUG) && defined(CONFIG_DEBUG_FS) 288 + static int callthunks_debug_show(struct seq_file *m, void *p) 289 + { 290 + unsigned long cpu = (unsigned long)m->private; 291 + 292 + seq_printf(m, "C: %16llu R: %16llu S: %16llu X: %16llu\n,", 293 + per_cpu(__x86_call_count, cpu), 294 + per_cpu(__x86_ret_count, cpu), 295 + per_cpu(__x86_stuffs_count, cpu), 296 + per_cpu(__x86_ctxsw_count, cpu)); 297 + return 0; 298 + } 299 + 300 + static int callthunks_debug_open(struct inode *inode, struct file *file) 301 + { 302 + return single_open(file, callthunks_debug_show, inode->i_private); 303 + } 304 + 305 + static const struct file_operations dfs_ops = { 306 + .open = callthunks_debug_open, 307 + .read = seq_read, 308 + .llseek = seq_lseek, 309 + .release = single_release, 310 + }; 311 + 312 + static int __init callthunks_debugfs_init(void) 313 + { 314 + struct dentry *dir; 315 + unsigned long cpu; 316 + 317 + dir = debugfs_create_dir("callthunks", NULL); 318 + for_each_possible_cpu(cpu) { 319 + void *arg = (void *)cpu; 320 + char name [10]; 321 + 322 + sprintf(name, "cpu%lu", cpu); 323 + debugfs_create_file(name, 0644, dir, arg, &dfs_ops); 324 + } 325 + return 0; 326 + } 327 + __initcall(callthunks_debugfs_init); 328 + #endif
+6 -1
arch/x86/lib/retpoline.S
··· 203 203 .align 64 204 204 SYM_FUNC_START(__x86_return_skl) 205 205 ANNOTATE_NOENDBR 206 - /* Keep the hotpath in a 16byte I-fetch */ 206 + /* 207 + * Keep the hotpath in a 16byte I-fetch for the non-debug 208 + * case. 209 + */ 210 + CALL_THUNKS_DEBUG_INC_RETS 207 211 shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth) 208 212 jz 1f 209 213 ANNOTATE_UNRET_SAFE 210 214 ret 211 215 int3 212 216 1: 217 + CALL_THUNKS_DEBUG_INC_STUFFS 213 218 .rept 16 214 219 ANNOTATE_INTRA_FUNCTION_CALL 215 220 call 2f