commit 6fe208f63a79f4f726f3be2b78ea3dd40487b657 · tjh.dev/kernel

+4

Documentation/admin-guide/kernel-parameters.txt

··· 3073 and gids from such clients. This is intended to ease 3074 migration from NFSv2/v3. 3075 3076 nmi_debug= [KNL,SH] Specify one or more actions to take 3077 when a NMI is triggered. 3078 Format: [state][,regs][,debounce][,die]

··· 3073 and gids from such clients. This is intended to ease 3074 migration from NFSv2/v3. 3075 3076 + nmi_backtrace.backtrace_idle [KNL] 3077 + Dump stacks even of idle CPUs in response to an 3078 + NMI stack-backtrace request. 3079 + 3080 nmi_debug= [KNL,SH] Specify one or more actions to take 3081 when a NMI is triggered. 3082 Format: [state][,regs][,debounce][,die]

+3

include/linux/smp.h

··· 26 struct { 27 struct llist_node llist; 28 unsigned int flags; 29 }; 30 }; 31 smp_call_func_t func;

··· 26 struct { 27 struct llist_node llist; 28 unsigned int flags; 29 + #ifdef CONFIG_64BIT 30 + u16 src, dst; 31 + #endif 32 }; 33 }; 34 smp_call_func_t func;

+3

include/linux/smp_types.h

··· 61 unsigned int u_flags; 62 atomic_t a_flags; 63 }; 64 }; 65 66 #endif /* __LINUX_SMP_TYPES_H */

··· 61 unsigned int u_flags; 62 atomic_t a_flags; 63 }; 64 + #ifdef CONFIG_64BIT 65 + u16 src, dst; 66 + #endif 67 }; 68 69 #endif /* __LINUX_SMP_TYPES_H */

+134

kernel/smp.c

··· 20 #include <linux/sched.h> 21 #include <linux/sched/idle.h> 22 #include <linux/hypervisor.h> 23 24 #include "smpboot.h" 25 #include "sched/smp.h" ··· 99 smpcfd_prepare_cpu(smp_processor_id()); 100 } 101 102 /* 103 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources 104 * ··· 205 */ 206 static __always_inline void csd_lock_wait(call_single_data_t *csd) 207 { 208 smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); 209 } 210 211 static __always_inline void csd_lock(call_single_data_t *csd) 212 { ··· 286 * We can unlock early even for the synchronous on-stack case, 287 * since we're doing this from the same CPU.. 288 */ 289 csd_unlock(csd); 290 local_irq_save(flags); 291 func(info); 292 local_irq_restore(flags); 293 return 0; 294 } ··· 390 entry = &csd_next->llist; 391 } 392 393 func(info); 394 csd_unlock(csd); 395 } else { 396 prev = &csd->llist; 397 } ··· 420 smp_call_func_t func = csd->func; 421 void *info = csd->info; 422 423 csd_unlock(csd); 424 func(info); 425 } else if (type == CSD_TYPE_IRQ_WORK) { 426 irq_work_single(csd); 427 } ··· 501 502 csd->func = func; 503 csd->info = info; 504 505 err = generic_exec_single(cpu, csd); 506 ··· 670 csd->flags |= CSD_TYPE_SYNC; 671 csd->func = func; 672 csd->info = info; 673 if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) 674 __cpumask_set_cpu(cpu, cfd->cpumask_ipi); 675 }

··· 20 #include <linux/sched.h> 21 #include <linux/sched/idle.h> 22 #include <linux/hypervisor.h> 23 + #include <linux/sched/clock.h> 24 + #include <linux/nmi.h> 25 + #include <linux/sched/debug.h> 26 27 #include "smpboot.h" 28 #include "sched/smp.h" ··· 96 smpcfd_prepare_cpu(smp_processor_id()); 97 } 98 99 + #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 100 + 101 + static DEFINE_PER_CPU(call_single_data_t *, cur_csd); 102 + static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); 103 + static DEFINE_PER_CPU(void *, cur_csd_info); 104 + 105 + #define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC) 106 + static atomic_t csd_bug_count = ATOMIC_INIT(0); 107 + 108 + /* Record current CSD work for current CPU, NULL to erase. */ 109 + static void csd_lock_record(call_single_data_t *csd) 110 + { 111 + if (!csd) { 112 + smp_mb(); /* NULL cur_csd after unlock. */ 113 + __this_cpu_write(cur_csd, NULL); 114 + return; 115 + } 116 + __this_cpu_write(cur_csd_func, csd->func); 117 + __this_cpu_write(cur_csd_info, csd->info); 118 + smp_wmb(); /* func and info before csd. */ 119 + __this_cpu_write(cur_csd, csd); 120 + smp_mb(); /* Update cur_csd before function call. */ 121 + /* Or before unlock, as the case may be. */ 122 + } 123 + 124 + static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd) 125 + { 126 + unsigned int csd_type; 127 + 128 + csd_type = CSD_TYPE(csd); 129 + if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC) 130 + return csd->dst; /* Other CSD_TYPE_ values might not have ->dst. */ 131 + return -1; 132 + } 133 + 134 + /* 135 + * Complain if too much time spent waiting. Note that only 136 + * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU, 137 + * so waiting on other types gets much less information. 138 + */ 139 + static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id) 140 + { 141 + int cpu = -1; 142 + int cpux; 143 + bool firsttime; 144 + u64 ts2, ts_delta; 145 + call_single_data_t *cpu_cur_csd; 146 + unsigned int flags = READ_ONCE(csd->flags); 147 + 148 + if (!(flags & CSD_FLAG_LOCK)) { 149 + if (!unlikely(*bug_id)) 150 + return true; 151 + cpu = csd_lock_wait_getcpu(csd); 152 + pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n", 153 + *bug_id, raw_smp_processor_id(), cpu); 154 + return true; 155 + } 156 + 157 + ts2 = sched_clock(); 158 + ts_delta = ts2 - *ts1; 159 + if (likely(ts_delta <= CSD_LOCK_TIMEOUT)) 160 + return false; 161 + 162 + firsttime = !*bug_id; 163 + if (firsttime) 164 + *bug_id = atomic_inc_return(&csd_bug_count); 165 + cpu = csd_lock_wait_getcpu(csd); 166 + if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu)) 167 + cpux = 0; 168 + else 169 + cpux = cpu; 170 + cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */ 171 + pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n", 172 + firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0, 173 + cpu, csd->func, csd->info); 174 + if (cpu_cur_csd && csd != cpu_cur_csd) { 175 + pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n", 176 + *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)), 177 + READ_ONCE(per_cpu(cur_csd_info, cpux))); 178 + } else { 179 + pr_alert("\tcsd: CSD lock (#%d) %s.\n", 180 + *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request"); 181 + } 182 + if (cpu >= 0) { 183 + if (!trigger_single_cpu_backtrace(cpu)) 184 + dump_cpu_task(cpu); 185 + if (!cpu_cur_csd) { 186 + pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu); 187 + arch_send_call_function_single_ipi(cpu); 188 + } 189 + } 190 + dump_stack(); 191 + *ts1 = ts2; 192 + 193 + return false; 194 + } 195 + 196 /* 197 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources 198 * ··· 105 */ 106 static __always_inline void csd_lock_wait(call_single_data_t *csd) 107 { 108 + int bug_id = 0; 109 + u64 ts0, ts1; 110 + 111 + ts1 = ts0 = sched_clock(); 112 + for (;;) { 113 + if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id)) 114 + break; 115 + cpu_relax(); 116 + } 117 + smp_acquire__after_ctrl_dep(); 118 + } 119 + 120 + #else 121 + static void csd_lock_record(call_single_data_t *csd) 122 + { 123 + } 124 + 125 + static __always_inline void csd_lock_wait(call_single_data_t *csd) 126 + { 127 smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); 128 } 129 + #endif 130 131 static __always_inline void csd_lock(call_single_data_t *csd) 132 { ··· 166 * We can unlock early even for the synchronous on-stack case, 167 * since we're doing this from the same CPU.. 168 */ 169 + csd_lock_record(csd); 170 csd_unlock(csd); 171 local_irq_save(flags); 172 func(info); 173 + csd_lock_record(NULL); 174 local_irq_restore(flags); 175 return 0; 176 } ··· 268 entry = &csd_next->llist; 269 } 270 271 + csd_lock_record(csd); 272 func(info); 273 csd_unlock(csd); 274 + csd_lock_record(NULL); 275 } else { 276 prev = &csd->llist; 277 } ··· 296 smp_call_func_t func = csd->func; 297 void *info = csd->info; 298 299 + csd_lock_record(csd); 300 csd_unlock(csd); 301 func(info); 302 + csd_lock_record(NULL); 303 } else if (type == CSD_TYPE_IRQ_WORK) { 304 irq_work_single(csd); 305 } ··· 375 376 csd->func = func; 377 csd->info = info; 378 + #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 379 + csd->src = smp_processor_id(); 380 + csd->dst = cpu; 381 + #endif 382 383 err = generic_exec_single(cpu, csd); 384 ··· 540 csd->flags |= CSD_TYPE_SYNC; 541 csd->func = func; 542 csd->info = info; 543 + #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 544 + csd->src = smp_processor_id(); 545 + csd->dst = cpu; 546 + #endif 547 if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) 548 __cpumask_set_cpu(cpu, cfd->cpumask_ipi); 549 }

+11

lib/Kconfig.debug

··· 1377 module may be built after the fact on the running kernel to 1378 be tested, if desired. 1379 1380 endmenu # lock debugging 1381 1382 config TRACE_IRQFLAGS

··· 1377 module may be built after the fact on the running kernel to 1378 be tested, if desired. 1379 1380 + config CSD_LOCK_WAIT_DEBUG 1381 + bool "Debugging for csd_lock_wait(), called from smp_call_function*()" 1382 + depends on DEBUG_KERNEL 1383 + depends on 64BIT 1384 + default n 1385 + help 1386 + This option enables debug prints when CPUs are slow to respond 1387 + to the smp_call_function*() IPI wrappers. These debug prints 1388 + include the IPI handler function currently executing (if any) 1389 + and relevant stack traces. 1390 + 1391 endmenu # lock debugging 1392 1393 config TRACE_IRQFLAGS

+5 -1

lib/nmi_backtrace.c

··· 85 put_cpu(); 86 } 87 88 bool nmi_cpu_backtrace(struct pt_regs *regs) 89 { 90 int cpu = smp_processor_id(); 91 92 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 93 - if (regs && cpu_in_idle(instruction_pointer(regs))) { 94 pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", 95 cpu, (void *)instruction_pointer(regs)); 96 } else {

··· 85 put_cpu(); 86 } 87 88 + // Dump stacks even for idle CPUs. 89 + static bool backtrace_idle; 90 + module_param(backtrace_idle, bool, 0644); 91 + 92 bool nmi_cpu_backtrace(struct pt_regs *regs) 93 { 94 int cpu = smp_processor_id(); 95 96 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 97 + if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) { 98 pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", 99 cpu, (void *)instruction_pointer(regs)); 100 } else {