commit 6fe208f63a79f4f726f3be2b78ea3dd40487b657 · tjh.dev/kernel

+4

Documentation/admin-guide/kernel-parameters.txt

··· 3073 3073 and gids from such clients. This is intended to ease 3074 3074 migration from NFSv2/v3. 3075 3075 3076 + nmi_backtrace.backtrace_idle [KNL] 3077 + Dump stacks even of idle CPUs in response to an 3078 + NMI stack-backtrace request. 3079 + 3076 3080 nmi_debug= [KNL,SH] Specify one or more actions to take 3077 3081 when a NMI is triggered. 3078 3082 Format: [state][,regs][,debounce][,die]

+3

include/linux/smp.h

··· 26 26 struct { 27 27 struct llist_node llist; 28 28 unsigned int flags; 29 + #ifdef CONFIG_64BIT 30 + u16 src, dst; 31 + #endif 29 32 }; 30 33 }; 31 34 smp_call_func_t func;

+3

include/linux/smp_types.h

··· 61 61 unsigned int u_flags; 62 62 atomic_t a_flags; 63 63 }; 64 + #ifdef CONFIG_64BIT 65 + u16 src, dst; 66 + #endif 64 67 }; 65 68 66 69 #endif /* __LINUX_SMP_TYPES_H */

+134

kernel/smp.c

··· 20 20 #include <linux/sched.h> 21 21 #include <linux/sched/idle.h> 22 22 #include <linux/hypervisor.h> 23 + #include <linux/sched/clock.h> 24 + #include <linux/nmi.h> 25 + #include <linux/sched/debug.h> 23 26 24 27 #include "smpboot.h" 25 28 #include "sched/smp.h" ··· 99 96 smpcfd_prepare_cpu(smp_processor_id()); 100 97 } 101 98 99 + #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 100 + 101 + static DEFINE_PER_CPU(call_single_data_t *, cur_csd); 102 + static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); 103 + static DEFINE_PER_CPU(void *, cur_csd_info); 104 + 105 + #define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC) 106 + static atomic_t csd_bug_count = ATOMIC_INIT(0); 107 + 108 + /* Record current CSD work for current CPU, NULL to erase. */ 109 + static void csd_lock_record(call_single_data_t *csd) 110 + { 111 + if (!csd) { 112 + smp_mb(); /* NULL cur_csd after unlock. */ 113 + __this_cpu_write(cur_csd, NULL); 114 + return; 115 + } 116 + __this_cpu_write(cur_csd_func, csd->func); 117 + __this_cpu_write(cur_csd_info, csd->info); 118 + smp_wmb(); /* func and info before csd. */ 119 + __this_cpu_write(cur_csd, csd); 120 + smp_mb(); /* Update cur_csd before function call. */ 121 + /* Or before unlock, as the case may be. */ 122 + } 123 + 124 + static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd) 125 + { 126 + unsigned int csd_type; 127 + 128 + csd_type = CSD_TYPE(csd); 129 + if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC) 130 + return csd->dst; /* Other CSD_TYPE_ values might not have ->dst. */ 131 + return -1; 132 + } 133 + 134 + /* 135 + * Complain if too much time spent waiting. Note that only 136 + * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU, 137 + * so waiting on other types gets much less information. 138 + */ 139 + static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id) 140 + { 141 + int cpu = -1; 142 + int cpux; 143 + bool firsttime; 144 + u64 ts2, ts_delta; 145 + call_single_data_t *cpu_cur_csd; 146 + unsigned int flags = READ_ONCE(csd->flags); 147 + 148 + if (!(flags & CSD_FLAG_LOCK)) { 149 + if (!unlikely(*bug_id)) 150 + return true; 151 + cpu = csd_lock_wait_getcpu(csd); 152 + pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n", 153 + *bug_id, raw_smp_processor_id(), cpu); 154 + return true; 155 + } 156 + 157 + ts2 = sched_clock(); 158 + ts_delta = ts2 - *ts1; 159 + if (likely(ts_delta <= CSD_LOCK_TIMEOUT)) 160 + return false; 161 + 162 + firsttime = !*bug_id; 163 + if (firsttime) 164 + *bug_id = atomic_inc_return(&csd_bug_count); 165 + cpu = csd_lock_wait_getcpu(csd); 166 + if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu)) 167 + cpux = 0; 168 + else 169 + cpux = cpu; 170 + cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */ 171 + pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n", 172 + firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0, 173 + cpu, csd->func, csd->info); 174 + if (cpu_cur_csd && csd != cpu_cur_csd) { 175 + pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n", 176 + *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)), 177 + READ_ONCE(per_cpu(cur_csd_info, cpux))); 178 + } else { 179 + pr_alert("\tcsd: CSD lock (#%d) %s.\n", 180 + *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request"); 181 + } 182 + if (cpu >= 0) { 183 + if (!trigger_single_cpu_backtrace(cpu)) 184 + dump_cpu_task(cpu); 185 + if (!cpu_cur_csd) { 186 + pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu); 187 + arch_send_call_function_single_ipi(cpu); 188 + } 189 + } 190 + dump_stack(); 191 + *ts1 = ts2; 192 + 193 + return false; 194 + } 195 + 102 196 /* 103 197 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources 104 198 * ··· 205 105 */ 206 106 static __always_inline void csd_lock_wait(call_single_data_t *csd) 207 107 { 108 + int bug_id = 0; 109 + u64 ts0, ts1; 110 + 111 + ts1 = ts0 = sched_clock(); 112 + for (;;) { 113 + if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id)) 114 + break; 115 + cpu_relax(); 116 + } 117 + smp_acquire__after_ctrl_dep(); 118 + } 119 + 120 + #else 121 + static void csd_lock_record(call_single_data_t *csd) 122 + { 123 + } 124 + 125 + static __always_inline void csd_lock_wait(call_single_data_t *csd) 126 + { 208 127 smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); 209 128 } 129 + #endif 210 130 211 131 static __always_inline void csd_lock(call_single_data_t *csd) 212 132 { ··· 286 166 * We can unlock early even for the synchronous on-stack case, 287 167 * since we're doing this from the same CPU.. 288 168 */ 169 + csd_lock_record(csd); 289 170 csd_unlock(csd); 290 171 local_irq_save(flags); 291 172 func(info); 173 + csd_lock_record(NULL); 292 174 local_irq_restore(flags); 293 175 return 0; 294 176 } ··· 390 268 entry = &csd_next->llist; 391 269 } 392 270 271 + csd_lock_record(csd); 393 272 func(info); 394 273 csd_unlock(csd); 274 + csd_lock_record(NULL); 395 275 } else { 396 276 prev = &csd->llist; 397 277 } ··· 420 296 smp_call_func_t func = csd->func; 421 297 void *info = csd->info; 422 298 299 + csd_lock_record(csd); 423 300 csd_unlock(csd); 424 301 func(info); 302 + csd_lock_record(NULL); 425 303 } else if (type == CSD_TYPE_IRQ_WORK) { 426 304 irq_work_single(csd); 427 305 } ··· 501 375 502 376 csd->func = func; 503 377 csd->info = info; 378 + #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 379 + csd->src = smp_processor_id(); 380 + csd->dst = cpu; 381 + #endif 504 382 505 383 err = generic_exec_single(cpu, csd); 506 384 ··· 670 540 csd->flags |= CSD_TYPE_SYNC; 671 541 csd->func = func; 672 542 csd->info = info; 543 + #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 544 + csd->src = smp_processor_id(); 545 + csd->dst = cpu; 546 + #endif 673 547 if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) 674 548 __cpumask_set_cpu(cpu, cfd->cpumask_ipi); 675 549 }

+11

lib/Kconfig.debug

··· 1377 1377 module may be built after the fact on the running kernel to 1378 1378 be tested, if desired. 1379 1379 1380 + config CSD_LOCK_WAIT_DEBUG 1381 + bool "Debugging for csd_lock_wait(), called from smp_call_function*()" 1382 + depends on DEBUG_KERNEL 1383 + depends on 64BIT 1384 + default n 1385 + help 1386 + This option enables debug prints when CPUs are slow to respond 1387 + to the smp_call_function*() IPI wrappers. These debug prints 1388 + include the IPI handler function currently executing (if any) 1389 + and relevant stack traces. 1390 + 1380 1391 endmenu # lock debugging 1381 1392 1382 1393 config TRACE_IRQFLAGS

+5 -1

lib/nmi_backtrace.c

··· 85 85 put_cpu(); 86 86 } 87 87 88 + // Dump stacks even for idle CPUs. 89 + static bool backtrace_idle; 90 + module_param(backtrace_idle, bool, 0644); 91 + 88 92 bool nmi_cpu_backtrace(struct pt_regs *regs) 89 93 { 90 94 int cpu = smp_processor_id(); 91 95 92 96 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 93 - if (regs && cpu_in_idle(instruction_pointer(regs))) { 97 + if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) { 94 98 pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", 95 99 cpu, (void *)instruction_pointer(regs)); 96 100 } else {