Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

debug lockups: Improve lockup detection

When debugging a recent lockup bug i found various deficiencies
in how our current lockup detection helpers work:

- SysRq-L is not very efficient as it uses a workqueue, hence
it cannot punch through hard lockups and cannot see through
most soft lockups either.

- The SysRq-L code depends on the NMI watchdog - which is off
by default.

- We dont print backtraces from the RCU code's built-in
'RCU state machine is stuck' debug code. This debug
code tends to be one of the first (and only) mechanisms
that show that a lockup has occured.

This patch changes the code so taht we:

- Trigger the NMI backtrace code from SysRq-L instead of using
a workqueue (which cannot punch through hard lockups)

- Trigger print-all-CPU-backtraces from the RCU lockup detection
code

Also decouple the backtrace printing code from the NMI watchdog:

- Dont use variable size cpumasks (it might not be initialized
and they are a bit more fragile anyway)

- Trigger an NMI immediately via an IPI, instead of waiting
for the NMI tick to occur. This is a lot faster and can
produce more relevant backtraces. It will also work if the
NMI watchdog is disabled.

- Dont print the 'dazed and confused' message when we print
a backtrace from the NMI

- Do a show_regs() plus a dump_stack() to get maximum info
out of the dump. Worst-case we get two stacktraces - which
is not a big deal. Sometimes, if register content is
corrupted, the precise stack walker in show_regs() wont
give us a full backtrace - in this case dump_stack() will
do it.

Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

+20 -13
+12 -6
arch/x86/kernel/apic/nmi.c
··· 39 39 int unknown_nmi_panic; 40 40 int nmi_watchdog_enabled; 41 41 42 - static cpumask_var_t backtrace_mask; 42 + static cpumask_t backtrace_mask __read_mostly; 43 43 44 44 /* nmi_active: 45 45 * >0: the lapic NMI watchdog is active, but can be disabled ··· 138 138 if (!prev_nmi_count) 139 139 goto error; 140 140 141 - alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO); 142 141 printk(KERN_INFO "Testing NMI watchdog ... "); 143 142 144 143 #ifdef CONFIG_SMP ··· 414 415 } 415 416 416 417 /* We can be called before check_nmi_watchdog, hence NULL check. */ 417 - if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) { 418 + if (cpumask_test_cpu(cpu, &backtrace_mask)) { 418 419 static DEFINE_SPINLOCK(lock); /* Serialise the printks */ 419 420 420 421 spin_lock(&lock); 421 422 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); 423 + show_regs(regs); 422 424 dump_stack(); 423 425 spin_unlock(&lock); 424 - cpumask_clear_cpu(cpu, backtrace_mask); 426 + cpumask_clear_cpu(cpu, &backtrace_mask); 427 + 428 + rc = 1; 425 429 } 426 430 427 431 /* Could check oops_in_progress here too, but it's safer not to */ ··· 558 556 { 559 557 int i; 560 558 561 - cpumask_copy(backtrace_mask, cpu_online_mask); 559 + cpumask_copy(&backtrace_mask, cpu_online_mask); 560 + 561 + printk(KERN_INFO "sending NMI to all CPUs:\n"); 562 + apic->send_IPI_all(NMI_VECTOR); 563 + 562 564 /* Wait for up to 10 seconds for all CPUs to do the backtrace */ 563 565 for (i = 0; i < 10 * 1000; i++) { 564 - if (cpumask_empty(backtrace_mask)) 566 + if (cpumask_empty(&backtrace_mask)) 565 567 break; 566 568 mdelay(1); 567 569 }
+2 -6
drivers/char/sysrq.c
··· 24 24 #include <linux/sysrq.h> 25 25 #include <linux/kbd_kern.h> 26 26 #include <linux/proc_fs.h> 27 + #include <linux/nmi.h> 27 28 #include <linux/quotaops.h> 28 29 #include <linux/perf_counter.h> 29 30 #include <linux/kernel.h> ··· 223 222 224 223 static void sysrq_handle_showallcpus(int key, struct tty_struct *tty) 225 224 { 226 - struct pt_regs *regs = get_irq_regs(); 227 - if (regs) { 228 - printk(KERN_INFO "CPU%d:\n", smp_processor_id()); 229 - show_regs(regs); 230 - } 231 - schedule_work(&sysrq_showallcpus); 225 + trigger_all_cpu_backtrace(); 232 226 } 233 227 234 228 static struct sysrq_key_op sysrq_showallcpus_op = {
+6 -1
kernel/rcutree.c
··· 35 35 #include <linux/rcupdate.h> 36 36 #include <linux/interrupt.h> 37 37 #include <linux/sched.h> 38 + #include <linux/nmi.h> 38 39 #include <asm/atomic.h> 39 40 #include <linux/bitops.h> 40 41 #include <linux/module.h> ··· 470 469 } 471 470 printk(" (detected by %d, t=%ld jiffies)\n", 472 471 smp_processor_id(), (long)(jiffies - rsp->gp_start)); 472 + trigger_all_cpu_backtrace(); 473 + 473 474 force_quiescent_state(rsp, 0); /* Kick them all. */ 474 475 } 475 476 ··· 482 479 483 480 printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n", 484 481 smp_processor_id(), jiffies - rsp->gp_start); 485 - dump_stack(); 482 + trigger_all_cpu_backtrace(); 483 + 486 484 spin_lock_irqsave(&rnp->lock, flags); 487 485 if ((long)(jiffies - rsp->jiffies_stall) >= 0) 488 486 rsp->jiffies_stall = 489 487 jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 490 488 spin_unlock_irqrestore(&rnp->lock, flags); 489 + 491 490 set_need_resched(); /* kick ourselves to get things going. */ 492 491 } 493 492