Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

watchdog/softlockup: Report the most frequent interrupts

When the watchdog determines that the current soft lockup is due to an
interrupt storm based on CPU utilization, reporting the most frequent
interrupts could be good enough for further troubleshooting.

Below is an example of interrupt storm. The call tree does not provide
useful information, but analyzing which interrupt caused the soft lockup by
comparing the counts of interrupts during the lockup period allows to
identify the culprit.

[ 638.870231] watchdog: BUG: soft lockup - CPU#9 stuck for 26s! [swapper/9:0]
[ 638.870825] CPU#9 Utilization every 4s during lockup:
[ 638.871194] #1: 0% system, 0% softirq, 100% hardirq, 0% idle
[ 638.871652] #2: 0% system, 0% softirq, 100% hardirq, 0% idle
[ 638.872107] #3: 0% system, 0% softirq, 100% hardirq, 0% idle
[ 638.872563] #4: 0% system, 0% softirq, 100% hardirq, 0% idle
[ 638.873018] #5: 0% system, 0% softirq, 100% hardirq, 0% idle
[ 638.873494] CPU#9 Detect HardIRQ Time exceeds 50%. Most frequent HardIRQs:
[ 638.873994] #1: 330945 irq#7
[ 638.874236] #2: 31 irq#82
[ 638.874493] #3: 10 irq#10
[ 638.874744] #4: 2 irq#89
[ 638.874992] #5: 1 irq#102
...
[ 638.875313] Call trace:
[ 638.875315] __do_softirq+0xa8/0x364

Signed-off-by: Bitao Hu <yaoma@linux.alibaba.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Liu Song <liusong@linux.alibaba.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240411074134.30922-6-yaoma@linux.alibaba.com

authored by

Bitao Hu and committed by
Thomas Gleixner
e9a9292e d7037381

+112 -4
+112 -4
kernel/watchdog.c
··· 12 12 13 13 #define pr_fmt(fmt) "watchdog: " fmt 14 14 15 - #include <linux/mm.h> 16 15 #include <linux/cpu.h> 17 - #include <linux/nmi.h> 18 16 #include <linux/init.h> 17 + #include <linux/irq.h> 18 + #include <linux/irqdesc.h> 19 19 #include <linux/kernel_stat.h> 20 + #include <linux/kvm_para.h> 20 21 #include <linux/math64.h> 22 + #include <linux/mm.h> 21 23 #include <linux/module.h> 24 + #include <linux/nmi.h> 25 + #include <linux/stop_machine.h> 22 26 #include <linux/sysctl.h> 23 27 #include <linux/tick.h> 28 + 24 29 #include <linux/sched/clock.h> 25 30 #include <linux/sched/debug.h> 26 31 #include <linux/sched/isolation.h> 27 - #include <linux/stop_machine.h> 28 32 29 33 #include <asm/irq_regs.h> 30 - #include <linux/kvm_para.h> 31 34 32 35 static DEFINE_MUTEX(watchdog_mutex); 33 36 ··· 421 418 } 422 419 } 423 420 421 + #define HARDIRQ_PERCENT_THRESH 50 422 + #define NUM_HARDIRQ_REPORT 5 423 + struct irq_counts { 424 + int irq; 425 + u32 counts; 426 + }; 427 + 428 + static DEFINE_PER_CPU(bool, snapshot_taken); 429 + 430 + /* Tabulate the most frequent interrupts. */ 431 + static void tabulate_irq_count(struct irq_counts *irq_counts, int irq, u32 counts, int rank) 432 + { 433 + int i; 434 + struct irq_counts new_count = {irq, counts}; 435 + 436 + for (i = 0; i < rank; i++) { 437 + if (counts > irq_counts[i].counts) 438 + swap(new_count, irq_counts[i]); 439 + } 440 + } 441 + 442 + /* 443 + * If the hardirq time exceeds HARDIRQ_PERCENT_THRESH% of the sample_period, 444 + * then the cause of softlockup might be interrupt storm. In this case, it 445 + * would be useful to start interrupt counting. 446 + */ 447 + static bool need_counting_irqs(void) 448 + { 449 + u8 util; 450 + int tail = __this_cpu_read(cpustat_tail); 451 + 452 + tail = (tail + NUM_HARDIRQ_REPORT - 1) % NUM_HARDIRQ_REPORT; 453 + util = __this_cpu_read(cpustat_util[tail][STATS_HARDIRQ]); 454 + return util > HARDIRQ_PERCENT_THRESH; 455 + } 456 + 457 + static void start_counting_irqs(void) 458 + { 459 + if (!__this_cpu_read(snapshot_taken)) { 460 + kstat_snapshot_irqs(); 461 + __this_cpu_write(snapshot_taken, true); 462 + } 463 + } 464 + 465 + static void stop_counting_irqs(void) 466 + { 467 + __this_cpu_write(snapshot_taken, false); 468 + } 469 + 470 + static void print_irq_counts(void) 471 + { 472 + unsigned int i, count; 473 + struct irq_counts irq_counts_sorted[NUM_HARDIRQ_REPORT] = { 474 + {-1, 0}, {-1, 0}, {-1, 0}, {-1, 0}, {-1, 0} 475 + }; 476 + 477 + if (__this_cpu_read(snapshot_taken)) { 478 + for_each_active_irq(i) { 479 + count = kstat_get_irq_since_snapshot(i); 480 + tabulate_irq_count(irq_counts_sorted, i, count, NUM_HARDIRQ_REPORT); 481 + } 482 + 483 + /* 484 + * Outputting the "watchdog" prefix on every line is redundant and not 485 + * concise, and the original alarm information is sufficient for 486 + * positioning in logs, hence here printk() is used instead of pr_crit(). 487 + */ 488 + printk(KERN_CRIT "CPU#%d Detect HardIRQ Time exceeds %d%%. Most frequent HardIRQs:\n", 489 + smp_processor_id(), HARDIRQ_PERCENT_THRESH); 490 + 491 + for (i = 0; i < NUM_HARDIRQ_REPORT; i++) { 492 + if (irq_counts_sorted[i].irq == -1) 493 + break; 494 + 495 + printk(KERN_CRIT "\t#%u: %-10u\tirq#%d\n", 496 + i + 1, irq_counts_sorted[i].counts, 497 + irq_counts_sorted[i].irq); 498 + } 499 + 500 + /* 501 + * If the hardirq time is less than HARDIRQ_PERCENT_THRESH% in the last 502 + * sample_period, then we suspect the interrupt storm might be subsiding. 503 + */ 504 + if (!need_counting_irqs()) 505 + stop_counting_irqs(); 506 + } 507 + } 508 + 424 509 static void report_cpu_status(void) 425 510 { 426 511 print_cpustat(); 512 + print_irq_counts(); 427 513 } 428 514 #else 429 515 static inline void update_cpustat(void) { } 430 516 static inline void report_cpu_status(void) { } 517 + static inline bool need_counting_irqs(void) { return false; } 518 + static inline void start_counting_irqs(void) { } 519 + static inline void stop_counting_irqs(void) { } 431 520 #endif 432 521 433 522 /* ··· 623 528 unsigned long now) 624 529 { 625 530 if ((watchdog_enabled & WATCHDOG_SOFTOCKUP_ENABLED) && watchdog_thresh) { 531 + /* 532 + * If period_ts has not been updated during a sample_period, then 533 + * in the subsequent few sample_periods, period_ts might also not 534 + * be updated, which could indicate a potential softlockup. In 535 + * this case, if we suspect the cause of the potential softlockup 536 + * might be interrupt storm, then we need to count the interrupts 537 + * to find which interrupt is storming. 538 + */ 539 + if (time_after_eq(now, period_ts + get_softlockup_thresh() / NUM_SAMPLE_PERIODS) && 540 + need_counting_irqs()) 541 + start_counting_irqs(); 542 + 626 543 /* Warn about unreasonable delays. */ 627 544 if (time_after(now, period_ts + get_softlockup_thresh())) 628 545 return now - touch_ts; ··· 657 550 static int softlockup_fn(void *data) 658 551 { 659 552 update_touch_ts(); 553 + stop_counting_irqs(); 660 554 complete(this_cpu_ptr(&softlockup_completion)); 661 555 662 556 return 0;