Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arch/tile: adopt the new nmi_backtrace framework

Previously tile was rolling its own method of capturing backtrace data
in the NMI handlers, but it was relying on running printk() from the NMI
handler, which is not always safe. So adopt the nmi_backtrace model
(with the new cpumask extension) instead.

So we can call the nmi_backtrace code directly from the nmi handler,
move the nmi_enter()/exit() into the top-level tile NMI handler.

The semantics of the routine change slightly since it is now synchronous
with the remote cores completing the backtraces. Previously it was
asynchronous, but with protection to avoid starting a new remote
backtrace if the old one was still in progress.

Link: http://lkml.kernel.org/r/1472487169-14923-4-git-send-email-cmetcalf@mellanox.com
Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org> [arm]
Cc: Petr Mladek <pmladek@suse.com>
Cc: Aaron Tomlin <atomlin@redhat.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Chris Metcalf and committed by
Linus Torvalds
511f8389 67766489

+27 -63
+3 -2
arch/tile/include/asm/irq.h
··· 79 79 void setup_irq_regs(void); 80 80 81 81 #ifdef __tilegx__ 82 - void arch_trigger_all_cpu_backtrace(bool self); 83 - #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace 82 + void arch_trigger_cpumask_backtrace(const struct cpumask *mask, 83 + bool exclude_self); 84 + #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace 84 85 #endif 85 86 86 87 #endif /* _ASM_TILE_IRQ_H */
-3
arch/tile/kernel/pmc.c
··· 16 16 #include <linux/spinlock.h> 17 17 #include <linux/module.h> 18 18 #include <linux/atomic.h> 19 - #include <linux/interrupt.h> 20 19 21 20 #include <asm/processor.h> 22 21 #include <asm/pmc.h> ··· 28 29 if (!perf_irq) 29 30 panic("Unexpected PERF_COUNT interrupt %d\n", fault); 30 31 31 - nmi_enter(); 32 32 retval = perf_irq(regs, fault); 33 - nmi_exit(); 34 33 return retval; 35 34 } 36 35
+17 -56
arch/tile/kernel/process.c
··· 22 22 #include <linux/init.h> 23 23 #include <linux/mm.h> 24 24 #include <linux/compat.h> 25 - #include <linux/hardirq.h> 25 + #include <linux/nmi.h> 26 26 #include <linux/syscalls.h> 27 27 #include <linux/kernel.h> 28 28 #include <linux/tracehook.h> ··· 594 594 tile_show_stack(&kbt); 595 595 } 596 596 597 - /* To ensure stack dump on tiles occurs one by one. */ 598 - static DEFINE_SPINLOCK(backtrace_lock); 599 - /* To ensure no backtrace occurs before all of the stack dump are done. */ 600 - static atomic_t backtrace_cpus; 601 - /* The cpu mask to avoid reentrance. */ 602 - static struct cpumask backtrace_mask; 603 - 604 - void do_nmi_dump_stack(struct pt_regs *regs) 605 - { 606 - int is_idle = is_idle_task(current) && !in_interrupt(); 607 - int cpu; 608 - 609 - nmi_enter(); 610 - cpu = smp_processor_id(); 611 - if (WARN_ON_ONCE(!cpumask_test_and_clear_cpu(cpu, &backtrace_mask))) 612 - goto done; 613 - 614 - spin_lock(&backtrace_lock); 615 - if (is_idle) 616 - pr_info("CPU: %d idle\n", cpu); 617 - else 618 - show_regs(regs); 619 - spin_unlock(&backtrace_lock); 620 - atomic_dec(&backtrace_cpus); 621 - done: 622 - nmi_exit(); 623 - } 624 - 625 597 #ifdef __tilegx__ 626 - void arch_trigger_all_cpu_backtrace(bool self) 598 + void nmi_raise_cpu_backtrace(struct cpumask *in_mask) 627 599 { 628 600 struct cpumask mask; 629 601 HV_Coord tile; 630 602 unsigned int timeout; 631 603 int cpu; 632 - int ongoing; 633 604 HV_NMI_Info info[NR_CPUS]; 634 - 635 - ongoing = atomic_cmpxchg(&backtrace_cpus, 0, num_online_cpus() - 1); 636 - if (ongoing != 0) { 637 - pr_err("Trying to do all-cpu backtrace.\n"); 638 - pr_err("But another all-cpu backtrace is ongoing (%d cpus left)\n", 639 - ongoing); 640 - if (self) { 641 - pr_err("Reporting the stack on this cpu only.\n"); 642 - dump_stack(); 643 - } 644 - return; 645 - } 646 - 647 - cpumask_copy(&mask, cpu_online_mask); 648 - cpumask_clear_cpu(smp_processor_id(), &mask); 649 - cpumask_copy(&backtrace_mask, &mask); 650 - 651 - /* Backtrace for myself first. */ 652 - if (self) 653 - dump_stack(); 654 605 655 606 /* Tentatively dump stack on remote tiles via NMI. */ 656 607 timeout = 100; 608 + cpumask_copy(&mask, in_mask); 657 609 while (!cpumask_empty(&mask) && timeout) { 658 610 for_each_cpu(cpu, &mask) { 659 611 tile.x = cpu_x(cpu); ··· 616 664 } 617 665 618 666 mdelay(10); 667 + touch_softlockup_watchdog(); 619 668 timeout--; 620 669 } 621 670 622 - /* Warn about cpus stuck in ICS and decrement their counts here. */ 671 + /* Warn about cpus stuck in ICS. */ 623 672 if (!cpumask_empty(&mask)) { 624 673 for_each_cpu(cpu, &mask) { 674 + 675 + /* Clear the bit as if nmi_cpu_backtrace() ran. */ 676 + cpumask_clear_cpu(cpu, in_mask); 677 + 625 678 switch (info[cpu].result) { 626 679 case HV_NMI_RESULT_FAIL_ICS: 627 680 pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n", ··· 637 680 cpu); 638 681 break; 639 682 case HV_ENOSYS: 640 - pr_warn("Hypervisor too old to allow remote stack dumps.\n"); 641 - goto skip_for_each; 683 + WARN_ONCE(1, "Hypervisor too old to allow remote stack dumps.\n"); 684 + break; 642 685 default: /* should not happen */ 643 686 pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n", 644 687 cpu, info[cpu].result, info[cpu].pc); 645 688 break; 646 689 } 647 690 } 648 - skip_for_each: 649 - atomic_sub(cpumask_weight(&mask), &backtrace_cpus); 650 691 } 692 + } 693 + 694 + void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) 695 + { 696 + nmi_trigger_cpumask_backtrace(mask, exclude_self, 697 + nmi_raise_cpu_backtrace); 651 698 } 652 699 #endif /* __tilegx_ */
+7 -2
arch/tile/kernel/traps.c
··· 20 20 #include <linux/reboot.h> 21 21 #include <linux/uaccess.h> 22 22 #include <linux/ptrace.h> 23 + #include <linux/hardirq.h> 24 + #include <linux/nmi.h> 23 25 #include <asm/stack.h> 24 26 #include <asm/traps.h> 25 27 #include <asm/setup.h> ··· 394 392 395 393 void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason) 396 394 { 395 + nmi_enter(); 397 396 switch (reason) { 397 + #ifdef arch_trigger_cpumask_backtrace 398 398 case TILE_NMI_DUMP_STACK: 399 - do_nmi_dump_stack(regs); 399 + nmi_cpu_backtrace(regs); 400 400 break; 401 + #endif 401 402 default: 402 403 panic("Unexpected do_nmi type %ld", reason); 403 - return; 404 404 } 405 + nmi_exit(); 405 406 } 406 407 407 408 /* Deprecated function currently only used here. */