Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/entry: Fix NMI vs IRQ state tracking

While the nmi_enter() users did
trace_hardirqs_{off_prepare,on_finish}() there was no matching
lockdep_hardirqs_*() calls to complete the picture.

Introduce idtentry_{enter,exit}_nmi() to enable proper IRQ state
tracking across the NMIs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/20200623083721.216740948@infradead.org

+70 -29
+38 -4
arch/x86/entry/common.c
··· 592 592 * The return value must be fed into the state argument of 593 593 * idtentry_exit(). 594 594 */ 595 - idtentry_state_t noinstr idtentry_enter(struct pt_regs *regs) 595 + noinstr idtentry_state_t idtentry_enter(struct pt_regs *regs) 596 596 { 597 597 idtentry_state_t ret = { 598 598 .exit_rcu = false, ··· 687 687 * Counterpart to idtentry_enter(). The return value of the entry 688 688 * function must be fed into the @state argument. 689 689 */ 690 - void noinstr idtentry_exit(struct pt_regs *regs, idtentry_state_t state) 690 + noinstr void idtentry_exit(struct pt_regs *regs, idtentry_state_t state) 691 691 { 692 692 lockdep_assert_irqs_disabled(); 693 693 ··· 731 731 * Invokes enter_from_user_mode() to establish the proper context for 732 732 * NOHZ_FULL. Otherwise scheduling on exit would not be possible. 733 733 */ 734 - void noinstr idtentry_enter_user(struct pt_regs *regs) 734 + noinstr void idtentry_enter_user(struct pt_regs *regs) 735 735 { 736 736 check_user_regs(regs); 737 737 enter_from_user_mode(); ··· 749 749 * 750 750 * Counterpart to idtentry_enter_user(). 751 751 */ 752 - void noinstr idtentry_exit_user(struct pt_regs *regs) 752 + noinstr void idtentry_exit_user(struct pt_regs *regs) 753 753 { 754 754 lockdep_assert_irqs_disabled(); 755 755 756 756 prepare_exit_to_usermode(regs); 757 + } 758 + 759 + noinstr bool idtentry_enter_nmi(struct pt_regs *regs) 760 + { 761 + bool irq_state = lockdep_hardirqs_enabled(current); 762 + 763 + __nmi_enter(); 764 + lockdep_hardirqs_off(CALLER_ADDR0); 765 + lockdep_hardirq_enter(); 766 + rcu_nmi_enter(); 767 + 768 + instrumentation_begin(); 769 + trace_hardirqs_off_finish(); 770 + ftrace_nmi_enter(); 771 + instrumentation_end(); 772 + 773 + return irq_state; 774 + } 775 + 776 + noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore) 777 + { 778 + instrumentation_begin(); 779 + ftrace_nmi_exit(); 780 + if (restore) { 781 + trace_hardirqs_on_prepare(); 782 + lockdep_hardirqs_on_prepare(CALLER_ADDR0); 783 + } 784 + instrumentation_end(); 785 + 786 + rcu_nmi_exit(); 787 + lockdep_hardirq_exit(); 788 + if (restore) 789 + lockdep_hardirqs_on(CALLER_ADDR0); 790 + __nmi_exit(); 757 791 } 758 792 759 793 #ifdef CONFIG_XEN_PV
+3
arch/x86/include/asm/idtentry.h
··· 20 20 idtentry_state_t idtentry_enter(struct pt_regs *regs); 21 21 void idtentry_exit(struct pt_regs *regs, idtentry_state_t state); 22 22 23 + bool idtentry_enter_nmi(struct pt_regs *regs); 24 + void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state); 25 + 23 26 /** 24 27 * DECLARE_IDTENTRY - Declare functions for simple IDT entry points 25 28 * No error code pushed by hardware
+4 -5
arch/x86/kernel/nmi.c
··· 330 330 __this_cpu_write(last_nmi_rip, regs->ip); 331 331 332 332 instrumentation_begin(); 333 - trace_hardirqs_off_finish(); 334 333 335 334 handled = nmi_handle(NMI_LOCAL, regs); 336 335 __this_cpu_add(nmi_stats.normal, handled); ··· 416 417 unknown_nmi_error(reason, regs); 417 418 418 419 out: 419 - if (regs->flags & X86_EFLAGS_IF) 420 - trace_hardirqs_on_prepare(); 421 420 instrumentation_end(); 422 421 } 423 422 ··· 475 478 476 479 DEFINE_IDTENTRY_RAW(exc_nmi) 477 480 { 481 + bool irq_state; 482 + 478 483 if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) 479 484 return; 480 485 ··· 490 491 491 492 this_cpu_write(nmi_dr7, local_db_save()); 492 493 493 - nmi_enter(); 494 + irq_state = idtentry_enter_nmi(regs); 494 495 495 496 inc_irq_stat(__nmi_count); 496 497 497 498 if (!ignore_nmis) 498 499 default_do_nmi(regs); 499 500 500 - nmi_exit(); 501 + idtentry_exit_nmi(regs, irq_state); 501 502 502 503 local_db_restore(this_cpu_read(nmi_dr7)); 503 504
+6 -11
arch/x86/kernel/traps.c
··· 403 403 } 404 404 #endif 405 405 406 - nmi_enter(); 406 + idtentry_enter_nmi(regs); 407 407 instrumentation_begin(); 408 408 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); 409 409 ··· 649 649 instrumentation_end(); 650 650 idtentry_exit_user(regs); 651 651 } else { 652 - nmi_enter(); 652 + bool irq_state = idtentry_enter_nmi(regs); 653 653 instrumentation_begin(); 654 - trace_hardirqs_off_finish(); 655 654 if (!do_int3(regs)) 656 655 die("int3", regs, 0); 657 - if (regs->flags & X86_EFLAGS_IF) 658 - trace_hardirqs_on_prepare(); 659 656 instrumentation_end(); 660 - nmi_exit(); 657 + idtentry_exit_nmi(regs, irq_state); 661 658 } 662 659 } 663 660 ··· 862 865 static __always_inline void exc_debug_kernel(struct pt_regs *regs, 863 866 unsigned long dr6) 864 867 { 865 - nmi_enter(); 868 + bool irq_state = idtentry_enter_nmi(regs); 866 869 instrumentation_begin(); 867 - trace_hardirqs_off_finish(); 868 870 869 871 /* 870 872 * If something gets miswired and we end up here for a user mode ··· 880 884 881 885 handle_debug(regs, dr6, false); 882 886 883 - if (regs->flags & X86_EFLAGS_IF) 884 - trace_hardirqs_on_prepare(); 885 887 instrumentation_end(); 886 - nmi_exit(); 888 + idtentry_exit_nmi(regs, irq_state); 887 889 } 888 890 889 891 static __always_inline void exc_debug_user(struct pt_regs *regs, ··· 897 903 instrumentation_begin(); 898 904 899 905 handle_debug(regs, dr6, true); 906 + 900 907 instrumentation_end(); 901 908 idtentry_exit_user(regs); 902 909 }
+19 -9
include/linux/hardirq.h
··· 111 111 /* 112 112 * nmi_enter() can nest up to 15 times; see NMI_BITS. 113 113 */ 114 - #define nmi_enter() \ 114 + #define __nmi_enter() \ 115 115 do { \ 116 + lockdep_off(); \ 116 117 arch_nmi_enter(); \ 117 118 printk_nmi_enter(); \ 118 - lockdep_off(); \ 119 119 BUG_ON(in_nmi() == NMI_MASK); \ 120 120 __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ 121 - rcu_nmi_enter(); \ 121 + } while (0) 122 + 123 + #define nmi_enter() \ 124 + do { \ 125 + __nmi_enter(); \ 122 126 lockdep_hardirq_enter(); \ 127 + rcu_nmi_enter(); \ 123 128 instrumentation_begin(); \ 124 129 ftrace_nmi_enter(); \ 125 130 instrumentation_end(); \ 131 + } while (0) 132 + 133 + #define __nmi_exit() \ 134 + do { \ 135 + BUG_ON(!in_nmi()); \ 136 + __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ 137 + printk_nmi_exit(); \ 138 + arch_nmi_exit(); \ 139 + lockdep_on(); \ 126 140 } while (0) 127 141 128 142 #define nmi_exit() \ ··· 144 130 instrumentation_begin(); \ 145 131 ftrace_nmi_exit(); \ 146 132 instrumentation_end(); \ 147 - lockdep_hardirq_exit(); \ 148 133 rcu_nmi_exit(); \ 149 - BUG_ON(!in_nmi()); \ 150 - __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ 151 - lockdep_on(); \ 152 - printk_nmi_exit(); \ 153 - arch_nmi_exit(); \ 134 + lockdep_hardirq_exit(); \ 135 + __nmi_exit(); \ 154 136 } while (0) 155 137 156 138 #endif /* LINUX_HARDIRQ_H */