Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: Fix simultaneous NMIs

If simultaneous NMIs happen, we're supposed to queue the second
and next (collapsing them), but currently we sometimes collapse
the second into the first.

Fix by using a counter for pending NMIs instead of a bool; since
the counter limit depends on whether the processor is currently
in an NMI handler, which can only be checked in vcpu context
(via the NMI mask), we add a new KVM_REQ_NMI to request recalculation
of the counter.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

+35 -19
+3 -2
arch/x86/include/asm/kvm_host.h
··· 413 413 u32 tsc_catchup_mult; 414 414 s8 tsc_catchup_shift; 415 415 416 - bool nmi_pending; 417 - bool nmi_injected; 416 + atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ 417 + unsigned nmi_pending; /* NMI queued after currently running handler */ 418 + bool nmi_injected; /* Trying to inject an NMI this entry */ 418 419 419 420 struct mtrr_state_type mtrr_state; 420 421 u32 pat;
+31 -17
arch/x86/kvm/x86.c
··· 83 83 static void update_cr8_intercept(struct kvm_vcpu *vcpu); 84 84 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 85 85 struct kvm_cpuid_entry2 __user *entries); 86 + static void process_nmi(struct kvm_vcpu *vcpu); 86 87 87 88 struct kvm_x86_ops *kvm_x86_ops; 88 89 EXPORT_SYMBOL_GPL(kvm_x86_ops); ··· 360 359 361 360 void kvm_inject_nmi(struct kvm_vcpu *vcpu) 362 361 { 363 - kvm_make_request(KVM_REQ_EVENT, vcpu); 364 - vcpu->arch.nmi_pending = 1; 362 + atomic_inc(&vcpu->arch.nmi_queued); 363 + kvm_make_request(KVM_REQ_NMI, vcpu); 365 364 } 366 365 EXPORT_SYMBOL_GPL(kvm_inject_nmi); 367 366 ··· 2828 2827 static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, 2829 2828 struct kvm_vcpu_events *events) 2830 2829 { 2830 + process_nmi(vcpu); 2831 2831 events->exception.injected = 2832 2832 vcpu->arch.exception.pending && 2833 2833 !kvm_exception_is_soft(vcpu->arch.exception.nr); ··· 2846 2844 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); 2847 2845 2848 2846 events->nmi.injected = vcpu->arch.nmi_injected; 2849 - events->nmi.pending = vcpu->arch.nmi_pending; 2847 + events->nmi.pending = vcpu->arch.nmi_pending != 0; 2850 2848 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); 2851 2849 events->nmi.pad = 0; 2852 2850 ··· 2866 2864 | KVM_VCPUEVENT_VALID_SHADOW)) 2867 2865 return -EINVAL; 2868 2866 2867 + process_nmi(vcpu); 2869 2868 vcpu->arch.exception.pending = events->exception.injected; 2870 2869 vcpu->arch.exception.nr = events->exception.nr; 2871 2870 vcpu->arch.exception.has_error_code = events->exception.has_error_code; ··· 4766 4763 kvm_set_rflags(vcpu, ctxt->eflags); 4767 4764 4768 4765 if (irq == NMI_VECTOR) 4769 - vcpu->arch.nmi_pending = false; 4766 + vcpu->arch.nmi_pending = 0; 4770 4767 else 4771 4768 vcpu->arch.interrupt.pending = false; 4772 4769 ··· 5575 5572 /* try to inject new event if pending */ 5576 5573 if (vcpu->arch.nmi_pending) { 5577 5574 if (kvm_x86_ops->nmi_allowed(vcpu)) { 5578 - vcpu->arch.nmi_pending = false; 5575 + --vcpu->arch.nmi_pending; 5579 5576 vcpu->arch.nmi_injected = true; 5580 5577 kvm_x86_ops->set_nmi(vcpu); 5581 5578 } ··· 5607 5604 } 5608 5605 } 5609 5606 5607 + static void process_nmi(struct kvm_vcpu *vcpu) 5608 + { 5609 + unsigned limit = 2; 5610 + 5611 + /* 5612 + * x86 is limited to one NMI running, and one NMI pending after it. 5613 + * If an NMI is already in progress, limit further NMIs to just one. 5614 + * Otherwise, allow two (and we'll inject the first one immediately). 5615 + */ 5616 + if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected) 5617 + limit = 1; 5618 + 5619 + vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0); 5620 + vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit); 5621 + kvm_make_request(KVM_REQ_EVENT, vcpu); 5622 + } 5623 + 5610 5624 static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5611 5625 { 5612 5626 int r; 5613 - bool nmi_pending; 5614 5627 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && 5615 5628 vcpu->run->request_interrupt_window; 5616 5629 ··· 5666 5647 } 5667 5648 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) 5668 5649 record_steal_time(vcpu); 5650 + if (kvm_check_request(KVM_REQ_NMI, vcpu)) 5651 + process_nmi(vcpu); 5669 5652 5670 5653 } 5671 5654 ··· 5675 5654 if (unlikely(r)) 5676 5655 goto out; 5677 5656 5678 - /* 5679 - * An NMI can be injected between local nmi_pending read and 5680 - * vcpu->arch.nmi_pending read inside inject_pending_event(). 5681 - * But in that case, KVM_REQ_EVENT will be set, which makes 5682 - * the race described above benign. 5683 - */ 5684 - nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending); 5685 - 5686 5657 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 5687 5658 inject_pending_event(vcpu); 5688 5659 5689 5660 /* enable NMI/IRQ window open exits if needed */ 5690 - if (nmi_pending) 5661 + if (vcpu->arch.nmi_pending) 5691 5662 kvm_x86_ops->enable_nmi_window(vcpu); 5692 5663 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) 5693 5664 kvm_x86_ops->enable_irq_window(vcpu); ··· 6387 6374 6388 6375 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) 6389 6376 { 6390 - vcpu->arch.nmi_pending = false; 6377 + atomic_set(&vcpu->arch.nmi_queued, 0); 6378 + vcpu->arch.nmi_pending = 0; 6391 6379 vcpu->arch.nmi_injected = false; 6392 6380 6393 6381 vcpu->arch.switch_db_regs = 0; ··· 6663 6649 !vcpu->arch.apf.halted) 6664 6650 || !list_empty_careful(&vcpu->async_pf.done) 6665 6651 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED 6666 - || vcpu->arch.nmi_pending || 6652 + || atomic_read(&vcpu->arch.nmi_queued) || 6667 6653 (kvm_arch_interrupt_allowed(vcpu) && 6668 6654 kvm_cpu_has_interrupt(vcpu)); 6669 6655 }
+1
include/linux/kvm_host.h
··· 49 49 #define KVM_REQ_EVENT 11 50 50 #define KVM_REQ_APF_HALT 12 51 51 #define KVM_REQ_STEAL_UPDATE 13 52 + #define KVM_REQ_NMI 14 52 53 53 54 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 54 55