Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

kvm: nVMX: fix entry with pending interrupt if APICv is enabled

Commit b5861e5cf2fcf83031ea3e26b0a69d887adf7d21 introduced a check on
the interrupt-window and NMI-window CPU execution controls in order to
inject an external interrupt vmexit before the first guest instruction
executes. However, when APIC virtualization is enabled the host does not
need a vmexit in order to inject an interrupt at the next interrupt window;
instead, it just places the interrupt vector in RVI and the processor will
inject it as soon as possible. Therefore, on machines with APICv it is
not enough to check the CPU execution controls: the same scenario can also
happen if RVI>vPPR.

Fixes: b5861e5cf2fcf83031ea3e26b0a69d887adf7d21
Reviewed-by: Nikita Leshchenko <nikita.leshchenko@oracle.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Liran Alon <liran.alon@oracle.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

+25 -11
+25 -11
arch/x86/kvm/vmx.c
··· 6162 6162 nested_mark_vmcs12_pages_dirty(vcpu); 6163 6163 } 6164 6164 6165 + static u8 vmx_get_rvi(void) 6166 + { 6167 + return vmcs_read16(GUEST_INTR_STATUS) & 0xff; 6168 + } 6169 + 6165 6170 static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) 6166 6171 { 6167 6172 struct vcpu_vmx *vmx = to_vmx(vcpu); ··· 6179 6174 WARN_ON_ONCE(!vmx->nested.virtual_apic_page)) 6180 6175 return false; 6181 6176 6182 - rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff; 6177 + rvi = vmx_get_rvi(); 6183 6178 6184 6179 vapic_page = kmap(vmx->nested.virtual_apic_page); 6185 6180 vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); ··· 10354 10349 return max_irr; 10355 10350 } 10356 10351 10352 + static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu) 10353 + { 10354 + u8 rvi = vmx_get_rvi(); 10355 + u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI); 10356 + 10357 + return ((rvi & 0xf0) > (vppr & 0xf0)); 10358 + } 10359 + 10357 10360 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 10358 10361 { 10359 10362 if (!kvm_vcpu_apicv_active(vcpu)) ··· 12606 12593 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 12607 12594 bool from_vmentry = !!exit_qual; 12608 12595 u32 dummy_exit_qual; 12609 - u32 vmcs01_cpu_exec_ctrl; 12596 + bool evaluate_pending_interrupts; 12610 12597 int r = 0; 12611 12598 12612 - vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 12599 + evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & 12600 + (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); 12601 + if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) 12602 + evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); 12613 12603 12614 12604 enter_guest_mode(vcpu); 12615 12605 ··· 12660 12644 * to L1 or delivered directly to L2 (e.g. In case L1 don't 12661 12645 * intercept EXTERNAL_INTERRUPT). 12662 12646 * 12663 - * Usually this would be handled by L0 requesting a 12664 - * IRQ/NMI window by setting VMCS accordingly. However, 12665 - * this setting was done on VMCS01 and now VMCS02 is active 12666 - * instead. Thus, we force L0 to perform pending event 12667 - * evaluation by requesting a KVM_REQ_EVENT. 12647 + * Usually this would be handled by the processor noticing an 12648 + * IRQ/NMI window request, or checking RVI during evaluation of 12649 + * pending virtual interrupts. However, this setting was done 12650 + * on VMCS01 and now VMCS02 is active instead. Thus, we force L0 12651 + * to perform pending event evaluation by requesting a KVM_REQ_EVENT. 12668 12652 */ 12669 - if (vmcs01_cpu_exec_ctrl & 12670 - (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) { 12653 + if (unlikely(evaluate_pending_interrupts)) 12671 12654 kvm_make_request(KVM_REQ_EVENT, vcpu); 12672 - } 12673 12655 12674 12656 /* 12675 12657 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point