Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'kvm-x86-vmx-6.11' of https://github.com/kvm-x86/linux into HEAD

KVM VMX changes for 6.11

- Remove an unnecessary EPT TLB flush when enabling hardware.

- Fix a series of bugs that cause KVM to fail to detect nested pending posted
interrupts as valid wake eents for a vCPU executing HLT in L2 (with
HLT-exiting disable by L1).

- Misc cleanups

+78 -55
-1
arch/x86/include/asm/kvm-x86-ops.h
··· 85 85 KVM_X86_OP(refresh_apicv_exec_ctrl) 86 86 KVM_X86_OP_OPTIONAL(hwapic_irr_update) 87 87 KVM_X86_OP_OPTIONAL(hwapic_isr_update) 88 - KVM_X86_OP_OPTIONAL_RET0(guest_apic_has_interrupt) 89 88 KVM_X86_OP_OPTIONAL(load_eoi_exitmap) 90 89 KVM_X86_OP_OPTIONAL(set_virtual_apic_mode) 91 90 KVM_X86_OP_OPTIONAL(set_apic_access_page_addr)
+1 -2
arch/x86/include/asm/kvm_host.h
··· 1731 1731 void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); 1732 1732 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); 1733 1733 void (*hwapic_isr_update)(int isr); 1734 - bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu); 1735 1734 void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); 1736 1735 void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); 1737 1736 void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); ··· 1836 1837 bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector, 1837 1838 u32 error_code); 1838 1839 int (*check_events)(struct kvm_vcpu *vcpu); 1839 - bool (*has_events)(struct kvm_vcpu *vcpu); 1840 + bool (*has_events)(struct kvm_vcpu *vcpu, bool for_injection); 1840 1841 void (*triple_fault)(struct kvm_vcpu *vcpu); 1841 1842 int (*get_state)(struct kvm_vcpu *vcpu, 1842 1843 struct kvm_nested_state __user *user_kvm_nested_state,
-1
arch/x86/kvm/vmx/main.c
··· 97 97 .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS, 98 98 .hwapic_irr_update = vmx_hwapic_irr_update, 99 99 .hwapic_isr_update = vmx_hwapic_isr_update, 100 - .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, 101 100 .sync_pir_to_irr = vmx_sync_pir_to_irr, 102 101 .deliver_interrupt = vmx_deliver_interrupt, 103 102 .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
+42 -5
arch/x86/kvm/vmx/nested.c
··· 12 12 #include "mmu.h" 13 13 #include "nested.h" 14 14 #include "pmu.h" 15 + #include "posted_intr.h" 15 16 #include "sgx.h" 16 17 #include "trace.h" 17 18 #include "vmx.h" ··· 3900 3899 if (!pi_test_and_clear_on(vmx->nested.pi_desc)) 3901 3900 return 0; 3902 3901 3903 - max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); 3904 - if (max_irr != 256) { 3902 + max_irr = pi_find_highest_vector(vmx->nested.pi_desc); 3903 + if (max_irr > 0) { 3905 3904 vapic_page = vmx->nested.virtual_apic_map.hva; 3906 3905 if (!vapic_page) 3907 3906 goto mmio_needed; ··· 4032 4031 to_vmx(vcpu)->nested.preemption_timer_expired; 4033 4032 } 4034 4033 4035 - static bool vmx_has_nested_events(struct kvm_vcpu *vcpu) 4034 + static bool vmx_has_nested_events(struct kvm_vcpu *vcpu, bool for_injection) 4036 4035 { 4037 - return nested_vmx_preemption_timer_pending(vcpu) || 4038 - to_vmx(vcpu)->nested.mtf_pending; 4036 + struct vcpu_vmx *vmx = to_vmx(vcpu); 4037 + void *vapic = vmx->nested.virtual_apic_map.hva; 4038 + int max_irr, vppr; 4039 + 4040 + if (nested_vmx_preemption_timer_pending(vcpu) || 4041 + vmx->nested.mtf_pending) 4042 + return true; 4043 + 4044 + /* 4045 + * Virtual Interrupt Delivery doesn't require manual injection. Either 4046 + * the interrupt is already in GUEST_RVI and will be recognized by CPU 4047 + * at VM-Entry, or there is a KVM_REQ_EVENT pending and KVM will move 4048 + * the interrupt from the PIR to RVI prior to entering the guest. 4049 + */ 4050 + if (for_injection) 4051 + return false; 4052 + 4053 + if (!nested_cpu_has_vid(get_vmcs12(vcpu)) || 4054 + __vmx_interrupt_blocked(vcpu)) 4055 + return false; 4056 + 4057 + if (!vapic) 4058 + return false; 4059 + 4060 + vppr = *((u32 *)(vapic + APIC_PROCPRI)); 4061 + 4062 + max_irr = vmx_get_rvi(); 4063 + if ((max_irr & 0xf0) > (vppr & 0xf0)) 4064 + return true; 4065 + 4066 + if (vmx->nested.pi_pending && vmx->nested.pi_desc && 4067 + pi_test_on(vmx->nested.pi_desc)) { 4068 + max_irr = pi_find_highest_vector(vmx->nested.pi_desc); 4069 + if (max_irr > 0 && (max_irr & 0xf0) > (vppr & 0xf0)) 4070 + return true; 4071 + } 4072 + 4073 + return false; 4039 4074 } 4040 4075 4041 4076 /*
+10
arch/x86/kvm/vmx/posted_intr.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 #ifndef __KVM_X86_VMX_POSTED_INTR_H 3 3 #define __KVM_X86_VMX_POSTED_INTR_H 4 + 5 + #include <linux/find.h> 4 6 #include <asm/posted_intr.h> 5 7 6 8 void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu); ··· 13 11 int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, 14 12 uint32_t guest_irq, bool set); 15 13 void vmx_pi_start_assignment(struct kvm *kvm); 14 + 15 + static inline int pi_find_highest_vector(struct pi_desc *pi_desc) 16 + { 17 + int vec; 18 + 19 + vec = find_last_bit((unsigned long *)pi_desc->pir, 256); 20 + return vec < 256 ? vec : -1; 21 + } 16 22 17 23 #endif /* __KVM_X86_VMX_POSTED_INTR_H */
+8 -6
arch/x86/kvm/vmx/vmcs12.h
··· 188 188 }; 189 189 190 190 /* 191 - * VMCS12_REVISION is an arbitrary id that should be changed if the content or 192 - * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and 193 - * VMPTRLD verifies that the VMCS region that L1 is loading contains this id. 191 + * VMCS12_REVISION is KVM's arbitrary ID for the layout of struct vmcs12. KVM 192 + * enumerates this value to L1 via MSR_IA32_VMX_BASIC, and checks the revision 193 + * ID during nested VMPTRLD to verify that L1 is loading a VMCS that adhere's 194 + * to KVM's virtual CPU definition. 194 195 * 195 - * IMPORTANT: Changing this value will break save/restore compatibility with 196 - * older kvm releases. 196 + * DO NOT change this value, as it will break save/restore compatibility with 197 + * older KVM releases. 197 198 */ 198 199 #define VMCS12_REVISION 0x11e57ed0 199 200 ··· 207 206 #define VMCS12_SIZE KVM_STATE_NESTED_VMX_VMCS_SIZE 208 207 209 208 /* 210 - * For save/restore compatibility, the vmcs12 field offsets must not change. 209 + * For save/restore compatibility, the vmcs12 field offsets must not change, 210 + * although appending fields and/or filling gaps is obviously allowed. 211 211 */ 212 212 #define CHECK_OFFSET(field, loc) \ 213 213 ASSERT_STRUCT_OFFSET(struct vmcs12, field, loc)
+9 -27
arch/x86/kvm/vmx/vmx.c
··· 2868 2868 return r; 2869 2869 } 2870 2870 2871 - if (enable_ept) 2872 - ept_sync_global(); 2873 - 2874 2871 return 0; 2875 2872 } 2876 2873 ··· 4139 4142 } 4140 4143 } 4141 4144 4142 - bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) 4143 - { 4144 - struct vcpu_vmx *vmx = to_vmx(vcpu); 4145 - void *vapic_page; 4146 - u32 vppr; 4147 - int rvi; 4148 - 4149 - if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || 4150 - !nested_cpu_has_vid(get_vmcs12(vcpu)) || 4151 - WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn)) 4152 - return false; 4153 - 4154 - rvi = vmx_get_rvi(); 4155 - 4156 - vapic_page = vmx->nested.virtual_apic_map.hva; 4157 - vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); 4158 - 4159 - return ((rvi & 0xf0) > (vppr & 0xf0)); 4160 - } 4161 - 4162 4145 void vmx_msr_filter_changed(struct kvm_vcpu *vcpu) 4163 4146 { 4164 4147 struct vcpu_vmx *vmx = to_vmx(vcpu); ··· 5063 5086 return !vmx_nmi_blocked(vcpu); 5064 5087 } 5065 5088 5089 + bool __vmx_interrupt_blocked(struct kvm_vcpu *vcpu) 5090 + { 5091 + return !(vmx_get_rflags(vcpu) & X86_EFLAGS_IF) || 5092 + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 5093 + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); 5094 + } 5095 + 5066 5096 bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu) 5067 5097 { 5068 5098 if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) 5069 5099 return false; 5070 5100 5071 - return !(vmx_get_rflags(vcpu) & X86_EFLAGS_IF) || 5072 - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 5073 - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); 5101 + return __vmx_interrupt_blocked(vcpu); 5074 5102 } 5075 5103 5076 5104 int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) ··· 8592 8610 static void vmx_exit(void) 8593 8611 { 8594 8612 kvm_exit(); 8613 + __vmx_exit(); 8595 8614 kvm_x86_vendor_exit(); 8596 8615 8597 - __vmx_exit(); 8598 8616 } 8599 8617 module_exit(vmx_exit); 8600 8618
+1
arch/x86/kvm/vmx/vmx.h
··· 406 406 bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu); 407 407 void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); 408 408 bool vmx_nmi_blocked(struct kvm_vcpu *vcpu); 409 + bool __vmx_interrupt_blocked(struct kvm_vcpu *vcpu); 409 410 bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu); 410 411 bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); 411 412 void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
-1
arch/x86/kvm/vmx/x86_ops.h
··· 48 48 void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu); 49 49 void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr); 50 50 void vmx_hwapic_isr_update(int max_isr); 51 - bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu); 52 51 int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu); 53 52 void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, 54 53 int trig_mode, int vector);
+7 -12
arch/x86/kvm/x86.c
··· 10557 10557 10558 10558 if (is_guest_mode(vcpu) && 10559 10559 kvm_x86_ops.nested_ops->has_events && 10560 - kvm_x86_ops.nested_ops->has_events(vcpu)) 10560 + kvm_x86_ops.nested_ops->has_events(vcpu, true)) 10561 10561 *req_immediate_exit = true; 10562 10562 10563 10563 /* ··· 11255 11255 * causes a spurious wakeup from HLT). 11256 11256 */ 11257 11257 if (is_guest_mode(vcpu)) { 11258 - if (kvm_check_nested_events(vcpu) < 0) 11258 + int r = kvm_check_nested_events(vcpu); 11259 + 11260 + WARN_ON_ONCE(r == -EBUSY); 11261 + if (r < 0) 11259 11262 return 0; 11260 11263 } 11261 11264 ··· 13145 13142 kvm_arch_free_memslot(kvm, old); 13146 13143 } 13147 13144 13148 - static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) 13149 - { 13150 - return (is_guest_mode(vcpu) && 13151 - static_call(kvm_x86_guest_apic_has_interrupt)(vcpu)); 13152 - } 13153 - 13154 13145 static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) 13155 13146 { 13156 13147 if (!list_empty_careful(&vcpu->async_pf.done)) ··· 13178 13181 if (kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu)) 13179 13182 return true; 13180 13183 13181 - if (kvm_arch_interrupt_allowed(vcpu) && 13182 - (kvm_cpu_has_interrupt(vcpu) || 13183 - kvm_guest_apic_has_interrupt(vcpu))) 13184 + if (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu)) 13184 13185 return true; 13185 13186 13186 13187 if (kvm_hv_has_stimer_pending(vcpu)) ··· 13186 13191 13187 13192 if (is_guest_mode(vcpu) && 13188 13193 kvm_x86_ops.nested_ops->has_events && 13189 - kvm_x86_ops.nested_ops->has_events(vcpu)) 13194 + kvm_x86_ops.nested_ops->has_events(vcpu, false)) 13190 13195 return true; 13191 13196 13192 13197 if (kvm_xen_has_pending_events(vcpu))