Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM x86 fixes from Paolo Bonzini:

- Disable AVIC on SNP-enabled systems that don't allow writes to the
virtual APIC page, as such hosts will hit unexpected RMP #PFs in the
host when running VMs of any flavor.

- Fix a WARN in the hypercall completion path due to KVM trying to
determine if a guest with protected register state is in 64-bit mode
(KVM's ABI is to assume such guests only make hypercalls in 64-bit
mode).

- Allow the guest to write to supported bits in MSR_AMD64_DE_CFG to fix
a regression with Windows guests, and because KVM's read-only
behavior appears to be entirely made up.

- Treat TDP MMU faults as spurious if the faulting access is allowed
given the existing SPTE. This fixes a benign WARN (other than the
WARN itself) due to unexpectedly replacing a writable SPTE with a
read-only SPTE.

- Emit a warning when KVM is configured with ignore_msrs=1 and also to
hide the MSRs that the guest is looking for from the kernel logs.
ignore_msrs can trick guests into assuming that certain processor
features are present, and this in turn leads to bogus bug reports.

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: x86: let it be known that ignore_msrs is a bad idea
KVM: VMX: don't include '<linux/find.h>' directly
KVM: x86/mmu: Treat TDP MMU faults as spurious if access is already allowed
KVM: SVM: Allow guest writes to set MSR_AMD64_DE_CFG bits
KVM: x86: Play nice with protected guests in complete_hypercall_exit()
KVM: SVM: Disable AVIC on SNP-enabled system without HvInUseWrAllowed feature

Changed files
+38 -23
arch
+1
arch/x86/include/asm/cpufeatures.h
··· 452 452 #define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ 453 453 #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ 454 454 #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ 455 + #define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ 455 456 456 457 /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ 457 458 #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
-12
arch/x86/kvm/mmu/mmu.c
··· 3364 3364 return true; 3365 3365 } 3366 3366 3367 - static bool is_access_allowed(struct kvm_page_fault *fault, u64 spte) 3368 - { 3369 - if (fault->exec) 3370 - return is_executable_pte(spte); 3371 - 3372 - if (fault->write) 3373 - return is_writable_pte(spte); 3374 - 3375 - /* Fault was on Read access */ 3376 - return spte & PT_PRESENT_MASK; 3377 - } 3378 - 3379 3367 /* 3380 3368 * Returns the last level spte pointer of the shadow page walk for the given 3381 3369 * gpa, and sets *spte to the spte value. This spte may be non-preset. If no
+17
arch/x86/kvm/mmu/spte.h
··· 462 462 } 463 463 464 464 /* 465 + * Returns true if the access indicated by @fault is allowed by the existing 466 + * SPTE protections. Note, the caller is responsible for checking that the 467 + * SPTE is a shadow-present, leaf SPTE (either before or after). 468 + */ 469 + static inline bool is_access_allowed(struct kvm_page_fault *fault, u64 spte) 470 + { 471 + if (fault->exec) 472 + return is_executable_pte(spte); 473 + 474 + if (fault->write) 475 + return is_writable_pte(spte); 476 + 477 + /* Fault was on Read access */ 478 + return spte & PT_PRESENT_MASK; 479 + } 480 + 481 + /* 465 482 * If the MMU-writable flag is cleared, i.e. the SPTE is write-protected for 466 483 * write-tracking, remote TLBs must be flushed, even if the SPTE was read-only, 467 484 * as KVM allows stale Writable TLB entries to exist. When dirty logging, KVM
+5
arch/x86/kvm/mmu/tdp_mmu.c
··· 985 985 if (fault->prefetch && is_shadow_present_pte(iter->old_spte)) 986 986 return RET_PF_SPURIOUS; 987 987 988 + if (is_shadow_present_pte(iter->old_spte) && 989 + is_access_allowed(fault, iter->old_spte) && 990 + is_last_spte(iter->old_spte, iter->level)) 991 + return RET_PF_SPURIOUS; 992 + 988 993 if (unlikely(!fault->slot)) 989 994 new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); 990 995 else
+6
arch/x86/kvm/svm/avic.c
··· 1199 1199 return false; 1200 1200 } 1201 1201 1202 + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP) && 1203 + !boot_cpu_has(X86_FEATURE_HV_INUSE_WR_ALLOWED)) { 1204 + pr_warn("AVIC disabled: missing HvInUseWrAllowed on SNP-enabled system\n"); 1205 + return false; 1206 + } 1207 + 1202 1208 if (boot_cpu_has(X86_FEATURE_AVIC)) { 1203 1209 pr_info("AVIC enabled\n"); 1204 1210 } else if (force_avic) {
-9
arch/x86/kvm/svm/svm.c
··· 3201 3201 if (data & ~supported_de_cfg) 3202 3202 return 1; 3203 3203 3204 - /* 3205 - * Don't let the guest change the host-programmed value. The 3206 - * MSR is very model specific, i.e. contains multiple bits that 3207 - * are completely unknown to KVM, and the one bit known to KVM 3208 - * is simply a reflection of hardware capabilities. 3209 - */ 3210 - if (!msr->host_initiated && data != svm->msr_decfg) 3211 - return 1; 3212 - 3213 3204 svm->msr_decfg = data; 3214 3205 break; 3215 3206 }
+1 -1
arch/x86/kvm/vmx/posted_intr.h
··· 2 2 #ifndef __KVM_X86_VMX_POSTED_INTR_H 3 3 #define __KVM_X86_VMX_POSTED_INTR_H 4 4 5 - #include <linux/find.h> 5 + #include <linux/bitmap.h> 6 6 #include <asm/posted_intr.h> 7 7 8 8 void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
+8 -1
arch/x86/kvm/x86.c
··· 9976 9976 { 9977 9977 u64 ret = vcpu->run->hypercall.ret; 9978 9978 9979 - if (!is_64_bit_mode(vcpu)) 9979 + if (!is_64_bit_hypercall(vcpu)) 9980 9980 ret = (u32)ret; 9981 9981 kvm_rax_write(vcpu, ret); 9982 9982 ++vcpu->stat.hypercalls; ··· 12723 12723 kvm_apicv_init(kvm); 12724 12724 kvm_hv_init_vm(kvm); 12725 12725 kvm_xen_init_vm(kvm); 12726 + 12727 + if (ignore_msrs && !report_ignored_msrs) { 12728 + pr_warn_once("Running KVM with ignore_msrs=1 and report_ignored_msrs=0 is not a\n" 12729 + "a supported configuration. Lying to the guest about the existence of MSRs\n" 12730 + "may cause the guest operating system to hang or produce errors. If a guest\n" 12731 + "does not run without ignore_msrs=1, please report it to kvm@vger.kernel.org.\n"); 12732 + } 12726 12733 12727 12734 return 0; 12728 12735