Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'kvm-x86-fixes-6.13-rcN' of https://github.com/kvm-x86/linux into HEAD

KVM x86 fixes for 6.13:

- Disable AVIC on SNP-enabled systems that don't allow writes to the virtual
APIC page, as such hosts will hit unexpected RMP #PFs in the host when
running VMs of any flavor.

- Fix a WARN in the hypercall completion path due to KVM trying to determine
if a guest with protected register state is in 64-bit mode (KVM's ABI is to
assume such guests only make hypercalls in 64-bit mode).

- Allow the guest to write to supported bits in MSR_AMD64_DE_CFG to fix a
regression with Windows guests, and because KVM's read-only behavior appears
to be entirely made up.

- Treat TDP MMU faults as spurious if the faulting access is allowed given the
existing SPTE. This fixes a benign WARN (other than the WARN itself) due to
unexpectedly replacing a writable SPTE with a read-only SPTE.

+30 -22
+1
arch/x86/include/asm/cpufeatures.h
··· 452 452 #define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ 453 453 #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ 454 454 #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ 455 + #define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ 455 456 456 457 /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ 457 458 #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
-12
arch/x86/kvm/mmu/mmu.c
··· 3364 3364 return true; 3365 3365 } 3366 3366 3367 - static bool is_access_allowed(struct kvm_page_fault *fault, u64 spte) 3368 - { 3369 - if (fault->exec) 3370 - return is_executable_pte(spte); 3371 - 3372 - if (fault->write) 3373 - return is_writable_pte(spte); 3374 - 3375 - /* Fault was on Read access */ 3376 - return spte & PT_PRESENT_MASK; 3377 - } 3378 - 3379 3367 /* 3380 3368 * Returns the last level spte pointer of the shadow page walk for the given 3381 3369 * gpa, and sets *spte to the spte value. This spte may be non-preset. If no
+17
arch/x86/kvm/mmu/spte.h
··· 462 462 } 463 463 464 464 /* 465 + * Returns true if the access indicated by @fault is allowed by the existing 466 + * SPTE protections. Note, the caller is responsible for checking that the 467 + * SPTE is a shadow-present, leaf SPTE (either before or after). 468 + */ 469 + static inline bool is_access_allowed(struct kvm_page_fault *fault, u64 spte) 470 + { 471 + if (fault->exec) 472 + return is_executable_pte(spte); 473 + 474 + if (fault->write) 475 + return is_writable_pte(spte); 476 + 477 + /* Fault was on Read access */ 478 + return spte & PT_PRESENT_MASK; 479 + } 480 + 481 + /* 465 482 * If the MMU-writable flag is cleared, i.e. the SPTE is write-protected for 466 483 * write-tracking, remote TLBs must be flushed, even if the SPTE was read-only, 467 484 * as KVM allows stale Writable TLB entries to exist. When dirty logging, KVM
+5
arch/x86/kvm/mmu/tdp_mmu.c
··· 985 985 if (fault->prefetch && is_shadow_present_pte(iter->old_spte)) 986 986 return RET_PF_SPURIOUS; 987 987 988 + if (is_shadow_present_pte(iter->old_spte) && 989 + is_access_allowed(fault, iter->old_spte) && 990 + is_last_spte(iter->old_spte, iter->level)) 991 + return RET_PF_SPURIOUS; 992 + 988 993 if (unlikely(!fault->slot)) 989 994 new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); 990 995 else
+6
arch/x86/kvm/svm/avic.c
··· 1199 1199 return false; 1200 1200 } 1201 1201 1202 + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP) && 1203 + !boot_cpu_has(X86_FEATURE_HV_INUSE_WR_ALLOWED)) { 1204 + pr_warn("AVIC disabled: missing HvInUseWrAllowed on SNP-enabled system\n"); 1205 + return false; 1206 + } 1207 + 1202 1208 if (boot_cpu_has(X86_FEATURE_AVIC)) { 1203 1209 pr_info("AVIC enabled\n"); 1204 1210 } else if (force_avic) {
-9
arch/x86/kvm/svm/svm.c
··· 3201 3201 if (data & ~supported_de_cfg) 3202 3202 return 1; 3203 3203 3204 - /* 3205 - * Don't let the guest change the host-programmed value. The 3206 - * MSR is very model specific, i.e. contains multiple bits that 3207 - * are completely unknown to KVM, and the one bit known to KVM 3208 - * is simply a reflection of hardware capabilities. 3209 - */ 3210 - if (!msr->host_initiated && data != svm->msr_decfg) 3211 - return 1; 3212 - 3213 3204 svm->msr_decfg = data; 3214 3205 break; 3215 3206 }
+1 -1
arch/x86/kvm/x86.c
··· 9976 9976 { 9977 9977 u64 ret = vcpu->run->hypercall.ret; 9978 9978 9979 - if (!is_64_bit_mode(vcpu)) 9979 + if (!is_64_bit_hypercall(vcpu)) 9980 9980 ret = (u32)ret; 9981 9981 kvm_rax_write(vcpu, ret); 9982 9982 ++vcpu->stat.hypercalls;