Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'kvm-x86-vmx-6.10' of https://github.com/kvm-x86/linux into HEAD

KVM VMX changes for 6.10:

- Clear vmcs.EXIT_QUALIFICATION when synthesizing an EPT Misconfig VM-Exit to
L1, as per the SDM.

- Move kvm_vcpu_arch's exit_qualification into x86_exception, as the field is
used only when synthesizing nested EPT violation, i.e. it's not the vCPU's
"real" exit_qualification, which is tracked elsewhere.

- Add a sanity check to assert that EPT Violations are the only sources of
nested PML Full VM-Exits.

+34 -16
-3
arch/x86/include/asm/kvm_host.h
··· 997 997 998 998 u64 msr_kvm_poll_control; 999 999 1000 - /* set at EPT violation at this point */ 1001 - unsigned long exit_qualification; 1002 - 1003 1000 /* pv related host specific info */ 1004 1001 struct { 1005 1002 bool pv_unhalted;
+1
arch/x86/kvm/kvm_emulate.h
··· 26 26 bool nested_page_fault; 27 27 u64 address; /* cr2 or nested page fault gpa */ 28 28 u8 async_page_fault; 29 + unsigned long exit_qualification; 29 30 }; 30 31 31 32 /*
+7 -7
arch/x86/kvm/mmu/paging_tmpl.h
··· 497 497 * The other bits are set to 0. 498 498 */ 499 499 if (!(errcode & PFERR_RSVD_MASK)) { 500 - vcpu->arch.exit_qualification &= (EPT_VIOLATION_GVA_IS_VALID | 501 - EPT_VIOLATION_GVA_TRANSLATED); 500 + walker->fault.exit_qualification = 0; 501 + 502 502 if (write_fault) 503 - vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE; 503 + walker->fault.exit_qualification |= EPT_VIOLATION_ACC_WRITE; 504 504 if (user_fault) 505 - vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ; 505 + walker->fault.exit_qualification |= EPT_VIOLATION_ACC_READ; 506 506 if (fetch_fault) 507 - vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR; 507 + walker->fault.exit_qualification |= EPT_VIOLATION_ACC_INSTR; 508 508 509 509 /* 510 510 * Note, pte_access holds the raw RWX bits from the EPTE, not 511 511 * ACC_*_MASK flags! 512 512 */ 513 - vcpu->arch.exit_qualification |= (pte_access & VMX_EPT_RWX_MASK) << 514 - EPT_VIOLATION_RWX_SHIFT; 513 + walker->fault.exit_qualification |= (pte_access & VMX_EPT_RWX_MASK) << 514 + EPT_VIOLATION_RWX_SHIFT; 515 515 } 516 516 #endif 517 517 walker->fault.address = addr;
+26 -4
arch/x86/kvm/vmx/nested.c
··· 409 409 { 410 410 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 411 411 struct vcpu_vmx *vmx = to_vmx(vcpu); 412 + unsigned long exit_qualification; 412 413 u32 vm_exit_reason; 413 - unsigned long exit_qualification = vcpu->arch.exit_qualification; 414 414 415 415 if (vmx->nested.pml_full) { 416 416 vm_exit_reason = EXIT_REASON_PML_FULL; 417 417 vmx->nested.pml_full = false; 418 - exit_qualification &= INTR_INFO_UNBLOCK_NMI; 418 + 419 + /* 420 + * It should be impossible to trigger a nested PML Full VM-Exit 421 + * for anything other than an EPT Violation from L2. KVM *can* 422 + * trigger nEPT page fault injection in response to an EPT 423 + * Misconfig, e.g. if the MMIO SPTE was stale and L1's EPT 424 + * tables also changed, but KVM should not treat EPT Misconfig 425 + * VM-Exits as writes. 426 + */ 427 + WARN_ON_ONCE(vmx->exit_reason.basic != EXIT_REASON_EPT_VIOLATION); 428 + 429 + /* 430 + * PML Full and EPT Violation VM-Exits both use bit 12 to report 431 + * "NMI unblocking due to IRET", i.e. the bit can be propagated 432 + * as-is from the original EXIT_QUALIFICATION. 433 + */ 434 + exit_qualification = vmx_get_exit_qual(vcpu) & INTR_INFO_UNBLOCK_NMI; 419 435 } else { 420 - if (fault->error_code & PFERR_RSVD_MASK) 436 + if (fault->error_code & PFERR_RSVD_MASK) { 421 437 vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; 422 - else 438 + exit_qualification = 0; 439 + } else { 440 + exit_qualification = fault->exit_qualification; 441 + exit_qualification |= vmx_get_exit_qual(vcpu) & 442 + (EPT_VIOLATION_GVA_IS_VALID | 443 + EPT_VIOLATION_GVA_TRANSLATED); 423 444 vm_exit_reason = EXIT_REASON_EPT_VIOLATION; 445 + } 424 446 425 447 /* 426 448 * Although the caller (kvm_inject_emulated_page_fault) would
-2
arch/x86/kvm/vmx/vmx.c
··· 5783 5783 error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ? 5784 5784 PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; 5785 5785 5786 - vcpu->arch.exit_qualification = exit_qualification; 5787 - 5788 5786 /* 5789 5787 * Check that the GPA doesn't exceed physical memory limits, as that is 5790 5788 * a guest page fault. We have to emulate the instruction here, because