Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: TDX: Explicitly set user-return MSRs that *may* be clobbered by the TDX-Module

Set all user-return MSRs to their post-TD-exit value when preparing to run
a TDX vCPU to ensure the value that KVM expects to be loaded after running
the vCPU is indeed the value that's loaded in hardware. If the TDX-Module
doesn't actually enter the guest, i.e. doesn't do VM-Enter, then it won't
"restore" VMM state, i.e. won't clobber user-return MSRs to their expected
post-run values, in which case simply updating KVM's "cached" value will
effectively corrupt the cache due to hardware still holding the original
value.

In theory, KVM could conditionally update the current user-return value if
and only if tdh_vp_enter() succeeds, but in practice "success" doesn't
guarantee the TDX-Module actually entered the guest, e.g. if the TDX-Module
synthesizes an EPT Violation because it suspects a zero-step attack.

Force-load the expected values instead of trying to decipher whether or
not the TDX-Module restored/clobbered MSRs, as the risk doesn't justify
the benefits. Effectively avoiding four WRMSRs once per run loop (even if
the vCPU is scheduled out, user-return MSRs only need to be reloaded if
the CPU exits to userspace or runs a non-TDX vCPU) is likely in the noise
when amortized over all entries, given the cost of running a TDX vCPU.
E.g. the cost of the WRMSRs is somewhere between ~300 and ~500 cycles,
whereas the cost of a _single_ roundtrip to/from a TDX guest is thousands
of cycles.

Fixes: e0b4f31a3c65 ("KVM: TDX: restore user ret MSRs")
Cc: stable@vger.kernel.org
Cc: Yan Zhao <yan.y.zhao@intel.com>
Cc: Xiaoyao Li <xiaoyao.li@intel.com>
Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://patch.msgid.link/20251030191528.3380553-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>

+23 -44
-1
arch/x86/include/asm/kvm_host.h
··· 2379 2379 int kvm_add_user_return_msr(u32 msr); 2380 2380 int kvm_find_user_return_msr(u32 msr); 2381 2381 int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); 2382 - void kvm_user_return_msr_update_cache(unsigned int index, u64 val); 2383 2382 u64 kvm_get_user_return_msr(unsigned int slot); 2384 2383 2385 2384 static inline bool kvm_is_supported_user_return_msr(u32 msr)
+23 -33
arch/x86/kvm/vmx/tdx.c
··· 763 763 return tdx_vcpu_state_details_intr_pending(vcpu_state_details); 764 764 } 765 765 766 - /* 767 - * Compared to vmx_prepare_switch_to_guest(), there is not much to do 768 - * as SEAMCALL/SEAMRET calls take care of most of save and restore. 769 - */ 770 - void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) 771 - { 772 - struct vcpu_vt *vt = to_vt(vcpu); 773 - 774 - if (vt->guest_state_loaded) 775 - return; 776 - 777 - if (likely(is_64bit_mm(current->mm))) 778 - vt->msr_host_kernel_gs_base = current->thread.gsbase; 779 - else 780 - vt->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); 781 - 782 - vt->guest_state_loaded = true; 783 - } 784 - 785 766 struct tdx_uret_msr { 786 767 u32 msr; 787 768 unsigned int slot; ··· 776 795 {.msr = MSR_TSC_AUX,}, 777 796 }; 778 797 779 - static void tdx_user_return_msr_update_cache(void) 798 + void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) 780 799 { 800 + struct vcpu_vt *vt = to_vt(vcpu); 781 801 int i; 782 802 803 + if (vt->guest_state_loaded) 804 + return; 805 + 806 + if (likely(is_64bit_mm(current->mm))) 807 + vt->msr_host_kernel_gs_base = current->thread.gsbase; 808 + else 809 + vt->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); 810 + 811 + vt->guest_state_loaded = true; 812 + 813 + /* 814 + * Explicitly set user-return MSRs that are clobbered by the TDX-Module 815 + * if VP.ENTER succeeds, i.e. on TD-Exit, with the values that would be 816 + * written by the TDX-Module. Don't rely on the TDX-Module to actually 817 + * clobber the MSRs, as the contract is poorly defined and not upheld. 818 + * E.g. the TDX-Module will synthesize an EPT Violation without doing 819 + * VM-Enter if it suspects a zero-step attack, and never "restore" VMM 820 + * state. 821 + */ 783 822 for (i = 0; i < ARRAY_SIZE(tdx_uret_msrs); i++) 784 - kvm_user_return_msr_update_cache(tdx_uret_msrs[i].slot, 785 - tdx_uret_msrs[i].defval); 823 + kvm_set_user_return_msr(tdx_uret_msrs[i].slot, 824 + tdx_uret_msrs[i].defval, -1ull); 786 825 } 787 826 788 827 static void tdx_prepare_switch_to_host(struct kvm_vcpu *vcpu) 789 828 { 790 829 struct vcpu_vt *vt = to_vt(vcpu); 791 - struct vcpu_tdx *tdx = to_tdx(vcpu); 792 830 793 831 if (!vt->guest_state_loaded) 794 832 return; 795 833 796 834 ++vcpu->stat.host_state_reload; 797 835 wrmsrl(MSR_KERNEL_GS_BASE, vt->msr_host_kernel_gs_base); 798 - 799 - if (tdx->guest_entered) { 800 - tdx_user_return_msr_update_cache(); 801 - tdx->guest_entered = false; 802 - } 803 836 804 837 vt->guest_state_loaded = false; 805 838 } ··· 1054 1059 update_debugctlmsr(vcpu->arch.host_debugctl); 1055 1060 1056 1061 tdx_load_host_xsave_state(vcpu); 1057 - tdx->guest_entered = true; 1058 1062 1059 1063 vcpu->arch.regs_avail &= TDX_REGS_AVAIL_SET; 1060 1064 ··· 3437 3443 /* 3438 3444 * Check if MSRs (tdx_uret_msrs) can be saved/restored 3439 3445 * before returning to user space. 3440 - * 3441 - * this_cpu_ptr(user_return_msrs)->registered isn't checked 3442 - * because the registration is done at vcpu runtime by 3443 - * tdx_user_return_msr_update_cache(). 3444 3446 */ 3445 3447 tdx_uret_msrs[i].slot = kvm_find_user_return_msr(tdx_uret_msrs[i].msr); 3446 3448 if (tdx_uret_msrs[i].slot == -1) {
-1
arch/x86/kvm/vmx/tdx.h
··· 67 67 u64 vp_enter_ret; 68 68 69 69 enum vcpu_tdx_state state; 70 - bool guest_entered; 71 70 72 71 u64 map_gpa_next; 73 72 u64 map_gpa_end;
-9
arch/x86/kvm/x86.c
··· 681 681 } 682 682 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_user_return_msr); 683 683 684 - void kvm_user_return_msr_update_cache(unsigned int slot, u64 value) 685 - { 686 - struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); 687 - 688 - msrs->values[slot].curr = value; 689 - kvm_user_return_register_notifier(msrs); 690 - } 691 - EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_user_return_msr_update_cache); 692 - 693 684 u64 kvm_get_user_return_msr(unsigned int slot) 694 685 { 695 686 return this_cpu_ptr(user_return_msrs)->values[slot].curr;