Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: x86: Enable CET virtualization for VMX and advertise to userspace

Add support for the LOAD_CET_STATE VM-Enter and VM-Exit controls, the
CET XFEATURE bits in XSS, and advertise support for IBT and SHSTK to
userspace. Explicitly clear IBT and SHSTK onn SVM, as additional work is
needed to enable CET on SVM, e.g. to context switch S_CET and other state.

Disable KVM CET feature if unrestricted_guest is unsupported/disabled as
KVM does not support emulating CET, as running without Unrestricted Guest
can result in KVM emulating large swaths of guest code. While it's highly
unlikely any guest will trigger emulation while also utilizing IBT or
SHSTK, there's zero reason to allow CET without Unrestricted Guest as that
combination should only be possible when explicitly disabling
unrestricted_guest for testing purposes.

Disable CET if VMX_BASIC[bit56] == 0, i.e. if hardware strictly enforces
the presence of an Error Code based on exception vector, as attempting to
inject a #CP with an Error Code (#CP architecturally has an Error Code)
will fail due to the #CP vector historically not having an Error Code.

Clear S_CET and SSP-related VMCS on "reset" to emulate the architectural
of CET MSRs and SSP being reset to 0 after RESET, power-up and INIT. Note,
KVM already clears guest CET state that is managed via XSTATE in
kvm_xstate_reset().

Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
Signed-off-by: Mathias Krause <minipli@grsecurity.net>
Tested-by: Mathias Krause <minipli@grsecurity.net>
Tested-by: John Allen <john.allen@amd.com>
Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Chao Gao <chao.gao@intel.com>
[sean: move some bits to separate patches, massage changelog]
Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20250919223258.1604852-29-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>

authored by

Yang Weijiang and committed by
Sean Christopherson
e140467b 343acdd1

+45 -3
+1
arch/x86/include/asm/vmx.h
··· 134 134 #define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49) 135 135 #define VMX_BASIC_INOUT BIT_ULL(54) 136 136 #define VMX_BASIC_TRUE_CTLS BIT_ULL(55) 137 + #define VMX_BASIC_NO_HW_ERROR_CODE_CC BIT_ULL(56) 137 138 138 139 static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) 139 140 {
+2
arch/x86/kvm/cpuid.c
··· 946 946 VENDOR_F(WAITPKG), 947 947 F(SGX_LC), 948 948 F(BUS_LOCK_DETECT), 949 + X86_64_F(SHSTK), 949 950 ); 950 951 951 952 /* ··· 981 980 F(AMX_INT8), 982 981 F(AMX_BF16), 983 982 F(FLUSH_L1D), 983 + F(IBT), 984 984 ); 985 985 986 986 /*
+4
arch/x86/kvm/svm/svm.c
··· 5222 5222 kvm_caps.supported_perf_cap = 0; 5223 5223 kvm_caps.supported_xss = 0; 5224 5224 5225 + /* KVM doesn't yet support CET virtualization for SVM. */ 5226 + kvm_cpu_cap_clear(X86_FEATURE_SHSTK); 5227 + kvm_cpu_cap_clear(X86_FEATURE_IBT); 5228 + 5225 5229 /* CPUID 0x80000001 and 0x8000000A (SVM features) */ 5226 5230 if (nested) { 5227 5231 kvm_cpu_cap_set(X86_FEATURE_SVM);
+5
arch/x86/kvm/vmx/capabilities.h
··· 73 73 return vmcs_config.basic & VMX_BASIC_INOUT; 74 74 } 75 75 76 + static inline bool cpu_has_vmx_basic_no_hw_errcode_cc(void) 77 + { 78 + return vmcs_config.basic & VMX_BASIC_NO_HW_ERROR_CODE_CC; 79 + } 80 + 76 81 static inline bool cpu_has_virtual_nmis(void) 77 82 { 78 83 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS &&
+29 -1
arch/x86/kvm/vmx/vmx.c
··· 2602 2602 { VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER }, 2603 2603 { VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS }, 2604 2604 { VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL }, 2605 + { VM_ENTRY_LOAD_CET_STATE, VM_EXIT_LOAD_CET_STATE }, 2605 2606 }; 2606 2607 2607 2608 memset(vmcs_conf, 0, sizeof(*vmcs_conf)); ··· 4869 4868 4870 4869 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ 4871 4870 4871 + if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) { 4872 + vmcs_writel(GUEST_SSP, 0); 4873 + vmcs_writel(GUEST_INTR_SSP_TABLE, 0); 4874 + } 4875 + if (kvm_cpu_cap_has(X86_FEATURE_IBT) || 4876 + kvm_cpu_cap_has(X86_FEATURE_SHSTK)) 4877 + vmcs_writel(GUEST_S_CET, 0); 4878 + 4872 4879 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); 4873 4880 4874 4881 vpid_sync_context(vmx->vpid); ··· 6344 6335 if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0) 6345 6336 vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest); 6346 6337 6338 + if (vmentry_ctl & VM_ENTRY_LOAD_CET_STATE) 6339 + pr_err("S_CET = 0x%016lx, SSP = 0x%016lx, SSP TABLE = 0x%016lx\n", 6340 + vmcs_readl(GUEST_S_CET), vmcs_readl(GUEST_SSP), 6341 + vmcs_readl(GUEST_INTR_SSP_TABLE)); 6347 6342 pr_err("*** Host State ***\n"); 6348 6343 pr_err("RIP = 0x%016lx RSP = 0x%016lx\n", 6349 6344 vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP)); ··· 6378 6365 vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); 6379 6366 if (vmcs_read32(VM_EXIT_MSR_LOAD_COUNT) > 0) 6380 6367 vmx_dump_msrs("host autoload", &vmx->msr_autoload.host); 6368 + if (vmexit_ctl & VM_EXIT_LOAD_CET_STATE) 6369 + pr_err("S_CET = 0x%016lx, SSP = 0x%016lx, SSP TABLE = 0x%016lx\n", 6370 + vmcs_readl(HOST_S_CET), vmcs_readl(HOST_SSP), 6371 + vmcs_readl(HOST_INTR_SSP_TABLE)); 6381 6372 6382 6373 pr_err("*** Control State ***\n"); 6383 6374 pr_err("CPUBased=0x%08x SecondaryExec=0x%08x TertiaryExec=0x%016llx\n", ··· 7963 7946 kvm_cpu_cap_set(X86_FEATURE_UMIP); 7964 7947 7965 7948 /* CPUID 0xD.1 */ 7966 - kvm_caps.supported_xss = 0; 7967 7949 if (!cpu_has_vmx_xsaves()) 7968 7950 kvm_cpu_cap_clear(X86_FEATURE_XSAVES); 7969 7951 ··· 7974 7958 7975 7959 if (cpu_has_vmx_waitpkg()) 7976 7960 kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); 7961 + 7962 + /* 7963 + * Disable CET if unrestricted_guest is unsupported as KVM doesn't 7964 + * enforce CET HW behaviors in emulator. On platforms with 7965 + * VMX_BASIC[bit56] == 0, inject #CP at VMX entry with error code 7966 + * fails, so disable CET in this case too. 7967 + */ 7968 + if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest || 7969 + !cpu_has_vmx_basic_no_hw_errcode_cc()) { 7970 + kvm_cpu_cap_clear(X86_FEATURE_SHSTK); 7971 + kvm_cpu_cap_clear(X86_FEATURE_IBT); 7972 + } 7977 7973 } 7978 7974 7979 7975 static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu,
+4 -2
arch/x86/kvm/vmx/vmx.h
··· 484 484 VM_ENTRY_LOAD_IA32_EFER | \ 485 485 VM_ENTRY_LOAD_BNDCFGS | \ 486 486 VM_ENTRY_PT_CONCEAL_PIP | \ 487 - VM_ENTRY_LOAD_IA32_RTIT_CTL) 487 + VM_ENTRY_LOAD_IA32_RTIT_CTL | \ 488 + VM_ENTRY_LOAD_CET_STATE) 488 489 489 490 #define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ 490 491 (VM_EXIT_SAVE_DEBUG_CONTROLS | \ ··· 507 506 VM_EXIT_LOAD_IA32_EFER | \ 508 507 VM_EXIT_CLEAR_BNDCFGS | \ 509 508 VM_EXIT_PT_CONCEAL_PIP | \ 510 - VM_EXIT_CLEAR_IA32_RTIT_CTL) 509 + VM_EXIT_CLEAR_IA32_RTIT_CTL | \ 510 + VM_EXIT_LOAD_CET_STATE) 511 511 512 512 #define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \ 513 513 (PIN_BASED_EXT_INTR_MASK | \