Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'x86_tdx_for_6.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull tdx updates from Dave Hansen:
"These essentially refine some interactions between TDX guests and
VMMs.

The first leverages a new TDX module feature to runtime disable the
ability for a VM to inject #VE exceptions. Before this feature, there
was only a static on/off switch and the guest had to panic if it was
configured in a bad state.

The second lets the guest opt in to be able to access the topology
CPUID leaves. Before this, accesses to those leaves would #VE.

For both of these, it would have been nicest to just change the
default behavior, but some pesky "other" OSes evidently need to retain
the legacy behavior.

Summary:

- Add new infrastructure for reading TDX metadata

- Use the newly-available metadata to:
- Disable potentially nasty #VE exceptions
- Get more complete CPU topology information from the VMM"

* tag 'x86_tdx_for_6.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/tdx: Enable CPU topology enumeration
x86/tdx: Dynamically disable SEPT violations from causing #VEs
x86/tdx: Rename tdx_parse_tdinfo() to tdx_setup()
x86/tdx: Introduce wrappers to read and write TD metadata

+128 -23
+116 -22
arch/x86/coco/tdx/tdx.c
··· 78 78 panic("TDCALL %lld failed (Buggy TDX module!)\n", fn); 79 79 } 80 80 81 + /* Read TD-scoped metadata */ 82 + static inline u64 tdg_vm_rd(u64 field, u64 *value) 83 + { 84 + struct tdx_module_args args = { 85 + .rdx = field, 86 + }; 87 + u64 ret; 88 + 89 + ret = __tdcall_ret(TDG_VM_RD, &args); 90 + *value = args.r8; 91 + 92 + return ret; 93 + } 94 + 95 + /* Write TD-scoped metadata */ 96 + static inline u64 tdg_vm_wr(u64 field, u64 value, u64 mask) 97 + { 98 + struct tdx_module_args args = { 99 + .rdx = field, 100 + .r8 = value, 101 + .r9 = mask, 102 + }; 103 + 104 + return __tdcall(TDG_VM_WR, &args); 105 + } 106 + 81 107 /** 82 108 * tdx_mcall_get_report0() - Wrapper to get TDREPORT0 (a.k.a. TDREPORT 83 109 * subtype 0) using TDG.MR.REPORT TDCALL. ··· 194 168 __tdx_hypercall(&args); 195 169 } 196 170 197 - static void tdx_parse_tdinfo(u64 *cc_mask) 171 + /* 172 + * The kernel cannot handle #VEs when accessing normal kernel memory. Ensure 173 + * that no #VE will be delivered for accesses to TD-private memory. 174 + * 175 + * TDX 1.0 does not allow the guest to disable SEPT #VE on its own. The VMM 176 + * controls if the guest will receive such #VE with TD attribute 177 + * ATTR_SEPT_VE_DISABLE. 178 + * 179 + * Newer TDX modules allow the guest to control if it wants to receive SEPT 180 + * violation #VEs. 181 + * 182 + * Check if the feature is available and disable SEPT #VE if possible. 183 + * 184 + * If the TD is allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE 185 + * attribute is no longer reliable. It reflects the initial state of the 186 + * control for the TD, but it will not be updated if someone (e.g. bootloader) 187 + * changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to 188 + * determine if SEPT #VEs are enabled or disabled. 189 + */ 190 + static void disable_sept_ve(u64 td_attr) 191 + { 192 + const char *msg = "TD misconfiguration: SEPT #VE has to be disabled"; 193 + bool debug = td_attr & ATTR_DEBUG; 194 + u64 config, controls; 195 + 196 + /* Is this TD allowed to disable SEPT #VE */ 197 + tdg_vm_rd(TDCS_CONFIG_FLAGS, &config); 198 + if (!(config & TDCS_CONFIG_FLEXIBLE_PENDING_VE)) { 199 + /* No SEPT #VE controls for the guest: check the attribute */ 200 + if (td_attr & ATTR_SEPT_VE_DISABLE) 201 + return; 202 + 203 + /* Relax SEPT_VE_DISABLE check for debug TD for backtraces */ 204 + if (debug) 205 + pr_warn("%s\n", msg); 206 + else 207 + tdx_panic(msg); 208 + return; 209 + } 210 + 211 + /* Check if SEPT #VE has been disabled before us */ 212 + tdg_vm_rd(TDCS_TD_CTLS, &controls); 213 + if (controls & TD_CTLS_PENDING_VE_DISABLE) 214 + return; 215 + 216 + /* Keep #VEs enabled for splats in debugging environments */ 217 + if (debug) 218 + return; 219 + 220 + /* Disable SEPT #VEs */ 221 + tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_PENDING_VE_DISABLE, 222 + TD_CTLS_PENDING_VE_DISABLE); 223 + } 224 + 225 + /* 226 + * TDX 1.0 generates a #VE when accessing topology-related CPUID leafs (0xB and 227 + * 0x1F) and the X2APIC_APICID MSR. The kernel returns all zeros on CPUID #VEs. 228 + * In practice, this means that the kernel can only boot with a plain topology. 229 + * Any complications will cause problems. 230 + * 231 + * The ENUM_TOPOLOGY feature allows the VMM to provide topology information. 232 + * Enabling the feature eliminates topology-related #VEs: the TDX module 233 + * virtualizes accesses to the CPUID leafs and the MSR. 234 + * 235 + * Enable ENUM_TOPOLOGY if it is available. 236 + */ 237 + static void enable_cpu_topology_enumeration(void) 238 + { 239 + u64 configured; 240 + 241 + /* Has the VMM provided a valid topology configuration? */ 242 + tdg_vm_rd(TDCS_TOPOLOGY_ENUM_CONFIGURED, &configured); 243 + if (!configured) { 244 + pr_err("VMM did not configure X2APIC_IDs properly\n"); 245 + return; 246 + } 247 + 248 + tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_ENUM_TOPOLOGY, TD_CTLS_ENUM_TOPOLOGY); 249 + } 250 + 251 + static void tdx_setup(u64 *cc_mask) 198 252 { 199 253 struct tdx_module_args args = {}; 200 254 unsigned int gpa_width; ··· 299 193 gpa_width = args.rcx & GENMASK(5, 0); 300 194 *cc_mask = BIT_ULL(gpa_width - 1); 301 195 302 - /* 303 - * The kernel can not handle #VE's when accessing normal kernel 304 - * memory. Ensure that no #VE will be delivered for accesses to 305 - * TD-private memory. Only VMM-shared memory (MMIO) will #VE. 306 - */ 307 196 td_attr = args.rdx; 308 - if (!(td_attr & ATTR_SEPT_VE_DISABLE)) { 309 - const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set."; 310 197 311 - /* Relax SEPT_VE_DISABLE check for debug TD. */ 312 - if (td_attr & ATTR_DEBUG) 313 - pr_warn("%s\n", msg); 314 - else 315 - tdx_panic(msg); 316 - } 198 + /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */ 199 + tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL); 200 + 201 + disable_sept_ve(td_attr); 202 + enable_cpu_topology_enumeration(); 317 203 } 318 204 319 205 /* ··· 1027 929 1028 930 void __init tdx_early_init(void) 1029 931 { 1030 - struct tdx_module_args args = { 1031 - .rdx = TDCS_NOTIFY_ENABLES, 1032 - .r9 = -1ULL, 1033 - }; 1034 932 u64 cc_mask; 1035 933 u32 eax, sig[3]; 1036 934 ··· 1041 947 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); 1042 948 1043 949 cc_vendor = CC_VENDOR_INTEL; 1044 - tdx_parse_tdinfo(&cc_mask); 1045 - cc_set_mask(cc_mask); 1046 950 1047 - /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */ 1048 - tdcall(TDG_VM_WR, &args); 951 + /* Configure the TD */ 952 + tdx_setup(&cc_mask); 953 + 954 + cc_set_mask(cc_mask); 1049 955 1050 956 /* 1051 957 * All bits above GPA width are reserved and kernel treats shared bit
+12 -1
arch/x86/include/asm/shared/tdx.h
··· 16 16 #define TDG_VP_VEINFO_GET 3 17 17 #define TDG_MR_REPORT 4 18 18 #define TDG_MEM_PAGE_ACCEPT 6 19 + #define TDG_VM_RD 7 19 20 #define TDG_VM_WR 8 20 21 21 - /* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */ 22 + /* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */ 23 + #define TDCS_CONFIG_FLAGS 0x1110000300000016 24 + #define TDCS_TD_CTLS 0x1110000300000017 22 25 #define TDCS_NOTIFY_ENABLES 0x9100000000000010 26 + #define TDCS_TOPOLOGY_ENUM_CONFIGURED 0x9100000000000019 27 + 28 + /* TDCS_CONFIG_FLAGS bits */ 29 + #define TDCS_CONFIG_FLEXIBLE_PENDING_VE BIT_ULL(1) 30 + 31 + /* TDCS_TD_CTLS bits */ 32 + #define TD_CTLS_PENDING_VE_DISABLE BIT_ULL(0) 33 + #define TD_CTLS_ENUM_TOPOLOGY BIT_ULL(1) 23 34 24 35 /* TDX hypercall Leaf IDs */ 25 36 #define TDVMCALL_MAP_GPA 0x10001