Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'kvm-x86-svm-6.19' of https://github.com/kvm-x86/linux into HEAD

KVM SVM changes for 6.19:

- Fix a few missing "VMCB dirty" bugs.

- Fix the worst of KVM's lack of EFER.LMSLE emulation.

- Add AVIC support for addressing 4k vCPUs in x2AVIC mode.

- Fix incorrect handling of selective CR0 writes when checking intercepts
during emulation of L2 instructions.

- Fix a currently-benign bug where KVM would clobber SPEC_CTRL[63:32] on
VMRUN and #VMEXIT.

- Fix a bug where KVM corrupt the guest code stream when re-injecting a soft
interrupt if the guest patched the underlying code after the VM-Exit, e.g.
when Linux patches code with a temporary INT3.

- Add KVM_X86_SNP_POLICY_BITS to advertise supported SNP policy bits to
userspace, and extend KVM "support" to all policy bits that don't require
any actual support from KVM.

+314 -89
+8 -1
Documentation/virt/kvm/x86/errata.rst
··· 48 48 Nested virtualization features 49 49 ------------------------------ 50 50 51 - TBD 51 + On AMD CPUs, when GIF is cleared, #DB exceptions or traps due to a breakpoint 52 + register match are ignored and discarded by the CPU. The CPU relies on the VMM 53 + to fully virtualize this behavior, even when vGIF is enabled for the guest 54 + (i.e. vGIF=0 does not cause the CPU to drop #DBs when the guest is running). 55 + KVM does not virtualize this behavior as the complexity is unjustified given 56 + the rarity of the use case. One way to handle this would be for KVM to 57 + intercept the #DB, temporarily disable the breakpoint, single-step over the 58 + instruction, then re-enable the breakpoint. 52 59 53 60 x2APIC 54 61 ------
+2
arch/x86/include/asm/cpufeatures.h
··· 338 338 #define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ 339 339 #define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ 340 340 #define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/ 341 + #define X86_FEATURE_EFER_LMSLE_MBZ (13*32+20) /* EFER.LMSLE must be zero */ 341 342 #define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ 342 343 #define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ 343 344 #define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ ··· 505 504 * can access host MMIO (ignored for all intents 506 505 * and purposes if CLEAR_CPU_BUF_VM is set). 507 506 */ 507 + #define X86_FEATURE_X2AVIC_EXT (21*32+18) /* AMD SVM x2AVIC support for 4k vCPUs */ 508 508 509 509 /* 510 510 * BUG word(s)
+9
arch/x86/include/asm/kvm_host.h
··· 2139 2139 * the gfn, i.e. retrying the instruction will hit a 2140 2140 * !PRESENT fault, which results in a new shadow page 2141 2141 * and sends KVM back to square one. 2142 + * 2143 + * EMULTYPE_SKIP_SOFT_INT - Set in combination with EMULTYPE_SKIP to only skip 2144 + * an instruction if it could generate a given software 2145 + * interrupt, which must be encoded via 2146 + * EMULTYPE_SET_SOFT_INT_VECTOR(). 2142 2147 */ 2143 2148 #define EMULTYPE_NO_DECODE (1 << 0) 2144 2149 #define EMULTYPE_TRAP_UD (1 << 1) ··· 2154 2149 #define EMULTYPE_PF (1 << 6) 2155 2150 #define EMULTYPE_COMPLETE_USER_EXIT (1 << 7) 2156 2151 #define EMULTYPE_WRITE_PF_TO_SP (1 << 8) 2152 + #define EMULTYPE_SKIP_SOFT_INT (1 << 9) 2153 + 2154 + #define EMULTYPE_SET_SOFT_INT_VECTOR(v) ((u32)((v) & 0xff) << 16) 2155 + #define EMULTYPE_GET_SOFT_INT_VECTOR(e) (((e) >> 16) & 0xff) 2157 2156 2158 2157 static inline bool kvm_can_emulate_event_vectoring(int emul_type) 2159 2158 {
+4 -1
arch/x86/include/asm/svm.h
··· 279 279 AVIC_IPI_FAILURE_INVALID_IPI_VECTOR, 280 280 }; 281 281 282 - #define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0) 282 + #define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(11, 0) 283 283 284 284 /* 285 285 * For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as ··· 289 289 290 290 /* 291 291 * For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511). 292 + * With X86_FEATURE_X2AVIC_EXT, the max index is increased to 0xfff (4095). 292 293 */ 293 294 #define X2AVIC_MAX_PHYSICAL_ID 0x1FFUL 295 + #define X2AVIC_4K_MAX_PHYSICAL_ID 0xFFFUL 294 296 295 297 static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID); 296 298 static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID); 299 + static_assert((X2AVIC_4K_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_4K_MAX_PHYSICAL_ID); 297 300 298 301 #define SVM_SEV_FEAT_SNP_ACTIVE BIT(0) 299 302 #define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3)
+1
arch/x86/include/uapi/asm/kvm.h
··· 502 502 /* vendor-specific groups and attributes for system fd */ 503 503 #define KVM_X86_GRP_SEV 1 504 504 # define KVM_X86_SEV_VMSA_FEATURES 0 505 + # define KVM_X86_SNP_POLICY_BITS 1 505 506 506 507 struct kvm_vmx_nested_state_data { 507 508 __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+1
arch/x86/kernel/cpu/scattered.c
··· 49 49 { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, 50 50 { X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, 51 51 { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, 52 + { X86_FEATURE_X2AVIC_EXT, CPUID_ECX, 6, 0x8000000a, 0 }, 52 53 { X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 }, 53 54 { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, 54 55 { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
+1
arch/x86/kvm/cpuid.c
··· 1135 1135 F(AMD_STIBP), 1136 1136 F(AMD_STIBP_ALWAYS_ON), 1137 1137 F(AMD_IBRS_SAME_MODE), 1138 + PASSTHROUGH_F(EFER_LMSLE_MBZ), 1138 1139 F(AMD_PSFD), 1139 1140 F(AMD_IBPB_RET), 1140 1141 );
+70 -16
arch/x86/kvm/svm/avic.c
··· 106 106 static bool next_vm_id_wrapped = 0; 107 107 static DEFINE_SPINLOCK(svm_vm_data_hash_lock); 108 108 static bool x2avic_enabled; 109 - 109 + static u32 x2avic_max_physical_id; 110 110 111 111 static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm, 112 112 bool intercept) ··· 158 158 svm->x2avic_msrs_intercepted = intercept; 159 159 } 160 160 161 + static u32 __avic_get_max_physical_id(struct kvm *kvm, struct kvm_vcpu *vcpu) 162 + { 163 + u32 arch_max; 164 + 165 + /* 166 + * Return the largest size (x2APIC) when querying without a vCPU, e.g. 167 + * to allocate the per-VM table.. 168 + */ 169 + if (x2avic_enabled && (!vcpu || apic_x2apic_mode(vcpu->arch.apic))) 170 + arch_max = x2avic_max_physical_id; 171 + else 172 + arch_max = AVIC_MAX_PHYSICAL_ID; 173 + 174 + /* 175 + * Despite its name, KVM_CAP_MAX_VCPU_ID represents the maximum APIC ID 176 + * plus one, so the max possible APIC ID is one less than that. 177 + */ 178 + return min(kvm->arch.max_vcpu_ids - 1, arch_max); 179 + } 180 + 181 + static u32 avic_get_max_physical_id(struct kvm_vcpu *vcpu) 182 + { 183 + return __avic_get_max_physical_id(vcpu->kvm, vcpu); 184 + } 185 + 161 186 static void avic_activate_vmcb(struct vcpu_svm *svm) 162 187 { 163 188 struct vmcb *vmcb = svm->vmcb01.ptr; 189 + struct kvm_vcpu *vcpu = &svm->vcpu; 164 190 165 191 vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK); 192 + 166 193 vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK; 194 + vmcb->control.avic_physical_id |= avic_get_max_physical_id(vcpu); 167 195 168 196 vmcb->control.int_ctl |= AVIC_ENABLE_MASK; 169 197 ··· 204 176 */ 205 177 if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) { 206 178 vmcb->control.int_ctl |= X2APIC_MODE_MASK; 207 - vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID; 179 + 208 180 /* Disabling MSR intercept for x2APIC registers */ 209 181 avic_set_x2apic_msr_interception(svm, false); 210 182 } else { ··· 214 186 */ 215 187 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu); 216 188 217 - /* For xAVIC and hybrid-xAVIC modes */ 218 - vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID; 219 189 /* Enabling MSR intercept for x2APIC registers */ 220 190 avic_set_x2apic_msr_interception(svm, true); 221 191 } ··· 273 247 return 0; 274 248 } 275 249 250 + static int avic_get_physical_id_table_order(struct kvm *kvm) 251 + { 252 + /* Provision for the maximum physical ID supported in x2avic mode */ 253 + return get_order((__avic_get_max_physical_id(kvm, NULL) + 1) * sizeof(u64)); 254 + } 255 + 256 + int avic_alloc_physical_id_table(struct kvm *kvm) 257 + { 258 + struct kvm_svm *kvm_svm = to_kvm_svm(kvm); 259 + 260 + if (!irqchip_in_kernel(kvm) || !enable_apicv) 261 + return 0; 262 + 263 + if (kvm_svm->avic_physical_id_table) 264 + return 0; 265 + 266 + kvm_svm->avic_physical_id_table = (void *)__get_free_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 267 + avic_get_physical_id_table_order(kvm)); 268 + if (!kvm_svm->avic_physical_id_table) 269 + return -ENOMEM; 270 + 271 + return 0; 272 + } 273 + 276 274 void avic_vm_destroy(struct kvm *kvm) 277 275 { 278 276 unsigned long flags; ··· 306 256 return; 307 257 308 258 free_page((unsigned long)kvm_svm->avic_logical_id_table); 309 - free_page((unsigned long)kvm_svm->avic_physical_id_table); 259 + free_pages((unsigned long)kvm_svm->avic_physical_id_table, 260 + avic_get_physical_id_table_order(kvm)); 310 261 311 262 spin_lock_irqsave(&svm_vm_data_hash_lock, flags); 312 263 hash_del(&kvm_svm->hnode); ··· 324 273 325 274 if (!enable_apicv) 326 275 return 0; 327 - 328 - kvm_svm->avic_physical_id_table = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); 329 - if (!kvm_svm->avic_physical_id_table) 330 - goto free_avic; 331 276 332 277 kvm_svm->avic_logical_id_table = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); 333 278 if (!kvm_svm->avic_logical_id_table) ··· 389 342 * fully initialized AVIC. 390 343 */ 391 344 if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) || 392 - (id > X2AVIC_MAX_PHYSICAL_ID)) { 345 + (id > x2avic_max_physical_id)) { 393 346 kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG); 394 347 vcpu->arch.apic->apicv_active = false; 395 348 return 0; ··· 609 562 u32 icrh = svm->vmcb->control.exit_info_1 >> 32; 610 563 u32 icrl = svm->vmcb->control.exit_info_1; 611 564 u32 id = svm->vmcb->control.exit_info_2 >> 32; 612 - u32 index = svm->vmcb->control.exit_info_2 & 0x1FF; 565 + u32 index = svm->vmcb->control.exit_info_2 & AVIC_PHYSICAL_MAX_INDEX_MASK; 613 566 struct kvm_lapic *apic = vcpu->arch.apic; 614 567 615 568 trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index); ··· 1009 962 if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) 1010 963 return; 1011 964 1012 - if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= PAGE_SIZE)) 965 + if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= 966 + PAGE_SIZE << avic_get_physical_id_table_order(vcpu->kvm))) 1013 967 return; 1014 968 1015 969 /* ··· 1072 1024 1073 1025 lockdep_assert_preemption_disabled(); 1074 1026 1075 - if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= PAGE_SIZE)) 1027 + if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= 1028 + PAGE_SIZE << avic_get_physical_id_table_order(vcpu->kvm))) 1076 1029 return; 1077 1030 1078 1031 /* ··· 1275 1226 1276 1227 /* AVIC is a prerequisite for x2AVIC. */ 1277 1228 x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC); 1278 - if (x2avic_enabled) 1279 - pr_info("x2AVIC enabled\n"); 1280 - else 1229 + if (x2avic_enabled) { 1230 + if (cpu_feature_enabled(X86_FEATURE_X2AVIC_EXT)) 1231 + x2avic_max_physical_id = X2AVIC_4K_MAX_PHYSICAL_ID; 1232 + else 1233 + x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID; 1234 + pr_info("x2AVIC enabled (max %u vCPUs)\n", x2avic_max_physical_id + 1); 1235 + } else { 1281 1236 svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true; 1237 + } 1282 1238 1283 1239 /* 1284 1240 * Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2)
+2 -10
arch/x86/kvm/svm/nested.c
··· 613 613 struct kvm_vcpu *vcpu = &svm->vcpu; 614 614 615 615 nested_vmcb02_compute_g_pat(svm); 616 + vmcb_mark_dirty(vmcb02, VMCB_NPT); 616 617 617 618 /* Load the nested guest state */ 618 619 if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) { ··· 752 751 vmcb02->control.nested_ctl = vmcb01->control.nested_ctl; 753 752 vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa; 754 753 vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa; 754 + vmcb_mark_dirty(vmcb02, VMCB_PERM_MAP); 755 755 756 756 /* 757 757 * Stash vmcb02's counter if the guest hasn't moved past the guilty ··· 1432 1430 case SVM_EXIT_IOIO: 1433 1431 vmexit = nested_svm_intercept_ioio(svm); 1434 1432 break; 1435 - case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { 1436 - if (vmcb12_is_intercept(&svm->nested.ctl, exit_code)) 1437 - vmexit = NESTED_EXIT_DONE; 1438 - break; 1439 - } 1440 - case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { 1441 - if (vmcb12_is_intercept(&svm->nested.ctl, exit_code)) 1442 - vmexit = NESTED_EXIT_DONE; 1443 - break; 1444 - } 1445 1433 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { 1446 1434 /* 1447 1435 * Host-intercepted exceptions have been checked already in
+28 -17
arch/x86/kvm/svm/sev.c
··· 65 65 #define AP_RESET_HOLD_NAE_EVENT 1 66 66 #define AP_RESET_HOLD_MSR_PROTO 2 67 67 68 - /* As defined by SEV-SNP Firmware ABI, under "Guest Policy". */ 69 - #define SNP_POLICY_MASK_API_MINOR GENMASK_ULL(7, 0) 70 - #define SNP_POLICY_MASK_API_MAJOR GENMASK_ULL(15, 8) 71 - #define SNP_POLICY_MASK_SMT BIT_ULL(16) 72 - #define SNP_POLICY_MASK_RSVD_MBO BIT_ULL(17) 73 - #define SNP_POLICY_MASK_DEBUG BIT_ULL(19) 74 - #define SNP_POLICY_MASK_SINGLE_SOCKET BIT_ULL(20) 68 + /* 69 + * SEV-SNP policy bits that can be supported by KVM. These include policy bits 70 + * that have implementation support within KVM or policy bits that do not 71 + * require implementation support within KVM to enforce the policy. 72 + */ 73 + #define KVM_SNP_POLICY_MASK_VALID (SNP_POLICY_MASK_API_MINOR | \ 74 + SNP_POLICY_MASK_API_MAJOR | \ 75 + SNP_POLICY_MASK_SMT | \ 76 + SNP_POLICY_MASK_RSVD_MBO | \ 77 + SNP_POLICY_MASK_DEBUG | \ 78 + SNP_POLICY_MASK_SINGLE_SOCKET | \ 79 + SNP_POLICY_MASK_CXL_ALLOW | \ 80 + SNP_POLICY_MASK_MEM_AES_256_XTS | \ 81 + SNP_POLICY_MASK_RAPL_DIS | \ 82 + SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM | \ 83 + SNP_POLICY_MASK_PAGE_SWAP_DISABLE) 75 84 76 - #define SNP_POLICY_MASK_VALID (SNP_POLICY_MASK_API_MINOR | \ 77 - SNP_POLICY_MASK_API_MAJOR | \ 78 - SNP_POLICY_MASK_SMT | \ 79 - SNP_POLICY_MASK_RSVD_MBO | \ 80 - SNP_POLICY_MASK_DEBUG | \ 81 - SNP_POLICY_MASK_SINGLE_SOCKET) 85 + static u64 snp_supported_policy_bits __ro_after_init; 82 86 83 87 #define INITIAL_VMSA_GPA 0xFFFFFFFFF000 84 88 ··· 2147 2143 *val = sev_supported_vmsa_features; 2148 2144 return 0; 2149 2145 2146 + case KVM_X86_SNP_POLICY_BITS: 2147 + *val = snp_supported_policy_bits; 2148 + return 0; 2149 + 2150 2150 default: 2151 2151 return -ENXIO; 2152 2152 } ··· 2215 2207 if (params.flags) 2216 2208 return -EINVAL; 2217 2209 2218 - if (params.policy & ~SNP_POLICY_MASK_VALID) 2210 + if (params.policy & ~snp_supported_policy_bits) 2219 2211 return -EINVAL; 2220 2212 2221 2213 /* Check for policy bits that must be set */ ··· 3108 3100 else if (sev_snp_supported) 3109 3101 sev_snp_supported = is_sev_snp_initialized(); 3110 3102 3111 - if (sev_snp_supported) 3103 + if (sev_snp_supported) { 3104 + snp_supported_policy_bits = sev_get_snp_policy_bits() & 3105 + KVM_SNP_POLICY_MASK_VALID; 3112 3106 nr_ciphertext_hiding_asids = init_args.max_snp_asid; 3107 + } 3113 3108 3114 3109 /* 3115 3110 * If ciphertext hiding is enabled, the joint SEV-ES/SEV-SNP ··· 5096 5085 5097 5086 /* Check if the SEV policy allows debugging */ 5098 5087 if (sev_snp_guest(vcpu->kvm)) { 5099 - if (!(sev->policy & SNP_POLICY_DEBUG)) 5088 + if (!(sev->policy & SNP_POLICY_MASK_DEBUG)) 5100 5089 return NULL; 5101 5090 } else { 5102 - if (sev->policy & SEV_POLICY_NODBG) 5091 + if (sev->policy & SEV_POLICY_MASK_NODBG) 5103 5092 return NULL; 5104 5093 } 5105 5094
+55 -31
arch/x86/kvm/svm/svm.c
··· 272 272 } 273 273 274 274 static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu, 275 + int emul_type, 275 276 bool commit_side_effects) 276 277 { 277 278 struct vcpu_svm *svm = to_svm(vcpu); ··· 294 293 if (unlikely(!commit_side_effects)) 295 294 old_rflags = svm->vmcb->save.rflags; 296 295 297 - if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) 296 + if (!kvm_emulate_instruction(vcpu, emul_type)) 298 297 return 0; 299 298 300 299 if (unlikely(!commit_side_effects)) ··· 312 311 313 312 static int svm_skip_emulated_instruction(struct kvm_vcpu *vcpu) 314 313 { 315 - return __svm_skip_emulated_instruction(vcpu, true); 314 + return __svm_skip_emulated_instruction(vcpu, EMULTYPE_SKIP, true); 316 315 } 317 316 318 - static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu) 317 + static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu, u8 vector) 319 318 { 319 + const int emul_type = EMULTYPE_SKIP | EMULTYPE_SKIP_SOFT_INT | 320 + EMULTYPE_SET_SOFT_INT_VECTOR(vector); 320 321 unsigned long rip, old_rip = kvm_rip_read(vcpu); 321 322 struct vcpu_svm *svm = to_svm(vcpu); 322 323 ··· 334 331 * in use, the skip must not commit any side effects such as clearing 335 332 * the interrupt shadow or RFLAGS.RF. 336 333 */ 337 - if (!__svm_skip_emulated_instruction(vcpu, !nrips)) 334 + if (!__svm_skip_emulated_instruction(vcpu, emul_type, !nrips)) 338 335 return -EIO; 339 336 340 337 rip = kvm_rip_read(vcpu); ··· 370 367 kvm_deliver_exception_payload(vcpu, ex); 371 368 372 369 if (kvm_exception_is_soft(ex->vector) && 373 - svm_update_soft_interrupt_rip(vcpu)) 370 + svm_update_soft_interrupt_rip(vcpu, ex->vector)) 374 371 return; 375 372 376 373 svm->vmcb->control.event_inj = ex->vector ··· 1199 1196 { 1200 1197 svm->current_vmcb = target_vmcb; 1201 1198 svm->vmcb = target_vmcb->ptr; 1199 + } 1200 + 1201 + static int svm_vcpu_precreate(struct kvm *kvm) 1202 + { 1203 + return avic_alloc_physical_id_table(kvm); 1202 1204 } 1203 1205 1204 1206 static int svm_vcpu_create(struct kvm_vcpu *vcpu) ··· 3636 3628 3637 3629 static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) 3638 3630 { 3631 + struct kvm_queued_interrupt *intr = &vcpu->arch.interrupt; 3639 3632 struct vcpu_svm *svm = to_svm(vcpu); 3640 3633 u32 type; 3641 3634 3642 - if (vcpu->arch.interrupt.soft) { 3643 - if (svm_update_soft_interrupt_rip(vcpu)) 3635 + if (intr->soft) { 3636 + if (svm_update_soft_interrupt_rip(vcpu, intr->nr)) 3644 3637 return; 3645 3638 3646 3639 type = SVM_EVTINJ_TYPE_SOFT; ··· 3649 3640 type = SVM_EVTINJ_TYPE_INTR; 3650 3641 } 3651 3642 3652 - trace_kvm_inj_virq(vcpu->arch.interrupt.nr, 3653 - vcpu->arch.interrupt.soft, reinjected); 3643 + trace_kvm_inj_virq(intr->nr, intr->soft, reinjected); 3654 3644 ++vcpu->stat.irq_injections; 3655 3645 3656 - svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | 3657 - SVM_EVTINJ_VALID | type; 3646 + svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type; 3658 3647 } 3659 3648 3660 3649 void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, ··· 4518 4511 case SVM_EXIT_WRITE_CR0: { 4519 4512 unsigned long cr0, val; 4520 4513 4521 - if (info->intercept == x86_intercept_cr_write) 4514 + /* 4515 + * Adjust the exit code accordingly if a CR other than CR0 is 4516 + * being written, and skip straight to the common handling as 4517 + * only CR0 has an additional selective intercept. 4518 + */ 4519 + if (info->intercept == x86_intercept_cr_write && info->modrm_reg) { 4522 4520 icpt_info.exit_code += info->modrm_reg; 4523 - 4524 - if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 || 4525 - info->intercept == x86_intercept_clts) 4526 4521 break; 4527 - 4528 - if (!(vmcb12_is_intercept(&svm->nested.ctl, 4529 - INTERCEPT_SELECTIVE_CR0))) 4530 - break; 4531 - 4532 - cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; 4533 - val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; 4534 - 4535 - if (info->intercept == x86_intercept_lmsw) { 4536 - cr0 &= 0xfUL; 4537 - val &= 0xfUL; 4538 - /* lmsw can't clear PE - catch this here */ 4539 - if (cr0 & X86_CR0_PE) 4540 - val |= X86_CR0_PE; 4541 4522 } 4542 4523 4524 + /* 4525 + * Convert the exit_code to SVM_EXIT_CR0_SEL_WRITE if a 4526 + * selective CR0 intercept is triggered (the common logic will 4527 + * treat the selective intercept as being enabled). Note, the 4528 + * unconditional intercept has higher priority, i.e. this is 4529 + * only relevant if *only* the selective intercept is enabled. 4530 + */ 4531 + if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_CR0_WRITE) || 4532 + !(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))) 4533 + break; 4534 + 4535 + /* CLTS never triggers INTERCEPT_SELECTIVE_CR0 */ 4536 + if (info->intercept == x86_intercept_clts) 4537 + break; 4538 + 4539 + /* LMSW always triggers INTERCEPT_SELECTIVE_CR0 */ 4540 + if (info->intercept == x86_intercept_lmsw) { 4541 + icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; 4542 + break; 4543 + } 4544 + 4545 + /* 4546 + * MOV-to-CR0 only triggers INTERCEPT_SELECTIVE_CR0 if any bit 4547 + * other than SVM_CR0_SELECTIVE_MASK is changed. 4548 + */ 4549 + cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; 4550 + val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; 4543 4551 if (cr0 ^ val) 4544 4552 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; 4545 - 4546 4553 break; 4547 4554 } 4548 4555 case SVM_EXIT_READ_DR0: ··· 5026 5005 .emergency_disable_virtualization_cpu = svm_emergency_disable_virtualization_cpu, 5027 5006 .has_emulated_msr = svm_has_emulated_msr, 5028 5007 5008 + .vcpu_precreate = svm_vcpu_precreate, 5029 5009 .vcpu_create = svm_vcpu_create, 5030 5010 .vcpu_free = svm_vcpu_free, 5031 5011 .vcpu_reset = svm_vcpu_reset, ··· 5331 5309 5332 5310 if (nested) { 5333 5311 pr_info("Nested Virtualization enabled\n"); 5334 - kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); 5312 + kvm_enable_efer_bits(EFER_SVME); 5313 + if (!boot_cpu_has(X86_FEATURE_EFER_LMSLE_MBZ)) 5314 + kvm_enable_efer_bits(EFER_LMSLE); 5335 5315 5336 5316 r = nested_svm_init_msrpm_merge_offsets(); 5337 5317 if (r)
+1 -3
arch/x86/kvm/svm/svm.h
··· 117 117 cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */ 118 118 }; 119 119 120 - #define SEV_POLICY_NODBG BIT_ULL(0) 121 - #define SNP_POLICY_DEBUG BIT_ULL(19) 122 - 123 120 struct kvm_svm { 124 121 struct kvm kvm; 125 122 ··· 804 807 805 808 bool __init avic_hardware_setup(void); 806 809 void avic_hardware_unsetup(void); 810 + int avic_alloc_physical_id_table(struct kvm *kvm); 807 811 void avic_vm_destroy(struct kvm *kvm); 808 812 int avic_vm_init(struct kvm *kvm); 809 813 void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);
+37 -10
arch/x86/kvm/svm/vmenter.S
··· 52 52 * there must not be any returns or indirect branches between this code 53 53 * and vmentry. 54 54 */ 55 - movl SVM_spec_ctrl(%_ASM_DI), %eax 56 - cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax 55 + #ifdef CONFIG_X86_64 56 + mov SVM_spec_ctrl(%rdi), %rdx 57 + cmp PER_CPU_VAR(x86_spec_ctrl_current), %rdx 57 58 je 801b 59 + movl %edx, %eax 60 + shr $32, %rdx 61 + #else 62 + mov SVM_spec_ctrl(%edi), %eax 63 + mov PER_CPU_VAR(x86_spec_ctrl_current), %ecx 64 + xor %eax, %ecx 65 + mov SVM_spec_ctrl + 4(%edi), %edx 66 + mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %esi 67 + xor %edx, %esi 68 + or %esi, %ecx 69 + je 801b 70 + #endif 58 71 mov $MSR_IA32_SPEC_CTRL, %ecx 59 - xor %edx, %edx 60 72 wrmsr 61 73 jmp 801b 62 74 .endm ··· 93 81 jnz 998f 94 82 rdmsr 95 83 movl %eax, SVM_spec_ctrl(%_ASM_DI) 84 + movl %edx, SVM_spec_ctrl + 4(%_ASM_DI) 96 85 998: 97 - 98 86 /* Now restore the host value of the MSR if different from the guest's. */ 99 - movl PER_CPU_VAR(x86_spec_ctrl_current), %eax 100 - cmp SVM_spec_ctrl(%_ASM_DI), %eax 87 + #ifdef CONFIG_X86_64 88 + mov PER_CPU_VAR(x86_spec_ctrl_current), %rdx 89 + cmp SVM_spec_ctrl(%rdi), %rdx 101 90 je 901b 102 - xor %edx, %edx 91 + movl %edx, %eax 92 + shr $32, %rdx 93 + #else 94 + mov PER_CPU_VAR(x86_spec_ctrl_current), %eax 95 + mov SVM_spec_ctrl(%edi), %esi 96 + xor %eax, %esi 97 + mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %edx 98 + mov SVM_spec_ctrl + 4(%edi), %edi 99 + xor %edx, %edi 100 + or %edi, %esi 101 + je 901b 102 + #endif 103 103 wrmsr 104 104 jmp 901b 105 105 .endm ··· 160 136 mov %_ASM_ARG1, %_ASM_DI 161 137 .endif 162 138 163 - /* Clobbers RAX, RCX, RDX. */ 139 + /* Clobbers RAX, RCX, RDX (and ESI on 32-bit), consumes RDI (@svm). */ 164 140 RESTORE_GUEST_SPEC_CTRL 165 141 166 142 /* ··· 237 213 /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ 238 214 FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT 239 215 240 - /* Clobbers RAX, RCX, RDX. */ 216 + /* 217 + * Clobbers RAX, RCX, RDX (and ESI, EDI on 32-bit), consumes RDI (@svm) 218 + * and RSP (pointer to @spec_ctrl_intercepted). 219 + */ 241 220 RESTORE_HOST_SPEC_CTRL 242 221 243 222 /* ··· 360 333 mov %rdi, SEV_ES_RDI (%rdx) 361 334 mov %rsi, SEV_ES_RSI (%rdx) 362 335 363 - /* Clobbers RAX, RCX, RDX (@hostsa). */ 336 + /* Clobbers RAX, RCX, and RDX (@hostsa), consumes RDI (@svm). */ 364 337 RESTORE_GUEST_SPEC_CTRL 365 338 366 339 /* Get svm->current_vmcb->pa into RAX. */
+21
arch/x86/kvm/x86.c
··· 9332 9332 return false; 9333 9333 } 9334 9334 9335 + static bool is_soft_int_instruction(struct x86_emulate_ctxt *ctxt, 9336 + int emulation_type) 9337 + { 9338 + u8 vector = EMULTYPE_GET_SOFT_INT_VECTOR(emulation_type); 9339 + 9340 + switch (ctxt->b) { 9341 + case 0xcc: 9342 + return vector == BP_VECTOR; 9343 + case 0xcd: 9344 + return vector == ctxt->src.val; 9345 + case 0xce: 9346 + return vector == OF_VECTOR; 9347 + default: 9348 + return false; 9349 + } 9350 + } 9351 + 9335 9352 /* 9336 9353 * Decode an instruction for emulation. The caller is responsible for handling 9337 9354 * code breakpoints. Note, manually detecting code breakpoints is unnecessary ··· 9459 9442 * injecting single-step #DBs. 9460 9443 */ 9461 9444 if (emulation_type & EMULTYPE_SKIP) { 9445 + if (emulation_type & EMULTYPE_SKIP_SOFT_INT && 9446 + !is_soft_int_instruction(ctxt, emulation_type)) 9447 + return 0; 9448 + 9462 9449 if (ctxt->mode != X86EMUL_MODE_PROT64) 9463 9450 ctxt->eip = (u32)ctxt->_eip; 9464 9451 else
+37
drivers/crypto/ccp/sev-dev.c
··· 2777 2777 } 2778 2778 EXPORT_SYMBOL_GPL(sev_platform_shutdown); 2779 2779 2780 + u64 sev_get_snp_policy_bits(void) 2781 + { 2782 + struct psp_device *psp = psp_master; 2783 + struct sev_device *sev; 2784 + u64 policy_bits; 2785 + 2786 + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) 2787 + return 0; 2788 + 2789 + if (!psp || !psp->sev_data) 2790 + return 0; 2791 + 2792 + sev = psp->sev_data; 2793 + 2794 + policy_bits = SNP_POLICY_MASK_BASE; 2795 + 2796 + if (sev->snp_plat_status.feature_info) { 2797 + if (sev->snp_feat_info_0.ecx & SNP_RAPL_DISABLE_SUPPORTED) 2798 + policy_bits |= SNP_POLICY_MASK_RAPL_DIS; 2799 + 2800 + if (sev->snp_feat_info_0.ecx & SNP_CIPHER_TEXT_HIDING_SUPPORTED) 2801 + policy_bits |= SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM; 2802 + 2803 + if (sev->snp_feat_info_0.ecx & SNP_AES_256_XTS_POLICY_SUPPORTED) 2804 + policy_bits |= SNP_POLICY_MASK_MEM_AES_256_XTS; 2805 + 2806 + if (sev->snp_feat_info_0.ecx & SNP_CXL_ALLOW_POLICY_SUPPORTED) 2807 + policy_bits |= SNP_POLICY_MASK_CXL_ALLOW; 2808 + 2809 + if (sev_version_greater_or_equal(1, 58)) 2810 + policy_bits |= SNP_POLICY_MASK_PAGE_SWAP_DISABLE; 2811 + } 2812 + 2813 + return policy_bits; 2814 + } 2815 + EXPORT_SYMBOL_GPL(sev_get_snp_policy_bits); 2816 + 2780 2817 void sev_dev_destroy(struct psp_device *psp) 2781 2818 { 2782 2819 struct sev_device *sev = psp->sev_data;
+37
include/linux/psp-sev.h
··· 14 14 15 15 #include <uapi/linux/psp-sev.h> 16 16 17 + /* As defined by SEV API, under "Guest Policy". */ 18 + #define SEV_POLICY_MASK_NODBG BIT(0) 19 + #define SEV_POLICY_MASK_NOKS BIT(1) 20 + #define SEV_POLICY_MASK_ES BIT(2) 21 + #define SEV_POLICY_MASK_NOSEND BIT(3) 22 + #define SEV_POLICY_MASK_DOMAIN BIT(4) 23 + #define SEV_POLICY_MASK_SEV BIT(5) 24 + #define SEV_POLICY_MASK_API_MAJOR GENMASK(23, 16) 25 + #define SEV_POLICY_MASK_API_MINOR GENMASK(31, 24) 26 + 27 + /* As defined by SEV-SNP Firmware ABI, under "Guest Policy". */ 28 + #define SNP_POLICY_MASK_API_MINOR GENMASK_ULL(7, 0) 29 + #define SNP_POLICY_MASK_API_MAJOR GENMASK_ULL(15, 8) 30 + #define SNP_POLICY_MASK_SMT BIT_ULL(16) 31 + #define SNP_POLICY_MASK_RSVD_MBO BIT_ULL(17) 32 + #define SNP_POLICY_MASK_MIGRATE_MA BIT_ULL(18) 33 + #define SNP_POLICY_MASK_DEBUG BIT_ULL(19) 34 + #define SNP_POLICY_MASK_SINGLE_SOCKET BIT_ULL(20) 35 + #define SNP_POLICY_MASK_CXL_ALLOW BIT_ULL(21) 36 + #define SNP_POLICY_MASK_MEM_AES_256_XTS BIT_ULL(22) 37 + #define SNP_POLICY_MASK_RAPL_DIS BIT_ULL(23) 38 + #define SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM BIT_ULL(24) 39 + #define SNP_POLICY_MASK_PAGE_SWAP_DISABLE BIT_ULL(25) 40 + 41 + /* Base SEV-SNP policy bitmask for minimum supported SEV firmware version */ 42 + #define SNP_POLICY_MASK_BASE (SNP_POLICY_MASK_API_MINOR | \ 43 + SNP_POLICY_MASK_API_MAJOR | \ 44 + SNP_POLICY_MASK_SMT | \ 45 + SNP_POLICY_MASK_RSVD_MBO | \ 46 + SNP_POLICY_MASK_MIGRATE_MA | \ 47 + SNP_POLICY_MASK_DEBUG | \ 48 + SNP_POLICY_MASK_SINGLE_SOCKET) 49 + 17 50 #define SEV_FW_BLOB_MAX_SIZE 0x4000 /* 16KB */ 18 51 19 52 /** ··· 882 849 u32 edx; 883 850 } __packed; 884 851 852 + #define SNP_RAPL_DISABLE_SUPPORTED BIT(2) 885 853 #define SNP_CIPHER_TEXT_HIDING_SUPPORTED BIT(3) 854 + #define SNP_AES_256_XTS_POLICY_SUPPORTED BIT(4) 855 + #define SNP_CXL_ALLOW_POLICY_SUPPORTED BIT(5) 886 856 887 857 #ifdef CONFIG_CRYPTO_DEV_SP_PSP 888 858 ··· 1031 995 void snp_free_firmware_page(void *addr); 1032 996 void sev_platform_shutdown(void); 1033 997 bool sev_is_snp_ciphertext_hiding_supported(void); 998 + u64 sev_get_snp_policy_bits(void); 1034 999 1035 1000 #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ 1036 1001