Merge tag 'kvm-x86-fixes-6.12-rcN' of https://github.com/kvm-x86/linux into HEAD

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

KVM x86 and selftests fixes for 6.12:

- Increase the timeout for the memslot performance selftest to avoid false
failures on arm64 and nested x86 platforms.

- Fix a goof in the guest_memfd selftest where a for-loop initialized a
bit mask to zero instead of BIT(0).

- Disable strict aliasing when building KVM selftests to prevent the
compiler from treating things like "u64 *" to "uint64_t *" cases as
undefined behavior, which can lead to nasty, hard to debug failures.

- Force -march=x86-64-v2 for KVM x86 selftests if and only if the uarch
is supported by the compiler.

- When emulating a guest TLB flush for a nested guest, flush vpid01, not
vpid02, if L2 is active but VPID is disabled in vmcs12, i.e. if L2 and
L1 are sharing VPID '0' (from L1's perspective).

- Fix a bug in the SNP initialization flow where KVM would return '0' to
userspace instead of -errno on failure.

Paolo Bonzini 1 year ago 9893deb0 59b723cd

+39 -14

6 changed files

expand all

arch

x86

kvm

svm

sev.c

vmx

nested.c

vmx.c

tools

testing

selftests

kvm

Makefile

guest_memfd_test.c

memslot_perf_test.c

+5 -2

arch/x86/kvm/svm/sev.c

··· 450 450 goto e_free; 451 451 452 452 /* This needs to happen after SEV/SNP firmware initialization. */ 453 - if (vm_type == KVM_X86_SNP_VM && snp_guest_req_init(kvm)) 454 - goto e_free; 453 + if (vm_type == KVM_X86_SNP_VM) { 454 + ret = snp_guest_req_init(kvm); 455 + if (ret) 456 + goto e_free; 457 + } 455 458 456 459 INIT_LIST_HEAD(&sev->regions_list); 457 460 INIT_LIST_HEAD(&sev->mirror_vms);

+25 -5

arch/x86/kvm/vmx/nested.c

··· 1197 1197 kvm_hv_nested_transtion_tlb_flush(vcpu, enable_ept); 1198 1198 1199 1199 /* 1200 - * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings 1201 - * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a 1202 - * full TLB flush from the guest's perspective. This is required even 1203 - * if VPID is disabled in the host as KVM may need to synchronize the 1204 - * MMU in response to the guest TLB flush. 1200 + * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the 1201 + * same VPID as the host, and so architecturally, linear and combined 1202 + * mappings for VPID=0 must be flushed at VM-Enter and VM-Exit. KVM 1203 + * emulates L2 sharing L1's VPID=0 by using vpid01 while running L2, 1204 + * and so KVM must also emulate TLB flush of VPID=0, i.e. vpid01. This 1205 + * is required if VPID is disabled in KVM, as a TLB flush (there are no 1206 + * VPIDs) still occurs from L1's perspective, and KVM may need to 1207 + * synchronize the MMU in response to the guest TLB flush. 1205 1208 * 1206 1209 * Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use. 1207 1210 * EPT is a special snowflake, as guest-physical mappings aren't ··· 2318 2315 2319 2316 vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA); 2320 2317 2318 + /* 2319 + * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the 2320 + * same VPID as the host. Emulate this behavior by using vpid01 for L2 2321 + * if VPID is disabled in vmcs12. Note, if VPID is disabled, VM-Enter 2322 + * and VM-Exit are architecturally required to flush VPID=0, but *only* 2323 + * VPID=0. I.e. using vpid02 would be ok (so long as KVM emulates the 2324 + * required flushes), but doing so would cause KVM to over-flush. E.g. 2325 + * if L1 runs L2 X with VPID12=1, then runs L2 Y with VPID12 disabled, 2326 + * and then runs L2 X again, then KVM can and should retain TLB entries 2327 + * for VPID12=1. 2328 + */ 2321 2329 if (enable_vpid) { 2322 2330 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) 2323 2331 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); ··· 5964 5950 return nested_vmx_fail(vcpu, 5965 5951 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); 5966 5952 5953 + /* 5954 + * Always flush the effective vpid02, i.e. never flush the current VPID 5955 + * and never explicitly flush vpid01. INVVPID targets a VPID, not a 5956 + * VMCS, and so whether or not the current vmcs12 has VPID enabled is 5957 + * irrelevant (and there may not be a loaded vmcs12). 5958 + */ 5967 5959 vpid02 = nested_get_vpid02(vcpu); 5968 5960 switch (type) { 5969 5961 case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:

+1 -1

arch/x86/kvm/vmx/vmx.c

··· 3216 3216 3217 3217 static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu) 3218 3218 { 3219 - if (is_guest_mode(vcpu)) 3219 + if (is_guest_mode(vcpu) && nested_cpu_has_vpid(get_vmcs12(vcpu))) 3220 3220 return nested_get_vpid02(vcpu); 3221 3221 return to_vmx(vcpu)->vpid; 3222 3222 }

+6 -4

tools/testing/selftests/kvm/Makefile

··· 241 241 -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ 242 242 -fno-builtin-memcmp -fno-builtin-memcpy \ 243 243 -fno-builtin-memset -fno-builtin-strnlen \ 244 - -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ 245 - -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ 246 - -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ 247 - $(KHDR_INCLUDES) 244 + -fno-stack-protector -fno-PIE -fno-strict-aliasing \ 245 + -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \ 246 + -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH_DIR) \ 247 + -I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES) 248 248 ifeq ($(ARCH),s390) 249 249 CFLAGS += -march=z10 250 250 endif 251 251 ifeq ($(ARCH),x86) 252 + ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0) 252 253 CFLAGS += -march=x86-64-v2 254 + endif 253 255 endif 254 256 ifeq ($(ARCH),arm64) 255 257 tools_dir := $(top_srcdir)/tools

+1 -1

tools/testing/selftests/kvm/guest_memfd_test.c

··· 134 134 size); 135 135 } 136 136 137 - for (flag = 0; flag; flag <<= 1) { 137 + for (flag = BIT(0); flag; flag <<= 1) { 138 138 fd = __vm_create_guest_memfd(vm, page_size, flag); 139 139 TEST_ASSERT(fd == -1 && errno == EINVAL, 140 140 "guest_memfd() with flag '0x%lx' should fail with EINVAL",

+1 -1

tools/testing/selftests/kvm/memslot_perf_test.c

··· 417 417 */ 418 418 static noinline void host_perform_sync(struct sync_area *sync) 419 419 { 420 - alarm(2); 420 + alarm(10); 421 421 422 422 atomic_store_explicit(&sync->sync_flag, true, memory_order_release); 423 423 while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))