Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:

- Allow again loading KVM on 32-bit non-PAE builds

- Fixes for host SMIs on AMD

- Fixes for guest SMIs on AMD

- Fixes for selftests on s390 and ARM

- Fix memory leak

- Enforce no-instrumentation area on vmentry when hardware breakpoints
are in use.

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (25 commits)
KVM: selftests: smm_test: Test SMM enter from L2
KVM: nSVM: Restore nested control upon leaving SMM
KVM: nSVM: Fix L1 state corruption upon return from SMM
KVM: nSVM: Introduce svm_copy_vmrun_state()
KVM: nSVM: Check that VM_HSAVE_PA MSR was set before VMRUN
KVM: nSVM: Check the value written to MSR_VM_HSAVE_PA
KVM: SVM: Fix sev_pin_memory() error checks in SEV migration utilities
KVM: SVM: Return -EFAULT if copy_to_user() for SEV mig packet header fails
KVM: SVM: add module param to control the #SMI interception
KVM: SVM: remove INIT intercept handler
KVM: SVM: #SMI interception must not skip the instruction
KVM: VMX: Remove vmx_msr_index from vmx.h
KVM: X86: Disable hardware breakpoints unconditionally before kvm_x86->run()
KVM: selftests: Address extra memslot parameters in vm_vaddr_alloc
kvm: debugfs: fix memory leak in kvm_create_vm_debugfs
KVM: x86/pmu: Clear anythread deprecated bit when 0xa leaf is unsupported on the SVM
KVM: mmio: Fix use-after-free Read in kvm_vm_ioctl_unregister_coalesced_mmio
KVM: SVM: Revert clearing of C-bit on GPA in #NPF handler
KVM: x86/mmu: Do not apply HPA (memory encryption) mask to GPAs
KVM: x86: Use kernel's x86_phys_bits to handle reduced MAXPHYADDR
...

Linus Torvalds 4 years ago 405386b0 f3523a22

+255 -64

21 changed files

expand all collapse all

arch

x86

kvm

cpuid.c

mmu

mmu.c

paging.h

paging_tmpl.h

spte.h

svm

nested.c

sev.c

svm.c

svm.h

vmx

vmx.h

x86.c

tools

testing

selftests

kvm

include

kvm_util.h

lib

aarch64

processor.c

guest_modes.c

kvm_util.c

set_memory_region_test.c

x86_64

hyperv_features.c

mmu_role_test.c

smm_test.c

virt

kvm

coalesced_mmio.c

kvm_main.c

+25 -5

arch/x86/kvm/cpuid.c

reviewed

··· 765 765 766 766 edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS); 767 767 edx.split.bit_width_fixed = cap.bit_width_fixed; 768 768 - edx.split.anythread_deprecated = 1; 768 768 + if (cap.version) 769 769 + edx.split.anythread_deprecated = 1; 769 770 edx.split.reserved1 = 0; 770 771 edx.split.reserved2 = 0; 771 772 ··· 941 940 unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); 942 941 unsigned phys_as = entry->eax & 0xff; 943 942 944 944 - if (!g_phys_as) 943 943 + /* 944 944 + * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as 945 945 + * the guest operates in the same PA space as the host, i.e. 946 946 + * reductions in MAXPHYADDR for memory encryption affect shadow 947 947 + * paging, too. 948 948 + * 949 949 + * If TDP is enabled but an explicit guest MAXPHYADDR is not 950 950 + * provided, use the raw bare metal MAXPHYADDR as reductions to 951 951 + * the HPAs do not affect GPAs. 952 952 + */ 953 953 + if (!tdp_enabled) 954 954 + g_phys_as = boot_cpu_data.x86_phys_bits; 955 955 + else if (!g_phys_as) 945 956 g_phys_as = phys_as; 957 957 + 946 958 entry->eax = g_phys_as | (virt_as << 8); 947 959 entry->edx = 0; 948 960 cpuid_entry_override(entry, CPUID_8000_0008_EBX); ··· 978 964 case 0x8000001a: 979 965 case 0x8000001e: 980 966 break; 981 981 - /* Support memory encryption cpuid if host supports it */ 982 967 case 0x8000001F: 983 983 - if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) 968 968 + if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) { 984 969 entry->eax = entry->ebx = entry->ecx = entry->edx = 0; 985 985 - else 970 970 + } else { 986 971 cpuid_entry_override(entry, CPUID_8000_001F_EAX); 972 972 + 973 973 + /* 974 974 + * Enumerate '0' for "PA bits reduction", the adjusted 975 975 + * MAXPHYADDR is enumerated directly (see 0x80000008). 976 976 + */ 977 977 + entry->ebx &= ~GENMASK(11, 6); 978 978 + } 987 979 break; 988 980 /*Add support for Centaur's CPUID instruction*/ 989 981 case 0xC0000000:

arch/x86/kvm/mmu/mmu.c

reviewed

··· 53 53 #include <asm/kvm_page_track.h> 54 54 #include "trace.h" 55 55 56 56 + #include "paging.h" 57 57 + 56 58 extern bool itlb_multihit_kvm_mitigation; 57 59 58 60 int __read_mostly nx_huge_pages = -1;

+14

arch/x86/kvm/mmu/paging.h

reviewed

··· 1 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 + /* Shadow paging constants/helpers that don't need to be #undef'd. */ 3 3 + #ifndef __KVM_X86_PAGING_H 4 4 + #define __KVM_X86_PAGING_H 5 5 + 6 6 + #define GUEST_PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) 7 7 + #define PT64_LVL_ADDR_MASK(level) \ 8 8 + (GUEST_PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ 9 9 + * PT64_LEVEL_BITS))) - 1)) 10 10 + #define PT64_LVL_OFFSET_MASK(level) \ 11 11 + (GUEST_PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ 12 12 + * PT64_LEVEL_BITS))) - 1)) 13 13 + #endif /* __KVM_X86_PAGING_H */ 14 14 +

+2 -2

arch/x86/kvm/mmu/paging_tmpl.h

reviewed

··· 24 24 #define pt_element_t u64 25 25 #define guest_walker guest_walker64 26 26 #define FNAME(name) paging##64_##name 27 27 - #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK 27 27 + #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK 28 28 #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) 29 29 #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) 30 30 #define PT_INDEX(addr, level) PT64_INDEX(addr, level) ··· 57 57 #define pt_element_t u64 58 58 #define guest_walker guest_walkerEPT 59 59 #define FNAME(name) ept_##name 60 60 - #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK 60 60 + #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK 61 61 #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) 62 62 #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) 63 63 #define PT_INDEX(addr, level) PT64_INDEX(addr, level)

-6

arch/x86/kvm/mmu/spte.h

reviewed

··· 38 38 #else 39 39 #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) 40 40 #endif 41 41 - #define PT64_LVL_ADDR_MASK(level) \ 42 42 - (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ 43 43 - * PT64_LEVEL_BITS))) - 1)) 44 44 - #define PT64_LVL_OFFSET_MASK(level) \ 45 45 - (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ 46 46 - * PT64_LEVEL_BITS))) - 1)) 47 41 48 42 #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \ 49 43 | shadow_x_mask | shadow_nx_mask | shadow_me_mask)

+33 -20

arch/x86/kvm/svm/nested.c

reviewed

··· 154 154 155 155 for (i = 0; i < MAX_INTERCEPT; i++) 156 156 c->intercepts[i] |= g->intercepts[i]; 157 157 + 158 158 + /* If SMI is not intercepted, ignore guest SMI intercept as well */ 159 159 + if (!intercept_smi) 160 160 + vmcb_clr_intercept(c, INTERCEPT_SMI); 157 161 } 158 162 159 163 static void copy_vmcb_control_area(struct vmcb_control_area *dst, ··· 308 304 return true; 309 305 } 310 306 311 311 - static void nested_load_control_from_vmcb12(struct vcpu_svm *svm, 312 312 - struct vmcb_control_area *control) 307 307 + void nested_load_control_from_vmcb12(struct vcpu_svm *svm, 308 308 + struct vmcb_control_area *control) 313 309 { 314 310 copy_vmcb_control_area(&svm->nested.ctl, control); 315 311 ··· 622 618 struct kvm_host_map map; 623 619 u64 vmcb12_gpa; 624 620 621 621 + if (!svm->nested.hsave_msr) { 622 622 + kvm_inject_gp(vcpu, 0); 623 623 + return 1; 624 624 + } 625 625 + 625 626 if (is_smm(vcpu)) { 626 627 kvm_queue_exception(vcpu, UD_VECTOR); 627 628 return 1; ··· 699 690 kvm_vcpu_unmap(vcpu, &map, true); 700 691 701 692 return ret; 693 693 + } 694 694 + 695 695 + /* Copy state save area fields which are handled by VMRUN */ 696 696 + void svm_copy_vmrun_state(struct vmcb_save_area *from_save, 697 697 + struct vmcb_save_area *to_save) 698 698 + { 699 699 + to_save->es = from_save->es; 700 700 + to_save->cs = from_save->cs; 701 701 + to_save->ss = from_save->ss; 702 702 + to_save->ds = from_save->ds; 703 703 + to_save->gdtr = from_save->gdtr; 704 704 + to_save->idtr = from_save->idtr; 705 705 + to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED; 706 706 + to_save->efer = from_save->efer; 707 707 + to_save->cr0 = from_save->cr0; 708 708 + to_save->cr3 = from_save->cr3; 709 709 + to_save->cr4 = from_save->cr4; 710 710 + to_save->rax = from_save->rax; 711 711 + to_save->rsp = from_save->rsp; 712 712 + to_save->rip = from_save->rip; 713 713 + to_save->cpl = 0; 702 714 } 703 715 704 716 void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) ··· 1385 1355 1386 1356 svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; 1387 1357 1388 1388 - svm->vmcb01.ptr->save.es = save->es; 1389 1389 - svm->vmcb01.ptr->save.cs = save->cs; 1390 1390 - svm->vmcb01.ptr->save.ss = save->ss; 1391 1391 - svm->vmcb01.ptr->save.ds = save->ds; 1392 1392 - svm->vmcb01.ptr->save.gdtr = save->gdtr; 1393 1393 - svm->vmcb01.ptr->save.idtr = save->idtr; 1394 1394 - svm->vmcb01.ptr->save.rflags = save->rflags | X86_EFLAGS_FIXED; 1395 1395 - svm->vmcb01.ptr->save.efer = save->efer; 1396 1396 - svm->vmcb01.ptr->save.cr0 = save->cr0; 1397 1397 - svm->vmcb01.ptr->save.cr3 = save->cr3; 1398 1398 - svm->vmcb01.ptr->save.cr4 = save->cr4; 1399 1399 - svm->vmcb01.ptr->save.rax = save->rax; 1400 1400 - svm->vmcb01.ptr->save.rsp = save->rsp; 1401 1401 - svm->vmcb01.ptr->save.rip = save->rip; 1402 1402 - svm->vmcb01.ptr->save.cpl = 0; 1403 1403 - 1358 1358 + svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save); 1404 1359 nested_load_control_from_vmcb12(svm, ctl); 1405 1360 1406 1361 svm_switch_vmcb(svm, &svm->nested.vmcb02); 1407 1407 - 1408 1362 nested_vmcb02_prepare_control(svm); 1409 1409 - 1410 1363 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); 1411 1364 ret = 0; 1412 1365 out_free:

+8 -6

arch/x86/kvm/svm/sev.c

reviewed

··· 1272 1272 /* Pin guest memory */ 1273 1273 guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, 1274 1274 PAGE_SIZE, &n, 0); 1275 1275 - if (!guest_page) 1276 1276 - return -EFAULT; 1275 1275 + if (IS_ERR(guest_page)) 1276 1276 + return PTR_ERR(guest_page); 1277 1277 1278 1278 /* allocate memory for header and transport buffer */ 1279 1279 ret = -ENOMEM; ··· 1310 1310 } 1311 1311 1312 1312 /* Copy packet header to userspace. */ 1313 1313 - ret = copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr, 1314 1314 - params.hdr_len); 1313 1313 + if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr, 1314 1314 + params.hdr_len)) 1315 1315 + ret = -EFAULT; 1315 1316 1316 1317 e_free_trans_data: 1317 1318 kfree(trans_data); ··· 1464 1463 data.trans_len = params.trans_len; 1465 1464 1466 1465 /* Pin guest memory */ 1467 1467 - ret = -EFAULT; 1468 1466 guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, 1469 1467 PAGE_SIZE, &n, 0); 1470 1470 - if (!guest_page) 1468 1468 + if (IS_ERR(guest_page)) { 1469 1469 + ret = PTR_ERR(guest_page); 1471 1470 goto e_free_trans; 1471 1471 + } 1472 1472 1473 1473 /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */ 1474 1474 data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;

+70 -7

arch/x86/kvm/svm/svm.c

reviewed

··· 198 198 bool __read_mostly dump_invalid_vmcb; 199 199 module_param(dump_invalid_vmcb, bool, 0644); 200 200 201 201 + 202 202 + bool intercept_smi = true; 203 203 + module_param(intercept_smi, bool, 0444); 204 204 + 205 205 + 201 206 static bool svm_gp_erratum_intercept = true; 202 207 203 208 static u8 rsm_ins_bytes[] = "\x0f\xaa"; ··· 1190 1185 1191 1186 svm_set_intercept(svm, INTERCEPT_INTR); 1192 1187 svm_set_intercept(svm, INTERCEPT_NMI); 1193 1193 - svm_set_intercept(svm, INTERCEPT_SMI); 1188 1188 + 1189 1189 + if (intercept_smi) 1190 1190 + svm_set_intercept(svm, INTERCEPT_SMI); 1191 1191 + 1194 1192 svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0); 1195 1193 svm_set_intercept(svm, INTERCEPT_RDPMC); 1196 1194 svm_set_intercept(svm, INTERCEPT_CPUID); ··· 1931 1923 { 1932 1924 struct vcpu_svm *svm = to_svm(vcpu); 1933 1925 1934 1934 - u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2); 1926 1926 + u64 fault_address = svm->vmcb->control.exit_info_2; 1935 1927 u64 error_code = svm->vmcb->control.exit_info_1; 1936 1928 1937 1929 trace_kvm_page_fault(fault_address, error_code); ··· 2110 2102 } 2111 2103 2112 2104 static int nmi_interception(struct kvm_vcpu *vcpu) 2105 2105 + { 2106 2106 + return 1; 2107 2107 + } 2108 2108 + 2109 2109 + static int smi_interception(struct kvm_vcpu *vcpu) 2113 2110 { 2114 2111 return 1; 2115 2112 } ··· 2954 2941 svm_disable_lbrv(vcpu); 2955 2942 break; 2956 2943 case MSR_VM_HSAVE_PA: 2957 2957 - svm->nested.hsave_msr = data; 2944 2944 + /* 2945 2945 + * Old kernels did not validate the value written to 2946 2946 + * MSR_VM_HSAVE_PA. Allow KVM_SET_MSR to set an invalid 2947 2947 + * value to allow live migrating buggy or malicious guests 2948 2948 + * originating from those kernels. 2949 2949 + */ 2950 2950 + if (!msr->host_initiated && !page_address_valid(vcpu, data)) 2951 2951 + return 1; 2952 2952 + 2953 2953 + svm->nested.hsave_msr = data & PAGE_MASK; 2958 2954 break; 2959 2955 case MSR_VM_CR: 2960 2956 return svm_set_vm_cr(vcpu, data); ··· 3102 3080 [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception, 3103 3081 [SVM_EXIT_INTR] = intr_interception, 3104 3082 [SVM_EXIT_NMI] = nmi_interception, 3105 3105 - [SVM_EXIT_SMI] = kvm_emulate_as_nop, 3106 3106 - [SVM_EXIT_INIT] = kvm_emulate_as_nop, 3083 3083 + [SVM_EXIT_SMI] = smi_interception, 3107 3084 [SVM_EXIT_VINTR] = interrupt_window_interception, 3108 3085 [SVM_EXIT_RDPMC] = kvm_emulate_rdpmc, 3109 3086 [SVM_EXIT_CPUID] = kvm_emulate_cpuid, ··· 4309 4288 static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) 4310 4289 { 4311 4290 struct vcpu_svm *svm = to_svm(vcpu); 4291 4291 + struct kvm_host_map map_save; 4312 4292 int ret; 4313 4293 4314 4294 if (is_guest_mode(vcpu)) { ··· 4325 4303 ret = nested_svm_vmexit(svm); 4326 4304 if (ret) 4327 4305 return ret; 4306 4306 + 4307 4307 + /* 4308 4308 + * KVM uses VMCB01 to store L1 host state while L2 runs but 4309 4309 + * VMCB01 is going to be used during SMM and thus the state will 4310 4310 + * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save 4311 4311 + * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the 4312 4312 + * format of the area is identical to guest save area offsetted 4313 4313 + * by 0x400 (matches the offset of 'struct vmcb_save_area' 4314 4314 + * within 'struct vmcb'). Note: HSAVE area may also be used by 4315 4315 + * L1 hypervisor to save additional host context (e.g. KVM does 4316 4316 + * that, see svm_prepare_guest_switch()) which must be 4317 4317 + * preserved. 4318 4318 + */ 4319 4319 + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), 4320 4320 + &map_save) == -EINVAL) 4321 4321 + return 1; 4322 4322 + 4323 4323 + BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); 4324 4324 + 4325 4325 + svm_copy_vmrun_state(&svm->vmcb01.ptr->save, 4326 4326 + map_save.hva + 0x400); 4327 4327 + 4328 4328 + kvm_vcpu_unmap(vcpu, &map_save, true); 4328 4329 } 4329 4330 return 0; 4330 4331 } ··· 4355 4310 static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) 4356 4311 { 4357 4312 struct vcpu_svm *svm = to_svm(vcpu); 4358 4358 - struct kvm_host_map map; 4313 4313 + struct kvm_host_map map, map_save; 4359 4314 int ret = 0; 4360 4315 4361 4316 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { 4362 4317 u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); 4363 4318 u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8); 4364 4319 u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); 4320 4320 + struct vmcb *vmcb12; 4365 4321 4366 4322 if (guest) { 4367 4323 if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) ··· 4378 4332 if (svm_allocate_nested(svm)) 4379 4333 return 1; 4380 4334 4381 4381 - ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, map.hva); 4335 4335 + vmcb12 = map.hva; 4336 4336 + 4337 4337 + nested_load_control_from_vmcb12(svm, &vmcb12->control); 4338 4338 + 4339 4339 + ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12); 4382 4340 kvm_vcpu_unmap(vcpu, &map, true); 4341 4341 + 4342 4342 + /* 4343 4343 + * Restore L1 host state from L1 HSAVE area as VMCB01 was 4344 4344 + * used during SMM (see svm_enter_smm()) 4345 4345 + */ 4346 4346 + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), 4347 4347 + &map_save) == -EINVAL) 4348 4348 + return 1; 4349 4349 + 4350 4350 + svm_copy_vmrun_state(map_save.hva + 0x400, 4351 4351 + &svm->vmcb01.ptr->save); 4352 4352 + 4353 4353 + kvm_vcpu_unmap(vcpu, &map_save, true); 4383 4354 } 4384 4355 } 4385 4356

arch/x86/kvm/svm/svm.h

reviewed

··· 31 31 #define MSRPM_OFFSETS 16 32 32 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; 33 33 extern bool npt_enabled; 34 34 + extern bool intercept_smi; 34 35 35 36 /* 36 37 * Clean bits in VMCB. ··· 464 463 void svm_free_nested(struct vcpu_svm *svm); 465 464 int svm_allocate_nested(struct vcpu_svm *svm); 466 465 int nested_svm_vmrun(struct kvm_vcpu *vcpu); 466 466 + void svm_copy_vmrun_state(struct vmcb_save_area *from_save, 467 467 + struct vmcb_save_area *to_save); 467 468 void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb); 468 469 int nested_svm_vmexit(struct vcpu_svm *svm); 469 470 ··· 482 479 int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 483 480 bool has_error_code, u32 error_code); 484 481 int nested_svm_exit_special(struct vcpu_svm *svm); 482 482 + void nested_load_control_from_vmcb12(struct vcpu_svm *svm, 483 483 + struct vmcb_control_area *control); 485 484 void nested_sync_control_from_vmcb02(struct vcpu_svm *svm); 486 485 void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm); 487 486 void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);

-2

arch/x86/kvm/vmx/vmx.h

reviewed

··· 14 14 #include "vmx_ops.h" 15 15 #include "cpuid.h" 16 16 17 17 - extern const u32 vmx_msr_index[]; 18 18 - 19 17 #define MSR_TYPE_R 1 20 18 #define MSR_TYPE_W 2 21 19 #define MSR_TYPE_RW 3

+2 -3

arch/x86/kvm/x86.c

reviewed

··· 9601 9601 set_debugreg(vcpu->arch.eff_db[3], 3); 9602 9602 set_debugreg(vcpu->arch.dr6, 6); 9603 9603 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; 9604 9604 + } else if (unlikely(hw_breakpoint_active())) { 9605 9605 + set_debugreg(0, 7); 9604 9606 } 9605 9607 9606 9608 for (;;) { ··· 10987 10985 int r; 10988 10986 10989 10987 rdmsrl_safe(MSR_EFER, &host_efer); 10990 10990 - if (WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_NX) && 10991 10991 - !(host_efer & EFER_NX))) 10992 10992 - return -EIO; 10993 10988 10994 10989 if (boot_cpu_has(X86_FEATURE_XSAVES)) 10995 10990 rdmsrl(MSR_IA32_XSS, host_xss);

+2 -1

tools/testing/selftests/kvm/include/kvm_util.h

reviewed

··· 45 45 VM_MODE_P40V48_64K, 46 46 VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ 47 47 VM_MODE_P47V64_4K, 48 48 + VM_MODE_P44V64_4K, 48 49 NUM_VM_MODES, 49 50 }; 50 51 ··· 63 62 64 63 #elif defined(__s390x__) 65 64 66 66 - #define VM_MODE_DEFAULT VM_MODE_P47V64_4K 65 65 + #define VM_MODE_DEFAULT VM_MODE_P44V64_4K 67 66 #define MIN_PAGE_SHIFT 12U 68 67 #define ptes_per_page(page_size) ((page_size) / 16) 69 68

+1 -1

tools/testing/selftests/kvm/lib/aarch64/processor.c

reviewed

··· 401 401 void vm_init_descriptor_tables(struct kvm_vm *vm) 402 402 { 403 403 vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers), 404 404 - vm->page_size, 0, 0); 404 404 + vm->page_size); 405 405 406 406 *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; 407 407 }

+16

tools/testing/selftests/kvm/lib/guest_modes.c

reviewed

··· 22 22 } 23 23 } 24 24 #endif 25 25 + #ifdef __s390x__ 26 26 + { 27 27 + int kvm_fd, vm_fd; 28 28 + struct kvm_s390_vm_cpu_processor info; 29 29 + 30 30 + kvm_fd = open_kvm_dev_path_or_exit(); 31 31 + vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, 0); 32 32 + kvm_device_access(vm_fd, KVM_S390_VM_CPU_MODEL, 33 33 + KVM_S390_VM_CPU_PROCESSOR, &info, false); 34 34 + close(vm_fd); 35 35 + close(kvm_fd); 36 36 + /* Starting with z13 we have 47bits of physical address */ 37 37 + if (info.ibc >= 0x30) 38 38 + guest_mode_append(VM_MODE_P47V64_4K, true, true); 39 39 + } 40 40 + #endif 25 41 } 26 42 27 43 void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg)

tools/testing/selftests/kvm/lib/kvm_util.c

reviewed

··· 176 176 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 177 177 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 178 178 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 179 179 + [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", 179 180 }; 180 181 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 181 182 "Missing new mode strings?"); ··· 195 194 { 40, 48, 0x10000, 16 }, 196 195 { 0, 0, 0x1000, 12 }, 197 196 { 47, 64, 0x1000, 12 }, 197 197 + { 44, 64, 0x1000, 12 }, 198 198 }; 199 199 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 200 200 "Missing new mode params?"); ··· 282 280 #endif 283 281 break; 284 282 case VM_MODE_P47V64_4K: 283 283 + vm->pgtable_levels = 5; 284 284 + break; 285 285 + case VM_MODE_P44V64_4K: 285 286 vm->pgtable_levels = 5; 286 287 break; 287 288 default:

+2 -1

tools/testing/selftests/kvm/set_memory_region_test.c

reviewed

··· 377 377 (max_mem_slots - 1), MEM_REGION_SIZE >> 10); 378 378 379 379 mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, 380 380 - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 380 380 + PROT_READ | PROT_WRITE, 381 381 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); 381 382 TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); 382 383 mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); 383 384

+1 -1

tools/testing/selftests/kvm/x86_64/hyperv_features.c

reviewed

··· 615 615 616 616 vm_init_descriptor_tables(vm); 617 617 vcpu_init_descriptor_tables(vm, VCPU_ID); 618 618 - vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); 618 618 + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); 619 619 620 620 pr_info("Testing access to Hyper-V specific MSRs\n"); 621 621 guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva),

+1 -1

tools/testing/selftests/kvm/x86_64/mmu_role_test.c

reviewed

··· 71 71 /* Set up a #PF handler to eat the RSVD #PF and signal all done! */ 72 72 vm_init_descriptor_tables(vm); 73 73 vcpu_init_descriptor_tables(vm, VCPU_ID); 74 74 - vm_handle_exception(vm, PF_VECTOR, guest_pf_handler); 74 74 + vm_install_exception_handler(vm, PF_VECTOR, guest_pf_handler); 75 75 76 76 r = _vcpu_run(vm, VCPU_ID); 77 77 TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r);

+64 -6

tools/testing/selftests/kvm/x86_64/smm_test.c

reviewed

··· 53 53 : "+a" (phase)); 54 54 } 55 55 56 56 - void self_smi(void) 56 56 + static void self_smi(void) 57 57 { 58 58 x2apic_write_reg(APIC_ICR, 59 59 APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI); 60 60 } 61 61 62 62 - void guest_code(void *arg) 62 62 + static void l2_guest_code(void) 63 63 { 64 64 + sync_with_host(8); 65 65 + 66 66 + sync_with_host(10); 67 67 + 68 68 + vmcall(); 69 69 + } 70 70 + 71 71 + static void guest_code(void *arg) 72 72 + { 73 73 + #define L2_GUEST_STACK_SIZE 64 74 74 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 64 75 uint64_t apicbase = rdmsr(MSR_IA32_APICBASE); 76 76 + struct svm_test_data *svm = arg; 77 77 + struct vmx_pages *vmx_pages = arg; 65 78 66 79 sync_with_host(1); 67 80 ··· 87 74 sync_with_host(4); 88 75 89 76 if (arg) { 90 90 - if (cpu_has_svm()) 91 91 - generic_svm_setup(arg, NULL, NULL); 92 92 - else 93 93 - GUEST_ASSERT(prepare_for_vmx_operation(arg)); 77 77 + if (cpu_has_svm()) { 78 78 + generic_svm_setup(svm, l2_guest_code, 79 79 + &l2_guest_stack[L2_GUEST_STACK_SIZE]); 80 80 + } else { 81 81 + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); 82 82 + GUEST_ASSERT(load_vmcs(vmx_pages)); 83 83 + prepare_vmcs(vmx_pages, l2_guest_code, 84 84 + &l2_guest_stack[L2_GUEST_STACK_SIZE]); 85 85 + } 94 86 95 87 sync_with_host(5); 96 88 97 89 self_smi(); 98 90 99 91 sync_with_host(7); 92 92 + 93 93 + if (cpu_has_svm()) { 94 94 + run_guest(svm->vmcb, svm->vmcb_gpa); 95 95 + svm->vmcb->save.rip += 3; 96 96 + run_guest(svm->vmcb, svm->vmcb_gpa); 97 97 + } else { 98 98 + vmlaunch(); 99 99 + vmresume(); 100 100 + } 101 101 + 102 102 + /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */ 103 103 + sync_with_host(12); 100 104 } 101 105 102 106 sync_with_host(DONE); 107 107 + } 108 108 + 109 109 + void inject_smi(struct kvm_vm *vm) 110 110 + { 111 111 + struct kvm_vcpu_events events; 112 112 + 113 113 + vcpu_events_get(vm, VCPU_ID, &events); 114 114 + 115 115 + events.smi.pending = 1; 116 116 + events.flags |= KVM_VCPUEVENT_VALID_SMM; 117 117 + 118 118 + vcpu_events_set(vm, VCPU_ID, &events); 103 119 } 104 120 105 121 int main(int argc, char *argv[]) ··· 188 146 stage_reported == SMRAM_STAGE, 189 147 "Unexpected stage: #%x, got %x", 190 148 stage, stage_reported); 149 149 + 150 150 + /* 151 151 + * Enter SMM during L2 execution and check that we correctly 152 152 + * return from it. Do not perform save/restore while in SMM yet. 153 153 + */ 154 154 + if (stage == 8) { 155 155 + inject_smi(vm); 156 156 + continue; 157 157 + } 158 158 + 159 159 + /* 160 160 + * Perform save/restore while the guest is in SMM triggered 161 161 + * during L2 execution. 162 162 + */ 163 163 + if (stage == 10) 164 164 + inject_smi(vm); 191 165 192 166 state = vcpu_save_state(vm, VCPU_ID); 193 167 kvm_vm_release(vm);

+1 -1

virt/kvm/coalesced_mmio.c

reviewed

··· 186 186 coalesced_mmio_in_range(dev, zone->addr, zone->size)) { 187 187 r = kvm_io_bus_unregister_dev(kvm, 188 188 zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev); 189 189 - kvm_iodevice_destructor(&dev->dev); 190 189 191 190 /* 192 191 * On failure, unregister destroys all devices on the ··· 195 196 */ 196 197 if (r) 197 198 break; 199 199 + kvm_iodevice_destructor(&dev->dev); 198 200 } 199 201 } 200 202

+1 -1

virt/kvm/kvm_main.c

reviewed

··· 935 935 stat_data->kvm = kvm; 936 936 stat_data->desc = pdesc; 937 937 stat_data->kind = KVM_STAT_VCPU; 938 938 - kvm->debugfs_stat_data[i] = stat_data; 938 938 + kvm->debugfs_stat_data[i + kvm_vm_stats_header.num_desc] = stat_data; 939 939 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), 940 940 kvm->debugfs_dentry, stat_data, 941 941 &stat_fops_per_vm);