Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: selftests: VMX preemption timer migration test

When a nested VM with a VMX-preemption timer is migrated, verify that the
nested VM and its parent VM observe the VMX-preemption timer exit close to
the original expiration deadline.

Signed-off-by: Makarand Sonare <makarandsonare@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Message-Id: <20200526215107.205814-3-makarandsonare@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

Makarand Sonare and committed by
Paolo Bonzini
8d7fbf01 850448f3

+298 -12
+4 -8
arch/x86/kvm/vmx/nested.c
··· 2091 2091 { 2092 2092 struct vcpu_vmx *vmx = to_vmx(vcpu); 2093 2093 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 2094 - u64 timer_value = 0; 2095 2094 2096 2095 u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >> 2097 2096 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; 2098 2097 2099 2098 if (!vmx->nested.has_preemption_timer_deadline) { 2100 - timer_value = vmcs12->vmx_preemption_timer_value; 2101 - vmx->nested.preemption_timer_deadline = timer_value + 2102 - l1_scaled_tsc; 2099 + vmx->nested.preemption_timer_deadline = 2100 + vmcs12->vmx_preemption_timer_value + l1_scaled_tsc; 2103 2101 vmx->nested.has_preemption_timer_deadline = true; 2104 - } else if (l1_scaled_tsc < vmx->nested.preemption_timer_deadline) 2105 - timer_value = vmx->nested.preemption_timer_deadline - 2106 - l1_scaled_tsc; 2107 - return timer_value; 2102 + } 2103 + return vmx->nested.preemption_timer_deadline - l1_scaled_tsc; 2108 2104 } 2109 2105 2110 2106 static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
+1
tools/arch/x86/include/uapi/asm/kvm.h
··· 400 400 struct kvm_vmx_nested_state_data { 401 401 __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; 402 402 __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; 403 + __u64 preemption_timer_deadline; 403 404 }; 404 405 405 406 struct kvm_vmx_nested_state_hdr {
+1
tools/testing/selftests/kvm/.gitignore
··· 10 10 /x86_64/set_sregs_test 11 11 /x86_64/smm_test 12 12 /x86_64/state_test 13 + /x86_64/vmx_preemption_timer_test 13 14 /x86_64/svm_vmcall_test 14 15 /x86_64/sync_regs_test 15 16 /x86_64/vmx_close_while_nested_test
+1
tools/testing/selftests/kvm/Makefile
··· 46 46 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test 47 47 TEST_GEN_PROGS_x86_64 += x86_64/smm_test 48 48 TEST_GEN_PROGS_x86_64 += x86_64/state_test 49 + TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test 49 50 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test 50 51 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test 51 52 TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
+2
tools/testing/selftests/kvm/include/kvm_util.h
··· 314 314 void ucall(uint64_t cmd, int nargs, ...); 315 315 uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); 316 316 317 + #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ 318 + ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) 317 319 #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) 318 320 #define GUEST_DONE() ucall(UCALL_DONE, 0) 319 321 #define __GUEST_ASSERT(_condition, _nargs, _args...) do { \
+7 -4
tools/testing/selftests/kvm/include/x86_64/processor.h
··· 79 79 static inline uint64_t rdtsc(void) 80 80 { 81 81 uint32_t eax, edx; 82 - 82 + uint32_t tsc_val; 83 83 /* 84 84 * The lfence is to wait (on Intel CPUs) until all previous 85 - * instructions have been executed. 85 + * instructions have been executed. If software requires RDTSC to be 86 + * executed prior to execution of any subsequent instruction, it can 87 + * execute LFENCE immediately after RDTSC 86 88 */ 87 - __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx)); 88 - return ((uint64_t)edx) << 32 | eax; 89 + __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx)); 90 + tsc_val = ((uint64_t)edx) << 32 | eax; 91 + return tsc_val; 89 92 } 90 93 91 94 static inline uint64_t rdtscp(uint32_t *aux)
+27
tools/testing/selftests/kvm/include/x86_64/vmx.h
··· 575 575 void *eptp; 576 576 }; 577 577 578 + union vmx_basic { 579 + u64 val; 580 + struct { 581 + u32 revision; 582 + u32 size:13, 583 + reserved1:3, 584 + width:1, 585 + dual:1, 586 + type:4, 587 + insouts:1, 588 + ctrl:1, 589 + vm_entry_exception_ctrl:1, 590 + reserved2:7; 591 + }; 592 + }; 593 + 594 + union vmx_ctrl_msr { 595 + u64 val; 596 + struct { 597 + u32 set, clr; 598 + }; 599 + }; 600 + 601 + union vmx_basic basic; 602 + union vmx_ctrl_msr ctrl_pin_rev; 603 + union vmx_ctrl_msr ctrl_exit_rev; 604 + 578 605 struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); 579 606 bool prepare_for_vmx_operation(struct vmx_pages *vmx); 580 607 void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
+255
tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * VMX-preemption timer test 4 + * 5 + * Copyright (C) 2020, Google, LLC. 6 + * 7 + * Test to ensure the VM-Enter after migration doesn't 8 + * incorrectly restarts the timer with the full timer 9 + * value instead of partially decayed timer value 10 + * 11 + */ 12 + #define _GNU_SOURCE /* for program_invocation_short_name */ 13 + #include <fcntl.h> 14 + #include <stdio.h> 15 + #include <stdlib.h> 16 + #include <string.h> 17 + #include <sys/ioctl.h> 18 + 19 + #include "test_util.h" 20 + 21 + #include "kvm_util.h" 22 + #include "processor.h" 23 + #include "vmx.h" 24 + 25 + #define VCPU_ID 5 26 + #define PREEMPTION_TIMER_VALUE 100000000ull 27 + #define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull 28 + 29 + u32 vmx_pt_rate; 30 + bool l2_save_restore_done; 31 + static u64 l2_vmx_pt_start; 32 + volatile u64 l2_vmx_pt_finish; 33 + 34 + void l2_guest_code(void) 35 + { 36 + u64 vmx_pt_delta; 37 + 38 + vmcall(); 39 + l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; 40 + 41 + /* 42 + * Wait until the 1st threshold has passed 43 + */ 44 + do { 45 + l2_vmx_pt_finish = rdtsc(); 46 + vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >> 47 + vmx_pt_rate; 48 + } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1); 49 + 50 + /* 51 + * Force L2 through Save and Restore cycle 52 + */ 53 + GUEST_SYNC(1); 54 + 55 + l2_save_restore_done = 1; 56 + 57 + /* 58 + * Now wait for the preemption timer to fire and 59 + * exit to L1 60 + */ 61 + while ((l2_vmx_pt_finish = rdtsc())) 62 + ; 63 + } 64 + 65 + void l1_guest_code(struct vmx_pages *vmx_pages) 66 + { 67 + #define L2_GUEST_STACK_SIZE 64 68 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 69 + u64 l1_vmx_pt_start; 70 + u64 l1_vmx_pt_finish; 71 + u64 l1_tsc_deadline, l2_tsc_deadline; 72 + 73 + GUEST_ASSERT(vmx_pages->vmcs_gpa); 74 + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); 75 + GUEST_ASSERT(load_vmcs(vmx_pages)); 76 + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); 77 + 78 + prepare_vmcs(vmx_pages, l2_guest_code, 79 + &l2_guest_stack[L2_GUEST_STACK_SIZE]); 80 + 81 + /* 82 + * Check for Preemption timer support 83 + */ 84 + basic.val = rdmsr(MSR_IA32_VMX_BASIC); 85 + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS 86 + : MSR_IA32_VMX_PINBASED_CTLS); 87 + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS 88 + : MSR_IA32_VMX_EXIT_CTLS); 89 + 90 + if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) || 91 + !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) 92 + return; 93 + 94 + GUEST_ASSERT(!vmlaunch()); 95 + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 96 + vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN)); 97 + 98 + /* 99 + * Turn on PIN control and resume the guest 100 + */ 101 + GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL, 102 + vmreadz(PIN_BASED_VM_EXEC_CONTROL) | 103 + PIN_BASED_VMX_PREEMPTION_TIMER)); 104 + 105 + GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE, 106 + PREEMPTION_TIMER_VALUE)); 107 + 108 + vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F; 109 + 110 + l2_save_restore_done = 0; 111 + 112 + l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; 113 + 114 + GUEST_ASSERT(!vmresume()); 115 + 116 + l1_vmx_pt_finish = rdtsc(); 117 + 118 + /* 119 + * Ensure exit from L2 happens after L2 goes through 120 + * save and restore 121 + */ 122 + GUEST_ASSERT(l2_save_restore_done); 123 + 124 + /* 125 + * Ensure the exit from L2 is due to preemption timer expiry 126 + */ 127 + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER); 128 + 129 + l1_tsc_deadline = l1_vmx_pt_start + 130 + (PREEMPTION_TIMER_VALUE << vmx_pt_rate); 131 + 132 + l2_tsc_deadline = l2_vmx_pt_start + 133 + (PREEMPTION_TIMER_VALUE << vmx_pt_rate); 134 + 135 + /* 136 + * Sync with the host and pass the l1|l2 pt_expiry_finish times and 137 + * tsc deadlines so that host can verify they are as expected 138 + */ 139 + GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline, 140 + l2_vmx_pt_finish, l2_tsc_deadline); 141 + } 142 + 143 + void guest_code(struct vmx_pages *vmx_pages) 144 + { 145 + if (vmx_pages) 146 + l1_guest_code(vmx_pages); 147 + 148 + GUEST_DONE(); 149 + } 150 + 151 + int main(int argc, char *argv[]) 152 + { 153 + vm_vaddr_t vmx_pages_gva = 0; 154 + 155 + struct kvm_regs regs1, regs2; 156 + struct kvm_vm *vm; 157 + struct kvm_run *run; 158 + struct kvm_x86_state *state; 159 + struct ucall uc; 160 + int stage; 161 + 162 + /* 163 + * AMD currently does not implement any VMX features, so for now we 164 + * just early out. 165 + */ 166 + nested_vmx_check_supported(); 167 + 168 + /* Create VM */ 169 + vm = vm_create_default(VCPU_ID, 0, guest_code); 170 + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); 171 + run = vcpu_state(vm, VCPU_ID); 172 + 173 + vcpu_regs_get(vm, VCPU_ID, &regs1); 174 + 175 + if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { 176 + vcpu_alloc_vmx(vm, &vmx_pages_gva); 177 + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); 178 + } else { 179 + pr_info("will skip vmx preemption timer checks\n"); 180 + goto done; 181 + } 182 + 183 + for (stage = 1;; stage++) { 184 + _vcpu_run(vm, VCPU_ID); 185 + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 186 + "Stage %d: unexpected exit reason: %u (%s),\n", 187 + stage, run->exit_reason, 188 + exit_reason_str(run->exit_reason)); 189 + 190 + switch (get_ucall(vm, VCPU_ID, &uc)) { 191 + case UCALL_ABORT: 192 + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], 193 + __FILE__, uc.args[1]); 194 + /* NOT REACHED */ 195 + case UCALL_SYNC: 196 + break; 197 + case UCALL_DONE: 198 + goto done; 199 + default: 200 + TEST_FAIL("Unknown ucall %lu", uc.cmd); 201 + } 202 + 203 + /* UCALL_SYNC is handled here. */ 204 + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && 205 + uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", 206 + stage, (ulong)uc.args[1]); 207 + /* 208 + * If this stage 2 then we should verify the vmx pt expiry 209 + * is as expected. 210 + * From L1's perspective verify Preemption timer hasn't 211 + * expired too early. 212 + * From L2's perspective verify Preemption timer hasn't 213 + * expired too late. 214 + */ 215 + if (stage == 2) { 216 + 217 + pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n", 218 + stage, uc.args[2], uc.args[3]); 219 + 220 + pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n", 221 + stage, uc.args[4], uc.args[5]); 222 + 223 + TEST_ASSERT(uc.args[2] >= uc.args[3], 224 + "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)", 225 + stage, uc.args[2], uc.args[3]); 226 + 227 + TEST_ASSERT(uc.args[4] < uc.args[5], 228 + "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)", 229 + stage, uc.args[4], uc.args[5]); 230 + } 231 + 232 + state = vcpu_save_state(vm, VCPU_ID); 233 + memset(&regs1, 0, sizeof(regs1)); 234 + vcpu_regs_get(vm, VCPU_ID, &regs1); 235 + 236 + kvm_vm_release(vm); 237 + 238 + /* Restore state in a new VM. */ 239 + kvm_vm_restart(vm, O_RDWR); 240 + vm_vcpu_add(vm, VCPU_ID); 241 + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); 242 + vcpu_load_state(vm, VCPU_ID, state); 243 + run = vcpu_state(vm, VCPU_ID); 244 + free(state); 245 + 246 + memset(&regs2, 0, sizeof(regs2)); 247 + vcpu_regs_get(vm, VCPU_ID, &regs2); 248 + TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)), 249 + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", 250 + (ulong) regs2.rdi, (ulong) regs2.rsi); 251 + } 252 + 253 + done: 254 + kvm_vm_free(vm); 255 + }