Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: VMX: Properly handle kvm_read/write_guest_virt*() result

Syzbot reports the following issue:

WARNING: CPU: 0 PID: 6819 at arch/x86/kvm/x86.c:618
kvm_inject_emulated_page_fault+0x210/0x290 arch/x86/kvm/x86.c:618
...
Call Trace:
...
RIP: 0010:kvm_inject_emulated_page_fault+0x210/0x290 arch/x86/kvm/x86.c:618
...
nested_vmx_get_vmptr+0x1f9/0x2a0 arch/x86/kvm/vmx/nested.c:4638
handle_vmon arch/x86/kvm/vmx/nested.c:4767 [inline]
handle_vmon+0x168/0x3a0 arch/x86/kvm/vmx/nested.c:4728
vmx_handle_exit+0x29c/0x1260 arch/x86/kvm/vmx/vmx.c:6067

'exception' we're trying to inject with kvm_inject_emulated_page_fault()
comes from:

nested_vmx_get_vmptr()
kvm_read_guest_virt()
kvm_read_guest_virt_helper()
vcpu->arch.walk_mmu->gva_to_gpa()

but it is only set when GVA to GPA conversion fails. In case it doesn't but
we still fail kvm_vcpu_read_guest_page(), X86EMUL_IO_NEEDED is returned and
nested_vmx_get_vmptr() calls kvm_inject_emulated_page_fault() with zeroed
'exception'. This happen when the argument is MMIO.

Paolo also noticed that nested_vmx_get_vmptr() is not the only place in
KVM code where kvm_read/write_guest_virt*() return result is mishandled.
VMX instructions along with INVPCID have the same issue. This was already
noticed before, e.g. see commit 541ab2aeb282 ("KVM: x86: work around
leak of uninitialized stack contents") but was never fully fixed.

KVM could've handled the request correctly by going to userspace and
performing I/O but there doesn't seem to be a good need for such requests
in the first place.

Introduce vmx_handle_memory_failure() as an interim solution.

Note, nested_vmx_get_vmptr() now has three possible outcomes: OK, PF,
KVM_EXIT_INTERNAL_ERROR and callers need to know if userspace exit is
needed (for KVM_EXIT_INTERNAL_ERROR) in case of failure. We don't seem
to have a good enum describing this tristate, just add "int *ret" to
nested_vmx_get_vmptr() interface to pass the information.

Reported-by: syzbot+2a7156e11dc199bdbd8a@syzkaller.appspotmail.com
Suggested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20200605115906.532682-1-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

Vitaly Kuznetsov and committed by
Paolo Bonzini
7a35e515 34d2618d

+74 -40
+42 -36
arch/x86/kvm/vmx/nested.c
··· 4624 4624 } 4625 4625 } 4626 4626 4627 - static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) 4627 + static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer, 4628 + int *ret) 4628 4629 { 4629 4630 gva_t gva; 4630 4631 struct x86_exception e; 4632 + int r; 4631 4633 4632 4634 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), 4633 4635 vmcs_read32(VMX_INSTRUCTION_INFO), false, 4634 - sizeof(*vmpointer), &gva)) 4635 - return 1; 4636 + sizeof(*vmpointer), &gva)) { 4637 + *ret = 1; 4638 + return -EINVAL; 4639 + } 4636 4640 4637 - if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) { 4638 - kvm_inject_emulated_page_fault(vcpu, &e); 4639 - return 1; 4641 + r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e); 4642 + if (r != X86EMUL_CONTINUE) { 4643 + *ret = vmx_handle_memory_failure(vcpu, r, &e); 4644 + return -EINVAL; 4640 4645 } 4641 4646 4642 4647 return 0; ··· 4769 4764 return 1; 4770 4765 } 4771 4766 4772 - if (nested_vmx_get_vmptr(vcpu, &vmptr)) 4773 - return 1; 4767 + if (nested_vmx_get_vmptr(vcpu, &vmptr, &ret)) 4768 + return ret; 4774 4769 4775 4770 /* 4776 4771 * SDM 3: 24.11.5 ··· 4843 4838 u32 zero = 0; 4844 4839 gpa_t vmptr; 4845 4840 u64 evmcs_gpa; 4841 + int r; 4846 4842 4847 4843 if (!nested_vmx_check_permission(vcpu)) 4848 4844 return 1; 4849 4845 4850 - if (nested_vmx_get_vmptr(vcpu, &vmptr)) 4851 - return 1; 4846 + if (nested_vmx_get_vmptr(vcpu, &vmptr, &r)) 4847 + return r; 4852 4848 4853 4849 if (!page_address_valid(vcpu, vmptr)) 4854 4850 return nested_vmx_failValid(vcpu, ··· 4908 4902 u64 value; 4909 4903 gva_t gva = 0; 4910 4904 short offset; 4911 - int len; 4905 + int len, r; 4912 4906 4913 4907 if (!nested_vmx_check_permission(vcpu)) 4914 4908 return 1; ··· 4949 4943 instr_info, true, len, &gva)) 4950 4944 return 1; 4951 4945 /* _system ok, nested_vmx_check_permission has verified cpl=0 */ 4952 - if (kvm_write_guest_virt_system(vcpu, gva, &value, len, &e)) { 4953 - kvm_inject_emulated_page_fault(vcpu, &e); 4954 - return 1; 4955 - } 4946 + r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e); 4947 + if (r != X86EMUL_CONTINUE) 4948 + return vmx_handle_memory_failure(vcpu, r, &e); 4956 4949 } 4957 4950 4958 4951 return nested_vmx_succeed(vcpu); ··· 4992 4987 unsigned long field; 4993 4988 short offset; 4994 4989 gva_t gva; 4995 - int len; 4990 + int len, r; 4996 4991 4997 4992 /* 4998 4993 * The value to write might be 32 or 64 bits, depending on L1's long ··· 5022 5017 if (get_vmx_mem_address(vcpu, exit_qualification, 5023 5018 instr_info, false, len, &gva)) 5024 5019 return 1; 5025 - if (kvm_read_guest_virt(vcpu, gva, &value, len, &e)) { 5026 - kvm_inject_emulated_page_fault(vcpu, &e); 5027 - return 1; 5028 - } 5020 + r = kvm_read_guest_virt(vcpu, gva, &value, len, &e); 5021 + if (r != X86EMUL_CONTINUE) 5022 + return vmx_handle_memory_failure(vcpu, r, &e); 5029 5023 } 5030 5024 5031 5025 field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf)); ··· 5107 5103 { 5108 5104 struct vcpu_vmx *vmx = to_vmx(vcpu); 5109 5105 gpa_t vmptr; 5106 + int r; 5110 5107 5111 5108 if (!nested_vmx_check_permission(vcpu)) 5112 5109 return 1; 5113 5110 5114 - if (nested_vmx_get_vmptr(vcpu, &vmptr)) 5115 - return 1; 5111 + if (nested_vmx_get_vmptr(vcpu, &vmptr, &r)) 5112 + return r; 5116 5113 5117 5114 if (!page_address_valid(vcpu, vmptr)) 5118 5115 return nested_vmx_failValid(vcpu, ··· 5175 5170 gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr; 5176 5171 struct x86_exception e; 5177 5172 gva_t gva; 5173 + int r; 5178 5174 5179 5175 if (!nested_vmx_check_permission(vcpu)) 5180 5176 return 1; ··· 5187 5181 true, sizeof(gpa_t), &gva)) 5188 5182 return 1; 5189 5183 /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ 5190 - if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr, 5191 - sizeof(gpa_t), &e)) { 5192 - kvm_inject_emulated_page_fault(vcpu, &e); 5193 - return 1; 5194 - } 5184 + r = kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr, 5185 + sizeof(gpa_t), &e); 5186 + if (r != X86EMUL_CONTINUE) 5187 + return vmx_handle_memory_failure(vcpu, r, &e); 5188 + 5195 5189 return nested_vmx_succeed(vcpu); 5196 5190 } 5197 5191 ··· 5215 5209 struct { 5216 5210 u64 eptp, gpa; 5217 5211 } operand; 5218 - int i; 5212 + int i, r; 5219 5213 5220 5214 if (!(vmx->nested.msrs.secondary_ctls_high & 5221 5215 SECONDARY_EXEC_ENABLE_EPT) || ··· 5242 5236 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), 5243 5237 vmx_instruction_info, false, sizeof(operand), &gva)) 5244 5238 return 1; 5245 - if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { 5246 - kvm_inject_emulated_page_fault(vcpu, &e); 5247 - return 1; 5248 - } 5239 + r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e); 5240 + if (r != X86EMUL_CONTINUE) 5241 + return vmx_handle_memory_failure(vcpu, r, &e); 5249 5242 5250 5243 /* 5251 5244 * Nested EPT roots are always held through guest_mmu, ··· 5296 5291 u64 gla; 5297 5292 } operand; 5298 5293 u16 vpid02; 5294 + int r; 5299 5295 5300 5296 if (!(vmx->nested.msrs.secondary_ctls_high & 5301 5297 SECONDARY_EXEC_ENABLE_VPID) || ··· 5324 5318 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), 5325 5319 vmx_instruction_info, false, sizeof(operand), &gva)) 5326 5320 return 1; 5327 - if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { 5328 - kvm_inject_emulated_page_fault(vcpu, &e); 5329 - return 1; 5330 - } 5321 + r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e); 5322 + if (r != X86EMUL_CONTINUE) 5323 + return vmx_handle_memory_failure(vcpu, r, &e); 5324 + 5331 5325 if (operand.vpid >> 16) 5332 5326 return nested_vmx_failValid(vcpu, 5333 5327 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+30 -4
arch/x86/kvm/vmx/vmx.c
··· 1600 1600 return 1; 1601 1601 } 1602 1602 1603 + /* 1604 + * Handles kvm_read/write_guest_virt*() result and either injects #PF or returns 1605 + * KVM_EXIT_INTERNAL_ERROR for cases not currently handled by KVM. Return value 1606 + * indicates whether exit to userspace is needed. 1607 + */ 1608 + int vmx_handle_memory_failure(struct kvm_vcpu *vcpu, int r, 1609 + struct x86_exception *e) 1610 + { 1611 + if (r == X86EMUL_PROPAGATE_FAULT) { 1612 + kvm_inject_emulated_page_fault(vcpu, e); 1613 + return 1; 1614 + } 1615 + 1616 + /* 1617 + * In case kvm_read/write_guest_virt*() failed with X86EMUL_IO_NEEDED 1618 + * while handling a VMX instruction KVM could've handled the request 1619 + * correctly by exiting to userspace and performing I/O but there 1620 + * doesn't seem to be a real use-case behind such requests, just return 1621 + * KVM_EXIT_INTERNAL_ERROR for now. 1622 + */ 1623 + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 1624 + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 1625 + vcpu->run->internal.ndata = 0; 1626 + 1627 + return 0; 1628 + } 1603 1629 1604 1630 /* 1605 1631 * Recognizes a pending MTF VM-exit and records the nested state for later ··· 5512 5486 u64 pcid; 5513 5487 u64 gla; 5514 5488 } operand; 5489 + int r; 5515 5490 5516 5491 if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { 5517 5492 kvm_queue_exception(vcpu, UD_VECTOR); ··· 5535 5508 sizeof(operand), &gva)) 5536 5509 return 1; 5537 5510 5538 - if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { 5539 - kvm_inject_emulated_page_fault(vcpu, &e); 5540 - return 1; 5541 - } 5511 + r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e); 5512 + if (r != X86EMUL_CONTINUE) 5513 + return vmx_handle_memory_failure(vcpu, r, &e); 5542 5514 5543 5515 if (operand.pcid >> 12 != 0) { 5544 5516 kvm_inject_gp(vcpu, 0);
+2
arch/x86/kvm/vmx/vmx.h
··· 355 355 void pt_update_intercept_for_msr(struct vcpu_vmx *vmx); 356 356 void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); 357 357 int vmx_find_msr_index(struct vmx_msrs *m, u32 msr); 358 + int vmx_handle_memory_failure(struct kvm_vcpu *vcpu, int r, 359 + struct x86_exception *e); 358 360 359 361 #define POSTED_INTR_ON 0 360 362 #define POSTED_INTR_SN 1