Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: async_pf: Let guest support delivery of async_pf from guest mode

Adds another flag bit (bit 2) to MSR_KVM_ASYNC_PF_EN. If bit 2 is 1,
async page faults are delivered to L1 as #PF vmexits; if bit 2 is 0,
kvm_can_do_async_pf returns 0 if in guest mode.

This is similar to what svm.c wanted to do all along, but it is only
enabled for Linux as L1 hypervisor. Foreign hypervisors must never
receive async page faults as vmexits, because they'd probably be very
confused about that.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>

authored by

Wanpeng Li and committed by
Radim Krčmář
52a5c155 adfe20fb

+16 -7
+3 -2
Documentation/virtual/kvm/msr.txt
··· 166 166 MSR_KVM_ASYNC_PF_EN: 0x4b564d02 167 167 data: Bits 63-6 hold 64-byte aligned physical address of a 168 168 64 byte memory area which must be in guest RAM and must be 169 - zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1 169 + zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1 170 170 when asynchronous page faults are enabled on the vcpu 0 when 171 171 disabled. Bit 1 is 1 if asynchronous page faults can be injected 172 - when vcpu is in cpl == 0. 172 + when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults 173 + are delivered to L1 as #PF vmexits. 173 174 174 175 First 4 byte of 64 byte memory location will be written to by 175 176 the hypervisor at the time of asynchronous page fault (APF)
+1
arch/x86/include/asm/kvm_host.h
··· 653 653 bool send_user_only; 654 654 u32 host_apf_reason; 655 655 unsigned long nested_apf_token; 656 + bool delivery_as_pf_vmexit; 656 657 } apf; 657 658 658 659 /* OSVW MSRs (AMD only) */
+1
arch/x86/include/uapi/asm/kvm_para.h
··· 67 67 68 68 #define KVM_ASYNC_PF_ENABLED (1 << 0) 69 69 #define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) 70 + #define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2) 70 71 71 72 /* Operations for KVM_HC_MMU_OP */ 72 73 #define KVM_MMU_OP_WRITE_PTE 1
+6 -1
arch/x86/kernel/kvm.c
··· 330 330 #ifdef CONFIG_PREEMPT 331 331 pa |= KVM_ASYNC_PF_SEND_ALWAYS; 332 332 #endif 333 - wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); 333 + pa |= KVM_ASYNC_PF_ENABLED; 334 + 335 + /* Async page fault support for L1 hypervisor is optional */ 336 + if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN, 337 + (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0) 338 + wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); 334 339 __this_cpu_write(apf_reason.enabled, 1); 335 340 printk(KERN_INFO"KVM setup async PF for cpu %d\n", 336 341 smp_processor_id());
+1 -1
arch/x86/kvm/mmu.c
··· 3749 3749 kvm_event_needs_reinjection(vcpu))) 3750 3750 return false; 3751 3751 3752 - if (is_guest_mode(vcpu)) 3752 + if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) 3753 3753 return false; 3754 3754 3755 3755 return kvm_x86_ops->interrupt_allowed(vcpu);
+1 -1
arch/x86/kvm/vmx.c
··· 8037 8037 if (is_nmi(intr_info)) 8038 8038 return false; 8039 8039 else if (is_page_fault(intr_info)) 8040 - return enable_ept; 8040 + return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept; 8041 8041 else if (is_no_device(intr_info) && 8042 8042 !(vmcs12->guest_cr0 & X86_CR0_TS)) 8043 8043 return false;
+3 -2
arch/x86/kvm/x86.c
··· 2063 2063 { 2064 2064 gpa_t gpa = data & ~0x3f; 2065 2065 2066 - /* Bits 2:5 are reserved, Should be zero */ 2067 - if (data & 0x3c) 2066 + /* Bits 3:5 are reserved, Should be zero */ 2067 + if (data & 0x38) 2068 2068 return 1; 2069 2069 2070 2070 vcpu->arch.apf.msr_val = data; ··· 2080 2080 return 1; 2081 2081 2082 2082 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); 2083 + vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; 2083 2084 kvm_async_pf_wakeup_all(vcpu); 2084 2085 return 0; 2085 2086 }