Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: async_pf: Inject 'page ready' event only if 'page not present' was previously injected

'Page not present' event may or may not get injected depending on
guest's state. If the event wasn't injected, there is no need to
inject the corresponding 'page ready' event as the guest may get
confused. E.g. Linux thinks that the corresponding 'page not present'
event wasn't delivered *yet* and allocates a 'dummy entry' for it.
This entry is never freed.

Note, 'wakeup all' events have no corresponding 'page not present'
event and always get injected.

s390 seems to always be able to inject 'page not present', the
change is effectively a nop.

Suggested-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20200610175532.779793-2-vkuznets@redhat.com>
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=208081
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

Vitaly Kuznetsov and committed by
Paolo Bonzini
2a18b7e7 7863e346

+12 -6
+1 -1
arch/s390/include/asm/kvm_host.h
··· 978 978 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 979 979 struct kvm_async_pf *work); 980 980 981 - void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 981 + bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 982 982 struct kvm_async_pf *work); 983 983 984 984 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+3 -1
arch/s390/kvm/kvm-s390.c
··· 3923 3923 } 3924 3924 } 3925 3925 3926 - void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3926 + bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3927 3927 struct kvm_async_pf *work) 3928 3928 { 3929 3929 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 3930 3930 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 3931 + 3932 + return true; 3931 3933 } 3932 3934 3933 3935 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+1 -1
arch/x86/include/asm/kvm_host.h
··· 1670 1670 void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, 1671 1671 unsigned long *vcpu_bitmap); 1672 1672 1673 - void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 1673 + bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 1674 1674 struct kvm_async_pf *work); 1675 1675 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 1676 1676 struct kvm_async_pf *work);
+5 -2
arch/x86/kvm/x86.c
··· 10511 10511 return kvm_arch_interrupt_allowed(vcpu); 10512 10512 } 10513 10513 10514 - void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 10514 + bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 10515 10515 struct kvm_async_pf *work) 10516 10516 { 10517 10517 struct x86_exception fault; ··· 10528 10528 fault.address = work->arch.token; 10529 10529 fault.async_page_fault = true; 10530 10530 kvm_inject_page_fault(vcpu, &fault); 10531 + return true; 10531 10532 } else { 10532 10533 /* 10533 10534 * It is not possible to deliver a paravirtualized asynchronous ··· 10539 10538 * fault is retried, hopefully the page will be ready in the host. 10540 10539 */ 10541 10540 kvm_make_request(KVM_REQ_APF_HALT, vcpu); 10541 + return false; 10542 10542 } 10543 10543 } 10544 10544 ··· 10557 10555 kvm_del_async_pf_gfn(vcpu, work->arch.gfn); 10558 10556 trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa); 10559 10557 10560 - if (kvm_pv_async_pf_enabled(vcpu) && 10558 + if ((work->wakeup_all || work->notpresent_injected) && 10559 + kvm_pv_async_pf_enabled(vcpu) && 10561 10560 !apf_put_user_ready(vcpu, work->arch.token)) { 10562 10561 vcpu->arch.apf.pageready_pending = true; 10563 10562 kvm_apic_set_irq(vcpu, &irq, NULL);
+1
include/linux/kvm_host.h
··· 206 206 unsigned long addr; 207 207 struct kvm_arch_async_pf arch; 208 208 bool wakeup_all; 209 + bool notpresent_injected; 209 210 }; 210 211 211 212 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
+1 -1
virt/kvm/async_pf.c
··· 189 189 190 190 list_add_tail(&work->queue, &vcpu->async_pf.queue); 191 191 vcpu->async_pf.queued++; 192 - kvm_arch_async_page_not_present(vcpu, work); 192 + work->notpresent_injected = kvm_arch_async_page_not_present(vcpu, work); 193 193 194 194 schedule_work(&work->work); 195 195