Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: x86: Add KVM_GET/SET_VCPU_EVENTS

This new IOCTL exports all yet user-invisible states related to
exceptions, interrupts, and NMIs. Together with appropriate user space
changes, this fixes sporadic problems of vmsave/restore, live migration
and system reset.

[avi: future-proof abi by adding a flags field]

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

authored by

Jan Kiszka and committed by
Avi Kivity
3cfc3092 65ac7264

+214
+49
Documentation/kvm/api.txt
··· 653 653 __u32 pad[9]; 654 654 }; 655 655 656 + 4.29 KVM_GET_VCPU_EVENTS 657 + 658 + Capability: KVM_CAP_VCPU_EVENTS 659 + Architectures: x86 660 + Type: vm ioctl 661 + Parameters: struct kvm_vcpu_event (out) 662 + Returns: 0 on success, -1 on error 663 + 664 + Gets currently pending exceptions, interrupts, and NMIs as well as related 665 + states of the vcpu. 666 + 667 + struct kvm_vcpu_events { 668 + struct { 669 + __u8 injected; 670 + __u8 nr; 671 + __u8 has_error_code; 672 + __u8 pad; 673 + __u32 error_code; 674 + } exception; 675 + struct { 676 + __u8 injected; 677 + __u8 nr; 678 + __u8 soft; 679 + __u8 pad; 680 + } interrupt; 681 + struct { 682 + __u8 injected; 683 + __u8 pending; 684 + __u8 masked; 685 + __u8 pad; 686 + } nmi; 687 + __u32 sipi_vector; 688 + __u32 flags; /* must be zero */ 689 + }; 690 + 691 + 4.30 KVM_SET_VCPU_EVENTS 692 + 693 + Capability: KVM_CAP_VCPU_EVENTS 694 + Architectures: x86 695 + Type: vm ioctl 696 + Parameters: struct kvm_vcpu_event (in) 697 + Returns: 0 on success, -1 on error 698 + 699 + Set pending exceptions, interrupts, and NMIs as well as related states of the 700 + vcpu. 701 + 702 + See KVM_GET_VCPU_EVENTS for the data structure. 703 + 704 + 656 705 5. The kvm_run structure 657 706 658 707 Application code obtains a pointer to the kvm_run structure by
+28
arch/x86/include/asm/kvm.h
··· 20 20 #define __KVM_HAVE_MCE 21 21 #define __KVM_HAVE_PIT_STATE2 22 22 #define __KVM_HAVE_XEN_HVM 23 + #define __KVM_HAVE_VCPU_EVENTS 23 24 24 25 /* Architectural interrupt line count. */ 25 26 #define KVM_NR_INTERRUPTS 256 ··· 253 252 __u8 pit_reinject; 254 253 __u8 reserved[31]; 255 254 }; 255 + 256 + /* for KVM_GET/SET_VCPU_EVENTS */ 257 + struct kvm_vcpu_events { 258 + struct { 259 + __u8 injected; 260 + __u8 nr; 261 + __u8 has_error_code; 262 + __u8 pad; 263 + __u32 error_code; 264 + } exception; 265 + struct { 266 + __u8 injected; 267 + __u8 nr; 268 + __u8 soft; 269 + __u8 pad; 270 + } interrupt; 271 + struct { 272 + __u8 injected; 273 + __u8 pending; 274 + __u8 masked; 275 + __u8 pad; 276 + } nmi; 277 + __u32 sipi_vector; 278 + __u32 flags; 279 + __u32 reserved[10]; 280 + }; 281 + 256 282 #endif /* _ASM_X86_KVM_H */
+2
arch/x86/include/asm/kvm_host.h
··· 523 523 bool has_error_code, u32 error_code); 524 524 int (*interrupt_allowed)(struct kvm_vcpu *vcpu); 525 525 int (*nmi_allowed)(struct kvm_vcpu *vcpu); 526 + bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); 527 + void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); 526 528 void (*enable_nmi_window)(struct kvm_vcpu *vcpu); 527 529 void (*enable_irq_window)(struct kvm_vcpu *vcpu); 528 530 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
+22
arch/x86/kvm/svm.c
··· 2499 2499 !(svm->vcpu.arch.hflags & HF_NMI_MASK); 2500 2500 } 2501 2501 2502 + static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) 2503 + { 2504 + struct vcpu_svm *svm = to_svm(vcpu); 2505 + 2506 + return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); 2507 + } 2508 + 2509 + static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) 2510 + { 2511 + struct vcpu_svm *svm = to_svm(vcpu); 2512 + 2513 + if (masked) { 2514 + svm->vcpu.arch.hflags |= HF_NMI_MASK; 2515 + svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); 2516 + } else { 2517 + svm->vcpu.arch.hflags &= ~HF_NMI_MASK; 2518 + svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); 2519 + } 2520 + } 2521 + 2502 2522 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) 2503 2523 { 2504 2524 struct vcpu_svm *svm = to_svm(vcpu); ··· 2966 2946 .queue_exception = svm_queue_exception, 2967 2947 .interrupt_allowed = svm_interrupt_allowed, 2968 2948 .nmi_allowed = svm_nmi_allowed, 2949 + .get_nmi_mask = svm_get_nmi_mask, 2950 + .set_nmi_mask = svm_set_nmi_mask, 2969 2951 .enable_nmi_window = enable_nmi_window, 2970 2952 .enable_irq_window = enable_irq_window, 2971 2953 .update_cr8_intercept = update_cr8_intercept,
+30
arch/x86/kvm/vmx.c
··· 2639 2639 GUEST_INTR_STATE_NMI)); 2640 2640 } 2641 2641 2642 + static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) 2643 + { 2644 + if (!cpu_has_virtual_nmis()) 2645 + return to_vmx(vcpu)->soft_vnmi_blocked; 2646 + else 2647 + return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 2648 + GUEST_INTR_STATE_NMI); 2649 + } 2650 + 2651 + static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) 2652 + { 2653 + struct vcpu_vmx *vmx = to_vmx(vcpu); 2654 + 2655 + if (!cpu_has_virtual_nmis()) { 2656 + if (vmx->soft_vnmi_blocked != masked) { 2657 + vmx->soft_vnmi_blocked = masked; 2658 + vmx->vnmi_blocked_time = 0; 2659 + } 2660 + } else { 2661 + if (masked) 2662 + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 2663 + GUEST_INTR_STATE_NMI); 2664 + else 2665 + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, 2666 + GUEST_INTR_STATE_NMI); 2667 + } 2668 + } 2669 + 2642 2670 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) 2643 2671 { 2644 2672 return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && ··· 4013 3985 .queue_exception = vmx_queue_exception, 4014 3986 .interrupt_allowed = vmx_interrupt_allowed, 4015 3987 .nmi_allowed = vmx_nmi_allowed, 3988 + .get_nmi_mask = vmx_get_nmi_mask, 3989 + .set_nmi_mask = vmx_set_nmi_mask, 4016 3990 .enable_nmi_window = enable_nmi_window, 4017 3991 .enable_irq_window = enable_irq_window, 4018 3992 .update_cr8_intercept = update_cr8_intercept,
+77
arch/x86/kvm/x86.c
··· 1342 1342 case KVM_CAP_SET_IDENTITY_MAP_ADDR: 1343 1343 case KVM_CAP_XEN_HVM: 1344 1344 case KVM_CAP_ADJUST_CLOCK: 1345 + case KVM_CAP_VCPU_EVENTS: 1345 1346 r = 1; 1346 1347 break; 1347 1348 case KVM_CAP_COALESCED_MMIO: ··· 1884 1883 return 0; 1885 1884 } 1886 1885 1886 + static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, 1887 + struct kvm_vcpu_events *events) 1888 + { 1889 + vcpu_load(vcpu); 1890 + 1891 + events->exception.injected = vcpu->arch.exception.pending; 1892 + events->exception.nr = vcpu->arch.exception.nr; 1893 + events->exception.has_error_code = vcpu->arch.exception.has_error_code; 1894 + events->exception.error_code = vcpu->arch.exception.error_code; 1895 + 1896 + events->interrupt.injected = vcpu->arch.interrupt.pending; 1897 + events->interrupt.nr = vcpu->arch.interrupt.nr; 1898 + events->interrupt.soft = vcpu->arch.interrupt.soft; 1899 + 1900 + events->nmi.injected = vcpu->arch.nmi_injected; 1901 + events->nmi.pending = vcpu->arch.nmi_pending; 1902 + events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); 1903 + 1904 + events->sipi_vector = vcpu->arch.sipi_vector; 1905 + 1906 + events->flags = 0; 1907 + 1908 + vcpu_put(vcpu); 1909 + } 1910 + 1911 + static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, 1912 + struct kvm_vcpu_events *events) 1913 + { 1914 + if (events->flags) 1915 + return -EINVAL; 1916 + 1917 + vcpu_load(vcpu); 1918 + 1919 + vcpu->arch.exception.pending = events->exception.injected; 1920 + vcpu->arch.exception.nr = events->exception.nr; 1921 + vcpu->arch.exception.has_error_code = events->exception.has_error_code; 1922 + vcpu->arch.exception.error_code = events->exception.error_code; 1923 + 1924 + vcpu->arch.interrupt.pending = events->interrupt.injected; 1925 + vcpu->arch.interrupt.nr = events->interrupt.nr; 1926 + vcpu->arch.interrupt.soft = events->interrupt.soft; 1927 + if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) 1928 + kvm_pic_clear_isr_ack(vcpu->kvm); 1929 + 1930 + vcpu->arch.nmi_injected = events->nmi.injected; 1931 + vcpu->arch.nmi_pending = events->nmi.pending; 1932 + kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); 1933 + 1934 + vcpu->arch.sipi_vector = events->sipi_vector; 1935 + 1936 + vcpu_put(vcpu); 1937 + 1938 + return 0; 1939 + } 1940 + 1887 1941 long kvm_arch_vcpu_ioctl(struct file *filp, 1888 1942 unsigned int ioctl, unsigned long arg) 1889 1943 { ··· 2094 2038 if (copy_from_user(&mce, argp, sizeof mce)) 2095 2039 goto out; 2096 2040 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); 2041 + break; 2042 + } 2043 + case KVM_GET_VCPU_EVENTS: { 2044 + struct kvm_vcpu_events events; 2045 + 2046 + kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events); 2047 + 2048 + r = -EFAULT; 2049 + if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events))) 2050 + break; 2051 + r = 0; 2052 + break; 2053 + } 2054 + case KVM_SET_VCPU_EVENTS: { 2055 + struct kvm_vcpu_events events; 2056 + 2057 + r = -EFAULT; 2058 + if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) 2059 + break; 2060 + 2061 + r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); 2097 2062 break; 2098 2063 } 2099 2064 default:
+6
include/linux/kvm.h
··· 489 489 #endif 490 490 #define KVM_CAP_ADJUST_CLOCK 39 491 491 #define KVM_CAP_INTERNAL_ERROR_DATA 40 492 + #ifdef __KVM_HAVE_VCPU_EVENTS 493 + #define KVM_CAP_VCPU_EVENTS 41 494 + #endif 492 495 493 496 #ifdef KVM_CAP_IRQ_ROUTING 494 497 ··· 675 672 /* IA64 stack access */ 676 673 #define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *) 677 674 #define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *) 675 + /* Available with KVM_CAP_VCPU_EVENTS */ 676 + #define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) 677 + #define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) 678 678 679 679 #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 680 680