commit 7753ea09640807104c8e353f6d5dc47ee55307cf · tjh.dev/kernel

+4

Documentation/admin-guide/kernel-parameters.txt

··· 1890 1890 [KVM,ARM] Trap guest accesses to GICv3 common 1891 1891 system registers 1892 1892 1893 + kvm-arm.vgic_v4_enable= 1894 + [KVM,ARM] Allow use of GICv4 for direct injection of 1895 + LPIs. 1896 + 1893 1897 kvm-intel.ept= [KVM,Intel] Disable extended page tables 1894 1898 (virtualized MMU) support on capable Intel chips. 1895 1899 Default is 1 (enabled)

+2

Documentation/virtual/kvm/devices/arm-vgic-its.txt

··· 64 64 -EINVAL: Inconsistent restored data 65 65 -EFAULT: Invalid guest ram access 66 66 -EBUSY: One or more VCPUS are running 67 + -EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the 68 + state is not available 67 69 68 70 KVM_DEV_ARM_VGIC_GRP_ITS_REGS 69 71 Attributes:

+5

arch/arm/kvm/Kconfig

··· 4 4 # 5 5 6 6 source "virt/kvm/Kconfig" 7 + source "virt/lib/Kconfig" 7 8 8 9 menuconfig VIRTUALIZATION 9 10 bool "Virtualization" ··· 24 23 select PREEMPT_NOTIFIERS 25 24 select ANON_INODES 26 25 select ARM_GIC 26 + select ARM_GIC_V3 27 + select ARM_GIC_V3_ITS 27 28 select HAVE_KVM_CPU_RELAX_INTERCEPT 28 29 select HAVE_KVM_ARCH_TLB_FLUSH_ALL 29 30 select KVM_MMIO ··· 39 36 select HAVE_KVM_IRQCHIP 40 37 select HAVE_KVM_IRQ_ROUTING 41 38 select HAVE_KVM_MSI 39 + select IRQ_BYPASS_MANAGER 40 + select HAVE_KVM_IRQ_BYPASS 42 41 depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER 43 42 ---help--- 44 43 Support hosting virtualized guest machines.

+1

arch/arm/kvm/Makefile

··· 32 32 obj-y += $(KVM)/arm/vgic/vgic-irqfd.o 33 33 obj-y += $(KVM)/arm/vgic/vgic-v2.o 34 34 obj-y += $(KVM)/arm/vgic/vgic-v3.o 35 + obj-y += $(KVM)/arm/vgic/vgic-v4.o 35 36 obj-y += $(KVM)/arm/vgic/vgic-mmio.o 36 37 obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o 37 38 obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o

+3

arch/arm64/kvm/Kconfig

··· 4 4 # 5 5 6 6 source "virt/kvm/Kconfig" 7 + source "virt/lib/Kconfig" 7 8 8 9 menuconfig VIRTUALIZATION 9 10 bool "Virtualization" ··· 37 36 select HAVE_KVM_MSI 38 37 select HAVE_KVM_IRQCHIP 39 38 select HAVE_KVM_IRQ_ROUTING 39 + select IRQ_BYPASS_MANAGER 40 + select HAVE_KVM_IRQ_BYPASS 40 41 ---help--- 41 42 Support hosting virtualized guest machines. 42 43 We don't support KVM with 16K page tables yet, due to the multiple

+1

arch/arm64/kvm/Makefile

··· 27 27 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o 28 28 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o 29 29 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o 30 + kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o 30 31 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o 31 32 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o 32 33 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o

+7

arch/x86/kvm/svm.c

··· 3671 3671 u32 ecx = msr->index; 3672 3672 u64 data = msr->data; 3673 3673 switch (ecx) { 3674 + case MSR_IA32_CR_PAT: 3675 + if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) 3676 + return 1; 3677 + vcpu->arch.pat = data; 3678 + svm->vmcb->save.g_pat = data; 3679 + mark_dirty(svm->vmcb, VMCB_NPT); 3680 + break; 3674 3681 case MSR_IA32_TSC: 3675 3682 kvm_write_tsc(vcpu, msr); 3676 3683 break;

+117 -44

arch/x86/kvm/vmx.c

··· 70 70 static bool __read_mostly enable_vpid = 1; 71 71 module_param_named(vpid, enable_vpid, bool, 0444); 72 72 73 + static bool __read_mostly enable_vnmi = 1; 74 + module_param_named(vnmi, enable_vnmi, bool, S_IRUGO); 75 + 73 76 static bool __read_mostly flexpriority_enabled = 1; 74 77 module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); 75 78 ··· 205 202 bool nmi_known_unmasked; 206 203 unsigned long vmcs_host_cr3; /* May not match real cr3 */ 207 204 unsigned long vmcs_host_cr4; /* May not match real cr4 */ 205 + /* Support for vnmi-less CPUs */ 206 + int soft_vnmi_blocked; 207 + ktime_t entry_time; 208 + s64 vnmi_blocked_time; 208 209 struct list_head loaded_vmcss_on_cpu_link; 209 210 }; 210 211 ··· 1298 1291 SECONDARY_EXEC_ENABLE_INVPCID; 1299 1292 } 1300 1293 1294 + static inline bool cpu_has_virtual_nmis(void) 1295 + { 1296 + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; 1297 + } 1298 + 1301 1299 static inline bool cpu_has_vmx_wbinvd_exit(void) 1302 1300 { 1303 1301 return vmcs_config.cpu_based_2nd_exec_ctrl & ··· 1358 1346 return (vmcs12->cpu_based_vm_exec_control & 1359 1347 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) && 1360 1348 (vmcs12->secondary_vm_exec_control & bit); 1361 - } 1362 - 1363 - static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) 1364 - { 1365 - return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; 1366 1349 } 1367 1350 1368 1351 static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12) ··· 3719 3712 &_vmexit_control) < 0) 3720 3713 return -EIO; 3721 3714 3722 - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | 3723 - PIN_BASED_VIRTUAL_NMIS; 3724 - opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER; 3715 + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; 3716 + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | 3717 + PIN_BASED_VMX_PREEMPTION_TIMER; 3725 3718 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, 3726 3719 &_pin_based_exec_control) < 0) 3727 3720 return -EIO; ··· 5239 5232 5240 5233 if (!kvm_vcpu_apicv_active(&vmx->vcpu)) 5241 5234 pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; 5235 + 5236 + if (!enable_vnmi) 5237 + pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; 5238 + 5242 5239 /* Enable the preemption timer dynamically */ 5243 5240 pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; 5244 5241 return pin_based_exec_ctrl; ··· 5677 5666 5678 5667 static void enable_nmi_window(struct kvm_vcpu *vcpu) 5679 5668 { 5680 - if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { 5669 + if (!enable_vnmi || 5670 + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { 5681 5671 enable_irq_window(vcpu); 5682 5672 return; 5683 5673 } ··· 5718 5706 { 5719 5707 struct vcpu_vmx *vmx = to_vmx(vcpu); 5720 5708 5709 + if (!enable_vnmi) { 5710 + /* 5711 + * Tracking the NMI-blocked state in software is built upon 5712 + * finding the next open IRQ window. This, in turn, depends on 5713 + * well-behaving guests: They have to keep IRQs disabled at 5714 + * least as long as the NMI handler runs. Otherwise we may 5715 + * cause NMI nesting, maybe breaking the guest. But as this is 5716 + * highly unlikely, we can live with the residual risk. 5717 + */ 5718 + vmx->loaded_vmcs->soft_vnmi_blocked = 1; 5719 + vmx->loaded_vmcs->vnmi_blocked_time = 0; 5720 + } 5721 + 5721 5722 ++vcpu->stat.nmi_injections; 5722 5723 vmx->loaded_vmcs->nmi_known_unmasked = false; 5723 5724 ··· 5749 5724 struct vcpu_vmx *vmx = to_vmx(vcpu); 5750 5725 bool masked; 5751 5726 5727 + if (!enable_vnmi) 5728 + return vmx->loaded_vmcs->soft_vnmi_blocked; 5752 5729 if (vmx->loaded_vmcs->nmi_known_unmasked) 5753 5730 return false; 5754 5731 masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; ··· 5762 5735 { 5763 5736 struct vcpu_vmx *vmx = to_vmx(vcpu); 5764 5737 5765 - vmx->loaded_vmcs->nmi_known_unmasked = !masked; 5766 - if (masked) 5767 - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 5768 - GUEST_INTR_STATE_NMI); 5769 - else 5770 - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, 5771 - GUEST_INTR_STATE_NMI); 5738 + if (!enable_vnmi) { 5739 + if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) { 5740 + vmx->loaded_vmcs->soft_vnmi_blocked = masked; 5741 + vmx->loaded_vmcs->vnmi_blocked_time = 0; 5742 + } 5743 + } else { 5744 + vmx->loaded_vmcs->nmi_known_unmasked = !masked; 5745 + if (masked) 5746 + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 5747 + GUEST_INTR_STATE_NMI); 5748 + else 5749 + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, 5750 + GUEST_INTR_STATE_NMI); 5751 + } 5772 5752 } 5773 5753 5774 5754 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) 5775 5755 { 5776 5756 if (to_vmx(vcpu)->nested.nested_run_pending) 5757 + return 0; 5758 + 5759 + if (!enable_vnmi && 5760 + to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) 5777 5761 return 0; 5778 5762 5779 5763 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & ··· 6514 6476 * AAK134, BY25. 6515 6477 */ 6516 6478 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && 6479 + enable_vnmi && 6517 6480 (exit_qualification & INTR_INFO_UNBLOCK_NMI)) 6518 6481 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); 6519 6482 ··· 6574 6535 6575 6536 static int handle_nmi_window(struct kvm_vcpu *vcpu) 6576 6537 { 6538 + WARN_ON_ONCE(!enable_vnmi); 6577 6539 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, 6578 6540 CPU_BASED_VIRTUAL_NMI_PENDING); 6579 6541 ++vcpu->stat.nmi_window_exits; ··· 6798 6758 if (!cpu_has_vmx_flexpriority()) 6799 6759 flexpriority_enabled = 0; 6800 6760 6761 + if (!cpu_has_virtual_nmis()) 6762 + enable_vnmi = 0; 6763 + 6801 6764 /* 6802 6765 * set_apic_access_page_addr() is used to reload apic access 6803 6766 * page upon invalidation. No need to do anything if not ··· 7005 6962 } 7006 6963 7007 6964 /* Create a new VMCS */ 7008 - item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); 6965 + item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL); 7009 6966 if (!item) 7010 6967 return NULL; 7011 6968 item->vmcs02.vmcs = alloc_vmcs(); ··· 8022 7979 * "blocked by NMI" bit has to be set before next VM entry. 8023 7980 */ 8024 7981 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && 7982 + enable_vnmi && 8025 7983 (exit_qualification & INTR_INFO_UNBLOCK_NMI)) 8026 7984 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 8027 7985 GUEST_INTR_STATE_NMI); ··· 8867 8823 return 0; 8868 8824 } 8869 8825 8826 + if (unlikely(!enable_vnmi && 8827 + vmx->loaded_vmcs->soft_vnmi_blocked)) { 8828 + if (vmx_interrupt_allowed(vcpu)) { 8829 + vmx->loaded_vmcs->soft_vnmi_blocked = 0; 8830 + } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && 8831 + vcpu->arch.nmi_pending) { 8832 + /* 8833 + * This CPU don't support us in finding the end of an 8834 + * NMI-blocked window if the guest runs with IRQs 8835 + * disabled. So we pull the trigger after 1 s of 8836 + * futile waiting, but inform the user about this. 8837 + */ 8838 + printk(KERN_WARNING "%s: Breaking out of NMI-blocked " 8839 + "state on VCPU %d after 1 s timeout\n", 8840 + __func__, vcpu->vcpu_id); 8841 + vmx->loaded_vmcs->soft_vnmi_blocked = 0; 8842 + } 8843 + } 8844 + 8870 8845 if (exit_reason < kvm_vmx_max_exit_handlers 8871 8846 && kvm_vmx_exit_handlers[exit_reason]) 8872 8847 return kvm_vmx_exit_handlers[exit_reason](vcpu); ··· 9168 9105 9169 9106 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; 9170 9107 9171 - if (vmx->loaded_vmcs->nmi_known_unmasked) 9172 - return; 9173 - /* 9174 - * Can't use vmx->exit_intr_info since we're not sure what 9175 - * the exit reason is. 9176 - */ 9177 - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 9178 - unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; 9179 - vector = exit_intr_info & INTR_INFO_VECTOR_MASK; 9180 - /* 9181 - * SDM 3: 27.7.1.2 (September 2008) 9182 - * Re-set bit "block by NMI" before VM entry if vmexit caused by 9183 - * a guest IRET fault. 9184 - * SDM 3: 23.2.2 (September 2008) 9185 - * Bit 12 is undefined in any of the following cases: 9186 - * If the VM exit sets the valid bit in the IDT-vectoring 9187 - * information field. 9188 - * If the VM exit is due to a double fault. 9189 - */ 9190 - if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && 9191 - vector != DF_VECTOR && !idtv_info_valid) 9192 - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 9193 - GUEST_INTR_STATE_NMI); 9194 - else 9195 - vmx->loaded_vmcs->nmi_known_unmasked = 9196 - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) 9197 - & GUEST_INTR_STATE_NMI); 9108 + if (enable_vnmi) { 9109 + if (vmx->loaded_vmcs->nmi_known_unmasked) 9110 + return; 9111 + /* 9112 + * Can't use vmx->exit_intr_info since we're not sure what 9113 + * the exit reason is. 9114 + */ 9115 + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 9116 + unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; 9117 + vector = exit_intr_info & INTR_INFO_VECTOR_MASK; 9118 + /* 9119 + * SDM 3: 27.7.1.2 (September 2008) 9120 + * Re-set bit "block by NMI" before VM entry if vmexit caused by 9121 + * a guest IRET fault. 9122 + * SDM 3: 23.2.2 (September 2008) 9123 + * Bit 12 is undefined in any of the following cases: 9124 + * If the VM exit sets the valid bit in the IDT-vectoring 9125 + * information field. 9126 + * If the VM exit is due to a double fault. 9127 + */ 9128 + if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && 9129 + vector != DF_VECTOR && !idtv_info_valid) 9130 + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 9131 + GUEST_INTR_STATE_NMI); 9132 + else 9133 + vmx->loaded_vmcs->nmi_known_unmasked = 9134 + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) 9135 + & GUEST_INTR_STATE_NMI); 9136 + } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked)) 9137 + vmx->loaded_vmcs->vnmi_blocked_time += 9138 + ktime_to_ns(ktime_sub(ktime_get(), 9139 + vmx->loaded_vmcs->entry_time)); 9198 9140 } 9199 9141 9200 9142 static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, ··· 9315 9247 { 9316 9248 struct vcpu_vmx *vmx = to_vmx(vcpu); 9317 9249 unsigned long debugctlmsr, cr3, cr4; 9250 + 9251 + /* Record the guest's net vcpu time for enforced NMI injections. */ 9252 + if (unlikely(!enable_vnmi && 9253 + vmx->loaded_vmcs->soft_vnmi_blocked)) 9254 + vmx->loaded_vmcs->entry_time = ktime_get(); 9318 9255 9319 9256 /* Don't enter VMX if guest state is invalid, let the exit handler 9320 9257 start emulation until we arrive back to a valid state */

+38 -3

include/kvm/arm_vgic.h

··· 26 26 #include <linux/list.h> 27 27 #include <linux/jump_label.h> 28 28 29 + #include <linux/irqchip/arm-gic-v4.h> 30 + 29 31 #define VGIC_V3_MAX_CPUS 255 30 32 #define VGIC_V2_MAX_CPUS 8 31 33 #define VGIC_NR_IRQS_LEGACY 256 ··· 75 73 /* Only needed for the legacy KVM_CREATE_IRQCHIP */ 76 74 bool can_emulate_gicv2; 77 75 76 + /* Hardware has GICv4? */ 77 + bool has_gicv4; 78 + 78 79 /* GIC system register CPU interface */ 79 80 struct static_key_false gicv3_cpuif; 80 81 ··· 121 116 bool hw; /* Tied to HW IRQ */ 122 117 struct kref refcount; /* Used for LPIs */ 123 118 u32 hwintid; /* HW INTID number */ 119 + unsigned int host_irq; /* linux irq corresponding to hwintid */ 124 120 union { 125 121 u8 targets; /* GICv2 target VCPUs mask */ 126 122 u32 mpidr; /* GICv3 target VCPU */ ··· 238 232 239 233 /* used by vgic-debug */ 240 234 struct vgic_state_iter *iter; 235 + 236 + /* 237 + * GICv4 ITS per-VM data, containing the IRQ domain, the VPE 238 + * array, the property table pointer as well as allocation 239 + * data. This essentially ties the Linux IRQ core and ITS 240 + * together, and avoids leaking KVM's data structures anywhere 241 + * else. 242 + */ 243 + struct its_vm its_vm; 241 244 }; 242 245 243 246 struct vgic_v2_cpu_if { ··· 265 250 u32 vgic_ap0r[4]; 266 251 u32 vgic_ap1r[4]; 267 252 u64 vgic_lr[VGIC_V3_MAX_LRS]; 253 + 254 + /* 255 + * GICv4 ITS per-VPE data, containing the doorbell IRQ, the 256 + * pending table pointer, the its_vm pointer and a few other 257 + * HW specific things. As for the its_vm structure, this is 258 + * linking the Linux IRQ subsystem and the ITS together. 259 + */ 260 + struct its_vpe its_vpe; 268 261 }; 269 262 270 263 struct vgic_cpu { ··· 330 307 331 308 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, 332 309 bool level, void *owner); 333 - int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); 334 - int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); 335 - bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); 310 + int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, 311 + u32 vintid); 312 + int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); 313 + bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid); 336 314 337 315 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); 338 316 ··· 372 348 int kvm_vgic_setup_default_irq_routing(struct kvm *kvm); 373 349 374 350 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner); 351 + 352 + struct kvm_kernel_irq_routing_entry; 353 + 354 + int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq, 355 + struct kvm_kernel_irq_routing_entry *irq_entry); 356 + 357 + int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, 358 + struct kvm_kernel_irq_routing_entry *irq_entry); 359 + 360 + void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu); 361 + void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu); 375 362 376 363 #endif /* __KVM_ARM_VGIC_H */

+1 -23

virt/kvm/arm/arch_timer.c

··· 817 817 { 818 818 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 819 819 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 820 - struct irq_desc *desc; 821 - struct irq_data *data; 822 - int phys_irq; 823 820 int ret; 824 821 825 822 if (timer->enabled) ··· 834 837 return -EINVAL; 835 838 } 836 839 837 - /* 838 - * Find the physical IRQ number corresponding to the host_vtimer_irq 839 - */ 840 - desc = irq_to_desc(host_vtimer_irq); 841 - if (!desc) { 842 - kvm_err("%s: no interrupt descriptor\n", __func__); 843 - return -EINVAL; 844 - } 845 - 846 - data = irq_desc_get_irq_data(desc); 847 - while (data->parent_data) 848 - data = data->parent_data; 849 - 850 - phys_irq = data->hwirq; 851 - 852 - /* 853 - * Tell the VGIC that the virtual interrupt is tied to a 854 - * physical interrupt. We do that once per VCPU. 855 - */ 856 - ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq); 840 + ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq); 857 841 if (ret) 858 842 return ret; 859 843

+46 -2

virt/kvm/arm/arm.c

··· 27 27 #include <linux/mman.h> 28 28 #include <linux/sched.h> 29 29 #include <linux/kvm.h> 30 + #include <linux/kvm_irqfd.h> 31 + #include <linux/irqbypass.h> 30 32 #include <trace/events/kvm.h> 31 33 #include <kvm/arm_pmu.h> 32 34 ··· 177 175 { 178 176 int i; 179 177 178 + kvm_vgic_destroy(kvm); 179 + 180 180 free_percpu(kvm->arch.last_vcpu_ran); 181 181 kvm->arch.last_vcpu_ran = NULL; 182 182 ··· 188 184 kvm->vcpus[i] = NULL; 189 185 } 190 186 } 191 - 192 - kvm_vgic_destroy(kvm); 193 187 } 194 188 195 189 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ··· 315 313 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) 316 314 { 317 315 kvm_timer_schedule(vcpu); 316 + kvm_vgic_v4_enable_doorbell(vcpu); 318 317 } 319 318 320 319 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) 321 320 { 322 321 kvm_timer_unschedule(vcpu); 322 + kvm_vgic_v4_disable_doorbell(vcpu); 323 323 } 324 324 325 325 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) ··· 1452 1448 return vcpu; 1453 1449 } 1454 1450 return NULL; 1451 + } 1452 + 1453 + bool kvm_arch_has_irq_bypass(void) 1454 + { 1455 + return true; 1456 + } 1457 + 1458 + int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, 1459 + struct irq_bypass_producer *prod) 1460 + { 1461 + struct kvm_kernel_irqfd *irqfd = 1462 + container_of(cons, struct kvm_kernel_irqfd, consumer); 1463 + 1464 + return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq, 1465 + &irqfd->irq_entry); 1466 + } 1467 + void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, 1468 + struct irq_bypass_producer *prod) 1469 + { 1470 + struct kvm_kernel_irqfd *irqfd = 1471 + container_of(cons, struct kvm_kernel_irqfd, consumer); 1472 + 1473 + kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq, 1474 + &irqfd->irq_entry); 1475 + } 1476 + 1477 + void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) 1478 + { 1479 + struct kvm_kernel_irqfd *irqfd = 1480 + container_of(cons, struct kvm_kernel_irqfd, consumer); 1481 + 1482 + kvm_arm_halt_guest(irqfd->kvm); 1483 + } 1484 + 1485 + void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons) 1486 + { 1487 + struct kvm_kernel_irqfd *irqfd = 1488 + container_of(cons, struct kvm_kernel_irqfd, consumer); 1489 + 1490 + kvm_arm_resume_guest(irqfd->kvm); 1455 1491 } 1456 1492 1457 1493 /**

+6 -3

virt/kvm/arm/hyp/vgic-v3-sr.c

··· 258 258 cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); 259 259 } 260 260 } else { 261 - if (static_branch_unlikely(&vgic_v3_cpuif_trap)) 261 + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || 262 + cpu_if->its_vpe.its_vm) 262 263 write_gicreg(0, ICH_HCR_EL2); 263 264 264 265 cpu_if->vgic_elrsr = 0xffff; ··· 338 337 /* 339 338 * If we need to trap system registers, we must write 340 339 * ICH_HCR_EL2 anyway, even if no interrupts are being 341 - * injected, 340 + * injected. Same thing if GICv4 is used, as VLPI 341 + * delivery is gated by ICH_HCR_EL2.En. 342 342 */ 343 - if (static_branch_unlikely(&vgic_v3_cpuif_trap)) 343 + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || 344 + cpu_if->its_vpe.its_vm) 344 345 write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); 345 346 } 346 347

+7

virt/kvm/arm/vgic/vgic-init.c

··· 285 285 if (ret) 286 286 goto out; 287 287 288 + ret = vgic_v4_init(kvm); 289 + if (ret) 290 + goto out; 291 + 288 292 kvm_for_each_vcpu(i, vcpu, kvm) 289 293 kvm_vgic_vcpu_enable(vcpu); 290 294 ··· 324 320 325 321 kfree(dist->spis); 326 322 dist->nr_spis = 0; 323 + 324 + if (vgic_supports_direct_msis(kvm)) 325 + vgic_v4_teardown(kvm); 327 326 } 328 327 329 328 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)

+136 -68

virt/kvm/arm/vgic/vgic-its.c

··· 38 38 static int vgic_its_restore_tables_v0(struct vgic_its *its); 39 39 static int vgic_its_commit_v0(struct vgic_its *its); 40 40 static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, 41 - struct kvm_vcpu *filter_vcpu); 41 + struct kvm_vcpu *filter_vcpu, bool needs_inv); 42 42 43 43 /* 44 44 * Creates a new (reference to a) struct vgic_irq for a given LPI. ··· 106 106 * However we only have those structs for mapped IRQs, so we read in 107 107 * the respective config data from memory here upon mapping the LPI. 108 108 */ 109 - ret = update_lpi_config(kvm, irq, NULL); 109 + ret = update_lpi_config(kvm, irq, NULL, false); 110 110 if (ret) 111 111 return ERR_PTR(ret); 112 112 ··· 273 273 * VCPU. Unconditionally applies if filter_vcpu is NULL. 274 274 */ 275 275 static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, 276 - struct kvm_vcpu *filter_vcpu) 276 + struct kvm_vcpu *filter_vcpu, bool needs_inv) 277 277 { 278 278 u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); 279 279 u8 prop; ··· 292 292 irq->priority = LPI_PROP_PRIORITY(prop); 293 293 irq->enabled = LPI_PROP_ENABLE_BIT(prop); 294 294 295 - vgic_queue_irq_unlock(kvm, irq, flags); 296 - } else { 297 - spin_unlock_irqrestore(&irq->irq_lock, flags); 295 + if (!irq->hw) { 296 + vgic_queue_irq_unlock(kvm, irq, flags); 297 + return 0; 298 + } 298 299 } 300 + 301 + spin_unlock_irqrestore(&irq->irq_lock, flags); 302 + 303 + if (irq->hw) 304 + return its_prop_update_vlpi(irq->host_irq, prop, needs_inv); 299 305 300 306 return 0; 301 307 } ··· 342 336 return i; 343 337 } 344 338 339 + static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) 340 + { 341 + int ret = 0; 342 + 343 + spin_lock(&irq->irq_lock); 344 + irq->target_vcpu = vcpu; 345 + spin_unlock(&irq->irq_lock); 346 + 347 + if (irq->hw) { 348 + struct its_vlpi_map map; 349 + 350 + ret = its_get_vlpi(irq->host_irq, &map); 351 + if (ret) 352 + return ret; 353 + 354 + map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; 355 + 356 + ret = its_map_vlpi(irq->host_irq, &map); 357 + } 358 + 359 + return ret; 360 + } 361 + 345 362 /* 346 363 * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI 347 364 * is targeting) to the VGIC's view, which deals with target VCPUs. ··· 379 350 return; 380 351 381 352 vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); 382 - 383 - spin_lock(&ite->irq->irq_lock); 384 - ite->irq->target_vcpu = vcpu; 385 - spin_unlock(&ite->irq->irq_lock); 353 + update_affinity(ite->irq, vcpu); 386 354 } 387 355 388 356 /* ··· 531 505 return 0; 532 506 } 533 507 534 - /* 535 - * Find the target VCPU and the LPI number for a given devid/eventid pair 536 - * and make this IRQ pending, possibly injecting it. 537 - * Must be called with the its_lock mutex held. 538 - * Returns 0 on success, a positive error value for any ITS mapping 539 - * related errors and negative error values for generic errors. 540 - */ 541 - static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, 542 - u32 devid, u32 eventid) 508 + int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, 509 + u32 devid, u32 eventid, struct vgic_irq **irq) 543 510 { 544 511 struct kvm_vcpu *vcpu; 545 512 struct its_ite *ite; 546 - unsigned long flags; 547 513 548 514 if (!its->enabled) 549 515 return -EBUSY; ··· 551 533 if (!vcpu->arch.vgic_cpu.lpis_enabled) 552 534 return -EBUSY; 553 535 554 - spin_lock_irqsave(&ite->irq->irq_lock, flags); 555 - ite->irq->pending_latch = true; 556 - vgic_queue_irq_unlock(kvm, ite->irq, flags); 557 - 536 + *irq = ite->irq; 558 537 return 0; 559 538 } 560 539 561 - static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev) 540 + struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi) 562 541 { 542 + u64 address; 543 + struct kvm_io_device *kvm_io_dev; 563 544 struct vgic_io_device *iodev; 564 545 565 - if (dev->ops != &kvm_io_gic_ops) 566 - return NULL; 546 + if (!vgic_has_its(kvm)) 547 + return ERR_PTR(-ENODEV); 567 548 568 - iodev = container_of(dev, struct vgic_io_device, dev); 549 + if (!(msi->flags & KVM_MSI_VALID_DEVID)) 550 + return ERR_PTR(-EINVAL); 569 551 552 + address = (u64)msi->address_hi << 32 | msi->address_lo; 553 + 554 + kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); 555 + if (!kvm_io_dev) 556 + return ERR_PTR(-EINVAL); 557 + 558 + if (kvm_io_dev->ops != &kvm_io_gic_ops) 559 + return ERR_PTR(-EINVAL); 560 + 561 + iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); 570 562 if (iodev->iodev_type != IODEV_ITS) 571 - return NULL; 563 + return ERR_PTR(-EINVAL); 572 564 573 - return iodev; 565 + return iodev->its; 566 + } 567 + 568 + /* 569 + * Find the target VCPU and the LPI number for a given devid/eventid pair 570 + * and make this IRQ pending, possibly injecting it. 571 + * Must be called with the its_lock mutex held. 572 + * Returns 0 on success, a positive error value for any ITS mapping 573 + * related errors and negative error values for generic errors. 574 + */ 575 + static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, 576 + u32 devid, u32 eventid) 577 + { 578 + struct vgic_irq *irq = NULL; 579 + unsigned long flags; 580 + int err; 581 + 582 + err = vgic_its_resolve_lpi(kvm, its, devid, eventid, &irq); 583 + if (err) 584 + return err; 585 + 586 + if (irq->hw) 587 + return irq_set_irqchip_state(irq->host_irq, 588 + IRQCHIP_STATE_PENDING, true); 589 + 590 + spin_lock_irqsave(&irq->irq_lock, flags); 591 + irq->pending_latch = true; 592 + vgic_queue_irq_unlock(kvm, irq, flags); 593 + 594 + return 0; 574 595 } 575 596 576 597 /* ··· 620 563 */ 621 564 int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) 622 565 { 623 - u64 address; 624 - struct kvm_io_device *kvm_io_dev; 625 - struct vgic_io_device *iodev; 566 + struct vgic_its *its; 626 567 int ret; 627 568 628 - if (!vgic_has_its(kvm)) 629 - return -ENODEV; 569 + its = vgic_msi_to_its(kvm, msi); 570 + if (IS_ERR(its)) 571 + return PTR_ERR(its); 630 572 631 - if (!(msi->flags & KVM_MSI_VALID_DEVID)) 632 - return -EINVAL; 633 - 634 - address = (u64)msi->address_hi << 32 | msi->address_lo; 635 - 636 - kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); 637 - if (!kvm_io_dev) 638 - return -EINVAL; 639 - 640 - iodev = vgic_get_its_iodev(kvm_io_dev); 641 - if (!iodev) 642 - return -EINVAL; 643 - 644 - mutex_lock(&iodev->its->its_lock); 645 - ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); 646 - mutex_unlock(&iodev->its->its_lock); 573 + mutex_lock(&its->its_lock); 574 + ret = vgic_its_trigger_msi(kvm, its, msi->devid, msi->data); 575 + mutex_unlock(&its->its_lock); 647 576 648 577 if (ret < 0) 649 578 return ret; ··· 651 608 list_del(&ite->ite_list); 652 609 653 610 /* This put matches the get in vgic_add_lpi. */ 654 - if (ite->irq) 611 + if (ite->irq) { 612 + if (ite->irq->hw) 613 + WARN_ON(its_unmap_vlpi(ite->irq->host_irq)); 614 + 655 615 vgic_put_irq(kvm, ite->irq); 616 + } 656 617 657 618 kfree(ite); 658 619 } ··· 730 683 ite->collection = collection; 731 684 vcpu = kvm_get_vcpu(kvm, collection->target_addr); 732 685 733 - spin_lock(&ite->irq->irq_lock); 734 - ite->irq->target_vcpu = vcpu; 735 - spin_unlock(&ite->irq->irq_lock); 736 - 737 - return 0; 686 + return update_affinity(ite->irq, vcpu); 738 687 } 739 688 740 689 /* ··· 1097 1054 1098 1055 ite->irq->pending_latch = false; 1099 1056 1057 + if (ite->irq->hw) 1058 + return irq_set_irqchip_state(ite->irq->host_irq, 1059 + IRQCHIP_STATE_PENDING, false); 1060 + 1100 1061 return 0; 1101 1062 } 1102 1063 ··· 1120 1073 if (!ite) 1121 1074 return E_ITS_INV_UNMAPPED_INTERRUPT; 1122 1075 1123 - return update_lpi_config(kvm, ite->irq, NULL); 1076 + return update_lpi_config(kvm, ite->irq, NULL, true); 1124 1077 } 1125 1078 1126 1079 /* ··· 1155 1108 irq = vgic_get_irq(kvm, NULL, intids[i]); 1156 1109 if (!irq) 1157 1110 continue; 1158 - update_lpi_config(kvm, irq, vcpu); 1111 + update_lpi_config(kvm, irq, vcpu, false); 1159 1112 vgic_put_irq(kvm, irq); 1160 1113 } 1161 1114 1162 1115 kfree(intids); 1116 + 1117 + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm) 1118 + its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe); 1163 1119 1164 1120 return 0; 1165 1121 } ··· 1178 1128 static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, 1179 1129 u64 *its_cmd) 1180 1130 { 1181 - struct vgic_dist *dist = &kvm->arch.vgic; 1182 1131 u32 target1_addr = its_cmd_get_target_addr(its_cmd); 1183 1132 u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32); 1184 1133 struct kvm_vcpu *vcpu1, *vcpu2; 1185 1134 struct vgic_irq *irq; 1135 + u32 *intids; 1136 + int irq_count, i; 1186 1137 1187 1138 if (target1_addr >= atomic_read(&kvm->online_vcpus) || 1188 1139 target2_addr >= atomic_read(&kvm->online_vcpus)) ··· 1195 1144 vcpu1 = kvm_get_vcpu(kvm, target1_addr); 1196 1145 vcpu2 = kvm_get_vcpu(kvm, target2_addr); 1197 1146 1198 - spin_lock(&dist->lpi_list_lock); 1147 + irq_count = vgic_copy_lpi_list(vcpu1, &intids); 1148 + if (irq_count < 0) 1149 + return irq_count; 1199 1150 1200 - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { 1201 - spin_lock(&irq->irq_lock); 1151 + for (i = 0; i < irq_count; i++) { 1152 + irq = vgic_get_irq(kvm, NULL, intids[i]); 1202 1153 1203 - if (irq->target_vcpu == vcpu1) 1204 - irq->target_vcpu = vcpu2; 1154 + update_affinity(irq, vcpu2); 1205 1155 1206 - spin_unlock(&irq->irq_lock); 1156 + vgic_put_irq(kvm, irq); 1207 1157 } 1208 1158 1209 - spin_unlock(&dist->lpi_list_lock); 1210 - 1159 + kfree(intids); 1211 1160 return 0; 1212 1161 } 1213 1162 ··· 1685 1634 if (!its) 1686 1635 return -ENOMEM; 1687 1636 1637 + if (vgic_initialized(dev->kvm)) { 1638 + int ret = vgic_v4_init(dev->kvm); 1639 + if (ret < 0) { 1640 + kfree(its); 1641 + return ret; 1642 + } 1643 + } 1644 + 1688 1645 mutex_init(&its->its_lock); 1689 1646 mutex_init(&its->cmd_lock); 1690 1647 ··· 2004 1945 2005 1946 list_for_each_entry(ite, &device->itt_head, ite_list) { 2006 1947 gpa_t gpa = base + ite->event_id * ite_esz; 1948 + 1949 + /* 1950 + * If an LPI carries the HW bit, this means that this 1951 + * interrupt is controlled by GICv4, and we do not 1952 + * have direct access to that state. Let's simply fail 1953 + * the save operation... 1954 + */ 1955 + if (ite->irq->hw) 1956 + return -EACCES; 2007 1957 2008 1958 ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz); 2009 1959 if (ret)

+5

virt/kvm/arm/vgic/vgic-mmio-v3.c

··· 54 54 return dist->has_its; 55 55 } 56 56 57 + bool vgic_supports_direct_msis(struct kvm *kvm) 58 + { 59 + return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm); 60 + } 61 + 57 62 static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, 58 63 gpa_t addr, unsigned int len) 59 64 {

+14

virt/kvm/arm/vgic/vgic-v3.c

··· 24 24 static bool group0_trap; 25 25 static bool group1_trap; 26 26 static bool common_trap; 27 + static bool gicv4_enable; 27 28 28 29 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) 29 30 { ··· 462 461 } 463 462 early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); 464 463 464 + static int __init early_gicv4_enable(char *buf) 465 + { 466 + return strtobool(buf, &gicv4_enable); 467 + } 468 + early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); 469 + 465 470 /** 466 471 * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT 467 472 * @node: pointer to the DT node ··· 486 479 kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; 487 480 kvm_vgic_global_state.can_emulate_gicv2 = false; 488 481 kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2; 482 + 483 + /* GICv4 support? */ 484 + if (info->has_v4) { 485 + kvm_vgic_global_state.has_gicv4 = gicv4_enable; 486 + kvm_info("GICv4 support %sabled\n", 487 + gicv4_enable ? "en" : "dis"); 488 + } 489 489 490 490 if (!info->vcpu.start) { 491 491 kvm_info("GICv3: no GICV resource entry\n");

+364

virt/kvm/arm/vgic/vgic-v4.c

··· 1 + /* 2 + * Copyright (C) 2017 ARM Ltd. 3 + * Author: Marc Zyngier <marc.zyngier@arm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 + */ 17 + 18 + #include <linux/interrupt.h> 19 + #include <linux/irq.h> 20 + #include <linux/irqdomain.h> 21 + #include <linux/kvm_host.h> 22 + #include <linux/irqchip/arm-gic-v3.h> 23 + 24 + #include "vgic.h" 25 + 26 + /* 27 + * How KVM uses GICv4 (insert rude comments here): 28 + * 29 + * The vgic-v4 layer acts as a bridge between several entities: 30 + * - The GICv4 ITS representation offered by the ITS driver 31 + * - VFIO, which is in charge of the PCI endpoint 32 + * - The virtual ITS, which is the only thing the guest sees 33 + * 34 + * The configuration of VLPIs is triggered by a callback from VFIO, 35 + * instructing KVM that a PCI device has been configured to deliver 36 + * MSIs to a vITS. 37 + * 38 + * kvm_vgic_v4_set_forwarding() is thus called with the routing entry, 39 + * and this is used to find the corresponding vITS data structures 40 + * (ITS instance, device, event and irq) using a process that is 41 + * extremely similar to the injection of an MSI. 42 + * 43 + * At this stage, we can link the guest's view of an LPI (uniquely 44 + * identified by the routing entry) and the host irq, using the GICv4 45 + * driver mapping operation. Should the mapping succeed, we've then 46 + * successfully upgraded the guest's LPI to a VLPI. We can then start 47 + * with updating GICv4's view of the property table and generating an 48 + * INValidation in order to kickstart the delivery of this VLPI to the 49 + * guest directly, without software intervention. Well, almost. 50 + * 51 + * When the PCI endpoint is deconfigured, this operation is reversed 52 + * with VFIO calling kvm_vgic_v4_unset_forwarding(). 53 + * 54 + * Once the VLPI has been mapped, it needs to follow any change the 55 + * guest performs on its LPI through the vITS. For that, a number of 56 + * command handlers have hooks to communicate these changes to the HW: 57 + * - Any invalidation triggers a call to its_prop_update_vlpi() 58 + * - The INT command results in a irq_set_irqchip_state(), which 59 + * generates an INT on the corresponding VLPI. 60 + * - The CLEAR command results in a irq_set_irqchip_state(), which 61 + * generates an CLEAR on the corresponding VLPI. 62 + * - DISCARD translates into an unmap, similar to a call to 63 + * kvm_vgic_v4_unset_forwarding(). 64 + * - MOVI is translated by an update of the existing mapping, changing 65 + * the target vcpu, resulting in a VMOVI being generated. 66 + * - MOVALL is translated by a string of mapping updates (similar to 67 + * the handling of MOVI). MOVALL is horrible. 68 + * 69 + * Note that a DISCARD/MAPTI sequence emitted from the guest without 70 + * reprogramming the PCI endpoint after MAPTI does not result in a 71 + * VLPI being mapped, as there is no callback from VFIO (the guest 72 + * will get the interrupt via the normal SW injection). Fixing this is 73 + * not trivial, and requires some horrible messing with the VFIO 74 + * internals. Not fun. Don't do that. 75 + * 76 + * Then there is the scheduling. Each time a vcpu is about to run on a 77 + * physical CPU, KVM must tell the corresponding redistributor about 78 + * it. And if we've migrated our vcpu from one CPU to another, we must 79 + * tell the ITS (so that the messages reach the right redistributor). 80 + * This is done in two steps: first issue a irq_set_affinity() on the 81 + * irq corresponding to the vcpu, then call its_schedule_vpe(). You 82 + * must be in a non-preemptible context. On exit, another call to 83 + * its_schedule_vpe() tells the redistributor that we're done with the 84 + * vcpu. 85 + * 86 + * Finally, the doorbell handling: Each vcpu is allocated an interrupt 87 + * which will fire each time a VLPI is made pending whilst the vcpu is 88 + * not running. Each time the vcpu gets blocked, the doorbell 89 + * interrupt gets enabled. When the vcpu is unblocked (for whatever 90 + * reason), the doorbell interrupt is disabled. 91 + */ 92 + 93 + #define DB_IRQ_FLAGS (IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING) 94 + 95 + static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) 96 + { 97 + struct kvm_vcpu *vcpu = info; 98 + 99 + vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true; 100 + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 101 + kvm_vcpu_kick(vcpu); 102 + 103 + return IRQ_HANDLED; 104 + } 105 + 106 + /** 107 + * vgic_v4_init - Initialize the GICv4 data structures 108 + * @kvm: Pointer to the VM being initialized 109 + * 110 + * We may be called each time a vITS is created, or when the 111 + * vgic is initialized. This relies on kvm->lock to be 112 + * held. In both cases, the number of vcpus should now be 113 + * fixed. 114 + */ 115 + int vgic_v4_init(struct kvm *kvm) 116 + { 117 + struct vgic_dist *dist = &kvm->arch.vgic; 118 + struct kvm_vcpu *vcpu; 119 + int i, nr_vcpus, ret; 120 + 121 + if (!vgic_supports_direct_msis(kvm)) 122 + return 0; /* Nothing to see here... move along. */ 123 + 124 + if (dist->its_vm.vpes) 125 + return 0; 126 + 127 + nr_vcpus = atomic_read(&kvm->online_vcpus); 128 + 129 + dist->its_vm.vpes = kzalloc(sizeof(*dist->its_vm.vpes) * nr_vcpus, 130 + GFP_KERNEL); 131 + if (!dist->its_vm.vpes) 132 + return -ENOMEM; 133 + 134 + dist->its_vm.nr_vpes = nr_vcpus; 135 + 136 + kvm_for_each_vcpu(i, vcpu, kvm) 137 + dist->its_vm.vpes[i] = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; 138 + 139 + ret = its_alloc_vcpu_irqs(&dist->its_vm); 140 + if (ret < 0) { 141 + kvm_err("VPE IRQ allocation failure\n"); 142 + kfree(dist->its_vm.vpes); 143 + dist->its_vm.nr_vpes = 0; 144 + dist->its_vm.vpes = NULL; 145 + return ret; 146 + } 147 + 148 + kvm_for_each_vcpu(i, vcpu, kvm) { 149 + int irq = dist->its_vm.vpes[i]->irq; 150 + 151 + /* 152 + * Don't automatically enable the doorbell, as we're 153 + * flipping it back and forth when the vcpu gets 154 + * blocked. Also disable the lazy disabling, as the 155 + * doorbell could kick us out of the guest too 156 + * early... 157 + */ 158 + irq_set_status_flags(irq, DB_IRQ_FLAGS); 159 + ret = request_irq(irq, vgic_v4_doorbell_handler, 160 + 0, "vcpu", vcpu); 161 + if (ret) { 162 + kvm_err("failed to allocate vcpu IRQ%d\n", irq); 163 + /* 164 + * Trick: adjust the number of vpes so we know 165 + * how many to nuke on teardown... 166 + */ 167 + dist->its_vm.nr_vpes = i; 168 + break; 169 + } 170 + } 171 + 172 + if (ret) 173 + vgic_v4_teardown(kvm); 174 + 175 + return ret; 176 + } 177 + 178 + /** 179 + * vgic_v4_teardown - Free the GICv4 data structures 180 + * @kvm: Pointer to the VM being destroyed 181 + * 182 + * Relies on kvm->lock to be held. 183 + */ 184 + void vgic_v4_teardown(struct kvm *kvm) 185 + { 186 + struct its_vm *its_vm = &kvm->arch.vgic.its_vm; 187 + int i; 188 + 189 + if (!its_vm->vpes) 190 + return; 191 + 192 + for (i = 0; i < its_vm->nr_vpes; i++) { 193 + struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i); 194 + int irq = its_vm->vpes[i]->irq; 195 + 196 + irq_clear_status_flags(irq, DB_IRQ_FLAGS); 197 + free_irq(irq, vcpu); 198 + } 199 + 200 + its_free_vcpu_irqs(its_vm); 201 + kfree(its_vm->vpes); 202 + its_vm->nr_vpes = 0; 203 + its_vm->vpes = NULL; 204 + } 205 + 206 + int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu) 207 + { 208 + if (!vgic_supports_direct_msis(vcpu->kvm)) 209 + return 0; 210 + 211 + return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false); 212 + } 213 + 214 + int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu) 215 + { 216 + int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; 217 + int err; 218 + 219 + if (!vgic_supports_direct_msis(vcpu->kvm)) 220 + return 0; 221 + 222 + /* 223 + * Before making the VPE resident, make sure the redistributor 224 + * corresponding to our current CPU expects us here. See the 225 + * doc in drivers/irqchip/irq-gic-v4.c to understand how this 226 + * turns into a VMOVP command at the ITS level. 227 + */ 228 + err = irq_set_affinity(irq, cpumask_of(smp_processor_id())); 229 + if (err) 230 + return err; 231 + 232 + err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true); 233 + if (err) 234 + return err; 235 + 236 + /* 237 + * Now that the VPE is resident, let's get rid of a potential 238 + * doorbell interrupt that would still be pending. 239 + */ 240 + err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false); 241 + 242 + return err; 243 + } 244 + 245 + static struct vgic_its *vgic_get_its(struct kvm *kvm, 246 + struct kvm_kernel_irq_routing_entry *irq_entry) 247 + { 248 + struct kvm_msi msi = (struct kvm_msi) { 249 + .address_lo = irq_entry->msi.address_lo, 250 + .address_hi = irq_entry->msi.address_hi, 251 + .data = irq_entry->msi.data, 252 + .flags = irq_entry->msi.flags, 253 + .devid = irq_entry->msi.devid, 254 + }; 255 + 256 + return vgic_msi_to_its(kvm, &msi); 257 + } 258 + 259 + int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, 260 + struct kvm_kernel_irq_routing_entry *irq_entry) 261 + { 262 + struct vgic_its *its; 263 + struct vgic_irq *irq; 264 + struct its_vlpi_map map; 265 + int ret; 266 + 267 + if (!vgic_supports_direct_msis(kvm)) 268 + return 0; 269 + 270 + /* 271 + * Get the ITS, and escape early on error (not a valid 272 + * doorbell for any of our vITSs). 273 + */ 274 + its = vgic_get_its(kvm, irq_entry); 275 + if (IS_ERR(its)) 276 + return 0; 277 + 278 + mutex_lock(&its->its_lock); 279 + 280 + /* Perform then actual DevID/EventID -> LPI translation. */ 281 + ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, 282 + irq_entry->msi.data, &irq); 283 + if (ret) 284 + goto out; 285 + 286 + /* 287 + * Emit the mapping request. If it fails, the ITS probably 288 + * isn't v4 compatible, so let's silently bail out. Holding 289 + * the ITS lock should ensure that nothing can modify the 290 + * target vcpu. 291 + */ 292 + map = (struct its_vlpi_map) { 293 + .vm = &kvm->arch.vgic.its_vm, 294 + .vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe, 295 + .vintid = irq->intid, 296 + .properties = ((irq->priority & 0xfc) | 297 + (irq->enabled ? LPI_PROP_ENABLED : 0) | 298 + LPI_PROP_GROUP1), 299 + .db_enabled = true, 300 + }; 301 + 302 + ret = its_map_vlpi(virq, &map); 303 + if (ret) 304 + goto out; 305 + 306 + irq->hw = true; 307 + irq->host_irq = virq; 308 + 309 + out: 310 + mutex_unlock(&its->its_lock); 311 + return ret; 312 + } 313 + 314 + int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq, 315 + struct kvm_kernel_irq_routing_entry *irq_entry) 316 + { 317 + struct vgic_its *its; 318 + struct vgic_irq *irq; 319 + int ret; 320 + 321 + if (!vgic_supports_direct_msis(kvm)) 322 + return 0; 323 + 324 + /* 325 + * Get the ITS, and escape early on error (not a valid 326 + * doorbell for any of our vITSs). 327 + */ 328 + its = vgic_get_its(kvm, irq_entry); 329 + if (IS_ERR(its)) 330 + return 0; 331 + 332 + mutex_lock(&its->its_lock); 333 + 334 + ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, 335 + irq_entry->msi.data, &irq); 336 + if (ret) 337 + goto out; 338 + 339 + WARN_ON(!(irq->hw && irq->host_irq == virq)); 340 + irq->hw = false; 341 + ret = its_unmap_vlpi(virq); 342 + 343 + out: 344 + mutex_unlock(&its->its_lock); 345 + return ret; 346 + } 347 + 348 + void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu) 349 + { 350 + if (vgic_supports_direct_msis(vcpu->kvm)) { 351 + int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; 352 + if (irq) 353 + enable_irq(irq); 354 + } 355 + } 356 + 357 + void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu) 358 + { 359 + if (vgic_supports_direct_msis(vcpu->kvm)) { 360 + int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; 361 + if (irq) 362 + disable_irq(irq); 363 + } 364 + }

+52 -15

virt/kvm/arm/vgic/vgic.c

··· 17 17 #include <linux/kvm.h> 18 18 #include <linux/kvm_host.h> 19 19 #include <linux/list_sort.h> 20 + #include <linux/interrupt.h> 21 + #include <linux/irq.h> 20 22 21 23 #include "vgic.h" 22 24 ··· 411 409 return 0; 412 410 } 413 411 414 - int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) 412 + /* @irq->irq_lock must be held */ 413 + static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, 414 + unsigned int host_irq) 415 415 { 416 - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); 416 + struct irq_desc *desc; 417 + struct irq_data *data; 418 + 419 + /* 420 + * Find the physical IRQ number corresponding to @host_irq 421 + */ 422 + desc = irq_to_desc(host_irq); 423 + if (!desc) { 424 + kvm_err("%s: no interrupt descriptor\n", __func__); 425 + return -EINVAL; 426 + } 427 + data = irq_desc_get_irq_data(desc); 428 + while (data->parent_data) 429 + data = data->parent_data; 430 + 431 + irq->hw = true; 432 + irq->host_irq = host_irq; 433 + irq->hwintid = data->hwirq; 434 + return 0; 435 + } 436 + 437 + /* @irq->irq_lock must be held */ 438 + static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) 439 + { 440 + irq->hw = false; 441 + irq->hwintid = 0; 442 + } 443 + 444 + int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, 445 + u32 vintid) 446 + { 447 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); 417 448 unsigned long flags; 449 + int ret; 418 450 419 451 BUG_ON(!irq); 420 452 421 453 spin_lock_irqsave(&irq->irq_lock, flags); 422 - 423 - irq->hw = true; 424 - irq->hwintid = phys_irq; 425 - 454 + ret = kvm_vgic_map_irq(vcpu, irq, host_irq); 426 455 spin_unlock_irqrestore(&irq->irq_lock, flags); 427 456 vgic_put_irq(vcpu->kvm, irq); 428 457 429 - return 0; 458 + return ret; 430 459 } 431 460 432 - int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) 461 + int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) 433 462 { 434 463 struct vgic_irq *irq; 435 464 unsigned long flags; ··· 468 435 if (!vgic_initialized(vcpu->kvm)) 469 436 return -EAGAIN; 470 437 471 - irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); 438 + irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); 472 439 BUG_ON(!irq); 473 440 474 441 spin_lock_irqsave(&irq->irq_lock, flags); 475 - 476 - irq->hw = false; 477 - irq->hwintid = 0; 478 - 442 + kvm_vgic_unmap_irq(irq); 479 443 spin_unlock_irqrestore(&irq->irq_lock, flags); 480 444 vgic_put_irq(vcpu->kvm, irq); 481 445 ··· 718 688 { 719 689 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 720 690 691 + WARN_ON(vgic_v4_sync_hwstate(vcpu)); 692 + 721 693 /* An empty ap_list_head implies used_lrs == 0 */ 722 694 if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) 723 695 return; ··· 732 700 /* Flush our emulation state into the GIC hardware before entering the guest. */ 733 701 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) 734 702 { 703 + WARN_ON(vgic_v4_flush_hwstate(vcpu)); 704 + 735 705 /* 736 706 * If there are no virtual interrupts active or pending for this 737 707 * VCPU, then there is no work to do and we can bail out without ··· 785 751 if (!vcpu->kvm->arch.vgic.enabled) 786 752 return false; 787 753 754 + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last) 755 + return true; 756 + 788 757 spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); 789 758 790 759 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { ··· 821 784 } 822 785 } 823 786 824 - bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) 787 + bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) 825 788 { 826 - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); 789 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); 827 790 bool map_is_active; 828 791 unsigned long flags; 829 792

+10

virt/kvm/arm/vgic/vgic.h

··· 237 237 } 238 238 } 239 239 240 + int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, 241 + u32 devid, u32 eventid, struct vgic_irq **irq); 242 + struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); 243 + 244 + bool vgic_supports_direct_msis(struct kvm *kvm); 245 + int vgic_v4_init(struct kvm *kvm); 246 + void vgic_v4_teardown(struct kvm *kvm); 247 + int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu); 248 + int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu); 249 + 240 250 #endif