KVM: Fix guest shared interrupt with in-kernel irqchip

Every call of kvm_set_irq() should offer an irq_source_id, which is
allocated by kvm_request_irq_source_id(). Based on irq_source_id, we
identify the irq source and implement logical OR for shared level
interrupts.

The allocated irq_source_id can be freed by kvm_free_irq_source_id().

Currently, we support at most sizeof(unsigned long) different irq sources.

[Amit: - rebase to kvm.git HEAD
- move definition of KVM_USERSPACE_IRQ_SOURCE_ID to common file
- move kvm_request_irq_source_id to the update_irq ioctl]

[Xiantao: - Add kvm/ia64 stuff and make it work for kvm/ia64 guests]

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

authored by Sheng Yang and committed by Avi Kivity 5550af4d 6ad9f15c

+79 -14
+3
arch/ia64/include/asm/kvm_host.h
··· 417 417 struct list_head assigned_dev_head; 418 418 struct dmar_domain *intel_iommu_domain; 419 419 struct hlist_head irq_ack_notifier_list; 420 + 421 + unsigned long irq_sources_bitmap; 422 + unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; 420 423 }; 421 424 422 425 union cpuid3_t {
+5 -3
arch/ia64/kvm/kvm-ia64.c
··· 778 778 kvm_build_io_pmt(kvm); 779 779 780 780 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 781 + 782 + /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 783 + set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 781 784 } 782 785 783 786 struct kvm *kvm_arch_create_vm(void) ··· 944 941 goto out; 945 942 if (irqchip_in_kernel(kvm)) { 946 943 mutex_lock(&kvm->lock); 947 - kvm_ioapic_set_irq(kvm->arch.vioapic, 948 - irq_event.irq, 949 - irq_event.level); 944 + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 945 + irq_event.irq, irq_event.level); 950 946 mutex_unlock(&kvm->lock); 951 947 r = 0; 952 948 }
+3
arch/x86/include/asm/kvm_host.h
··· 364 364 365 365 struct page *ept_identity_pagetable; 366 366 bool ept_identity_pagetable_done; 367 + 368 + unsigned long irq_sources_bitmap; 369 + unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; 367 370 }; 368 371 369 372 struct kvm_vm_stat {
+9 -2
arch/x86/kvm/i8254.c
··· 545 545 if (!pit) 546 546 return NULL; 547 547 548 + mutex_lock(&kvm->lock); 549 + pit->irq_source_id = kvm_request_irq_source_id(kvm); 550 + mutex_unlock(&kvm->lock); 551 + if (pit->irq_source_id < 0) 552 + return NULL; 553 + 548 554 mutex_init(&pit->pit_state.lock); 549 555 mutex_lock(&pit->pit_state.lock); 550 556 spin_lock_init(&pit->pit_state.inject_lock); ··· 593 587 mutex_lock(&kvm->arch.vpit->pit_state.lock); 594 588 timer = &kvm->arch.vpit->pit_state.pit_timer.timer; 595 589 hrtimer_cancel(timer); 590 + kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); 596 591 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 597 592 kfree(kvm->arch.vpit); 598 593 } ··· 602 595 static void __inject_pit_timer_intr(struct kvm *kvm) 603 596 { 604 597 mutex_lock(&kvm->lock); 605 - kvm_set_irq(kvm, 0, 1); 606 - kvm_set_irq(kvm, 0, 0); 598 + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); 599 + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); 607 600 mutex_unlock(&kvm->lock); 608 601 } 609 602
+1
arch/x86/kvm/i8254.h
··· 44 44 struct kvm_io_device speaker_dev; 45 45 struct kvm *kvm; 46 46 struct kvm_kpit_state pit_state; 47 + int irq_source_id; 47 48 }; 48 49 49 50 #define KVM_PIT_BASE_ADDRESS 0x40
+5 -1
arch/x86/kvm/x86.c
··· 1742 1742 goto out; 1743 1743 if (irqchip_in_kernel(kvm)) { 1744 1744 mutex_lock(&kvm->lock); 1745 - kvm_set_irq(kvm, irq_event.irq, irq_event.level); 1745 + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1746 + irq_event.irq, irq_event.level); 1746 1747 mutex_unlock(&kvm->lock); 1747 1748 r = 0; 1748 1749 } ··· 4013 4012 4014 4013 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 4015 4014 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 4015 + 4016 + /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 4017 + set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 4016 4018 4017 4019 return kvm; 4018 4020 }
+6 -1
include/linux/kvm_host.h
··· 37 37 #define KVM_REQ_UNHALT 6 38 38 #define KVM_REQ_MMU_SYNC 7 39 39 40 + #define KVM_USERSPACE_IRQ_SOURCE_ID 0 41 + 40 42 struct kvm_vcpu; 41 43 extern struct kmem_cache *kvm_vcpu_cache; 42 44 ··· 308 306 int host_irq; 309 307 int guest_irq; 310 308 int irq_requested; 309 + int irq_source_id; 311 310 struct pci_dev *dev; 312 311 struct kvm *kvm; 313 312 }; 314 - void kvm_set_irq(struct kvm *kvm, int irq, int level); 313 + void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); 315 314 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); 316 315 void kvm_register_irq_ack_notifier(struct kvm *kvm, 317 316 struct kvm_irq_ack_notifier *kian); 318 317 void kvm_unregister_irq_ack_notifier(struct kvm *kvm, 319 318 struct kvm_irq_ack_notifier *kian); 319 + int kvm_request_irq_source_id(struct kvm *kvm); 320 + void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 320 321 321 322 #ifdef CONFIG_DMAR 322 323 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+39 -3
virt/kvm/irq_comm.c
··· 25 25 #include "ioapic.h" 26 26 27 27 /* This should be called with the kvm->lock mutex held */ 28 - void kvm_set_irq(struct kvm *kvm, int irq, int level) 28 + void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) 29 29 { 30 + unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; 31 + 32 + /* Logical OR for level trig interrupt */ 33 + if (level) 34 + set_bit(irq_source_id, irq_state); 35 + else 36 + clear_bit(irq_source_id, irq_state); 37 + 30 38 /* Not possible to detect if the guest uses the PIC or the 31 39 * IOAPIC. So set the bit in both. The guest will ignore 32 40 * writes to the unused one. 33 41 */ 34 - kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level); 42 + kvm_ioapic_set_irq(kvm->arch.vioapic, irq, !!(*irq_state)); 35 43 #ifdef CONFIG_X86 36 - kvm_pic_set_irq(pic_irqchip(kvm), irq, level); 44 + kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state)); 37 45 #endif 38 46 } 39 47 ··· 65 57 struct kvm_irq_ack_notifier *kian) 66 58 { 67 59 hlist_del(&kian->link); 60 + } 61 + 62 + /* The caller must hold kvm->lock mutex */ 63 + int kvm_request_irq_source_id(struct kvm *kvm) 64 + { 65 + unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; 66 + int irq_source_id = find_first_zero_bit(bitmap, 67 + sizeof(kvm->arch.irq_sources_bitmap)); 68 + if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { 69 + printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); 70 + irq_source_id = -EFAULT; 71 + } else 72 + set_bit(irq_source_id, bitmap); 73 + return irq_source_id; 74 + } 75 + 76 + void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) 77 + { 78 + int i; 79 + 80 + if (irq_source_id <= 0 || 81 + irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { 82 + printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); 83 + return; 84 + } 85 + for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) 86 + clear_bit(irq_source_id, &kvm->arch.irq_states[i]); 87 + clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); 68 88 }
+8 -4
virt/kvm/kvm_main.c
··· 105 105 */ 106 106 mutex_lock(&assigned_dev->kvm->lock); 107 107 kvm_set_irq(assigned_dev->kvm, 108 + assigned_dev->irq_source_id, 108 109 assigned_dev->guest_irq, 1); 109 110 mutex_unlock(&assigned_dev->kvm->lock); 110 111 kvm_put_kvm(assigned_dev->kvm); 111 112 } 112 113 113 - /* FIXME: Implement the OR logic needed to make shared interrupts on 114 - * this line behave properly 115 - */ 116 114 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) 117 115 { 118 116 struct kvm_assigned_dev_kernel *assigned_dev = ··· 132 134 133 135 dev = container_of(kian, struct kvm_assigned_dev_kernel, 134 136 ack_notifier); 135 - kvm_set_irq(dev->kvm, dev->guest_irq, 0); 137 + kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); 136 138 enable_irq(dev->host_irq); 137 139 } 138 140 ··· 144 146 free_irq(assigned_dev->host_irq, (void *)assigned_dev); 145 147 146 148 kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); 149 + kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 147 150 148 151 if (cancel_work_sync(&assigned_dev->interrupt_work)) 149 152 /* We had pending work. That means we will have to take ··· 214 215 match->ack_notifier.gsi = assigned_irq->guest_irq; 215 216 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; 216 217 kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); 218 + r = kvm_request_irq_source_id(kvm); 219 + if (r < 0) 220 + goto out_release; 221 + else 222 + match->irq_source_id = r; 217 223 218 224 /* Even though this is PCI, we don't want to use shared 219 225 * interrupts. Sharing host devices with guest-assigned devices