Merge branch 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm:
KVM: ia64: Makefile fix for forcing to re-generate asm-offsets.h
KVM: Future-proof device assignment ABI
KVM: ia64: Fix halt emulation logic
KVM: Fix guest shared interrupt with in-kernel irqchip
KVM: MMU: sync root on paravirt TLB flush

+140 -58
+5 -1
arch/ia64/include/asm/kvm_host.h
··· 365 365 long itc_offset; 366 366 unsigned long itc_check; 367 367 unsigned long timer_check; 368 - unsigned long timer_pending; 368 + unsigned int timer_pending; 369 + unsigned int timer_fired; 369 370 370 371 unsigned long vrr[8]; 371 372 unsigned long ibr[8]; ··· 418 417 struct list_head assigned_dev_head; 419 418 struct dmar_domain *intel_iommu_domain; 420 419 struct hlist_head irq_ack_notifier_list; 420 + 421 + unsigned long irq_sources_bitmap; 422 + unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; 421 423 }; 422 424 423 425 union cpuid3_t {
+6 -2
arch/ia64/kvm/Makefile
··· 29 29 echo ""; \ 30 30 echo "#endif" ) > $@ 31 31 endef 32 + 32 33 # We use internal rules to avoid the "is up to date" message from make 33 - arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c 34 + arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c \ 35 + $(wildcard $(srctree)/arch/ia64/include/asm/*.h)\ 36 + $(wildcard $(srctree)/include/linux/*.h) 34 37 $(call if_changed_dep,cc_s_c) 35 38 36 39 $(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s 37 40 $(call cmd,offsets) 41 + 42 + FORCE : $(obj)/$(offsets-file) 38 43 39 44 # 40 45 # Makefile for Kernel-based Virtual Machine module ··· 58 53 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o 59 54 obj-$(CONFIG_KVM) += kvm.o 60 55 61 - FORCE : $(obj)/$(offsets-file) 62 56 EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 63 57 kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ 64 58 vtlb.o process.o
+43 -41
arch/ia64/kvm/kvm-ia64.c
··· 385 385 struct kvm *kvm = vcpu->kvm; 386 386 struct call_data call_data; 387 387 int i; 388 + 388 389 call_data.ptc_g_data = p->u.ptc_g_data; 389 390 390 391 for (i = 0; i < KVM_MAX_VCPUS; i++) { ··· 419 418 ktime_t kt; 420 419 long itc_diff; 421 420 unsigned long vcpu_now_itc; 422 - 423 421 unsigned long expires; 424 422 struct hrtimer *p_ht = &vcpu->arch.hlt_timer; 425 423 unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec; 426 424 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 427 425 428 - vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset; 429 - 430 - if (time_after(vcpu_now_itc, vpd->itm)) { 431 - vcpu->arch.timer_check = 1; 432 - return 1; 433 - } 434 - itc_diff = vpd->itm - vcpu_now_itc; 435 - if (itc_diff < 0) 436 - itc_diff = -itc_diff; 437 - 438 - expires = div64_u64(itc_diff, cyc_per_usec); 439 - kt = ktime_set(0, 1000 * expires); 440 - vcpu->arch.ht_active = 1; 441 - hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); 442 - 443 426 if (irqchip_in_kernel(vcpu->kvm)) { 427 + 428 + vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset; 429 + 430 + if (time_after(vcpu_now_itc, vpd->itm)) { 431 + vcpu->arch.timer_check = 1; 432 + return 1; 433 + } 434 + itc_diff = vpd->itm - vcpu_now_itc; 435 + if (itc_diff < 0) 436 + itc_diff = -itc_diff; 437 + 438 + expires = div64_u64(itc_diff, cyc_per_usec); 439 + kt = ktime_set(0, 1000 * expires); 440 + 441 + down_read(&vcpu->kvm->slots_lock); 442 + vcpu->arch.ht_active = 1; 443 + hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); 444 + 444 445 vcpu->arch.mp_state = KVM_MP_STATE_HALTED; 445 446 kvm_vcpu_block(vcpu); 446 447 hrtimer_cancel(p_ht); 447 448 vcpu->arch.ht_active = 0; 449 + 450 + if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) 451 + if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) 452 + vcpu->arch.mp_state = 453 + KVM_MP_STATE_RUNNABLE; 454 + up_read(&vcpu->kvm->slots_lock); 448 455 449 456 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) 450 457 return -EINTR; ··· 492 483 493 484 static const int kvm_vti_max_exit_handlers = 494 485 sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers); 495 - 496 - static void kvm_prepare_guest_switch(struct kvm_vcpu *vcpu) 497 - { 498 - } 499 486 500 487 static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu) 501 488 { ··· 605 600 606 601 again: 607 602 preempt_disable(); 608 - 609 - kvm_prepare_guest_switch(vcpu); 610 603 local_irq_disable(); 611 604 612 605 if (signal_pending(current)) { ··· 617 614 618 615 vcpu->guest_mode = 1; 619 616 kvm_guest_enter(); 620 - 617 + down_read(&vcpu->kvm->slots_lock); 621 618 r = vti_vcpu_run(vcpu, kvm_run); 622 619 if (r < 0) { 623 620 local_irq_enable(); ··· 637 634 * But we need to prevent reordering, hence this barrier(): 638 635 */ 639 636 barrier(); 640 - 641 637 kvm_guest_exit(); 642 - 638 + up_read(&vcpu->kvm->slots_lock); 643 639 preempt_enable(); 644 640 645 641 r = kvm_handle_exit(kvm_run, vcpu); ··· 675 673 676 674 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 677 675 kvm_vcpu_block(vcpu); 676 + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); 678 677 vcpu_put(vcpu); 679 678 return -EAGAIN; 680 679 } ··· 781 778 kvm_build_io_pmt(kvm); 782 779 783 780 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 781 + 782 + /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 783 + set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 784 784 } 785 785 786 786 struct kvm *kvm_arch_create_vm(void) ··· 947 941 goto out; 948 942 if (irqchip_in_kernel(kvm)) { 949 943 mutex_lock(&kvm->lock); 950 - kvm_ioapic_set_irq(kvm->arch.vioapic, 951 - irq_event.irq, 952 - irq_event.level); 944 + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 945 + irq_event.irq, irq_event.level); 953 946 mutex_unlock(&kvm->lock); 954 947 r = 0; 955 948 } ··· 1128 1123 wait_queue_head_t *q; 1129 1124 1130 1125 vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer); 1126 + q = &vcpu->wq; 1127 + 1131 1128 if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED) 1132 1129 goto out; 1133 1130 1134 - q = &vcpu->wq; 1135 - if (waitqueue_active(q)) { 1136 - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 1131 + if (waitqueue_active(q)) 1137 1132 wake_up_interruptible(q); 1138 - } 1133 + 1139 1134 out: 1135 + vcpu->arch.timer_fired = 1; 1140 1136 vcpu->arch.timer_check = 1; 1141 1137 return HRTIMER_NORESTART; 1142 1138 } ··· 1706 1700 void kvm_vcpu_kick(struct kvm_vcpu *vcpu) 1707 1701 { 1708 1702 int ipi_pcpu = vcpu->cpu; 1703 + int cpu = get_cpu(); 1709 1704 1710 1705 if (waitqueue_active(&vcpu->wq)) 1711 1706 wake_up_interruptible(&vcpu->wq); 1712 1707 1713 - if (vcpu->guest_mode) 1708 + if (vcpu->guest_mode && cpu != ipi_pcpu) 1714 1709 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); 1710 + put_cpu(); 1715 1711 } 1716 1712 1717 1713 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) ··· 1723 1715 1724 1716 if (!test_and_set_bit(vec, &vpd->irr[0])) { 1725 1717 vcpu->arch.irq_new_pending = 1; 1726 - if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 1727 - kvm_vcpu_kick(vcpu); 1728 - else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) { 1729 - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 1730 - if (waitqueue_active(&vcpu->wq)) 1731 - wake_up_interruptible(&vcpu->wq); 1732 - } 1718 + kvm_vcpu_kick(vcpu); 1733 1719 return 1; 1734 1720 } 1735 1721 return 0; ··· 1793 1791 1794 1792 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 1795 1793 { 1796 - return 0; 1794 + return vcpu->arch.timer_fired; 1797 1795 } 1798 1796 1799 1797 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+7 -2
arch/ia64/kvm/kvm_fw.c
··· 286 286 return index; 287 287 } 288 288 289 + static void prepare_for_halt(struct kvm_vcpu *vcpu) 290 + { 291 + vcpu->arch.timer_pending = 1; 292 + vcpu->arch.timer_fired = 0; 293 + } 294 + 289 295 int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) 290 296 { 291 297 ··· 310 304 break; 311 305 case PAL_HALT_LIGHT: 312 306 { 313 - vcpu->arch.timer_pending = 1; 314 307 INIT_PAL_STATUS_SUCCESS(result); 308 + prepare_for_halt(vcpu); 315 309 if (kvm_highest_pending_irq(vcpu) == -1) 316 310 ret = kvm_emulate_halt(vcpu); 317 - 318 311 } 319 312 break; 320 313
+1 -1
arch/ia64/kvm/process.c
··· 713 713 if (!(VCPU(v, itv) & (1 << 16))) { 714 714 vcpu_pend_interrupt(v, VCPU(v, itv) 715 715 & 0xff); 716 - VMX(v, itc_check) = 0; 716 + VMX(v, itc_check) = 0; 717 717 } else { 718 718 v->arch.timer_pending = 1; 719 719 }
+3
arch/x86/include/asm/kvm_host.h
··· 364 364 365 365 struct page *ept_identity_pagetable; 366 366 bool ept_identity_pagetable_done; 367 + 368 + unsigned long irq_sources_bitmap; 369 + unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; 367 370 }; 368 371 369 372 struct kvm_vm_stat {
+9 -2
arch/x86/kvm/i8254.c
··· 545 545 if (!pit) 546 546 return NULL; 547 547 548 + mutex_lock(&kvm->lock); 549 + pit->irq_source_id = kvm_request_irq_source_id(kvm); 550 + mutex_unlock(&kvm->lock); 551 + if (pit->irq_source_id < 0) 552 + return NULL; 553 + 548 554 mutex_init(&pit->pit_state.lock); 549 555 mutex_lock(&pit->pit_state.lock); 550 556 spin_lock_init(&pit->pit_state.inject_lock); ··· 593 587 mutex_lock(&kvm->arch.vpit->pit_state.lock); 594 588 timer = &kvm->arch.vpit->pit_state.pit_timer.timer; 595 589 hrtimer_cancel(timer); 590 + kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); 596 591 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 597 592 kfree(kvm->arch.vpit); 598 593 } ··· 602 595 static void __inject_pit_timer_intr(struct kvm *kvm) 603 596 { 604 597 mutex_lock(&kvm->lock); 605 - kvm_set_irq(kvm, 0, 1); 606 - kvm_set_irq(kvm, 0, 0); 598 + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); 599 + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); 607 600 mutex_unlock(&kvm->lock); 608 601 } 609 602
+1
arch/x86/kvm/i8254.h
··· 44 44 struct kvm_io_device speaker_dev; 45 45 struct kvm *kvm; 46 46 struct kvm_kpit_state pit_state; 47 + int irq_source_id; 47 48 }; 48 49 49 50 #define KVM_PIT_BASE_ADDRESS 0x40
+1
arch/x86/kvm/mmu.c
··· 2634 2634 static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) 2635 2635 { 2636 2636 kvm_x86_ops->tlb_flush(vcpu); 2637 + set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); 2637 2638 return 1; 2638 2639 } 2639 2640
+5 -1
arch/x86/kvm/x86.c
··· 1742 1742 goto out; 1743 1743 if (irqchip_in_kernel(kvm)) { 1744 1744 mutex_lock(&kvm->lock); 1745 - kvm_set_irq(kvm, irq_event.irq, irq_event.level); 1745 + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1746 + irq_event.irq, irq_event.level); 1746 1747 mutex_unlock(&kvm->lock); 1747 1748 r = 0; 1748 1749 } ··· 4013 4012 4014 4013 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 4015 4014 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 4015 + 4016 + /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 4017 + set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 4016 4018 4017 4019 return kvm; 4018 4020 }
+6
include/linux/kvm.h
··· 489 489 __u32 busnr; 490 490 __u32 devfn; 491 491 __u32 flags; 492 + union { 493 + __u32 reserved[12]; 494 + }; 492 495 }; 493 496 494 497 struct kvm_assigned_irq { ··· 499 496 __u32 host_irq; 500 497 __u32 guest_irq; 501 498 __u32 flags; 499 + union { 500 + __u32 reserved[12]; 501 + }; 502 502 }; 503 503 504 504 #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
+6 -1
include/linux/kvm_host.h
··· 37 37 #define KVM_REQ_UNHALT 6 38 38 #define KVM_REQ_MMU_SYNC 7 39 39 40 + #define KVM_USERSPACE_IRQ_SOURCE_ID 0 41 + 40 42 struct kvm_vcpu; 41 43 extern struct kmem_cache *kvm_vcpu_cache; 42 44 ··· 308 306 int host_irq; 309 307 int guest_irq; 310 308 int irq_requested; 309 + int irq_source_id; 311 310 struct pci_dev *dev; 312 311 struct kvm *kvm; 313 312 }; 314 - void kvm_set_irq(struct kvm *kvm, int irq, int level); 313 + void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); 315 314 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); 316 315 void kvm_register_irq_ack_notifier(struct kvm *kvm, 317 316 struct kvm_irq_ack_notifier *kian); 318 317 void kvm_unregister_irq_ack_notifier(struct kvm *kvm, 319 318 struct kvm_irq_ack_notifier *kian); 319 + int kvm_request_irq_source_id(struct kvm *kvm); 320 + void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 320 321 321 322 #ifdef CONFIG_DMAR 322 323 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+39 -3
virt/kvm/irq_comm.c
··· 25 25 #include "ioapic.h" 26 26 27 27 /* This should be called with the kvm->lock mutex held */ 28 - void kvm_set_irq(struct kvm *kvm, int irq, int level) 28 + void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) 29 29 { 30 + unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; 31 + 32 + /* Logical OR for level trig interrupt */ 33 + if (level) 34 + set_bit(irq_source_id, irq_state); 35 + else 36 + clear_bit(irq_source_id, irq_state); 37 + 30 38 /* Not possible to detect if the guest uses the PIC or the 31 39 * IOAPIC. So set the bit in both. The guest will ignore 32 40 * writes to the unused one. 33 41 */ 34 - kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level); 42 + kvm_ioapic_set_irq(kvm->arch.vioapic, irq, !!(*irq_state)); 35 43 #ifdef CONFIG_X86 36 - kvm_pic_set_irq(pic_irqchip(kvm), irq, level); 44 + kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state)); 37 45 #endif 38 46 } 39 47 ··· 65 57 struct kvm_irq_ack_notifier *kian) 66 58 { 67 59 hlist_del(&kian->link); 60 + } 61 + 62 + /* The caller must hold kvm->lock mutex */ 63 + int kvm_request_irq_source_id(struct kvm *kvm) 64 + { 65 + unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; 66 + int irq_source_id = find_first_zero_bit(bitmap, 67 + sizeof(kvm->arch.irq_sources_bitmap)); 68 + if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { 69 + printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); 70 + irq_source_id = -EFAULT; 71 + } else 72 + set_bit(irq_source_id, bitmap); 73 + return irq_source_id; 74 + } 75 + 76 + void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) 77 + { 78 + int i; 79 + 80 + if (irq_source_id <= 0 || 81 + irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { 82 + printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); 83 + return; 84 + } 85 + for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) 86 + clear_bit(irq_source_id, &kvm->arch.irq_states[i]); 87 + clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); 68 88 }
+8 -4
virt/kvm/kvm_main.c
··· 105 105 */ 106 106 mutex_lock(&assigned_dev->kvm->lock); 107 107 kvm_set_irq(assigned_dev->kvm, 108 + assigned_dev->irq_source_id, 108 109 assigned_dev->guest_irq, 1); 109 110 mutex_unlock(&assigned_dev->kvm->lock); 110 111 kvm_put_kvm(assigned_dev->kvm); 111 112 } 112 113 113 - /* FIXME: Implement the OR logic needed to make shared interrupts on 114 - * this line behave properly 115 - */ 116 114 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) 117 115 { 118 116 struct kvm_assigned_dev_kernel *assigned_dev = ··· 132 134 133 135 dev = container_of(kian, struct kvm_assigned_dev_kernel, 134 136 ack_notifier); 135 - kvm_set_irq(dev->kvm, dev->guest_irq, 0); 137 + kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); 136 138 enable_irq(dev->host_irq); 137 139 } 138 140 ··· 144 146 free_irq(assigned_dev->host_irq, (void *)assigned_dev); 145 147 146 148 kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); 149 + kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 147 150 148 151 if (cancel_work_sync(&assigned_dev->interrupt_work)) 149 152 /* We had pending work. That means we will have to take ··· 214 215 match->ack_notifier.gsi = assigned_irq->guest_irq; 215 216 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; 216 217 kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); 218 + r = kvm_request_irq_source_id(kvm); 219 + if (r < 0) 220 + goto out_release; 221 + else 222 + match->irq_source_id = r; 217 223 218 224 /* Even though this is PCI, we don't want to use shared 219 225 * interrupts. Sharing host devices with guest-assigned devices