commit 17d11ba14990d2bbaaec9c09a200b803679a968e · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge branch 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: Avoid redelivery of edge interrupt before next edge
KVM: MMU: limit rmap chain length
KVM: ia64: fix build failures due to ia64/unsigned long mismatches
KVM: Make KVM_HPAGES_PER_HPAGE unsigned long to avoid build error on powerpc
KVM: fix ack not being delivered when msi present
KVM: s390: fix wait_queue handling
KVM: VMX: Fix locking imbalance on emulation failure
KVM: VMX: Fix locking order in handle_invalid_guest_state
KVM: MMU: handle n_free_mmu_pages > n_alloc_mmu_pages in kvm_mmu_change_mmu_pages
KVM: SVM: force new asid on vcpu migration
KVM: x86: verify MTRR/PAT validity
KVM: PIT: fix kpit_elapsed division by zero
KVM: Fix KVM_GET_MSR_INDEX_LIST

Linus Torvalds 16 years ago 17d11ba1 fb1ee451

+110 -41

13 changed files

expand all

unified split

arch

ia64

kvm

mmio.c

vcpu.c

vcpu.h

powerpc

include

asm

kvm_host.h

s390

kvm

interrupt.c

x86

kvm

i8254.c

mmu.c

svm.c

vmx.c

x86.c

include

linux

kvm_host.h

virt

kvm

ioapic.c

irq_comm.c

+4 -2

arch/ia64/kvm/mmio.c

··· 247 247 vcpu_get_fpreg(vcpu, inst.M9.f2, &v); 248 248 /* Write high word. FIXME: this is a kludge! */ 249 249 v.u.bits[1] &= 0x3ffff; 250 - mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); 250 + mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8, 251 + ma, IOREQ_WRITE); 251 252 data = v.u.bits[0]; 252 253 size = 3; 253 254 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) { ··· 266 265 267 266 /* Write high word.FIXME: this is a kludge! */ 268 267 v.u.bits[1] &= 0x3ffff; 269 - mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); 268 + mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 269 + 8, ma, IOREQ_WRITE); 270 270 data = v.u.bits[0]; 271 271 size = 3; 272 272 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {

+3 -3

arch/ia64/kvm/vcpu.c

··· 461 461 u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg) 462 462 { 463 463 struct kvm_pt_regs *regs = vcpu_regs(vcpu); 464 - u64 val; 464 + unsigned long val; 465 465 466 466 if (!reg) 467 467 return 0; ··· 469 469 return val; 470 470 } 471 471 472 - void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat) 472 + void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat) 473 473 { 474 474 struct kvm_pt_regs *regs = vcpu_regs(vcpu); 475 475 long sof = (regs->cr_ifs) & 0x7f; ··· 1072 1072 vcpu_set_gr(vcpu, inst.M46.r1, tag, 0); 1073 1073 } 1074 1074 1075 - int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr) 1075 + int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr) 1076 1076 { 1077 1077 struct thash_data *data; 1078 1078 union ia64_isr visr, pt_isr;

+7 -6

arch/ia64/kvm/vcpu.h

··· 686 686 return highest_bits((int *)&(VMX(vcpu, insvc[0]))); 687 687 } 688 688 689 - extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg, 689 + extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, 690 690 struct ia64_fpreg *val); 691 - extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg, 691 + extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, 692 692 struct ia64_fpreg *val); 693 - extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg); 694 - extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat); 695 - extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu); 696 - extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val); 693 + extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg); 694 + extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, 695 + u64 val, int nat); 696 + extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu); 697 + extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val); 697 698 extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr); 698 699 extern void vcpu_bsw0(struct kvm_vcpu *vcpu); 699 700 extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,

+1 -1

arch/powerpc/include/asm/kvm_host.h

··· 34 34 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 35 35 36 36 /* We don't currently support large pages. */ 37 - #define KVM_PAGES_PER_HPAGE (1<<31) 37 + #define KVM_PAGES_PER_HPAGE (1UL << 31) 38 38 39 39 struct kvm; 40 40 struct kvm_run;

+1 -1

arch/s390/kvm/interrupt.c

··· 386 386 } 387 387 __unset_cpu_idle(vcpu); 388 388 __set_current_state(TASK_RUNNING); 389 - remove_wait_queue(&vcpu->wq, &wait); 389 + remove_wait_queue(&vcpu->arch.local_int.wq, &wait); 390 390 spin_unlock_bh(&vcpu->arch.local_int.lock); 391 391 spin_unlock(&vcpu->arch.local_int.float_int->lock); 392 392 hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);

arch/x86/kvm/i8254.c

··· 104 104 ktime_t remaining; 105 105 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; 106 106 107 + if (!ps->pit_timer.period) 108 + return 0; 109 + 107 110 /* 108 111 * The Counter does not stop when it reaches zero. In 109 112 * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to

+36 -12

arch/x86/kvm/mmu.c

··· 489 489 * 490 490 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc 491 491 * containing more mappings. 492 + * 493 + * Returns the number of rmap entries before the spte was added or zero if 494 + * the spte was not added. 495 + * 492 496 */ 493 - static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) 497 + static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) 494 498 { 495 499 struct kvm_mmu_page *sp; 496 500 struct kvm_rmap_desc *desc; 497 501 unsigned long *rmapp; 498 - int i; 502 + int i, count = 0; 499 503 500 504 if (!is_rmap_pte(*spte)) 501 - return; 505 + return count; 502 506 gfn = unalias_gfn(vcpu->kvm, gfn); 503 507 sp = page_header(__pa(spte)); 504 508 sp->gfns[spte - sp->spt] = gfn; ··· 519 515 } else { 520 516 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); 521 517 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); 522 - while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) 518 + while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) { 523 519 desc = desc->more; 520 + count += RMAP_EXT; 521 + } 524 522 if (desc->shadow_ptes[RMAP_EXT-1]) { 525 523 desc->more = mmu_alloc_rmap_desc(vcpu); 526 524 desc = desc->more; ··· 531 525 ; 532 526 desc->shadow_ptes[i] = spte; 533 527 } 528 + return count; 534 529 } 535 530 536 531 static void rmap_desc_remove_entry(unsigned long *rmapp, ··· 759 752 spte = rmap_next(kvm, rmapp, spte); 760 753 } 761 754 return young; 755 + } 756 + 757 + #define RMAP_RECYCLE_THRESHOLD 1000 758 + 759 + static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage) 760 + { 761 + unsigned long *rmapp; 762 + 763 + gfn = unalias_gfn(vcpu->kvm, gfn); 764 + rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage); 765 + 766 + kvm_unmap_rmapp(vcpu->kvm, rmapp); 767 + kvm_flush_remote_tlbs(vcpu->kvm); 762 768 } 763 769 764 770 int kvm_age_hva(struct kvm *kvm, unsigned long hva) ··· 1427 1407 */ 1428 1408 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) 1429 1409 { 1410 + int used_pages; 1411 + 1412 + used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; 1413 + used_pages = max(0, used_pages); 1414 + 1430 1415 /* 1431 1416 * If we set the number of mmu pages to be smaller be than the 1432 1417 * number of actived pages , we must to free some mmu pages before we 1433 1418 * change the value 1434 1419 */ 1435 1420 1436 - if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) > 1437 - kvm_nr_mmu_pages) { 1438 - int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages 1439 - - kvm->arch.n_free_mmu_pages; 1440 - 1441 - while (n_used_mmu_pages > kvm_nr_mmu_pages) { 1421 + if (used_pages > kvm_nr_mmu_pages) { 1422 + while (used_pages > kvm_nr_mmu_pages) { 1442 1423 struct kvm_mmu_page *page; 1443 1424 1444 1425 page = container_of(kvm->arch.active_mmu_pages.prev, 1445 1426 struct kvm_mmu_page, link); 1446 1427 kvm_mmu_zap_page(kvm, page); 1447 - n_used_mmu_pages--; 1428 + used_pages--; 1448 1429 } 1449 1430 kvm->arch.n_free_mmu_pages = 0; 1450 1431 } ··· 1761 1740 { 1762 1741 int was_rmapped = 0; 1763 1742 int was_writeble = is_writeble_pte(*shadow_pte); 1743 + int rmap_count; 1764 1744 1765 1745 pgprintk("%s: spte %llx access %x write_fault %d" 1766 1746 " user_fault %d gfn %lx\n", ··· 1803 1781 1804 1782 page_header_update_slot(vcpu->kvm, shadow_pte, gfn); 1805 1783 if (!was_rmapped) { 1806 - rmap_add(vcpu, shadow_pte, gfn, largepage); 1784 + rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage); 1807 1785 if (!is_rmap_pte(*shadow_pte)) 1808 1786 kvm_release_pfn_clean(pfn); 1787 + if (rmap_count > RMAP_RECYCLE_THRESHOLD) 1788 + rmap_recycle(vcpu, gfn, largepage); 1809 1789 } else { 1810 1790 if (was_writeble) 1811 1791 kvm_release_pfn_dirty(pfn);

+3 -3

arch/x86/kvm/svm.c

··· 711 711 svm->vmcb->control.tsc_offset += delta; 712 712 vcpu->cpu = cpu; 713 713 kvm_migrate_timers(vcpu); 714 + svm->asid_generation = 0; 714 715 } 715 716 716 717 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) ··· 1032 1031 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; 1033 1032 } 1034 1033 1035 - svm->vcpu.cpu = svm_data->cpu; 1036 1034 svm->asid_generation = svm_data->asid_generation; 1037 1035 svm->vmcb->control.asid = svm_data->next_asid++; 1038 1036 } ··· 2300 2300 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); 2301 2301 2302 2302 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 2303 - if (svm->vcpu.cpu != cpu || 2304 - svm->asid_generation != svm_data->asid_generation) 2303 + /* FIXME: handle wraparound of asid_generation */ 2304 + if (svm->asid_generation != svm_data->asid_generation) 2305 2305 new_asid(svm, svm_data); 2306 2306 } 2307 2307

+3 -3

arch/x86/kvm/vmx.c

··· 3157 3157 struct vcpu_vmx *vmx = to_vmx(vcpu); 3158 3158 enum emulation_result err = EMULATE_DONE; 3159 3159 3160 - preempt_enable(); 3161 3160 local_irq_enable(); 3161 + preempt_enable(); 3162 3162 3163 3163 while (!guest_state_valid(vcpu)) { 3164 3164 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); ··· 3168 3168 3169 3169 if (err != EMULATE_DONE) { 3170 3170 kvm_report_emulation_failure(vcpu, "emulation failure"); 3171 - return; 3171 + break; 3172 3172 } 3173 3173 3174 3174 if (signal_pending(current)) ··· 3177 3177 schedule(); 3178 3178 } 3179 3179 3180 - local_irq_disable(); 3181 3180 preempt_disable(); 3181 + local_irq_disable(); 3182 3182 3183 3183 vmx->invalid_state_emulation_result = err; 3184 3184 }

+40 -4

arch/x86/kvm/x86.c

··· 704 704 return false; 705 705 } 706 706 707 + static bool valid_pat_type(unsigned t) 708 + { 709 + return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ 710 + } 711 + 712 + static bool valid_mtrr_type(unsigned t) 713 + { 714 + return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ 715 + } 716 + 717 + static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) 718 + { 719 + int i; 720 + 721 + if (!msr_mtrr_valid(msr)) 722 + return false; 723 + 724 + if (msr == MSR_IA32_CR_PAT) { 725 + for (i = 0; i < 8; i++) 726 + if (!valid_pat_type((data >> (i * 8)) & 0xff)) 727 + return false; 728 + return true; 729 + } else if (msr == MSR_MTRRdefType) { 730 + if (data & ~0xcff) 731 + return false; 732 + return valid_mtrr_type(data & 0xff); 733 + } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { 734 + for (i = 0; i < 8 ; i++) 735 + if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) 736 + return false; 737 + return true; 738 + } 739 + 740 + /* variable MTRRs */ 741 + return valid_mtrr_type(data & 0xff); 742 + } 743 + 707 744 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 708 745 { 709 746 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; 710 747 711 - if (!msr_mtrr_valid(msr)) 748 + if (!mtrr_valid(vcpu, msr, data)) 712 749 return 1; 713 750 714 751 if (msr == MSR_MTRRdefType) { ··· 1116 1079 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) 1117 1080 goto out; 1118 1081 r = -E2BIG; 1119 - if (n < num_msrs_to_save) 1082 + if (n < msr_list.nmsrs) 1120 1083 goto out; 1121 1084 r = -EFAULT; 1122 1085 if (copy_to_user(user_msr_list->indices, &msrs_to_save, 1123 1086 num_msrs_to_save * sizeof(u32))) 1124 1087 goto out; 1125 - if (copy_to_user(user_msr_list->indices 1126 - + num_msrs_to_save * sizeof(u32), 1088 + if (copy_to_user(user_msr_list->indices + num_msrs_to_save, 1127 1089 &emulated_msrs, 1128 1090 ARRAY_SIZE(emulated_msrs) * sizeof(u32))) 1129 1091 goto out;

include/linux/kvm_host.h

··· 110 110 111 111 struct kvm_kernel_irq_routing_entry { 112 112 u32 gsi; 113 + u32 type; 113 114 int (*set)(struct kvm_kernel_irq_routing_entry *e, 114 115 struct kvm *kvm, int level); 115 116 union {

+5 -5

virt/kvm/ioapic.c

··· 95 95 if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) 96 96 pent->fields.remote_irr = 1; 97 97 } 98 - if (!pent->fields.trig_mode) 99 - ioapic->irr &= ~(1 << idx); 100 98 101 99 return injected; 102 100 } ··· 134 136 mask_after = ioapic->redirtbl[index].fields.mask; 135 137 if (mask_before != mask_after) 136 138 kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); 137 - if (ioapic->irr & (1 << index)) 139 + if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG 140 + && ioapic->irr & (1 << index)) 138 141 ioapic_service(ioapic, index); 139 142 break; 140 143 } ··· 183 184 if (!level) 184 185 ioapic->irr &= ~mask; 185 186 else { 187 + int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); 186 188 ioapic->irr |= mask; 187 - if ((!entry.fields.trig_mode && old_irr != ioapic->irr) 188 - || !entry.fields.remote_irr) 189 + if ((edge && old_irr != ioapic->irr) || 190 + (!edge && !entry.fields.remote_irr)) 189 191 ret = ioapic_service(ioapic, irq); 190 192 } 191 193 }

+3 -1

virt/kvm/irq_comm.c

··· 160 160 unsigned gsi = pin; 161 161 162 162 list_for_each_entry(e, &kvm->irq_routing, link) 163 - if (e->irqchip.irqchip == irqchip && 163 + if (e->type == KVM_IRQ_ROUTING_IRQCHIP && 164 + e->irqchip.irqchip == irqchip && 164 165 e->irqchip.pin == pin) { 165 166 gsi = e->gsi; 166 167 break; ··· 260 259 int delta; 261 260 262 261 e->gsi = ue->gsi; 262 + e->type = ue->type; 263 263 switch (ue->type) { 264 264 case KVM_IRQ_ROUTING_IRQCHIP: 265 265 delta = 0;