Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
"ARM fixes. There are a couple pending x86 patches but they'll have to
wait for next week"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: arm/arm64: vgic: Kick VCPUs when queueing already pending IRQs
KVM: arm/arm64: vgic: Prevent access to invalid SPIs
arm/arm64: KVM: Perform local TLB invalidation when multiplexing vcpus on a single CPU

Changed files
+112 -23
arch
arm
include
kvm
hyp
arm64
include
kvm
hyp
virt
+1
arch/arm/include/asm/kvm_asm.h
··· 66 66 extern void __kvm_flush_vm_context(void); 67 67 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); 68 68 extern void __kvm_tlb_flush_vmid(struct kvm *kvm); 69 + extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); 69 70 70 71 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); 71 72
+3
arch/arm/include/asm/kvm_host.h
··· 57 57 /* VTTBR value associated with below pgd and vmid */ 58 58 u64 vttbr; 59 59 60 + /* The last vcpu id that ran on each physical CPU */ 61 + int __percpu *last_vcpu_ran; 62 + 60 63 /* Timer */ 61 64 struct arch_timer_kvm timer; 62 65
+1
arch/arm/include/asm/kvm_hyp.h
··· 71 71 #define ICIALLUIS __ACCESS_CP15(c7, 0, c1, 0) 72 72 #define ATS1CPR __ACCESS_CP15(c7, 0, c8, 0) 73 73 #define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0) 74 + #define TLBIALL __ACCESS_CP15(c8, 0, c7, 0) 74 75 #define TLBIALLNSNHIS __ACCESS_CP15(c8, 4, c3, 4) 75 76 #define PRRR __ACCESS_CP15(c10, 0, c2, 0) 76 77 #define NMRR __ACCESS_CP15(c10, 0, c2, 1)
+26 -1
arch/arm/kvm/arm.c
··· 114 114 */ 115 115 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 116 116 { 117 - int ret = 0; 117 + int ret, cpu; 118 118 119 119 if (type) 120 120 return -EINVAL; 121 + 122 + kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); 123 + if (!kvm->arch.last_vcpu_ran) 124 + return -ENOMEM; 125 + 126 + for_each_possible_cpu(cpu) 127 + *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1; 121 128 122 129 ret = kvm_alloc_stage2_pgd(kvm); 123 130 if (ret) ··· 148 141 out_free_stage2_pgd: 149 142 kvm_free_stage2_pgd(kvm); 150 143 out_fail_alloc: 144 + free_percpu(kvm->arch.last_vcpu_ran); 145 + kvm->arch.last_vcpu_ran = NULL; 151 146 return ret; 152 147 } 153 148 ··· 176 167 void kvm_arch_destroy_vm(struct kvm *kvm) 177 168 { 178 169 int i; 170 + 171 + free_percpu(kvm->arch.last_vcpu_ran); 172 + kvm->arch.last_vcpu_ran = NULL; 179 173 180 174 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 181 175 if (kvm->vcpus[i]) { ··· 324 312 325 313 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 326 314 { 315 + int *last_ran; 316 + 317 + last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran); 318 + 319 + /* 320 + * We might get preempted before the vCPU actually runs, but 321 + * over-invalidation doesn't affect correctness. 322 + */ 323 + if (*last_ran != vcpu->vcpu_id) { 324 + kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu); 325 + *last_ran = vcpu->vcpu_id; 326 + } 327 + 327 328 vcpu->cpu = cpu; 328 329 vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); 329 330
+15
arch/arm/kvm/hyp/tlb.c
··· 55 55 __kvm_tlb_flush_vmid(kvm); 56 56 } 57 57 58 + void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) 59 + { 60 + struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); 61 + 62 + /* Switch to requested VMID */ 63 + write_sysreg(kvm->arch.vttbr, VTTBR); 64 + isb(); 65 + 66 + write_sysreg(0, TLBIALL); 67 + dsb(nsh); 68 + isb(); 69 + 70 + write_sysreg(0, VTTBR); 71 + } 72 + 58 73 void __hyp_text __kvm_flush_vm_context(void) 59 74 { 60 75 write_sysreg(0, TLBIALLNSNHIS);
+1
arch/arm64/include/asm/kvm_asm.h
··· 54 54 extern void __kvm_flush_vm_context(void); 55 55 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); 56 56 extern void __kvm_tlb_flush_vmid(struct kvm *kvm); 57 + extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); 57 58 58 59 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); 59 60
+3
arch/arm64/include/asm/kvm_host.h
··· 62 62 /* VTTBR value associated with above pgd and vmid */ 63 63 u64 vttbr; 64 64 65 + /* The last vcpu id that ran on each physical CPU */ 66 + int __percpu *last_vcpu_ran; 67 + 65 68 /* The maximum number of vCPUs depends on the used GIC model */ 66 69 int max_vcpus; 67 70
+1 -1
arch/arm64/include/asm/kvm_mmu.h
··· 128 128 return v; 129 129 } 130 130 131 - #define kern_hyp_va(v) (typeof(v))(__kern_hyp_va((unsigned long)(v))) 131 + #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) 132 132 133 133 /* 134 134 * We currently only support a 40bit IPA.
+15
arch/arm64/kvm/hyp/tlb.c
··· 64 64 write_sysreg(0, vttbr_el2); 65 65 } 66 66 67 + void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) 68 + { 69 + struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); 70 + 71 + /* Switch to requested VMID */ 72 + write_sysreg(kvm->arch.vttbr, vttbr_el2); 73 + isb(); 74 + 75 + asm volatile("tlbi vmalle1" : : ); 76 + dsb(nsh); 77 + isb(); 78 + 79 + write_sysreg(0, vttbr_el2); 80 + } 81 + 67 82 void __hyp_text __kvm_flush_vm_context(void) 68 83 { 69 84 dsb(ishst);
+27 -14
virt/kvm/arm/vgic/vgic-mmio.c
··· 453 453 return container_of(dev, struct vgic_io_device, dev); 454 454 } 455 455 456 - static bool check_region(const struct vgic_register_region *region, 456 + static bool check_region(const struct kvm *kvm, 457 + const struct vgic_register_region *region, 457 458 gpa_t addr, int len) 458 459 { 459 - if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1) 460 - return true; 461 - if ((region->access_flags & VGIC_ACCESS_32bit) && 462 - len == sizeof(u32) && !(addr & 3)) 463 - return true; 464 - if ((region->access_flags & VGIC_ACCESS_64bit) && 465 - len == sizeof(u64) && !(addr & 7)) 466 - return true; 460 + int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; 461 + 462 + switch (len) { 463 + case sizeof(u8): 464 + flags = VGIC_ACCESS_8bit; 465 + break; 466 + case sizeof(u32): 467 + flags = VGIC_ACCESS_32bit; 468 + break; 469 + case sizeof(u64): 470 + flags = VGIC_ACCESS_64bit; 471 + break; 472 + default: 473 + return false; 474 + } 475 + 476 + if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) { 477 + if (!region->bits_per_irq) 478 + return true; 479 + 480 + /* Do we access a non-allocated IRQ? */ 481 + return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs; 482 + } 467 483 468 484 return false; 469 485 } ··· 493 477 494 478 region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, 495 479 addr - iodev->base_addr); 496 - if (!region || !check_region(region, addr, len)) { 480 + if (!region || !check_region(vcpu->kvm, region, addr, len)) { 497 481 memset(val, 0, len); 498 482 return 0; 499 483 } ··· 526 510 527 511 region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, 528 512 addr - iodev->base_addr); 529 - if (!region) 530 - return 0; 531 - 532 - if (!check_region(region, addr, len)) 513 + if (!region || !check_region(vcpu->kvm, region, addr, len)) 533 514 return 0; 534 515 535 516 switch (iodev->iodev_type) {
+7 -7
virt/kvm/arm/vgic/vgic-mmio.h
··· 50 50 #define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1) 51 51 52 52 /* 53 - * (addr & mask) gives us the byte offset for the INT ID, so we want to 54 - * divide this with 'bytes per irq' to get the INT ID, which is given 55 - * by '(bits) / 8'. But we do this with fixed-point-arithmetic and 56 - * take advantage of the fact that division by a fraction equals 57 - * multiplication with the inverted fraction, and scale up both the 58 - * numerator and denominator with 8 to support at most 64 bits per IRQ: 53 + * (addr & mask) gives us the _byte_ offset for the INT ID. 54 + * We multiply this by 8 the get the _bit_ offset, then divide this by 55 + * the number of bits to learn the actual INT ID. 56 + * But instead of a division (which requires a "long long div" implementation), 57 + * we shift by the binary logarithm of <bits>. 58 + * This assumes that <bits> is a power of two. 59 59 */ 60 60 #define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ 61 - 64 / (bits) / 8) 61 + 8 >> ilog2(bits)) 62 62 63 63 /* 64 64 * Some VGIC registers store per-IRQ information, with a different number
+12
virt/kvm/arm/vgic/vgic.c
··· 273 273 * no more work for us to do. 274 274 */ 275 275 spin_unlock(&irq->irq_lock); 276 + 277 + /* 278 + * We have to kick the VCPU here, because we could be 279 + * queueing an edge-triggered interrupt for which we 280 + * get no EOI maintenance interrupt. In that case, 281 + * while the IRQ is already on the VCPU's AP list, the 282 + * VCPU could have EOI'ed the original interrupt and 283 + * won't see this one until it exits for some other 284 + * reason. 285 + */ 286 + if (vcpu) 287 + kvm_vcpu_kick(vcpu); 276 288 return false; 277 289 } 278 290