Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:

- Fixes for s390 interrupt delivery

- Fixes for Xen emulator bugs showing up as debug kernel WARNs

- Fix another issue with SEV/ES string I/O VMGEXITs

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: x86: Take srcu lock in post_kvm_run_save()
KVM: SEV-ES: fix another issue with string I/O VMGEXITs
KVM: x86/xen: Fix kvm_xen_has_interrupt() sleeping in kvm_vcpu_block()
KVM: x86: switch pvclock_gtod_sync_lock to a raw spinlock
KVM: s390: preserve deliverable_mask in __airqs_kick_single_vcpu
KVM: s390: clear kicked_mask before sleeping again

Changed files
+61 -25
arch
s390
x86
include
kvm
+3 -2
arch/s390/kvm/interrupt.c
··· 3053 3053 int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus); 3054 3054 struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; 3055 3055 struct kvm_vcpu *vcpu; 3056 + u8 vcpu_isc_mask; 3056 3057 3057 3058 for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) { 3058 3059 vcpu = kvm_get_vcpu(kvm, vcpu_idx); 3059 3060 if (psw_ioint_disabled(vcpu)) 3060 3061 continue; 3061 - deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24); 3062 - if (deliverable_mask) { 3062 + vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24); 3063 + if (deliverable_mask & vcpu_isc_mask) { 3063 3064 /* lately kicked but not yet running */ 3064 3065 if (test_and_set_bit(vcpu_idx, gi->kicked_mask)) 3065 3066 return;
+1
arch/s390/kvm/kvm-s390.c
··· 3363 3363 3364 3364 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3365 3365 { 3366 + clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 3366 3367 return kvm_s390_vcpu_has_irq(vcpu, 0); 3367 3368 } 3368 3369
+1 -1
arch/x86/include/asm/kvm_host.h
··· 1098 1098 u64 cur_tsc_generation; 1099 1099 int nr_vcpus_matched_tsc; 1100 1100 1101 - spinlock_t pvclock_gtod_sync_lock; 1101 + raw_spinlock_t pvclock_gtod_sync_lock; 1102 1102 bool use_master_clock; 1103 1103 u64 master_kernel_ns; 1104 1104 u64 master_cycle_now;
+12 -3
arch/x86/kvm/svm/sev.c
··· 2591 2591 2592 2592 int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) 2593 2593 { 2594 - if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2)) 2594 + int count; 2595 + int bytes; 2596 + 2597 + if (svm->vmcb->control.exit_info_2 > INT_MAX) 2595 2598 return -EINVAL; 2596 2599 2597 - return kvm_sev_es_string_io(&svm->vcpu, size, port, 2598 - svm->ghcb_sa, svm->ghcb_sa_len / size, in); 2600 + count = svm->vmcb->control.exit_info_2; 2601 + if (unlikely(check_mul_overflow(count, size, &bytes))) 2602 + return -EINVAL; 2603 + 2604 + if (!setup_vmgexit_scratch(svm, in, bytes)) 2605 + return -EINVAL; 2606 + 2607 + return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in); 2599 2608 } 2600 2609 2601 2610 void sev_es_init_vmcb(struct vcpu_svm *svm)
+22 -14
arch/x86/kvm/x86.c
··· 2542 2542 kvm_vcpu_write_tsc_offset(vcpu, offset); 2543 2543 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); 2544 2544 2545 - spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags); 2545 + raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags); 2546 2546 if (!matched) { 2547 2547 kvm->arch.nr_vcpus_matched_tsc = 0; 2548 2548 } else if (!already_matched) { ··· 2550 2550 } 2551 2551 2552 2552 kvm_track_tsc_matching(vcpu); 2553 - spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags); 2553 + raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags); 2554 2554 } 2555 2555 2556 2556 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, ··· 2780 2780 kvm_make_mclock_inprogress_request(kvm); 2781 2781 2782 2782 /* no guest entries from this point */ 2783 - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); 2783 + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); 2784 2784 pvclock_update_vm_gtod_copy(kvm); 2785 - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); 2785 + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); 2786 2786 2787 2787 kvm_for_each_vcpu(i, vcpu, kvm) 2788 2788 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); ··· 2800 2800 unsigned long flags; 2801 2801 u64 ret; 2802 2802 2803 - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); 2803 + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); 2804 2804 if (!ka->use_master_clock) { 2805 - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); 2805 + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); 2806 2806 return get_kvmclock_base_ns() + ka->kvmclock_offset; 2807 2807 } 2808 2808 2809 2809 hv_clock.tsc_timestamp = ka->master_cycle_now; 2810 2810 hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; 2811 - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); 2811 + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); 2812 2812 2813 2813 /* both __this_cpu_read() and rdtsc() should be on the same cpu */ 2814 2814 get_cpu(); ··· 2902 2902 * If the host uses TSC clock, then passthrough TSC as stable 2903 2903 * to the guest. 2904 2904 */ 2905 - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); 2905 + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); 2906 2906 use_master_clock = ka->use_master_clock; 2907 2907 if (use_master_clock) { 2908 2908 host_tsc = ka->master_cycle_now; 2909 2909 kernel_ns = ka->master_kernel_ns; 2910 2910 } 2911 - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); 2911 + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); 2912 2912 2913 2913 /* Keep irq disabled to prevent changes to the clock */ 2914 2914 local_irq_save(flags); ··· 6100 6100 * is slightly ahead) here we risk going negative on unsigned 6101 6101 * 'system_time' when 'user_ns.clock' is very small. 6102 6102 */ 6103 - spin_lock_irq(&ka->pvclock_gtod_sync_lock); 6103 + raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock); 6104 6104 if (kvm->arch.use_master_clock) 6105 6105 now_ns = ka->master_kernel_ns; 6106 6106 else 6107 6107 now_ns = get_kvmclock_base_ns(); 6108 6108 ka->kvmclock_offset = user_ns.clock - now_ns; 6109 - spin_unlock_irq(&ka->pvclock_gtod_sync_lock); 6109 + raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock); 6110 6110 6111 6111 kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); 6112 6112 break; ··· 8156 8156 list_for_each_entry(kvm, &vm_list, vm_list) { 8157 8157 struct kvm_arch *ka = &kvm->arch; 8158 8158 8159 - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); 8159 + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); 8160 8160 pvclock_update_vm_gtod_copy(kvm); 8161 - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); 8161 + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); 8162 8162 8163 8163 kvm_for_each_vcpu(cpu, vcpu, kvm) 8164 8164 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); ··· 8800 8800 8801 8801 kvm_run->cr8 = kvm_get_cr8(vcpu); 8802 8802 kvm_run->apic_base = kvm_get_apic_base(vcpu); 8803 + 8804 + /* 8805 + * The call to kvm_ready_for_interrupt_injection() may end up in 8806 + * kvm_xen_has_interrupt() which may require the srcu lock to be 8807 + * held, to protect against changes in the vcpu_info address. 8808 + */ 8809 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 8803 8810 kvm_run->ready_for_interrupt_injection = 8804 8811 pic_in_kernel(vcpu->kvm) || 8805 8812 kvm_vcpu_ready_for_interrupt_injection(vcpu); 8813 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 8806 8814 8807 8815 if (is_smm(vcpu)) 8808 8816 kvm_run->flags |= KVM_RUN_X86_SMM; ··· 11207 11199 11208 11200 raw_spin_lock_init(&kvm->arch.tsc_write_lock); 11209 11201 mutex_init(&kvm->arch.apic_map_lock); 11210 - spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); 11202 + raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); 11211 11203 11212 11204 kvm->arch.kvmclock_offset = -get_kvmclock_base_ns(); 11213 11205 pvclock_update_vm_gtod_copy(kvm);
+22 -5
arch/x86/kvm/xen.c
··· 190 190 191 191 int __kvm_xen_has_interrupt(struct kvm_vcpu *v) 192 192 { 193 + int err; 193 194 u8 rc = 0; 194 195 195 196 /* ··· 217 216 if (likely(slots->generation == ghc->generation && 218 217 !kvm_is_error_hva(ghc->hva) && ghc->memslot)) { 219 218 /* Fast path */ 220 - __get_user(rc, (u8 __user *)ghc->hva + offset); 221 - } else { 222 - /* Slow path */ 223 - kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset, 224 - sizeof(rc)); 219 + pagefault_disable(); 220 + err = __get_user(rc, (u8 __user *)ghc->hva + offset); 221 + pagefault_enable(); 222 + if (!err) 223 + return rc; 225 224 } 225 + 226 + /* Slow path */ 227 + 228 + /* 229 + * This function gets called from kvm_vcpu_block() after setting the 230 + * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately 231 + * from a HLT. So we really mustn't sleep. If the page ended up absent 232 + * at that point, just return 1 in order to trigger an immediate wake, 233 + * and we'll end up getting called again from a context where we *can* 234 + * fault in the page and wait for it. 235 + */ 236 + if (in_atomic() || !task_is_running(current)) 237 + return 1; 238 + 239 + kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset, 240 + sizeof(rc)); 226 241 227 242 return rc; 228 243 }