Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
"Bugfixes and a one-liner patch to silence a sparse warning"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: arm64: Stop clobbering x0 for HVC_SOFT_RESTART
KVM: arm64: PMU: Fix per-CPU access in preemptible context
KVM: VMX: Use KVM_POSSIBLE_CR*_GUEST_BITS to initialize guest/host masks
KVM: x86: Mark CR4.TSD as being possibly owned by the guest
KVM: x86: Inject #GP if guest attempts to toggle CR4.LA57 in 64-bit mode
kvm: use more precise cast and do not drop __user
KVM: x86: bit 8 of non-leaf PDPEs is not reserved
KVM: X86: Fix async pf caused null-ptr-deref
KVM: arm64: vgic-v4: Plug race between non-residency and v4.1 doorbell
KVM: arm64: pvtime: Ensure task delay accounting is enabled
KVM: arm64: Fix kvm_reset_vcpu() return code being incorrect with SVE
KVM: arm64: Annotate hyp NMI-related functions as __always_inline
KVM: s390: reduce number of IO pins to 1

+70 -30
+1 -1
arch/arm64/include/asm/arch_gicv3.h
··· 109 109 return read_sysreg_s(SYS_ICC_PMR_EL1); 110 110 } 111 111 112 - static inline void gic_write_pmr(u32 val) 112 + static __always_inline void gic_write_pmr(u32 val) 113 113 { 114 114 write_sysreg_s(val, SYS_ICC_PMR_EL1); 115 115 }
+1 -1
arch/arm64/include/asm/cpufeature.h
··· 675 675 cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH); 676 676 } 677 677 678 - static inline bool system_uses_irq_prio_masking(void) 678 + static __always_inline bool system_uses_irq_prio_masking(void) 679 679 { 680 680 return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && 681 681 cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING);
+7 -4
arch/arm64/kvm/hyp-init.S
··· 136 136 137 137 1: cmp x0, #HVC_RESET_VECTORS 138 138 b.ne 1f 139 - reset: 139 + 140 140 /* 141 - * Reset kvm back to the hyp stub. Do not clobber x0-x4 in 142 - * case we coming via HVC_SOFT_RESTART. 141 + * Set the HVC_RESET_VECTORS return code before entering the common 142 + * path so that we do not clobber x0-x2 in case we are coming via 143 + * HVC_SOFT_RESTART. 143 144 */ 145 + mov x0, xzr 146 + reset: 147 + /* Reset kvm back to the hyp stub. */ 144 148 mrs x5, sctlr_el2 145 149 mov_q x6, SCTLR_ELx_FLAGS 146 150 bic x5, x5, x6 // Clear SCTL_M and etc ··· 155 151 /* Install stub vectors */ 156 152 adr_l x5, __hyp_stub_vectors 157 153 msr vbar_el2, x5 158 - mov x0, xzr 159 154 eret 160 155 161 156 1: /* Bad stub call */
+6 -1
arch/arm64/kvm/pmu.c
··· 159 159 } 160 160 161 161 /* 162 - * On VHE ensure that only guest events have EL0 counting enabled 162 + * On VHE ensure that only guest events have EL0 counting enabled. 163 + * This is called from both vcpu_{load,put} and the sysreg handling. 164 + * Since the latter is preemptible, special care must be taken to 165 + * disable preemption. 163 166 */ 164 167 void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) 165 168 { ··· 172 169 if (!has_vhe()) 173 170 return; 174 171 172 + preempt_disable(); 175 173 host = this_cpu_ptr(&kvm_host_data); 176 174 events_guest = host->pmu_events.events_guest; 177 175 events_host = host->pmu_events.events_host; 178 176 179 177 kvm_vcpu_pmu_enable_el0(events_guest); 180 178 kvm_vcpu_pmu_disable_el0(events_host); 179 + preempt_enable(); 181 180 } 182 181 183 182 /*
+12 -3
arch/arm64/kvm/pvtime.c
··· 3 3 4 4 #include <linux/arm-smccc.h> 5 5 #include <linux/kvm_host.h> 6 + #include <linux/sched/stat.h> 6 7 7 8 #include <asm/kvm_mmu.h> 8 9 #include <asm/pvclock-abi.h> ··· 74 73 return base; 75 74 } 76 75 76 + static bool kvm_arm_pvtime_supported(void) 77 + { 78 + return !!sched_info_on(); 79 + } 80 + 77 81 int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, 78 82 struct kvm_device_attr *attr) 79 83 { ··· 88 82 int ret = 0; 89 83 int idx; 90 84 91 - if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA) 85 + if (!kvm_arm_pvtime_supported() || 86 + attr->attr != KVM_ARM_VCPU_PVTIME_IPA) 92 87 return -ENXIO; 93 88 94 89 if (get_user(ipa, user)) ··· 117 110 u64 __user *user = (u64 __user *)attr->addr; 118 111 u64 ipa; 119 112 120 - if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA) 113 + if (!kvm_arm_pvtime_supported() || 114 + attr->attr != KVM_ARM_VCPU_PVTIME_IPA) 121 115 return -ENXIO; 122 116 123 117 ipa = vcpu->arch.steal.base; ··· 133 125 { 134 126 switch (attr->attr) { 135 127 case KVM_ARM_VCPU_PVTIME_IPA: 136 - return 0; 128 + if (kvm_arm_pvtime_supported()) 129 + return 0; 137 130 } 138 131 return -ENXIO; 139 132 }
+7 -3
arch/arm64/kvm/reset.c
··· 245 245 */ 246 246 int kvm_reset_vcpu(struct kvm_vcpu *vcpu) 247 247 { 248 - int ret = -EINVAL; 248 + int ret; 249 249 bool loaded; 250 250 u32 pstate; 251 251 ··· 269 269 270 270 if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || 271 271 test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) { 272 - if (kvm_vcpu_enable_ptrauth(vcpu)) 272 + if (kvm_vcpu_enable_ptrauth(vcpu)) { 273 + ret = -EINVAL; 273 274 goto out; 275 + } 274 276 } 275 277 276 278 switch (vcpu->arch.target) { 277 279 default: 278 280 if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { 279 - if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) 281 + if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) { 282 + ret = -EINVAL; 280 283 goto out; 284 + } 281 285 pstate = VCPU_RESET_PSTATE_SVC; 282 286 } else { 283 287 pstate = VCPU_RESET_PSTATE_EL1;
+8
arch/arm64/kvm/vgic/vgic-v4.c
··· 90 90 !irqd_irq_disabled(&irq_to_desc(irq)->irq_data)) 91 91 disable_irq_nosync(irq); 92 92 93 + /* 94 + * The v4.1 doorbell can fire concurrently with the vPE being 95 + * made non-resident. Ensure we only update pending_last 96 + * *after* the non-residency sequence has completed. 97 + */ 98 + raw_spin_lock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock); 93 99 vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true; 100 + raw_spin_unlock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock); 101 + 94 102 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 95 103 kvm_vcpu_kick(vcpu); 96 104
+4 -4
arch/s390/include/asm/kvm_host.h
··· 31 31 #define KVM_USER_MEM_SLOTS 32 32 32 33 33 /* 34 - * These seem to be used for allocating ->chip in the routing table, 35 - * which we don't use. 4096 is an out-of-thin-air value. If we need 36 - * to look at ->chip later on, we'll need to revisit this. 34 + * These seem to be used for allocating ->chip in the routing table, which we 35 + * don't use. 1 is as small as we can get to reduce the needed memory. If we 36 + * need to look at ->chip later on, we'll need to revisit this. 37 37 */ 38 38 #define KVM_NR_IRQCHIPS 1 39 - #define KVM_IRQCHIP_NUM_PINS 4096 39 + #define KVM_IRQCHIP_NUM_PINS 1 40 40 #define KVM_HALT_POLL_NS_DEFAULT 50000 41 41 42 42 /* s390-specific vcpu->requests bit members */
+1 -1
arch/x86/kvm/kvm_cache_regs.h
··· 7 7 #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS 8 8 #define KVM_POSSIBLE_CR4_GUEST_BITS \ 9 9 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ 10 - | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE) 10 + | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE | X86_CR4_TSD) 11 11 12 12 #define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ 13 13 static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
+1 -1
arch/x86/kvm/mmu/mmu.c
··· 4449 4449 nonleaf_bit8_rsvd | rsvd_bits(7, 7) | 4450 4450 rsvd_bits(maxphyaddr, 51); 4451 4451 rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd | 4452 - nonleaf_bit8_rsvd | gbpages_bit_rsvd | 4452 + gbpages_bit_rsvd | 4453 4453 rsvd_bits(maxphyaddr, 51); 4454 4454 rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd | 4455 4455 rsvd_bits(maxphyaddr, 51);
+2 -2
arch/x86/kvm/vmx/nested.c
··· 4109 4109 * CR0_GUEST_HOST_MASK is already set in the original vmcs01 4110 4110 * (KVM doesn't change it); 4111 4111 */ 4112 - vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; 4112 + vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; 4113 4113 vmx_set_cr0(vcpu, vmcs12->host_cr0); 4114 4114 4115 4115 /* Same as above - no reason to call set_cr4_guest_host_mask(). */ ··· 4259 4259 */ 4260 4260 vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx)); 4261 4261 4262 - vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; 4262 + vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; 4263 4263 vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW)); 4264 4264 4265 4265 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
+5 -8
arch/x86/kvm/vmx/vmx.c
··· 133 133 #define KVM_VM_CR0_ALWAYS_ON \ 134 134 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ 135 135 X86_CR0_WP | X86_CR0_PG | X86_CR0_PE) 136 - #define KVM_CR4_GUEST_OWNED_BITS \ 137 - (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ 138 - | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) 139 136 140 137 #define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE 141 138 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) ··· 4031 4034 4032 4035 void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) 4033 4036 { 4034 - vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; 4035 - if (enable_ept) 4036 - vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; 4037 + vmx->vcpu.arch.cr4_guest_owned_bits = KVM_POSSIBLE_CR4_GUEST_BITS; 4038 + if (!enable_ept) 4039 + vmx->vcpu.arch.cr4_guest_owned_bits &= ~X86_CR4_PGE; 4037 4040 if (is_guest_mode(&vmx->vcpu)) 4038 4041 vmx->vcpu.arch.cr4_guest_owned_bits &= 4039 4042 ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask; ··· 4330 4333 /* 22.2.1, 20.8.1 */ 4331 4334 vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); 4332 4335 4333 - vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; 4334 - vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); 4336 + vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; 4337 + vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits); 4335 4338 4336 4339 set_cr4_guest_host_mask(vmx); 4337 4340
+5
arch/x86/kvm/x86.c
··· 975 975 if (is_long_mode(vcpu)) { 976 976 if (!(cr4 & X86_CR4_PAE)) 977 977 return 1; 978 + if ((cr4 ^ old_cr4) & X86_CR4_LA57) 979 + return 1; 978 980 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) 979 981 && ((cr4 ^ old_cr4) & pdptr_bits) 980 982 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, ··· 2693 2691 2694 2692 /* Bits 4:5 are reserved, Should be zero */ 2695 2693 if (data & 0x30) 2694 + return 1; 2695 + 2696 + if (!lapic_in_kernel(vcpu)) 2696 2697 return 1; 2697 2698 2698 2699 vcpu->arch.apf.msr_en_val = data;
+8
drivers/irqchip/irq-gic-v3-its.c
··· 4054 4054 u64 val; 4055 4055 4056 4056 if (info->req_db) { 4057 + unsigned long flags; 4058 + 4057 4059 /* 4058 4060 * vPE is going to block: make the vPE non-resident with 4059 4061 * PendingLast clear and DB set. The GIC guarantees that if 4060 4062 * we read-back PendingLast clear, then a doorbell will be 4061 4063 * delivered when an interrupt comes. 4064 + * 4065 + * Note the locking to deal with the concurrent update of 4066 + * pending_last from the doorbell interrupt handler that can 4067 + * run concurrently. 4062 4068 */ 4069 + raw_spin_lock_irqsave(&vpe->vpe_lock, flags); 4063 4070 val = its_clear_vpend_valid(vlpi_base, 4064 4071 GICR_VPENDBASER_PendingLast, 4065 4072 GICR_VPENDBASER_4_1_DB); 4066 4073 vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast); 4074 + raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags); 4067 4075 } else { 4068 4076 /* 4069 4077 * We're not blocking, so just make the vPE non-resident
+2 -1
virt/kvm/kvm_main.c
··· 3350 3350 if (kvm_sigmask.len != sizeof(compat_sigset_t)) 3351 3351 goto out; 3352 3352 r = -EFAULT; 3353 - if (get_compat_sigset(&sigset, (void *)sigmask_arg->sigset)) 3353 + if (get_compat_sigset(&sigset, 3354 + (compat_sigset_t __user *)sigmask_arg->sigset)) 3354 3355 goto out; 3355 3356 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); 3356 3357 } else