Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'kvm-x86-xen-6.8' of https://github.com/kvm-x86/linux into HEAD

KVM Xen change for 6.8:

To workaround Xen guests that don't expect Xen PV clocks to be marked as being
based on a stable TSC, add a Xen config knob to allow userspace to opt out of
KVM setting the "TSC stable" bit in Xen PV clocks. Note, the "TSC stable" bit
was added to the PVCLOCK ABI by KVM without an ack from Xen, i.e. KVM isn't
entirely blameless for the buggy guest behavior.

+38 -6
+6
Documentation/virt/kvm/api.rst
··· 8550 8550 #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) 8551 8551 #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) 8552 8552 #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) 8553 + #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) 8553 8554 8554 8555 The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG 8555 8556 ioctl is available, for the guest to set its hypercall page. ··· 8593 8592 behave more correctly, not using the XEN_RUNSTATE_UPDATE flag until/unless 8594 8593 specifically enabled (by the guest making the hypercall, causing the VMM 8595 8594 to enable the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute). 8595 + 8596 + The KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag indicates that KVM supports 8597 + clearing the PVCLOCK_TSC_STABLE_BIT flag in Xen pvclock sources. This will be 8598 + done when the KVM_CAP_XEN_HVM ioctl sets the 8599 + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag. 8596 8600 8597 8601 8.31 KVM_CAP_PPC_MULTITCE 8598 8602 -------------------------
+23 -5
arch/x86/kvm/x86.c
··· 3110 3110 3111 3111 static void kvm_setup_guest_pvclock(struct kvm_vcpu *v, 3112 3112 struct gfn_to_pfn_cache *gpc, 3113 - unsigned int offset) 3113 + unsigned int offset, 3114 + bool force_tsc_unstable) 3114 3115 { 3115 3116 struct kvm_vcpu_arch *vcpu = &v->arch; 3116 3117 struct pvclock_vcpu_time_info *guest_hv_clock; ··· 3148 3147 } 3149 3148 3150 3149 memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock)); 3150 + 3151 + if (force_tsc_unstable) 3152 + guest_hv_clock->flags &= ~PVCLOCK_TSC_STABLE_BIT; 3153 + 3151 3154 smp_wmb(); 3152 3155 3153 3156 guest_hv_clock->version = ++vcpu->hv_clock.version; ··· 3172 3167 u64 tsc_timestamp, host_tsc; 3173 3168 u8 pvclock_flags; 3174 3169 bool use_master_clock; 3170 + #ifdef CONFIG_KVM_XEN 3171 + /* 3172 + * For Xen guests we may need to override PVCLOCK_TSC_STABLE_BIT as unless 3173 + * explicitly told to use TSC as its clocksource Xen will not set this bit. 3174 + * This default behaviour led to bugs in some guest kernels which cause 3175 + * problems if they observe PVCLOCK_TSC_STABLE_BIT in the pvclock flags. 3176 + */ 3177 + bool xen_pvclock_tsc_unstable = 3178 + ka->xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; 3179 + #endif 3175 3180 3176 3181 kernel_ns = 0; 3177 3182 host_tsc = 0; ··· 3260 3245 vcpu->hv_clock.flags = pvclock_flags; 3261 3246 3262 3247 if (vcpu->pv_time.active) 3263 - kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0); 3248 + kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false); 3264 3249 #ifdef CONFIG_KVM_XEN 3265 3250 if (vcpu->xen.vcpu_info_cache.active) 3266 3251 kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache, 3267 - offsetof(struct compat_vcpu_info, time)); 3252 + offsetof(struct compat_vcpu_info, time), 3253 + xen_pvclock_tsc_unstable); 3268 3254 if (vcpu->xen.vcpu_time_info_cache.active) 3269 - kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0); 3255 + kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0, 3256 + xen_pvclock_tsc_unstable); 3270 3257 #endif 3271 3258 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock); 3272 3259 return 0; ··· 4677 4660 KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | 4678 4661 KVM_XEN_HVM_CONFIG_SHARED_INFO | 4679 4662 KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL | 4680 - KVM_XEN_HVM_CONFIG_EVTCHN_SEND; 4663 + KVM_XEN_HVM_CONFIG_EVTCHN_SEND | 4664 + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; 4681 4665 if (sched_info_on()) 4682 4666 r |= KVM_XEN_HVM_CONFIG_RUNSTATE | 4683 4667 KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
+8 -1
arch/x86/kvm/xen.c
··· 1162 1162 { 1163 1163 /* Only some feature flags need to be *enabled* by userspace */ 1164 1164 u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | 1165 - KVM_XEN_HVM_CONFIG_EVTCHN_SEND; 1165 + KVM_XEN_HVM_CONFIG_EVTCHN_SEND | 1166 + KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; 1167 + u32 old_flags; 1166 1168 1167 1169 if (xhc->flags & ~permitted_flags) 1168 1170 return -EINVAL; ··· 1185 1183 else if (!xhc->msr && kvm->arch.xen_hvm_config.msr) 1186 1184 static_branch_slow_dec_deferred(&kvm_xen_enabled); 1187 1185 1186 + old_flags = kvm->arch.xen_hvm_config.flags; 1188 1187 memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); 1189 1188 1190 1189 mutex_unlock(&kvm->arch.xen.xen_lock); 1190 + 1191 + if ((old_flags ^ xhc->flags) & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE) 1192 + kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); 1193 + 1191 1194 return 0; 1192 1195 } 1193 1196
+1
include/uapi/linux/kvm.h
··· 1245 1245 #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) 1246 1246 #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) 1247 1247 #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) 1248 + #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) 1248 1249 1249 1250 struct kvm_xen_hvm_config { 1250 1251 __u32 flags;