Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86: kvmclock: abolish PVCLOCK_COUNTS_FROM_ZERO

Newer KVM won't be exposing PVCLOCK_COUNTS_FROM_ZERO anymore.
The purpose of that flags was to start counting system time from 0 when
the KVM clock has been initialized.
We can achieve the same by selecting one read as the initial point.

A simple subtraction will work unless the KVM clock count overflows
earlier (has smaller width) than scheduler's cycle count. We should be
safe till x86_128.

Because PVCLOCK_COUNTS_FROM_ZERO was enabled only on new hypervisors,
setting sched clock as stable based on PVCLOCK_TSC_STABLE_BIT might
regress on older ones.

I presume we don't need to change kvm_clock_read instead of introducing
kvm_sched_clock_read. A problem could arise in case sched_clock is
expected to return the same value as get_cycles, but we should have
merged those clocks in that case.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

Radim Krčmář and committed by
Paolo Bonzini
72c930dc 1cea0ce6

+35 -11
+35 -11
arch/x86/kernel/kvmclock.c
··· 32 32 static int kvmclock = 1; 33 33 static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; 34 34 static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; 35 + static cycle_t kvm_sched_clock_offset; 35 36 36 37 static int parse_no_kvmclock(char *arg) 37 38 { ··· 91 90 static cycle_t kvm_clock_get_cycles(struct clocksource *cs) 92 91 { 93 92 return kvm_clock_read(); 93 + } 94 + 95 + static cycle_t kvm_sched_clock_read(void) 96 + { 97 + return kvm_clock_read() - kvm_sched_clock_offset; 98 + } 99 + 100 + static inline void kvm_sched_clock_init(bool stable) 101 + { 102 + if (!stable) { 103 + pv_time_ops.sched_clock = kvm_clock_read; 104 + return; 105 + } 106 + 107 + kvm_sched_clock_offset = kvm_clock_read(); 108 + pv_time_ops.sched_clock = kvm_sched_clock_read; 109 + set_sched_clock_stable(); 110 + 111 + printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n", 112 + kvm_sched_clock_offset); 113 + 114 + BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) > 115 + sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time)); 94 116 } 95 117 96 118 /* ··· 272 248 memblock_free(mem, size); 273 249 return; 274 250 } 275 - pv_time_ops.sched_clock = kvm_clock_read; 251 + 252 + if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) 253 + pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); 254 + 255 + cpu = get_cpu(); 256 + vcpu_time = &hv_clock[cpu].pvti; 257 + flags = pvclock_read_flags(vcpu_time); 258 + 259 + kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT); 260 + put_cpu(); 261 + 276 262 x86_platform.calibrate_tsc = kvm_get_tsc_khz; 277 263 x86_platform.get_wallclock = kvm_get_wallclock; 278 264 x86_platform.set_wallclock = kvm_set_wallclock; ··· 299 265 kvm_get_preset_lpj(); 300 266 clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); 301 267 pv_info.name = "KVM"; 302 - 303 - if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) 304 - pvclock_set_flags(~0); 305 - 306 - cpu = get_cpu(); 307 - vcpu_time = &hv_clock[cpu].pvti; 308 - flags = pvclock_read_flags(vcpu_time); 309 - if (flags & PVCLOCK_COUNTS_FROM_ZERO) 310 - set_sched_clock_stable(); 311 - put_cpu(); 312 268 } 313 269 314 270 int __init kvm_setup_vsyscall_timeinfo(void)