Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86: Increase precision of x86_platform.get/set_wallclock()

All the virtualized platforms (KVM, lguest and Xen) have persistent
wallclocks that have more than one second of precision.

read_persistent_wallclock() and update_persistent_wallclock() allow
for nanosecond precision but their implementation on x86 with
x86_platform.get/set_wallclock() only allows for one second precision.
This means guests may see a wallclock time that is off by up to 1
second.

Make set_wallclock() and get_wallclock() take a struct timespec
parameter (which allows for nanosecond precision) so KVM and Xen
guests may start with a more accurate wallclock time and a Xen dom0
can maintain a more accurate wallclock for guests.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>

authored by

David Vrabel and committed by
John Stultz
3565184e 0a0a7e66

+32 -41
+2 -2
arch/x86/include/asm/mc146818rtc.h
··· 95 95 unsigned char rtc_cmos_read(unsigned char addr); 96 96 void rtc_cmos_write(unsigned char val, unsigned char addr); 97 97 98 - extern int mach_set_rtc_mmss(unsigned long nowtime); 99 - extern unsigned long mach_get_cmos_time(void); 98 + extern int mach_set_rtc_mmss(const struct timespec *now); 99 + extern void mach_get_cmos_time(struct timespec *now); 100 100 101 101 #define RTC_IRQ 8 102 102
+4 -2
arch/x86/include/asm/x86_init.h
··· 142 142 void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); 143 143 }; 144 144 145 + struct timespec; 146 + 145 147 /** 146 148 * struct x86_platform_ops - platform specific runtime functions 147 149 * @calibrate_tsc: calibrate TSC ··· 158 156 */ 159 157 struct x86_platform_ops { 160 158 unsigned long (*calibrate_tsc)(void); 161 - unsigned long (*get_wallclock)(void); 162 - int (*set_wallclock)(unsigned long nowtime); 159 + void (*get_wallclock)(struct timespec *ts); 160 + int (*set_wallclock)(const struct timespec *ts); 163 161 void (*iommu_shutdown)(void); 164 162 bool (*is_untracked_pat_range)(u64 start, u64 end); 165 163 void (*nmi_init)(void);
+3 -6
arch/x86/kernel/kvmclock.c
··· 48 48 * have elapsed since the hypervisor wrote the data. So we try to account for 49 49 * that with system time 50 50 */ 51 - static unsigned long kvm_get_wallclock(void) 51 + static void kvm_get_wallclock(struct timespec *now) 52 52 { 53 53 struct pvclock_vcpu_time_info *vcpu_time; 54 - struct timespec ts; 55 54 int low, high; 56 55 int cpu; 57 56 ··· 63 64 cpu = smp_processor_id(); 64 65 65 66 vcpu_time = &hv_clock[cpu].pvti; 66 - pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); 67 + pvclock_read_wallclock(&wall_clock, vcpu_time, now); 67 68 68 69 preempt_enable(); 69 - 70 - return ts.tv_sec; 71 70 } 72 71 73 - static int kvm_set_wallclock(unsigned long now) 72 + static int kvm_set_wallclock(const struct timespec *now) 74 73 { 75 74 return -1; 76 75 }
+7 -10
arch/x86/kernel/rtc.c
··· 38 38 * jump to the next second precisely 500 ms later. Check the Motorola 39 39 * MC146818A or Dallas DS12887 data sheet for details. 40 40 */ 41 - int mach_set_rtc_mmss(unsigned long nowtime) 41 + int mach_set_rtc_mmss(const struct timespec *now) 42 42 { 43 + unsigned long nowtime = now->tv_sec; 43 44 struct rtc_time tm; 44 45 int retval = 0; 45 46 ··· 59 58 return retval; 60 59 } 61 60 62 - unsigned long mach_get_cmos_time(void) 61 + void mach_get_cmos_time(struct timespec *now) 63 62 { 64 63 unsigned int status, year, mon, day, hour, min, sec, century = 0; 65 64 unsigned long flags; ··· 108 107 } else 109 108 year += CMOS_YEARS_OFFS; 110 109 111 - return mktime(year, mon, day, hour, min, sec); 110 + now->tv_sec = mktime(year, mon, day, hour, min, sec); 111 + now->tv_nsec = 0; 112 112 } 113 113 114 114 /* Routines for accessing the CMOS RAM/RTC. */ ··· 137 135 138 136 int update_persistent_clock(struct timespec now) 139 137 { 140 - return x86_platform.set_wallclock(now.tv_sec); 138 + return x86_platform.set_wallclock(&now); 141 139 } 142 140 143 141 /* not static: needed by APM */ 144 142 void read_persistent_clock(struct timespec *ts) 145 143 { 146 - unsigned long retval; 147 - 148 - retval = x86_platform.get_wallclock(); 149 - 150 - ts->tv_sec = retval; 151 - ts->tv_nsec = 0; 144 + x86_platform.get_wallclock(ts); 152 145 } 153 146 154 147
+2 -2
arch/x86/lguest/boot.c
··· 882 882 * It would be far better for everyone if the Guest had its own clock, but 883 883 * until then the Host gives us the time on every interrupt. 884 884 */ 885 - static unsigned long lguest_get_wallclock(void) 885 + static void lguest_get_wallclock(struct timespec *now) 886 886 { 887 - return lguest_data.time.tv_sec; 887 + *now = lguest_data.time; 888 888 } 889 889 890 890 /*
+6 -4
arch/x86/platform/efi/efi.c
··· 352 352 return status; 353 353 } 354 354 355 - int efi_set_rtc_mmss(unsigned long nowtime) 355 + int efi_set_rtc_mmss(const struct timespec *now) 356 356 { 357 + unsigned long nowtime = now->tv_sec; 357 358 efi_status_t status; 358 359 efi_time_t eft; 359 360 efi_time_cap_t cap; ··· 389 388 return 0; 390 389 } 391 390 392 - unsigned long efi_get_time(void) 391 + void efi_get_time(struct timespec *now) 393 392 { 394 393 efi_status_t status; 395 394 efi_time_t eft; ··· 399 398 if (status != EFI_SUCCESS) 400 399 pr_err("Oops: efitime: can't read time!\n"); 401 400 402 - return mktime(eft.year, eft.month, eft.day, eft.hour, 403 - eft.minute, eft.second); 401 + now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour, 402 + eft.minute, eft.second); 403 + now->tv_nsec = 0; 404 404 } 405 405 406 406 /*
+6 -13
arch/x86/xen/time.c
··· 191 191 put_cpu_var(xen_vcpu); 192 192 } 193 193 194 - static unsigned long xen_get_wallclock(void) 194 + static void xen_get_wallclock(struct timespec *now) 195 195 { 196 - struct timespec ts; 197 - 198 - xen_read_wallclock(&ts); 199 - return ts.tv_sec; 196 + xen_read_wallclock(now); 200 197 } 201 198 202 - static int xen_set_wallclock(unsigned long now) 199 + static int xen_set_wallclock(const struct timespec *now) 203 200 { 204 201 struct xen_platform_op op; 205 - int rc; 206 202 207 203 /* do nothing for domU */ 208 204 if (!xen_initial_domain()) 209 205 return -1; 210 206 211 207 op.cmd = XENPF_settime; 212 - op.u.settime.secs = now; 213 - op.u.settime.nsecs = 0; 208 + op.u.settime.secs = now->tv_sec; 209 + op.u.settime.nsecs = now->tv_nsec; 214 210 op.u.settime.system_time = xen_clocksource_read(); 215 211 216 - rc = HYPERVISOR_dom0_op(&op); 217 - WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now); 218 - 219 - return rc; 212 + return HYPERVISOR_dom0_op(&op); 220 213 } 221 214 222 215 static struct clocksource xen_clocksource __read_mostly = {
+2 -2
include/linux/efi.h
··· 594 594 extern int __init efi_uart_console_only (void); 595 595 extern void efi_initialize_iomem_resources(struct resource *code_resource, 596 596 struct resource *data_resource, struct resource *bss_resource); 597 - extern unsigned long efi_get_time(void); 598 - extern int efi_set_rtc_mmss(unsigned long nowtime); 597 + extern void efi_get_time(struct timespec *now); 598 + extern int efi_set_rtc_mmss(const struct timespec *now); 599 599 extern void efi_reserve_boot_services(void); 600 600 extern struct efi_memory_map memmap; 601 601