Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

timekeeping: Increase granularity of read_persistent_clock()

The persistent clock of some architectures (e.g. s390) have a
better granularity than seconds. To reduce the delta between the
host clock and the guest clock in a virtualized system change the
read_persistent_clock function to return a struct timespec.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134811.013873340@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

authored by

Martin Schwidefsky and committed by
Thomas Gleixner
d4f587c6 75c5158f

+83 -74
+3 -2
arch/m68knommu/kernel/time.c
··· 72 72 return mktime(year, mon, day, hour, min, sec);; 73 73 } 74 74 75 - unsigned long read_persistent_clock(void) 75 + void read_persistent_clock(struct timespec *ts) 76 76 { 77 - return read_rtc_mmss(); 77 + ts->tv_sec = read_rtc_mmss(); 78 + ts->tv_nsec = 0; 78 79 } 79 80 80 81 int update_persistent_clock(struct timespec now)
+3 -2
arch/mips/dec/time.c
··· 18 18 #include <asm/dec/ioasic.h> 19 19 #include <asm/dec/machtype.h> 20 20 21 - unsigned long read_persistent_clock(void) 21 + void read_persistent_clock(struct timespec *ts) 22 22 { 23 23 unsigned int year, mon, day, hour, min, sec, real_year; 24 24 unsigned long flags; ··· 53 53 54 54 year += real_year - 72 + 2000; 55 55 56 - return mktime(year, mon, day, hour, min, sec); 56 + ts->tv_sec = mktime(year, mon, day, hour, min, sec); 57 + ts->tv_nsec = 0; 57 58 } 58 59 59 60 /*
+3 -2
arch/mips/lasat/ds1603.c
··· 135 135 lasat_ndelay(1000); 136 136 } 137 137 138 - unsigned long read_persistent_clock(void) 138 + void read_persistent_clock(struct timespec *ts) 139 139 { 140 140 unsigned long word; 141 141 unsigned long flags; ··· 147 147 rtc_end_op(); 148 148 spin_unlock_irqrestore(&rtc_lock, flags); 149 149 150 - return word; 150 + ts->tv_sec = word; 151 + ts->tv_nsec = 0; 151 152 } 152 153 153 154 int rtc_mips_set_mmss(unsigned long time)
+6 -2
arch/mips/lasat/sysctl.c
··· 92 92 int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, 93 93 void *buffer, size_t *lenp, loff_t *ppos) 94 94 { 95 + struct timespec ts; 95 96 int r; 96 97 97 98 if (!write) { 98 - rtctmp = read_persistent_clock(); 99 + read_persistent_clock(&ts); 100 + rtctmp = ts.tv_sec; 99 101 /* check for time < 0 and set to 0 */ 100 102 if (rtctmp < 0) 101 103 rtctmp = 0; ··· 136 134 void *oldval, size_t *oldlenp, 137 135 void *newval, size_t newlen) 138 136 { 137 + struct timespec ts; 139 138 int r; 140 139 141 - rtctmp = read_persistent_clock(); 140 + read_persistent_clock(&ts); 141 + rtctmp = ts.tv_sec; 142 142 if (rtctmp < 0) 143 143 rtctmp = 0; 144 144 r = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
+3 -2
arch/mips/lemote/lm2e/setup.c
··· 54 54 mips_hpt_frequency = cpu_clock_freq / 2; 55 55 } 56 56 57 - unsigned long read_persistent_clock(void) 57 + void read_persistent_clock(struct timespec *ts) 58 58 { 59 - return mc146818_get_cmos_time(); 59 + ts->tv_sec = mc146818_get_cmos_time(); 60 + ts->tv_nsec = 0; 60 61 } 61 62 62 63 void (*__wbflush)(void);
+3 -2
arch/mips/mti-malta/malta-time.c
··· 100 100 return count; 101 101 } 102 102 103 - unsigned long read_persistent_clock(void) 103 + void read_persistent_clock(struct timespec *ts) 104 104 { 105 - return mc146818_get_cmos_time(); 105 + ts->tv_sec = mc146818_get_cmos_time(); 106 + ts->tv_nsec = 0; 106 107 } 107 108 108 109 static void __init plat_perf_setup(void)
+3 -2
arch/mips/pmc-sierra/yosemite/setup.c
··· 70 70 } 71 71 72 72 73 - unsigned long read_persistent_clock(void) 73 + void read_persistent_clock(struct timespec *ts) 74 74 { 75 75 unsigned int year, month, day, hour, min, sec; 76 76 unsigned long flags; ··· 92 92 m48t37_base->control = 0x00; 93 93 spin_unlock_irqrestore(&rtc_lock, flags); 94 94 95 - return mktime(year, month, day, hour, min, sec); 95 + ts->tv_sec = mktime(year, month, day, hour, min, sec); 96 + ts->tv_nsec = 0; 96 97 } 97 98 98 99 int rtc_mips_set_time(unsigned long tim)
+11 -4
arch/mips/sibyte/swarm/setup.c
··· 87 87 88 88 enum swarm_rtc_type swarm_rtc_type; 89 89 90 - unsigned long read_persistent_clock(void) 90 + void read_persistent_clock(struct timespec *ts) 91 91 { 92 + unsigned long sec; 93 + 92 94 switch (swarm_rtc_type) { 93 95 case RTC_XICOR: 94 - return xicor_get_time(); 96 + sec = xicor_get_time(); 97 + break; 95 98 96 99 case RTC_M4LT81: 97 - return m41t81_get_time(); 100 + sec = m41t81_get_time(); 101 + break; 98 102 99 103 case RTC_NONE: 100 104 default: 101 - return mktime(2000, 1, 1, 0, 0, 0); 105 + sec = mktime(2000, 1, 1, 0, 0, 0); 106 + break; 102 107 } 108 + ts->tv_sec = sec; 109 + tv->tv_nsec = 0; 103 110 } 104 111 105 112 int rtc_mips_set_time(unsigned long sec)
+3 -2
arch/mips/sni/time.c
··· 182 182 setup_pit_timer(); 183 183 } 184 184 185 - unsigned long read_persistent_clock(void) 185 + void read_persistent_clock(struct timespec *ts) 186 186 { 187 - return -1; 187 + ts->tv_sec = -1; 188 + ts->tv_nsec = 0; 188 189 }
+4 -3
arch/powerpc/kernel/time.c
··· 769 769 return ppc_md.set_rtc_time(&tm); 770 770 } 771 771 772 - unsigned long read_persistent_clock(void) 772 + void read_persistent_clock(struct timespec *ts) 773 773 { 774 774 struct rtc_time tm; 775 775 static int first = 1; ··· 787 787 if (!ppc_md.get_rtc_time) 788 788 return 0; 789 789 ppc_md.get_rtc_time(&tm); 790 - return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, 791 - tm.tm_hour, tm.tm_min, tm.tm_sec); 790 + ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, 791 + tm.tm_hour, tm.tm_min, tm.tm_sec); 792 + ts->tv_nsec = 0; 792 793 } 793 794 794 795 /* clocksource code */
+4 -18
arch/s390/kernel/time.c
··· 182 182 static void etr_reset(void); 183 183 static void stp_reset(void); 184 184 185 - unsigned long read_persistent_clock(void) 185 + void read_persistent_clock(struct timespec *ts) 186 186 { 187 - struct timespec ts; 188 - 189 - tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, &ts); 190 - return ts.tv_sec; 187 + tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts); 191 188 } 192 189 193 190 static cycle_t read_tod_clock(struct clocksource *cs) ··· 245 248 { 246 249 struct timespec ts; 247 250 unsigned long flags; 248 - cycle_t now; 249 251 250 252 /* Reset time synchronization interfaces. */ 251 253 etr_reset(); ··· 262 266 panic("Could not register TOD clock source"); 263 267 264 268 /* 265 - * The TOD clock is an accurate clock. The xtime should be 266 - * initialized in a way that the difference between TOD and 267 - * xtime is reasonably small. Too bad that timekeeping_init 268 - * sets xtime.tv_nsec to zero. In addition the clock source 269 - * change from the jiffies clock source to the TOD clock 270 - * source add another error of up to 1/HZ second. The same 271 - * function sets wall_to_monotonic to a value that is too 272 - * small for /proc/uptime to be accurate. 273 - * Reset xtime and wall_to_monotonic to sane values. 269 + * Reset wall_to_monotonic to the initial timestamp created 270 + * in head.S to get a precise value in /proc/uptime. 274 271 */ 275 272 write_seqlock_irqsave(&xtime_lock, flags); 276 - now = get_clock(); 277 - tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime); 278 - clocksource_tod.cycle_last = now; 279 273 tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts); 280 274 set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec); 281 275 write_sequnlock_irqrestore(&xtime_lock, flags);
+2 -4
arch/sh/kernel/time.c
··· 39 39 int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time; 40 40 41 41 #ifdef CONFIG_GENERIC_CMOS_UPDATE 42 - unsigned long read_persistent_clock(void) 42 + void read_persistent_clock(struct timespec *ts) 43 43 { 44 - struct timespec tv; 45 - rtc_sh_get_time(&tv); 46 - return tv.tv_sec; 44 + rtc_sh_get_time(&ts); 47 45 } 48 46 49 47 int update_persistent_clock(struct timespec now)
+3 -2
arch/x86/kernel/rtc.c
··· 178 178 } 179 179 180 180 /* not static: needed by APM */ 181 - unsigned long read_persistent_clock(void) 181 + void read_persistent_clock(struct timespec *ts) 182 182 { 183 183 unsigned long retval, flags; 184 184 ··· 186 186 retval = get_wallclock(); 187 187 spin_unlock_irqrestore(&rtc_lock, flags); 188 188 189 - return retval; 189 + ts->tv_sec = retval; 190 + ts->tv_nsec = 0; 190 191 } 191 192 192 193 int update_persistent_clock(struct timespec now)
+2 -3
arch/xtensa/kernel/time.c
··· 59 59 60 60 void __init time_init(void) 61 61 { 62 - xtime.tv_nsec = 0; 63 - xtime.tv_sec = read_persistent_clock(); 64 - 62 + /* FIXME: xtime&wall_to_monotonic are set in timekeeping_init. */ 63 + read_persistent_clock(&xtime); 65 64 set_normalized_timespec(&wall_to_monotonic, 66 65 -xtime.tv_sec, -xtime.tv_nsec); 67 66
+1 -1
include/linux/time.h
··· 101 101 extern struct timespec wall_to_monotonic; 102 102 extern seqlock_t xtime_lock; 103 103 104 - extern unsigned long read_persistent_clock(void); 104 + extern void read_persistent_clock(struct timespec *ts); 105 105 extern int update_persistent_clock(struct timespec now); 106 106 extern int no_sync_cmos_clock __read_mostly; 107 107 void timekeeping_init(void);
+29 -23
kernel/time/timekeeping.c
··· 154 154 */ 155 155 struct timespec xtime __attribute__ ((aligned (16))); 156 156 struct timespec wall_to_monotonic __attribute__ ((aligned (16))); 157 - static unsigned long total_sleep_time; /* seconds */ 157 + static struct timespec total_sleep_time; 158 158 159 159 /* 160 160 * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. ··· 487 487 } 488 488 489 489 /** 490 - * read_persistent_clock - Return time in seconds from the persistent clock. 490 + * read_persistent_clock - Return time from the persistent clock. 491 491 * 492 492 * Weak dummy function for arches that do not yet support it. 493 - * Returns seconds from epoch using the battery backed persistent clock. 494 - * Returns zero if unsupported. 493 + * Reads the time from the battery backed persistent clock. 494 + * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. 495 495 * 496 496 * XXX - Do be sure to remove it once all arches implement it. 497 497 */ 498 - unsigned long __attribute__((weak)) read_persistent_clock(void) 498 + void __attribute__((weak)) read_persistent_clock(struct timespec *ts) 499 499 { 500 - return 0; 500 + ts->tv_sec = 0; 501 + ts->tv_nsec = 0; 501 502 } 502 503 503 504 /* ··· 508 507 { 509 508 struct clocksource *clock; 510 509 unsigned long flags; 511 - unsigned long sec = read_persistent_clock(); 510 + struct timespec now; 511 + 512 + read_persistent_clock(&now); 512 513 513 514 write_seqlock_irqsave(&xtime_lock, flags); 514 515 ··· 521 518 clock->enable(clock); 522 519 timekeeper_setup_internals(clock); 523 520 524 - xtime.tv_sec = sec; 525 - xtime.tv_nsec = 0; 521 + xtime.tv_sec = now.tv_sec; 522 + xtime.tv_nsec = now.tv_nsec; 526 523 raw_time.tv_sec = 0; 527 524 raw_time.tv_nsec = 0; 528 525 set_normalized_timespec(&wall_to_monotonic, 529 526 -xtime.tv_sec, -xtime.tv_nsec); 530 527 update_xtime_cache(0); 531 - total_sleep_time = 0; 528 + total_sleep_time.tv_sec = 0; 529 + total_sleep_time.tv_nsec = 0; 532 530 write_sequnlock_irqrestore(&xtime_lock, flags); 533 531 } 534 532 535 533 /* time in seconds when suspend began */ 536 - static unsigned long timekeeping_suspend_time; 534 + static struct timespec timekeeping_suspend_time; 537 535 538 536 /** 539 537 * timekeeping_resume - Resumes the generic timekeeping subsystem. ··· 547 543 static int timekeeping_resume(struct sys_device *dev) 548 544 { 549 545 unsigned long flags; 550 - unsigned long now = read_persistent_clock(); 546 + struct timespec ts; 547 + 548 + read_persistent_clock(&ts); 551 549 552 550 clocksource_resume(); 553 551 554 552 write_seqlock_irqsave(&xtime_lock, flags); 555 553 556 - if (now && (now > timekeeping_suspend_time)) { 557 - unsigned long sleep_length = now - timekeeping_suspend_time; 558 - 559 - xtime.tv_sec += sleep_length; 560 - wall_to_monotonic.tv_sec -= sleep_length; 561 - total_sleep_time += sleep_length; 554 + if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { 555 + ts = timespec_sub(ts, timekeeping_suspend_time); 556 + xtime = timespec_add_safe(xtime, ts); 557 + wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); 558 + total_sleep_time = timespec_add_safe(total_sleep_time, ts); 562 559 } 563 560 update_xtime_cache(0); 564 561 /* re-base the last cycle value */ ··· 582 577 { 583 578 unsigned long flags; 584 579 585 - timekeeping_suspend_time = read_persistent_clock(); 580 + read_persistent_clock(&timekeeping_suspend_time); 586 581 587 582 write_seqlock_irqsave(&xtime_lock, flags); 588 583 timekeeping_forward_now(); ··· 806 801 */ 807 802 void getboottime(struct timespec *ts) 808 803 { 809 - set_normalized_timespec(ts, 810 - - (wall_to_monotonic.tv_sec + total_sleep_time), 811 - - wall_to_monotonic.tv_nsec); 804 + struct timespec boottime; 805 + 806 + boottime = timespec_add_safe(wall_to_monotonic, total_sleep_time); 807 + set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); 812 808 } 813 809 814 810 /** ··· 818 812 */ 819 813 void monotonic_to_bootbased(struct timespec *ts) 820 814 { 821 - ts->tv_sec += total_sleep_time; 815 + *ts = timespec_add_safe(*ts, total_sleep_time); 822 816 } 823 817 824 818 unsigned long get_seconds(void)