Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

clocksource: introduce CLOCK_MONOTONIC_RAW

In talking with Josip Loncaric, and his work on clock synchronization (see
btime.sf.net), he mentioned that for really close synchronization, it is
useful to have access to "hardware time", that is a notion of time that is
not in any way adjusted by the clock slewing done to keep close time sync.

Part of the issue is if we are using the kernel's ntp adjusted
representation of time in order to measure how we should correct time, we
can run into what Paul McKenney aptly described as "Painting a road using
the lines we're painting as the guide".

I had been thinking of a similar problem, and was trying to come up with a
way to give users access to a purely hardware based time representation
that avoided users having to know the underlying frequency and mask values
needed to deal with the wide variety of possible underlying hardware
counters.

My solution is to introduce CLOCK_MONOTONIC_RAW. This exposes a
nanosecond based time value, that increments starting at bootup and has no
frequency adjustments made to it what so ever.

The time is accessed from userspace via the posix_clock_gettime() syscall,
passing CLOCK_MONOTONIC_RAW as the clock_id.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by

John Stultz and committed by
Ingo Molnar
2d42244a 9a055117

+64
+3
include/linux/clocksource.h
··· 79 79 /* timekeeping specific data, ignore */ 80 80 cycle_t cycle_interval; 81 81 u64 xtime_interval; 82 + u32 raw_interval; 82 83 /* 83 84 * Second part is written at each timer interrupt 84 85 * Keep it in a different cache line to dirty no ··· 88 87 cycle_t cycle_last ____cacheline_aligned_in_smp; 89 88 u64 xtime_nsec; 90 89 s64 error; 90 + struct timespec raw_time; 91 91 92 92 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG 93 93 /* Watchdog related data, used by the framework */ ··· 217 215 218 216 /* Go back from cycles -> shifted ns, this time use ntp adjused mult */ 219 217 c->xtime_interval = (u64)c->cycle_interval * c->mult; 218 + c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift; 220 219 } 221 220 222 221
+2
include/linux/time.h
··· 117 117 extern unsigned int alarm_setitimer(unsigned int seconds); 118 118 extern int do_getitimer(int which, struct itimerval *value); 119 119 extern void getnstimeofday(struct timespec *tv); 120 + extern void getrawmonotonic(struct timespec *ts); 120 121 extern void getboottime(struct timespec *ts); 121 122 extern void monotonic_to_bootbased(struct timespec *ts); 122 123 ··· 215 214 #define CLOCK_MONOTONIC 1 216 215 #define CLOCK_PROCESS_CPUTIME_ID 2 217 216 #define CLOCK_THREAD_CPUTIME_ID 3 217 + #define CLOCK_MONOTONIC_RAW 4 218 218 219 219 /* 220 220 * The IDs of various hardware clocks:
+15
kernel/posix-timers.c
··· 223 223 } 224 224 225 225 /* 226 + * Get monotonic time for posix timers 227 + */ 228 + static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp) 229 + { 230 + getrawmonotonic(tp); 231 + return 0; 232 + } 233 + 234 + /* 226 235 * Initialize everything, well, just everything in Posix clocks/timers ;) 227 236 */ 228 237 static __init int init_posix_timers(void) ··· 244 235 .clock_get = posix_ktime_get_ts, 245 236 .clock_set = do_posix_clock_nosettime, 246 237 }; 238 + struct k_clock clock_monotonic_raw = { 239 + .clock_getres = hrtimer_get_res, 240 + .clock_get = posix_get_monotonic_raw, 241 + .clock_set = do_posix_clock_nosettime, 242 + }; 247 243 248 244 register_posix_clock(CLOCK_REALTIME, &clock_realtime); 249 245 register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); 246 + register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); 250 247 251 248 posix_timers_cache = kmem_cache_create("posix_timers_cache", 252 249 sizeof (struct k_itimer), 0, SLAB_PANIC,
+44
kernel/time/timekeeping.c
··· 75 75 76 76 nsec = cyc2ns(clock, cycle_delta); 77 77 timespec_add_ns(&xtime, nsec); 78 + 79 + nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; 80 + clock->raw_time.tv_nsec += nsec; 78 81 } 79 82 80 83 /** ··· 186 183 187 184 clocksource_forward_now(); 188 185 186 + new->raw_time = clock->raw_time; 187 + 189 188 clock = new; 190 189 clock->cycle_last = 0; 191 190 clock->cycle_last = clocksource_read(new); ··· 208 203 static inline void clocksource_forward_now(void) { } 209 204 static inline void change_clocksource(void) { } 210 205 #endif 206 + 207 + /** 208 + * getrawmonotonic - Returns the raw monotonic time in a timespec 209 + * @ts: pointer to the timespec to be set 210 + * 211 + * Returns the raw monotonic time (completely un-modified by ntp) 212 + */ 213 + void getrawmonotonic(struct timespec *ts) 214 + { 215 + unsigned long seq; 216 + s64 nsecs; 217 + cycle_t cycle_now, cycle_delta; 218 + 219 + do { 220 + seq = read_seqbegin(&xtime_lock); 221 + 222 + /* read clocksource: */ 223 + cycle_now = clocksource_read(clock); 224 + 225 + /* calculate the delta since the last update_wall_time: */ 226 + cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; 227 + 228 + /* convert to nanoseconds: */ 229 + nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; 230 + 231 + *ts = clock->raw_time; 232 + 233 + } while (read_seqretry(&xtime_lock, seq)); 234 + 235 + timespec_add_ns(ts, nsecs); 236 + } 237 + EXPORT_SYMBOL(getrawmonotonic); 238 + 211 239 212 240 /** 213 241 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres ··· 502 464 clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; 503 465 xtime.tv_sec++; 504 466 second_overflow(); 467 + } 468 + 469 + clock->raw_time.tv_nsec += clock->raw_interval; 470 + if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) { 471 + clock->raw_time.tv_nsec -= NSEC_PER_SEC; 472 + clock->raw_time.tv_sec++; 505 473 } 506 474 507 475 /* accumulate error between NTP and clock interval */