Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer fixes from Thomas Gleixner:
"A few fixes for timekeeping and timers:

- Plug a subtle race due to a missing READ_ONCE() in the timekeeping
code where reloading of a pointer results in an inconsistent
callback argument being supplied to the clocksource->read function.

- Correct the CLOCK_MONOTONIC_RAW sub-nanosecond accounting in the
time keeping core code, to prevent a possible discontuity.

- Apply a similar fix to the arm64 vdso clock_gettime()
implementation

- Add missing includes to clocksource drivers, which relied on
indirect includes which fails in certain configs.

- Use the proper iomem pointer for read/iounmap in a probe function"

* 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
arm64/vdso: Fix nsec handling for CLOCK_MONOTONIC_RAW
time: Fix CLOCK_MONOTONIC_RAW sub-nanosecond accounting
time: Fix clock->read(clock) race around clocksource changes
clocksource: Explicitly include linux/clocksource.h when needed
clocksource/drivers/arm_arch_timer: Fix read and iounmap of incorrect variable

Changed files
+55 -33
arch
arm64
kernel
drivers
include
kernel
+3 -2
arch/arm64/kernel/vdso.c
··· 221 221 /* tkr_mono.cycle_last == tkr_raw.cycle_last */ 222 222 vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; 223 223 vdso_data->raw_time_sec = tk->raw_time.tv_sec; 224 - vdso_data->raw_time_nsec = tk->raw_time.tv_nsec; 224 + vdso_data->raw_time_nsec = (tk->raw_time.tv_nsec << 225 + tk->tkr_raw.shift) + 226 + tk->tkr_raw.xtime_nsec; 225 227 vdso_data->xtime_clock_sec = tk->xtime_sec; 226 228 vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; 227 - /* tkr_raw.xtime_nsec == 0 */ 228 229 vdso_data->cs_mono_mult = tk->tkr_mono.mult; 229 230 vdso_data->cs_raw_mult = tk->tkr_raw.mult; 230 231 /* tkr_mono.shift == tkr_raw.shift */
-1
arch/arm64/kernel/vdso/gettimeofday.S
··· 256 256 seqcnt_check fail=monotonic_raw 257 257 258 258 /* All computations are done with left-shifted nsecs. */ 259 - lsl x14, x14, x12 260 259 get_nsec_per_sec res=x9 261 260 lsl x9, x9, x12 262 261
+2 -2
drivers/clocksource/arm_arch_timer.c
··· 1209 1209 return 0; 1210 1210 } 1211 1211 1212 - rate = readl_relaxed(frame + CNTFRQ); 1212 + rate = readl_relaxed(base + CNTFRQ); 1213 1213 1214 - iounmap(frame); 1214 + iounmap(base); 1215 1215 1216 1216 return rate; 1217 1217 }
+1
drivers/clocksource/cadence_ttc_timer.c
··· 18 18 #include <linux/clk.h> 19 19 #include <linux/interrupt.h> 20 20 #include <linux/clockchips.h> 21 + #include <linux/clocksource.h> 21 22 #include <linux/of_address.h> 22 23 #include <linux/of_irq.h> 23 24 #include <linux/slab.h>
+1
drivers/clocksource/timer-sun5i.c
··· 12 12 13 13 #include <linux/clk.h> 14 14 #include <linux/clockchips.h> 15 + #include <linux/clocksource.h> 15 16 #include <linux/delay.h> 16 17 #include <linux/interrupt.h> 17 18 #include <linux/irq.h>
+2 -3
include/linux/timekeeper_internal.h
··· 29 29 */ 30 30 struct tk_read_base { 31 31 struct clocksource *clock; 32 - u64 (*read)(struct clocksource *cs); 33 32 u64 mask; 34 33 u64 cycle_last; 35 34 u32 mult; ··· 57 58 * interval. 58 59 * @xtime_remainder: Shifted nano seconds left over when rounding 59 60 * @cycle_interval 60 - * @raw_interval: Raw nano seconds accumulated per NTP interval. 61 + * @raw_interval: Shifted raw nano seconds accumulated per NTP interval. 61 62 * @ntp_error: Difference between accumulated time and NTP time in ntp 62 63 * shifted nano seconds. 63 64 * @ntp_error_shift: Shift conversion between clock shifted nano seconds and ··· 99 100 u64 cycle_interval; 100 101 u64 xtime_interval; 101 102 s64 xtime_remainder; 102 - u32 raw_interval; 103 + u64 raw_interval; 103 104 /* The ntp_tick_length() value currently being used. 104 105 * This cached copy ensures we consistently apply the tick 105 106 * length for an entire tick, as ntp_tick_length may change
+46 -25
kernel/time/timekeeping.c
··· 118 118 tk->offs_boot = ktime_add(tk->offs_boot, delta); 119 119 } 120 120 121 + /* 122 + * tk_clock_read - atomic clocksource read() helper 123 + * 124 + * This helper is necessary to use in the read paths because, while the 125 + * seqlock ensures we don't return a bad value while structures are updated, 126 + * it doesn't protect from potential crashes. There is the possibility that 127 + * the tkr's clocksource may change between the read reference, and the 128 + * clock reference passed to the read function. This can cause crashes if 129 + * the wrong clocksource is passed to the wrong read function. 130 + * This isn't necessary to use when holding the timekeeper_lock or doing 131 + * a read of the fast-timekeeper tkrs (which is protected by its own locking 132 + * and update logic). 133 + */ 134 + static inline u64 tk_clock_read(struct tk_read_base *tkr) 135 + { 136 + struct clocksource *clock = READ_ONCE(tkr->clock); 137 + 138 + return clock->read(clock); 139 + } 140 + 121 141 #ifdef CONFIG_DEBUG_TIMEKEEPING 122 142 #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ 123 143 ··· 195 175 */ 196 176 do { 197 177 seq = read_seqcount_begin(&tk_core.seq); 198 - now = tkr->read(tkr->clock); 178 + now = tk_clock_read(tkr); 199 179 last = tkr->cycle_last; 200 180 mask = tkr->mask; 201 181 max = tkr->clock->max_cycles; ··· 229 209 u64 cycle_now, delta; 230 210 231 211 /* read clocksource */ 232 - cycle_now = tkr->read(tkr->clock); 212 + cycle_now = tk_clock_read(tkr); 233 213 234 214 /* calculate the delta since the last update_wall_time */ 235 215 delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); ··· 258 238 ++tk->cs_was_changed_seq; 259 239 old_clock = tk->tkr_mono.clock; 260 240 tk->tkr_mono.clock = clock; 261 - tk->tkr_mono.read = clock->read; 262 241 tk->tkr_mono.mask = clock->mask; 263 - tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock); 242 + tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono); 264 243 265 244 tk->tkr_raw.clock = clock; 266 - tk->tkr_raw.read = clock->read; 267 245 tk->tkr_raw.mask = clock->mask; 268 246 tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; 269 247 ··· 280 262 /* Go back from cycles -> shifted ns */ 281 263 tk->xtime_interval = interval * clock->mult; 282 264 tk->xtime_remainder = ntpinterval - tk->xtime_interval; 283 - tk->raw_interval = (interval * clock->mult) >> clock->shift; 265 + tk->raw_interval = interval * clock->mult; 284 266 285 267 /* if changing clocks, convert xtime_nsec shift units */ 286 268 if (old_clock) { ··· 422 404 423 405 now += timekeeping_delta_to_ns(tkr, 424 406 clocksource_delta( 425 - tkr->read(tkr->clock), 407 + tk_clock_read(tkr), 426 408 tkr->cycle_last, 427 409 tkr->mask)); 428 410 } while (read_seqcount_retry(&tkf->seq, seq)); ··· 479 461 return cycles_at_suspend; 480 462 } 481 463 464 + static struct clocksource dummy_clock = { 465 + .read = dummy_clock_read, 466 + }; 467 + 482 468 /** 483 469 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. 484 470 * @tk: Timekeeper to snapshot. ··· 499 477 struct tk_read_base *tkr = &tk->tkr_mono; 500 478 501 479 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 502 - cycles_at_suspend = tkr->read(tkr->clock); 503 - tkr_dummy.read = dummy_clock_read; 480 + cycles_at_suspend = tk_clock_read(tkr); 481 + tkr_dummy.clock = &dummy_clock; 504 482 update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); 505 483 506 484 tkr = &tk->tkr_raw; 507 485 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 508 - tkr_dummy.read = dummy_clock_read; 486 + tkr_dummy.clock = &dummy_clock; 509 487 update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); 510 488 } 511 489 ··· 671 649 */ 672 650 static void timekeeping_forward_now(struct timekeeper *tk) 673 651 { 674 - struct clocksource *clock = tk->tkr_mono.clock; 675 652 u64 cycle_now, delta; 676 653 u64 nsec; 677 654 678 - cycle_now = tk->tkr_mono.read(clock); 655 + cycle_now = tk_clock_read(&tk->tkr_mono); 679 656 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); 680 657 tk->tkr_mono.cycle_last = cycle_now; 681 658 tk->tkr_raw.cycle_last = cycle_now; ··· 950 929 951 930 do { 952 931 seq = read_seqcount_begin(&tk_core.seq); 953 - 954 - now = tk->tkr_mono.read(tk->tkr_mono.clock); 932 + now = tk_clock_read(&tk->tkr_mono); 955 933 systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq; 956 934 systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; 957 935 base_real = ktime_add(tk->tkr_mono.base, ··· 1128 1108 * Check whether the system counter value provided by the 1129 1109 * device driver is on the current timekeeping interval. 1130 1110 */ 1131 - now = tk->tkr_mono.read(tk->tkr_mono.clock); 1111 + now = tk_clock_read(&tk->tkr_mono); 1132 1112 interval_start = tk->tkr_mono.cycle_last; 1133 1113 if (!cycle_between(interval_start, cycles, now)) { 1134 1114 clock_was_set_seq = tk->clock_was_set_seq; ··· 1649 1629 * The less preferred source will only be tried if there is no better 1650 1630 * usable source. The rtc part is handled separately in rtc core code. 1651 1631 */ 1652 - cycle_now = tk->tkr_mono.read(clock); 1632 + cycle_now = tk_clock_read(&tk->tkr_mono); 1653 1633 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && 1654 1634 cycle_now > tk->tkr_mono.cycle_last) { 1655 1635 u64 nsec, cyc_delta; ··· 1996 1976 u32 shift, unsigned int *clock_set) 1997 1977 { 1998 1978 u64 interval = tk->cycle_interval << shift; 1999 - u64 raw_nsecs; 1979 + u64 snsec_per_sec; 2000 1980 2001 1981 /* If the offset is smaller than a shifted interval, do nothing */ 2002 1982 if (offset < interval) ··· 2011 1991 *clock_set |= accumulate_nsecs_to_secs(tk); 2012 1992 2013 1993 /* Accumulate raw time */ 2014 - raw_nsecs = (u64)tk->raw_interval << shift; 2015 - raw_nsecs += tk->raw_time.tv_nsec; 2016 - if (raw_nsecs >= NSEC_PER_SEC) { 2017 - u64 raw_secs = raw_nsecs; 2018 - raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); 2019 - tk->raw_time.tv_sec += raw_secs; 1994 + tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift; 1995 + tk->tkr_raw.xtime_nsec += tk->raw_interval << shift; 1996 + snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift; 1997 + while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) { 1998 + tk->tkr_raw.xtime_nsec -= snsec_per_sec; 1999 + tk->raw_time.tv_sec++; 2020 2000 } 2021 - tk->raw_time.tv_nsec = raw_nsecs; 2001 + tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift; 2002 + tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift; 2022 2003 2023 2004 /* Accumulate error between NTP and clock interval */ 2024 2005 tk->ntp_error += tk->ntp_tick << shift; ··· 2051 2030 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET 2052 2031 offset = real_tk->cycle_interval; 2053 2032 #else 2054 - offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock), 2033 + offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), 2055 2034 tk->tkr_mono.cycle_last, tk->tkr_mono.mask); 2056 2035 #endif 2057 2036