clocksource: Make negative motion detection more robust

Guenter reported boot stalls on a emulated ARM 32-bit platform, which has a
24-bit wide clocksource.

It turns out that the calculated maximal idle time, which limits idle
sleeps to prevent clocksource wrap arounds, is close to the point where the
negative motion detection triggers.

max_idle_ns: 597268854 ns
negative motion tripping point: 671088640 ns

If the idle wakeup is delayed beyond that point, the clocksource
advances far enough to trigger the negative motion detection. This
prevents the clock to advance and in the worst case the system stalls
completely if the consecutive sleeps based on the stale clock are
delayed as well.

Cure this by calculating a more robust cut-off value for negative motion,
which covers 87.5% of the actual clocksource counter width. Compare the
delta against this value to catch negative motion. This is specifically for
clock sources with a small counter width as their wrap around time is close
to the half counter width. For clock sources with wide counters this is not
a problem because the maximum idle time is far from the half counter width
due to the math overflow protection constraints.

For the case at hand this results in a tripping point of 1174405120ns.

Note, that this cannot prevent issues when the delay exceeds the 87.5%
margin, but that's not different from the previous unchecked version which
allowed arbitrary time jumps.

Systems with small counter width are prone to invalid results, but this
problem is unlikely to be seen on real hardware. If such a system
completely stalls for more than half a second, then there are other more
urgent problems than the counter wrapping around.

Fixes: c163e40af9b2 ("timekeeping: Always check for negative motion")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/all/8734j5ul4x.ffs@tglx
Closes: https://lore.kernel.org/all/387b120b-d68a-45e8-b6ab-768cd95d11c2@roeck-us.net

+20 -7
+2
include/linux/clocksource.h
··· 49 * @archdata: Optional arch-specific data 50 * @max_cycles: Maximum safe cycle value which won't overflow on 51 * multiplication 52 * @name: Pointer to clocksource name 53 * @list: List head for registration (internal) 54 * @freq_khz: Clocksource frequency in khz. ··· 110 struct arch_clocksource_data archdata; 111 #endif 112 u64 max_cycles; 113 const char *name; 114 struct list_head list; 115 u32 freq_khz;
··· 49 * @archdata: Optional arch-specific data 50 * @max_cycles: Maximum safe cycle value which won't overflow on 51 * multiplication 52 + * @max_raw_delta: Maximum safe delta value for negative motion detection 53 * @name: Pointer to clocksource name 54 * @list: List head for registration (internal) 55 * @freq_khz: Clocksource frequency in khz. ··· 109 struct arch_clocksource_data archdata; 110 #endif 111 u64 max_cycles; 112 + u64 max_raw_delta; 113 const char *name; 114 struct list_head list; 115 u32 freq_khz;
+10 -1
kernel/time/clocksource.c
··· 24 25 static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end) 26 { 27 - u64 delta = clocksource_delta(end, start, cs->mask); 28 29 if (likely(delta < cs->max_cycles)) 30 return clocksource_cyc2ns(delta, cs->mult, cs->shift); ··· 993 cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift, 994 cs->maxadj, cs->mask, 995 &cs->max_cycles); 996 } 997 998 static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
··· 24 25 static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end) 26 { 27 + u64 delta = clocksource_delta(end, start, cs->mask, cs->max_raw_delta); 28 29 if (likely(delta < cs->max_cycles)) 30 return clocksource_cyc2ns(delta, cs->mult, cs->shift); ··· 993 cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift, 994 cs->maxadj, cs->mask, 995 &cs->max_cycles); 996 + 997 + /* 998 + * Threshold for detecting negative motion in clocksource_delta(). 999 + * 1000 + * Allow for 0.875 of the counter width so that overly long idle 1001 + * sleeps, which go slightly over mask/2, do not trigger the 1002 + * negative motion detection. 1003 + */ 1004 + cs->max_raw_delta = (cs->mask >> 1) + (cs->mask >> 2) + (cs->mask >> 3); 1005 } 1006 1007 static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
+4 -2
kernel/time/timekeeping.c
··· 755 u64 cycle_now, delta; 756 757 cycle_now = tk_clock_read(&tk->tkr_mono); 758 - delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); 759 tk->tkr_mono.cycle_last = cycle_now; 760 tk->tkr_raw.cycle_last = cycle_now; 761 ··· 2231 return false; 2232 2233 offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), 2234 - tk->tkr_mono.cycle_last, tk->tkr_mono.mask); 2235 2236 /* Check if there's really nothing to do */ 2237 if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
··· 755 u64 cycle_now, delta; 756 757 cycle_now = tk_clock_read(&tk->tkr_mono); 758 + delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, 759 + tk->tkr_mono.clock->max_raw_delta); 760 tk->tkr_mono.cycle_last = cycle_now; 761 tk->tkr_raw.cycle_last = cycle_now; 762 ··· 2230 return false; 2231 2232 offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), 2233 + tk->tkr_mono.cycle_last, tk->tkr_mono.mask, 2234 + tk->tkr_mono.clock->max_raw_delta); 2235 2236 /* Check if there's really nothing to do */ 2237 if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
+4 -4
kernel/time/timekeeping_internal.h
··· 30 31 #endif 32 33 - static inline u64 clocksource_delta(u64 now, u64 last, u64 mask) 34 { 35 u64 ret = (now - last) & mask; 36 37 /* 38 - * Prevent time going backwards by checking the MSB of mask in 39 - * the result. If set, return 0. 40 */ 41 - return ret & ~(mask >> 1) ? 0 : ret; 42 } 43 44 /* Semi public for serialization of non timekeeper VDSO updates. */
··· 30 31 #endif 32 33 + static inline u64 clocksource_delta(u64 now, u64 last, u64 mask, u64 max_delta) 34 { 35 u64 ret = (now - last) & mask; 36 37 /* 38 + * Prevent time going backwards by checking the result against 39 + * @max_delta. If greater, return 0. 40 */ 41 + return ret > max_delta ? 0 : ret; 42 } 43 44 /* Semi public for serialization of non timekeeper VDSO updates. */