Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer fixes from Thomas Gleixner:
"Two fixes from the timer departement:

- Fix a long standing issue in the NOHZ tick code which causes RB
tree corruption, delayed timers and other malfunctions. The cause
for this is code which modifies the expiry time of an enqueued
hrtimer.

- Revert the CLOCK_MONOTONIC/CLOCK_BOOTTIME unification due to
regression reports. Seems userspace _is_ relying on the documented
behaviour despite our hope that it wont"

* 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
Revert: Unify CLOCK_MONOTONIC and CLOCK_BOOTTIME
tick/sched: Do not mess with an enqueued hrtimer

+119 -109
+11 -3
Documentation/trace/ftrace.rst
··· 461 461 and ticks at the same rate as the hardware clocksource. 462 462 463 463 boot: 464 - Same as mono. Used to be a separate clock which accounted 465 - for the time spent in suspend while CLOCK_MONOTONIC did 466 - not. 464 + This is the boot clock (CLOCK_BOOTTIME) and is based on the 465 + fast monotonic clock, but also accounts for time spent in 466 + suspend. Since the clock access is designed for use in 467 + tracing in the suspend path, some side effects are possible 468 + if clock is accessed after the suspend time is accounted before 469 + the fast mono clock is updated. In this case, the clock update 470 + appears to happen slightly sooner than it normally would have. 471 + Also on 32-bit systems, it's possible that the 64-bit boot offset 472 + sees a partial update. These effects are rare and post 473 + processing should be able to handle them. See comments in the 474 + ktime_get_boot_fast_ns() function for more information. 467 475 468 476 To set a clock, simply echo the clock name into this file:: 469 477
+6 -1
drivers/input/evdev.c
··· 31 31 enum evdev_clock_type { 32 32 EV_CLK_REAL = 0, 33 33 EV_CLK_MONO, 34 + EV_CLK_BOOT, 34 35 EV_CLK_MAX 35 36 }; 36 37 ··· 198 197 case CLOCK_REALTIME: 199 198 clk_type = EV_CLK_REAL; 200 199 break; 201 - case CLOCK_BOOTTIME: 202 200 case CLOCK_MONOTONIC: 203 201 clk_type = EV_CLK_MONO; 202 + break; 203 + case CLOCK_BOOTTIME: 204 + clk_type = EV_CLK_BOOT; 204 205 break; 205 206 default: 206 207 return -EINVAL; ··· 314 311 315 312 ev_time[EV_CLK_MONO] = ktime_get(); 316 313 ev_time[EV_CLK_REAL] = ktime_mono_to_real(ev_time[EV_CLK_MONO]); 314 + ev_time[EV_CLK_BOOT] = ktime_mono_to_any(ev_time[EV_CLK_MONO], 315 + TK_OFFS_BOOT); 317 316 318 317 rcu_read_lock(); 319 318
+2
include/linux/hrtimer.h
··· 161 161 enum hrtimer_base_type { 162 162 HRTIMER_BASE_MONOTONIC, 163 163 HRTIMER_BASE_REALTIME, 164 + HRTIMER_BASE_BOOTTIME, 164 165 HRTIMER_BASE_TAI, 165 166 HRTIMER_BASE_MONOTONIC_SOFT, 166 167 HRTIMER_BASE_REALTIME_SOFT, 168 + HRTIMER_BASE_BOOTTIME_SOFT, 167 169 HRTIMER_BASE_TAI_SOFT, 168 170 HRTIMER_MAX_CLOCK_BASES, 169 171 };
-2
include/linux/timekeeper_internal.h
··· 52 52 * @offs_real: Offset clock monotonic -> clock realtime 53 53 * @offs_boot: Offset clock monotonic -> clock boottime 54 54 * @offs_tai: Offset clock monotonic -> clock tai 55 - * @time_suspended: Accumulated suspend time 56 55 * @tai_offset: The current UTC to TAI offset in seconds 57 56 * @clock_was_set_seq: The sequence number of clock was set events 58 57 * @cs_was_changed_seq: The sequence number of clocksource change events ··· 94 95 ktime_t offs_real; 95 96 ktime_t offs_boot; 96 97 ktime_t offs_tai; 97 - ktime_t time_suspended; 98 98 s32 tai_offset; 99 99 unsigned int clock_was_set_seq; 100 100 u8 cs_was_changed_seq;
+25 -12
include/linux/timekeeping.h
··· 33 33 extern time64_t ktime_get_seconds(void); 34 34 extern time64_t __ktime_get_real_seconds(void); 35 35 extern time64_t ktime_get_real_seconds(void); 36 - extern void ktime_get_active_ts64(struct timespec64 *ts); 37 36 38 37 extern int __getnstimeofday64(struct timespec64 *tv); 39 38 extern void getnstimeofday64(struct timespec64 *tv); 40 39 extern void getboottime64(struct timespec64 *ts); 41 40 42 - #define ktime_get_real_ts64(ts) getnstimeofday64(ts) 43 - 44 - /* Clock BOOTTIME compatibility wrappers */ 45 - static inline void get_monotonic_boottime64(struct timespec64 *ts) 46 - { 47 - ktime_get_ts64(ts); 48 - } 41 + #define ktime_get_real_ts64(ts) getnstimeofday64(ts) 49 42 50 43 /* 51 44 * ktime_t based interfaces 52 45 */ 46 + 53 47 enum tk_offsets { 54 48 TK_OFFS_REAL, 49 + TK_OFFS_BOOT, 55 50 TK_OFFS_TAI, 56 51 TK_OFFS_MAX, 57 52 }; ··· 57 62 extern ktime_t ktime_get_raw(void); 58 63 extern u32 ktime_get_resolution_ns(void); 59 64 60 - /* Clock BOOTTIME compatibility wrappers */ 61 - static inline ktime_t ktime_get_boottime(void) { return ktime_get(); } 62 - static inline u64 ktime_get_boot_ns(void) { return ktime_get(); } 63 - 64 65 /** 65 66 * ktime_get_real - get the real (wall-) time in ktime_t format 66 67 */ 67 68 static inline ktime_t ktime_get_real(void) 68 69 { 69 70 return ktime_get_with_offset(TK_OFFS_REAL); 71 + } 72 + 73 + /** 74 + * ktime_get_boottime - Returns monotonic time since boot in ktime_t format 75 + * 76 + * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the 77 + * time spent in suspend. 78 + */ 79 + static inline ktime_t ktime_get_boottime(void) 80 + { 81 + return ktime_get_with_offset(TK_OFFS_BOOT); 70 82 } 71 83 72 84 /** ··· 102 100 return ktime_to_ns(ktime_get_real()); 103 101 } 104 102 103 + static inline u64 ktime_get_boot_ns(void) 104 + { 105 + return ktime_to_ns(ktime_get_boottime()); 106 + } 107 + 105 108 static inline u64 ktime_get_tai_ns(void) 106 109 { 107 110 return ktime_to_ns(ktime_get_clocktai()); ··· 119 112 120 113 extern u64 ktime_get_mono_fast_ns(void); 121 114 extern u64 ktime_get_raw_fast_ns(void); 115 + extern u64 ktime_get_boot_fast_ns(void); 122 116 extern u64 ktime_get_real_fast_ns(void); 123 117 124 118 /* 125 119 * timespec64 interfaces utilizing the ktime based ones 126 120 */ 121 + static inline void get_monotonic_boottime64(struct timespec64 *ts) 122 + { 123 + *ts = ktime_to_timespec64(ktime_get_boottime()); 124 + } 125 + 127 126 static inline void timekeeping_clocktai64(struct timespec64 *ts) 128 127 { 129 128 *ts = ktime_to_timespec64(ktime_get_clocktai());
-1
include/uapi/linux/time.h
··· 73 73 */ 74 74 #define CLOCK_SGI_CYCLE 10 75 75 #define CLOCK_TAI 11 76 - #define CLOCK_MONOTONIC_ACTIVE 12 77 76 78 77 #define MAX_CLOCKS 16 79 78 #define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC)
+14 -2
kernel/time/hrtimer.c
··· 91 91 .get_time = &ktime_get_real, 92 92 }, 93 93 { 94 + .index = HRTIMER_BASE_BOOTTIME, 95 + .clockid = CLOCK_BOOTTIME, 96 + .get_time = &ktime_get_boottime, 97 + }, 98 + { 94 99 .index = HRTIMER_BASE_TAI, 95 100 .clockid = CLOCK_TAI, 96 101 .get_time = &ktime_get_clocktai, ··· 111 106 .get_time = &ktime_get_real, 112 107 }, 113 108 { 109 + .index = HRTIMER_BASE_BOOTTIME_SOFT, 110 + .clockid = CLOCK_BOOTTIME, 111 + .get_time = &ktime_get_boottime, 112 + }, 113 + { 114 114 .index = HRTIMER_BASE_TAI_SOFT, 115 115 .clockid = CLOCK_TAI, 116 116 .get_time = &ktime_get_clocktai, ··· 129 119 130 120 [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, 131 121 [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, 132 - [CLOCK_BOOTTIME] = HRTIMER_BASE_MONOTONIC, 122 + [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, 133 123 [CLOCK_TAI] = HRTIMER_BASE_TAI, 134 124 }; 135 125 ··· 581 571 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) 582 572 { 583 573 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; 574 + ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; 584 575 ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; 585 576 586 577 ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, 587 - offs_real, offs_tai); 578 + offs_real, offs_boot, offs_tai); 588 579 589 580 base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; 581 + base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; 590 582 base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; 591 583 592 584 return now;
-2
kernel/time/posix-stubs.c
··· 83 83 case CLOCK_BOOTTIME: 84 84 get_monotonic_boottime64(tp); 85 85 break; 86 - case CLOCK_MONOTONIC_ACTIVE: 87 - ktime_get_active_ts64(tp); 88 86 default: 89 87 return -EINVAL; 90 88 }
+17 -9
kernel/time/posix-timers.c
··· 252 252 return 0; 253 253 } 254 254 255 - static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp) 255 + static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp) 256 256 { 257 - timekeeping_clocktai64(tp); 257 + get_monotonic_boottime64(tp); 258 258 return 0; 259 259 } 260 260 261 - static int posix_get_monotonic_active(clockid_t which_clock, 262 - struct timespec64 *tp) 261 + static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp) 263 262 { 264 - ktime_get_active_ts64(tp); 263 + timekeeping_clocktai64(tp); 265 264 return 0; 266 265 } 267 266 ··· 1316 1317 .timer_arm = common_hrtimer_arm, 1317 1318 }; 1318 1319 1319 - static const struct k_clock clock_monotonic_active = { 1320 + static const struct k_clock clock_boottime = { 1320 1321 .clock_getres = posix_get_hrtimer_res, 1321 - .clock_get = posix_get_monotonic_active, 1322 + .clock_get = posix_get_boottime, 1323 + .nsleep = common_nsleep, 1324 + .timer_create = common_timer_create, 1325 + .timer_set = common_timer_set, 1326 + .timer_get = common_timer_get, 1327 + .timer_del = common_timer_del, 1328 + .timer_rearm = common_hrtimer_rearm, 1329 + .timer_forward = common_hrtimer_forward, 1330 + .timer_remaining = common_hrtimer_remaining, 1331 + .timer_try_to_cancel = common_hrtimer_try_to_cancel, 1332 + .timer_arm = common_hrtimer_arm, 1322 1333 }; 1323 1334 1324 1335 static const struct k_clock * const posix_clocks[] = { ··· 1339 1330 [CLOCK_MONOTONIC_RAW] = &clock_monotonic_raw, 1340 1331 [CLOCK_REALTIME_COARSE] = &clock_realtime_coarse, 1341 1332 [CLOCK_MONOTONIC_COARSE] = &clock_monotonic_coarse, 1342 - [CLOCK_BOOTTIME] = &clock_monotonic, 1333 + [CLOCK_BOOTTIME] = &clock_boottime, 1343 1334 [CLOCK_REALTIME_ALARM] = &alarm_clock, 1344 1335 [CLOCK_BOOTTIME_ALARM] = &alarm_clock, 1345 1336 [CLOCK_TAI] = &clock_tai, 1346 - [CLOCK_MONOTONIC_ACTIVE] = &clock_monotonic_active, 1347 1337 }; 1348 1338 1349 1339 static const struct k_clock *clockid_to_kclock(const clockid_t id)
-15
kernel/time/tick-common.c
··· 419 419 clockevents_shutdown(td->evtdev); 420 420 } 421 421 422 - static void tick_forward_next_period(void) 423 - { 424 - ktime_t delta, now = ktime_get(); 425 - u64 n; 426 - 427 - delta = ktime_sub(now, tick_next_period); 428 - n = ktime_divns(delta, tick_period); 429 - tick_next_period += n * tick_period; 430 - if (tick_next_period < now) 431 - tick_next_period += tick_period; 432 - tick_sched_forward_next_period(); 433 - } 434 - 435 422 /** 436 423 * tick_resume_local - Resume the local tick device 437 424 * ··· 430 443 { 431 444 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 432 445 bool broadcast = tick_resume_check_broadcast(); 433 - 434 - tick_forward_next_period(); 435 446 436 447 clockevents_tick_resume(td->evtdev); 437 448 if (!broadcast) {
-6
kernel/time/tick-internal.h
··· 141 141 static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); } 142 142 #endif /* !(BROADCAST && ONESHOT) */ 143 143 144 - #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) 145 - extern void tick_sched_forward_next_period(void); 146 - #else 147 - static inline void tick_sched_forward_next_period(void) { } 148 - #endif 149 - 150 144 /* NO_HZ_FULL internal */ 151 145 #ifdef CONFIG_NO_HZ_FULL 152 146 extern void tick_nohz_init(void);
+5 -14
kernel/time/tick-sched.c
··· 52 52 static ktime_t last_jiffies_update; 53 53 54 54 /* 55 - * Called after resume. Make sure that jiffies are not fast forwarded due to 56 - * clock monotonic being forwarded by the suspended time. 57 - */ 58 - void tick_sched_forward_next_period(void) 59 - { 60 - last_jiffies_update = tick_next_period; 61 - } 62 - 63 - /* 64 55 * Must be called with interrupts disabled ! 65 56 */ 66 57 static void tick_do_update_jiffies64(ktime_t now) ··· 795 804 return; 796 805 } 797 806 798 - hrtimer_set_expires(&ts->sched_timer, tick); 799 - 800 - if (ts->nohz_mode == NOHZ_MODE_HIGHRES) 801 - hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); 802 - else 807 + if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 808 + hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED); 809 + } else { 810 + hrtimer_set_expires(&ts->sched_timer, tick); 803 811 tick_program_event(tick, 1); 812 + } 804 813 } 805 814 806 815 static void tick_nohz_retain_tick(struct tick_sched *ts)
+37 -41
kernel/time/timekeeping.c
··· 138 138 139 139 static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) 140 140 { 141 - /* Update both bases so mono and raw stay coupled. */ 142 - tk->tkr_mono.base += delta; 143 - tk->tkr_raw.base += delta; 144 - 145 - /* Accumulate time spent in suspend */ 146 - tk->time_suspended += delta; 141 + tk->offs_boot = ktime_add(tk->offs_boot, delta); 147 142 } 148 143 149 144 /* ··· 468 473 } 469 474 EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); 470 475 476 + /** 477 + * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. 478 + * 479 + * To keep it NMI safe since we're accessing from tracing, we're not using a 480 + * separate timekeeper with updates to monotonic clock and boot offset 481 + * protected with seqlocks. This has the following minor side effects: 482 + * 483 + * (1) Its possible that a timestamp be taken after the boot offset is updated 484 + * but before the timekeeper is updated. If this happens, the new boot offset 485 + * is added to the old timekeeping making the clock appear to update slightly 486 + * earlier: 487 + * CPU 0 CPU 1 488 + * timekeeping_inject_sleeptime64() 489 + * __timekeeping_inject_sleeptime(tk, delta); 490 + * timestamp(); 491 + * timekeeping_update(tk, TK_CLEAR_NTP...); 492 + * 493 + * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be 494 + * partially updated. Since the tk->offs_boot update is a rare event, this 495 + * should be a rare occurrence which postprocessing should be able to handle. 496 + */ 497 + u64 notrace ktime_get_boot_fast_ns(void) 498 + { 499 + struct timekeeper *tk = &tk_core.timekeeper; 500 + 501 + return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot)); 502 + } 503 + EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); 504 + 505 + 471 506 /* 472 507 * See comment for __ktime_get_fast_ns() vs. timestamp ordering 473 508 */ ··· 789 764 790 765 static ktime_t *offsets[TK_OFFS_MAX] = { 791 766 [TK_OFFS_REAL] = &tk_core.timekeeper.offs_real, 767 + [TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot, 792 768 [TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai, 793 769 }; 794 770 ··· 885 859 timespec64_add_ns(ts, nsec + tomono.tv_nsec); 886 860 } 887 861 EXPORT_SYMBOL_GPL(ktime_get_ts64); 888 - 889 - /** 890 - * ktime_get_active_ts64 - Get the active non-suspended monotonic clock 891 - * @ts: pointer to timespec variable 892 - * 893 - * The function calculates the monotonic clock from the realtime clock and 894 - * the wall_to_monotonic offset, subtracts the accumulated suspend time and 895 - * stores the result in normalized timespec64 format in the variable 896 - * pointed to by @ts. 897 - */ 898 - void ktime_get_active_ts64(struct timespec64 *ts) 899 - { 900 - struct timekeeper *tk = &tk_core.timekeeper; 901 - struct timespec64 tomono, tsusp; 902 - u64 nsec, nssusp; 903 - unsigned int seq; 904 - 905 - WARN_ON(timekeeping_suspended); 906 - 907 - do { 908 - seq = read_seqcount_begin(&tk_core.seq); 909 - ts->tv_sec = tk->xtime_sec; 910 - nsec = timekeeping_get_ns(&tk->tkr_mono); 911 - tomono = tk->wall_to_monotonic; 912 - nssusp = tk->time_suspended; 913 - } while (read_seqcount_retry(&tk_core.seq, seq)); 914 - 915 - ts->tv_sec += tomono.tv_sec; 916 - ts->tv_nsec = 0; 917 - timespec64_add_ns(ts, nsec + tomono.tv_nsec); 918 - tsusp = ns_to_timespec64(nssusp); 919 - *ts = timespec64_sub(*ts, tsusp); 920 - } 921 862 922 863 /** 923 864 * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC ··· 1586 1593 return; 1587 1594 } 1588 1595 tk_xtime_add(tk, delta); 1596 + tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta)); 1589 1597 tk_update_sleep_time(tk, timespec64_to_ktime(*delta)); 1590 1598 tk_debug_account_sleep_time(delta); 1591 1599 } ··· 2119 2125 void getboottime64(struct timespec64 *ts) 2120 2126 { 2121 2127 struct timekeeper *tk = &tk_core.timekeeper; 2122 - ktime_t t = ktime_sub(tk->offs_real, tk->time_suspended); 2128 + ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot); 2123 2129 2124 2130 *ts = ktime_to_timespec64(t); 2125 2131 } ··· 2182 2188 * ktime_get_update_offsets_now - hrtimer helper 2183 2189 * @cwsseq: pointer to check and store the clock was set sequence number 2184 2190 * @offs_real: pointer to storage for monotonic -> realtime offset 2191 + * @offs_boot: pointer to storage for monotonic -> boottime offset 2185 2192 * @offs_tai: pointer to storage for monotonic -> clock tai offset 2186 2193 * 2187 2194 * Returns current monotonic time and updates the offsets if the ··· 2192 2197 * Called from hrtimer_interrupt() or retrigger_next_event() 2193 2198 */ 2194 2199 ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, 2195 - ktime_t *offs_tai) 2200 + ktime_t *offs_boot, ktime_t *offs_tai) 2196 2201 { 2197 2202 struct timekeeper *tk = &tk_core.timekeeper; 2198 2203 unsigned int seq; ··· 2209 2214 if (*cwsseq != tk->clock_was_set_seq) { 2210 2215 *cwsseq = tk->clock_was_set_seq; 2211 2216 *offs_real = tk->offs_real; 2217 + *offs_boot = tk->offs_boot; 2212 2218 *offs_tai = tk->offs_tai; 2213 2219 } 2214 2220
+1
kernel/time/timekeeping.h
··· 6 6 */ 7 7 extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, 8 8 ktime_t *offs_real, 9 + ktime_t *offs_boot, 9 10 ktime_t *offs_tai); 10 11 11 12 extern int timekeeping_valid_for_hres(void);
+1 -1
kernel/trace/trace.c
··· 1165 1165 { trace_clock, "perf", 1 }, 1166 1166 { ktime_get_mono_fast_ns, "mono", 1 }, 1167 1167 { ktime_get_raw_fast_ns, "mono_raw", 1 }, 1168 - { ktime_get_mono_fast_ns, "boot", 1 }, 1168 + { ktime_get_boot_fast_ns, "boot", 1 }, 1169 1169 ARCH_TRACE_CLOCKS 1170 1170 }; 1171 1171