commit a91c49517de3445bc438d29a5bb481338817791e · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge tag 'timers-urgent-2025-04-06' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer fix from Thomas Gleixner:
"A revert to fix a adjtimex() regression:

The recent change to prevent that time goes backwards for the coarse
time getters due to immediate multiplier adjustments via adjtimex(),
changed the way how the timekeeping core treats that.

That change result in a regression on the adjtimex() side, which is
user space visible:

1) The forwarding of the base time moves the update out of the
original period and establishes a new one. That's changing the
behaviour of the [PF]LL control, which user space expects to be
applied periodically.

2) The clearing of the accumulated NTP error due to #1, changes the
behaviour as well.

An attempt to delay the multiplier/frequency update to the next tick
did not solve the problem as userspace expects that the multiplier or
frequency updates are in effect, when the syscall returns.

There is a different solution for the coarse time problem available,
so revert the offending commit to restore the existing adjtimex()
behaviour"

* tag 'timers-urgent-2025-04-06' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
Revert "timekeeping: Fix possible inconsistencies in _COARSE clockids"

Linus Torvalds 9 months ago a91c4951 1f80fbac

+25 -69

1 changed file

expand all

unified split

kernel

time

timekeeping.c

+25 -69

kernel/time/timekeeping.c

··· 682 } 683 684 /** 685 - * timekeeping_forward - update clock to given cycle now value 686 * @tk: Pointer to the timekeeper to update 687 - * @cycle_now: Current clocksource read value 688 * 689 * Forward the current clock to update its state since the last call to 690 * update_wall_time(). This is useful before significant clock changes, 691 * as it avoids having to deal with this time offset explicitly. 692 */ 693 - static void timekeeping_forward(struct timekeeper *tk, u64 cycle_now) 694 { 695 - u64 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, 696 - tk->tkr_mono.clock->max_raw_delta); 697 698 tk->tkr_mono.cycle_last = cycle_now; 699 tk->tkr_raw.cycle_last = cycle_now; 700 ··· 708 tk_normalize_xtime(tk); 709 delta -= incr; 710 } 711 - } 712 - 713 - /** 714 - * timekeeping_forward_now - update clock to the current time 715 - * @tk: Pointer to the timekeeper to update 716 - * 717 - * Forward the current clock to update its state since the last call to 718 - * update_wall_time(). This is useful before significant clock changes, 719 - * as it avoids having to deal with this time offset explicitly. 720 - */ 721 - static void timekeeping_forward_now(struct timekeeper *tk) 722 - { 723 - u64 cycle_now = tk_clock_read(&tk->tkr_mono); 724 - 725 - timekeeping_forward(tk, cycle_now); 726 } 727 728 /** ··· 2151 return offset; 2152 } 2153 2154 - static u64 timekeeping_accumulate(struct timekeeper *tk, u64 offset, 2155 - enum timekeeping_adv_mode mode, 2156 - unsigned int *clock_set) 2157 - { 2158 - int shift = 0, maxshift; 2159 - 2160 - /* 2161 - * TK_ADV_FREQ indicates that adjtimex(2) directly set the 2162 - * frequency or the tick length. 2163 - * 2164 - * Accumulate the offset, so that the new multiplier starts from 2165 - * now. This is required as otherwise for offsets, which are 2166 - * smaller than tk::cycle_interval, timekeeping_adjust() could set 2167 - * xtime_nsec backwards, which subsequently causes time going 2168 - * backwards in the coarse time getters. But even for the case 2169 - * where offset is greater than tk::cycle_interval the periodic 2170 - * accumulation does not have much value. 2171 - * 2172 - * Also reset tk::ntp_error as it does not make sense to keep the 2173 - * old accumulated error around in this case. 2174 - */ 2175 - if (mode == TK_ADV_FREQ) { 2176 - timekeeping_forward(tk, tk->tkr_mono.cycle_last + offset); 2177 - tk->ntp_error = 0; 2178 - return 0; 2179 - } 2180 - 2181 - /* 2182 - * With NO_HZ we may have to accumulate many cycle_intervals 2183 - * (think "ticks") worth of time at once. To do this efficiently, 2184 - * we calculate the largest doubling multiple of cycle_intervals 2185 - * that is smaller than the offset. We then accumulate that 2186 - * chunk in one go, and then try to consume the next smaller 2187 - * doubled multiple. 2188 - */ 2189 - shift = ilog2(offset) - ilog2(tk->cycle_interval); 2190 - shift = max(0, shift); 2191 - /* Bound shift to one less than what overflows tick_length */ 2192 - maxshift = (64 - (ilog2(ntp_tick_length()) + 1)) - 1; 2193 - shift = min(shift, maxshift); 2194 - while (offset >= tk->cycle_interval) { 2195 - offset = logarithmic_accumulation(tk, offset, shift, clock_set); 2196 - if (offset < tk->cycle_interval << shift) 2197 - shift--; 2198 - } 2199 - return offset; 2200 - } 2201 - 2202 /* 2203 * timekeeping_advance - Updates the timekeeper to the current time and 2204 * current NTP tick length ··· 2160 struct timekeeper *tk = &tk_core.shadow_timekeeper; 2161 struct timekeeper *real_tk = &tk_core.timekeeper; 2162 unsigned int clock_set = 0; 2163 u64 offset; 2164 2165 guard(raw_spinlock_irqsave)(&tk_core.lock); ··· 2177 if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) 2178 return false; 2179 2180 - offset = timekeeping_accumulate(tk, offset, mode, &clock_set); 2181 2182 /* Adjust the multiplier to correct NTP error */ 2183 timekeeping_adjust(tk, offset);

··· 682 } 683 684 /** 685 + * timekeeping_forward_now - update clock to the current time 686 * @tk: Pointer to the timekeeper to update 687 * 688 * Forward the current clock to update its state since the last call to 689 * update_wall_time(). This is useful before significant clock changes, 690 * as it avoids having to deal with this time offset explicitly. 691 */ 692 + static void timekeeping_forward_now(struct timekeeper *tk) 693 { 694 + u64 cycle_now, delta; 695 696 + cycle_now = tk_clock_read(&tk->tkr_mono); 697 + delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, 698 + tk->tkr_mono.clock->max_raw_delta); 699 tk->tkr_mono.cycle_last = cycle_now; 700 tk->tkr_raw.cycle_last = cycle_now; 701 ··· 707 tk_normalize_xtime(tk); 708 delta -= incr; 709 } 710 } 711 712 /** ··· 2165 return offset; 2166 } 2167 2168 /* 2169 * timekeeping_advance - Updates the timekeeper to the current time and 2170 * current NTP tick length ··· 2222 struct timekeeper *tk = &tk_core.shadow_timekeeper; 2223 struct timekeeper *real_tk = &tk_core.timekeeper; 2224 unsigned int clock_set = 0; 2225 + int shift = 0, maxshift; 2226 u64 offset; 2227 2228 guard(raw_spinlock_irqsave)(&tk_core.lock); ··· 2238 if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) 2239 return false; 2240 2241 + /* 2242 + * With NO_HZ we may have to accumulate many cycle_intervals 2243 + * (think "ticks") worth of time at once. To do this efficiently, 2244 + * we calculate the largest doubling multiple of cycle_intervals 2245 + * that is smaller than the offset. We then accumulate that 2246 + * chunk in one go, and then try to consume the next smaller 2247 + * doubled multiple. 2248 + */ 2249 + shift = ilog2(offset) - ilog2(tk->cycle_interval); 2250 + shift = max(0, shift); 2251 + /* Bound shift to one less than what overflows tick_length */ 2252 + maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; 2253 + shift = min(shift, maxshift); 2254 + while (offset >= tk->cycle_interval) { 2255 + offset = logarithmic_accumulation(tk, offset, shift, &clock_set); 2256 + if (offset < tk->cycle_interval<<shift) 2257 + shift--; 2258 + } 2259 2260 /* Adjust the multiplier to correct NTP error */ 2261 timekeeping_adjust(tk, offset);