Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cpuidle: menu: Skip tick_nohz_get_sleep_length() call in some cases

Because the cost of calling tick_nohz_get_sleep_length() may increase
in the future, reorder the code in menu_select() so it first uses the
statistics to determine the expected idle duration. If that value is
higher than RESIDENCY_THRESHOLD_NS, tick_nohz_get_sleep_length() will
be called to obtain the time till the closest timer and refine the
idle duration prediction if necessary.

This causes the governor to always take the full overhead of
get_typical_interval() with the assumption that the cost will be
amortized by skipping the tick_nohz_get_sleep_length() call in the
cases when the predicted idle duration is relatively very small.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Doug Smythies <dsmythies@telus.net>

+54 -34
+14
drivers/cpuidle/governors/gov.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + /* Common definitions for cpuidle governors. */ 4 + 5 + #ifndef __CPUIDLE_GOVERNOR_H 6 + #define __CPUIDLE_GOVERNOR_H 7 + 8 + /* 9 + * Idle state target residency threshold used for deciding whether or not to 10 + * check the time till the closest expected timer event. 11 + */ 12 + #define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC) 13 + 14 + #endif /* __CPUIDLE_GOVERNOR_H */
+38 -27
drivers/cpuidle/governors/menu.c
··· 19 19 #include <linux/sched/stat.h> 20 20 #include <linux/math64.h> 21 21 22 + #include "gov.h" 23 + 22 24 #define BUCKETS 12 23 25 #define INTERVAL_SHIFT 3 24 26 #define INTERVALS (1UL << INTERVAL_SHIFT) ··· 168 166 * of points is below a threshold. If it is... then use the 169 167 * average of these 8 points as the estimated value. 170 168 */ 171 - static unsigned int get_typical_interval(struct menu_device *data, 172 - unsigned int predicted_us) 169 + static unsigned int get_typical_interval(struct menu_device *data) 173 170 { 174 171 int i, divisor; 175 172 unsigned int min, max, thresh, avg; ··· 196 195 } 197 196 } 198 197 199 - /* 200 - * If the result of the computation is going to be discarded anyway, 201 - * avoid the computation altogether. 202 - */ 203 - if (min >= predicted_us) 198 + if (!max) 204 199 return UINT_MAX; 205 200 206 201 if (divisor == INTERVALS) ··· 264 267 { 265 268 struct menu_device *data = this_cpu_ptr(&menu_devices); 266 269 s64 latency_req = cpuidle_governor_latency_req(dev->cpu); 267 - unsigned int predicted_us; 268 270 u64 predicted_ns; 269 271 u64 interactivity_req; 270 272 unsigned int nr_iowaiters; ··· 275 279 data->needs_update = 0; 276 280 } 277 281 278 - /* determine the expected residency time, round up */ 279 - delta = tick_nohz_get_sleep_length(&delta_tick); 280 - if (unlikely(delta < 0)) { 281 - delta = 0; 282 - delta_tick = 0; 283 - } 284 - data->next_timer_ns = delta; 285 - 286 282 nr_iowaiters = nr_iowait_cpu(dev->cpu); 287 - data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters); 283 + 284 + /* Find the shortest expected idle interval. */ 285 + predicted_ns = get_typical_interval(data) * NSEC_PER_USEC; 286 + if (predicted_ns > RESIDENCY_THRESHOLD_NS) { 287 + unsigned int timer_us; 288 + 289 + /* Determine the time till the closest timer. */ 290 + delta = tick_nohz_get_sleep_length(&delta_tick); 291 + if (unlikely(delta < 0)) { 292 + delta = 0; 293 + delta_tick = 0; 294 + } 295 + 296 + data->next_timer_ns = delta; 297 + data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters); 298 + 299 + /* Round up the result for half microseconds. */ 300 + timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 + 301 + data->next_timer_ns * 302 + data->correction_factor[data->bucket], 303 + RESOLUTION * DECAY * NSEC_PER_USEC); 304 + /* Use the lowest expected idle interval to pick the idle state. */ 305 + predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns); 306 + } else { 307 + /* 308 + * Because the next timer event is not going to be determined 309 + * in this case, assume that without the tick the closest timer 310 + * will be in distant future and that the closest tick will occur 311 + * after 1/2 of the tick period. 312 + */ 313 + data->next_timer_ns = KTIME_MAX; 314 + delta_tick = TICK_NSEC / 2; 315 + data->bucket = which_bucket(KTIME_MAX, nr_iowaiters); 316 + } 288 317 289 318 if (unlikely(drv->state_count <= 1 || latency_req == 0) || 290 319 ((data->next_timer_ns < drv->states[1].target_residency_ns || ··· 323 302 *stop_tick = !(drv->states[0].flags & CPUIDLE_FLAG_POLLING); 324 303 return 0; 325 304 } 326 - 327 - /* Round up the result for half microseconds. */ 328 - predicted_us = div_u64(data->next_timer_ns * 329 - data->correction_factor[data->bucket] + 330 - (RESOLUTION * DECAY * NSEC_PER_USEC) / 2, 331 - RESOLUTION * DECAY * NSEC_PER_USEC); 332 - /* Use the lowest expected idle interval to pick the idle state. */ 333 - predicted_ns = (u64)min(predicted_us, 334 - get_typical_interval(data, predicted_us)) * 335 - NSEC_PER_USEC; 336 305 337 306 if (tick_nohz_tick_stopped()) { 338 307 /*
+2 -7
drivers/cpuidle/governors/teo.c
··· 140 140 #include <linux/sched/topology.h> 141 141 #include <linux/tick.h> 142 142 143 + #include "gov.h" 144 + 143 145 /* 144 146 * The number of bits to shift the CPU's capacity by in order to determine 145 147 * the utilized threshold. ··· 153 151 * noise and low enough to react quickly when activity starts to ramp up. 154 152 */ 155 153 #define UTIL_THRESHOLD_SHIFT 6 156 - 157 154 158 155 /* 159 156 * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value ··· 166 165 * the detection of recent early wakeup patterns. 167 166 */ 168 167 #define NR_RECENT 9 169 - 170 - /* 171 - * Idle state target residency threshold used for deciding whether or not to 172 - * check the time till the closest expected timer event. 173 - */ 174 - #define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC) 175 168 176 169 /** 177 170 * struct teo_bin - Metrics used by the TEO cpuidle governor.