Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cpuidle: Use nanoseconds as the unit of time

Currently, the cpuidle subsystem uses microseconds as the unit of
time which (among other things) causes the idle loop to incur some
integer division overhead for no clear benefit.

In order to allow cpuidle to measure time in nanoseconds, add two
new fields, exit_latency_ns and target_residency_ns, to represent the
exit latency and target residency of an idle state in nanoseconds,
respectively, to struct cpuidle_state and initialize them with the
help of the corresponding values in microseconds provided by drivers.
Additionally, change cpuidle_governor_latency_req() to return the
idle state exit latency constraint in nanoseconds.

Also meeasure idle state residency (last_residency_ns in struct
cpuidle_device and time_ns in struct cpuidle_driver) in nanoseconds
and update the cpuidle core and governors accordingly.

However, the menu governor still computes typical intervals in
microseconds to avoid integer overflows.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Doug Smythies <dsmythies@telus.net>
Tested-by: Doug Smythies <dsmythies@telus.net>

+174 -161
+17 -19
drivers/cpuidle/cpuidle.c
··· 75 75 76 76 static int find_deepest_state(struct cpuidle_driver *drv, 77 77 struct cpuidle_device *dev, 78 - unsigned int max_latency, 78 + u64 max_latency_ns, 79 79 unsigned int forbidden_flags, 80 80 bool s2idle) 81 81 { 82 - unsigned int latency_req = 0; 82 + u64 latency_req = 0; 83 83 int i, ret = 0; 84 84 85 85 for (i = 1; i < drv->state_count; i++) { 86 86 struct cpuidle_state *s = &drv->states[i]; 87 87 88 88 if (dev->states_usage[i].disable || 89 - s->exit_latency <= latency_req || 90 - s->exit_latency > max_latency || 89 + s->exit_latency_ns <= latency_req || 90 + s->exit_latency_ns > max_latency_ns || 91 91 (s->flags & forbidden_flags) || 92 92 (s2idle && !s->enter_s2idle)) 93 93 continue; 94 94 95 - latency_req = s->exit_latency; 95 + latency_req = s->exit_latency_ns; 96 96 ret = i; 97 97 } 98 98 return ret; ··· 124 124 int cpuidle_find_deepest_state(struct cpuidle_driver *drv, 125 125 struct cpuidle_device *dev) 126 126 { 127 - return find_deepest_state(drv, dev, UINT_MAX, 0, false); 127 + return find_deepest_state(drv, dev, U64_MAX, 0, false); 128 128 } 129 129 130 130 #ifdef CONFIG_SUSPEND ··· 180 180 * that interrupts won't be enabled when it exits and allows the tick to 181 181 * be frozen safely. 182 182 */ 183 - index = find_deepest_state(drv, dev, UINT_MAX, 0, true); 183 + index = find_deepest_state(drv, dev, U64_MAX, 0, true); 184 184 if (index > 0) 185 185 enter_s2idle_proper(drv, dev, index); 186 186 ··· 209 209 * CPU as a broadcast timer, this call may fail if it is not available. 210 210 */ 211 211 if (broadcast && tick_broadcast_enter()) { 212 - index = find_deepest_state(drv, dev, target_state->exit_latency, 212 + index = find_deepest_state(drv, dev, target_state->exit_latency_ns, 213 213 CPUIDLE_FLAG_TIMER_STOP, false); 214 214 if (index < 0) { 215 215 default_idle_call(); ··· 247 247 local_irq_enable(); 248 248 249 249 if (entered_state >= 0) { 250 - s64 diff, delay = drv->states[entered_state].exit_latency; 250 + s64 diff, delay = drv->states[entered_state].exit_latency_ns; 251 251 int i; 252 252 253 253 /* ··· 255 255 * This can be moved to within driver enter routine, 256 256 * but that results in multiple copies of same code. 257 257 */ 258 - diff = ktime_us_delta(time_end, time_start); 259 - if (diff > INT_MAX) 260 - diff = INT_MAX; 258 + diff = ktime_sub(time_end, time_start); 261 259 262 - dev->last_residency = (int)diff; 263 - dev->states_usage[entered_state].time += dev->last_residency; 260 + dev->last_residency_ns = diff; 261 + dev->states_usage[entered_state].time_ns += diff; 264 262 dev->states_usage[entered_state].usage++; 265 263 266 - if (diff < drv->states[entered_state].target_residency) { 264 + if (diff < drv->states[entered_state].target_residency_ns) { 267 265 for (i = entered_state - 1; i >= 0; i--) { 268 266 if (dev->states_usage[i].disable) 269 267 continue; ··· 279 281 * Update if a deeper state would have been a 280 282 * better match for the observed idle duration. 281 283 */ 282 - if (diff - delay >= drv->states[i].target_residency) 284 + if (diff - delay >= drv->states[i].target_residency_ns) 283 285 dev->states_usage[entered_state].below++; 284 286 285 287 break; 286 288 } 287 289 } 288 290 } else { 289 - dev->last_residency = 0; 291 + dev->last_residency_ns = 0; 290 292 } 291 293 292 294 return entered_state; ··· 379 381 if (dev->states_usage[i].disable) 380 382 continue; 381 383 382 - limit_ns = (u64)drv->states[i].target_residency * NSEC_PER_USEC; 384 + limit_ns = (u64)drv->states[i].target_residency_ns; 383 385 } 384 386 385 387 dev->poll_limit_ns = limit_ns; ··· 550 552 static void __cpuidle_device_init(struct cpuidle_device *dev) 551 553 { 552 554 memset(dev->states_usage, 0, sizeof(dev->states_usage)); 553 - dev->last_residency = 0; 555 + dev->last_residency_ns = 0; 554 556 dev->next_hrtimer = 0; 555 557 } 556 558
+20 -9
drivers/cpuidle/driver.c
··· 165 165 if (!drv->cpumask) 166 166 drv->cpumask = (struct cpumask *)cpu_possible_mask; 167 167 168 - /* 169 - * Look for the timer stop flag in the different states, so that we know 170 - * if the broadcast timer has to be set up. The loop is in the reverse 171 - * order, because usually one of the deeper states have this flag set. 172 - */ 173 - for (i = drv->state_count - 1; i >= 0 ; i--) { 174 - if (drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP) { 168 + for (i = 0; i < drv->state_count; i++) { 169 + struct cpuidle_state *s = &drv->states[i]; 170 + 171 + /* 172 + * Look for the timer stop flag in the different states and if 173 + * it is found, indicate that the broadcast timer has to be set 174 + * up. 175 + */ 176 + if (s->flags & CPUIDLE_FLAG_TIMER_STOP) 175 177 drv->bctimer = 1; 176 - break; 177 - } 178 + 179 + /* 180 + * The core will use the target residency and exit latency 181 + * values in nanoseconds, but allow drivers to provide them in 182 + * microseconds too. 183 + */ 184 + if (s->target_residency > 0) 185 + s->target_residency_ns = s->target_residency * NSEC_PER_USEC; 186 + 187 + if (s->exit_latency > 0) 188 + s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC; 178 189 } 179 190 } 180 191
+5 -2
drivers/cpuidle/governor.c
··· 107 107 * cpuidle_governor_latency_req - Compute a latency constraint for CPU 108 108 * @cpu: Target CPU 109 109 */ 110 - int cpuidle_governor_latency_req(unsigned int cpu) 110 + s64 cpuidle_governor_latency_req(unsigned int cpu) 111 111 { 112 112 int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); 113 113 struct device *device = get_cpu_device(cpu); 114 114 int device_req = dev_pm_qos_raw_resume_latency(device); 115 115 116 - return device_req < global_req ? device_req : global_req; 116 + if (device_req > global_req) 117 + device_req = global_req; 118 + 119 + return (s64)device_req * NSEC_PER_USEC; 117 120 }
+3 -4
drivers/cpuidle/governors/haltpoll.c
··· 49 49 struct cpuidle_device *dev, 50 50 bool *stop_tick) 51 51 { 52 - int latency_req = cpuidle_governor_latency_req(dev->cpu); 52 + s64 latency_req = cpuidle_governor_latency_req(dev->cpu); 53 53 54 54 if (!drv->state_count || latency_req == 0) { 55 55 *stop_tick = false; ··· 75 75 return 0; 76 76 } 77 77 78 - static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us) 78 + static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns) 79 79 { 80 80 unsigned int val; 81 - u64 block_ns = block_us*NSEC_PER_USEC; 82 81 83 82 /* Grow cpu_halt_poll_us if 84 83 * cpu_halt_poll_us < block_ns < guest_halt_poll_us ··· 114 115 dev->last_state_idx = index; 115 116 116 117 if (index != 0) 117 - adjust_poll_limit(dev, dev->last_residency); 118 + adjust_poll_limit(dev, dev->last_residency_ns); 118 119 } 119 120 120 121 /**
+13 -12
drivers/cpuidle/governors/ladder.c
··· 27 27 struct { 28 28 u32 promotion_count; 29 29 u32 demotion_count; 30 - u32 promotion_time; 31 - u32 demotion_time; 30 + u64 promotion_time_ns; 31 + u64 demotion_time_ns; 32 32 } threshold; 33 33 struct { 34 34 int promotion_count; ··· 68 68 { 69 69 struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); 70 70 struct ladder_device_state *last_state; 71 - int last_residency, last_idx = dev->last_state_idx; 71 + int last_idx = dev->last_state_idx; 72 72 int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0; 73 - int latency_req = cpuidle_governor_latency_req(dev->cpu); 73 + s64 latency_req = cpuidle_governor_latency_req(dev->cpu); 74 + s64 last_residency; 74 75 75 76 /* Special case when user has set very strict latency requirement */ 76 77 if (unlikely(latency_req == 0)) { ··· 81 80 82 81 last_state = &ldev->states[last_idx]; 83 82 84 - last_residency = dev->last_residency - drv->states[last_idx].exit_latency; 83 + last_residency = dev->last_residency_ns - drv->states[last_idx].exit_latency_ns; 85 84 86 85 /* consider promotion */ 87 86 if (last_idx < drv->state_count - 1 && 88 87 !dev->states_usage[last_idx + 1].disable && 89 - last_residency > last_state->threshold.promotion_time && 90 - drv->states[last_idx + 1].exit_latency <= latency_req) { 88 + last_residency > last_state->threshold.promotion_time_ns && 89 + drv->states[last_idx + 1].exit_latency_ns <= latency_req) { 91 90 last_state->stats.promotion_count++; 92 91 last_state->stats.demotion_count = 0; 93 92 if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { ··· 99 98 /* consider demotion */ 100 99 if (last_idx > first_idx && 101 100 (dev->states_usage[last_idx].disable || 102 - drv->states[last_idx].exit_latency > latency_req)) { 101 + drv->states[last_idx].exit_latency_ns > latency_req)) { 103 102 int i; 104 103 105 104 for (i = last_idx - 1; i > first_idx; i--) { 106 - if (drv->states[i].exit_latency <= latency_req) 105 + if (drv->states[i].exit_latency_ns <= latency_req) 107 106 break; 108 107 } 109 108 ladder_do_selection(dev, ldev, last_idx, i); ··· 111 110 } 112 111 113 112 if (last_idx > first_idx && 114 - last_residency < last_state->threshold.demotion_time) { 113 + last_residency < last_state->threshold.demotion_time_ns) { 115 114 last_state->stats.demotion_count++; 116 115 last_state->stats.promotion_count = 0; 117 116 if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) { ··· 151 150 lstate->threshold.demotion_count = DEMOTION_COUNT; 152 151 153 152 if (i < drv->state_count - 1) 154 - lstate->threshold.promotion_time = state->exit_latency; 153 + lstate->threshold.promotion_time_ns = state->exit_latency_ns; 155 154 if (i > first_idx) 156 - lstate->threshold.demotion_time = state->exit_latency; 155 + lstate->threshold.demotion_time_ns = state->exit_latency_ns; 157 156 } 158 157 159 158 return 0;
+55 -68
drivers/cpuidle/governors/menu.c
··· 19 19 #include <linux/sched/stat.h> 20 20 #include <linux/math64.h> 21 21 22 - /* 23 - * Please note when changing the tuning values: 24 - * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of 25 - * a scaling operation multiplication may overflow on 32 bit platforms. 26 - * In that case, #define RESOLUTION as ULL to get 64 bit result: 27 - * #define RESOLUTION 1024ULL 28 - * 29 - * The default values do not overflow. 30 - */ 31 22 #define BUCKETS 12 32 23 #define INTERVAL_SHIFT 3 33 24 #define INTERVALS (1UL << INTERVAL_SHIFT) 34 25 #define RESOLUTION 1024 35 26 #define DECAY 8 36 - #define MAX_INTERESTING 50000 37 - 27 + #define MAX_INTERESTING (50000 * NSEC_PER_USEC) 38 28 39 29 /* 40 30 * Concepts and ideas behind the menu governor ··· 110 120 int needs_update; 111 121 int tick_wakeup; 112 122 113 - unsigned int next_timer_us; 123 + u64 next_timer_ns; 114 124 unsigned int bucket; 115 125 unsigned int correction_factor[BUCKETS]; 116 126 unsigned int intervals[INTERVALS]; 117 127 int interval_ptr; 118 128 }; 119 129 120 - static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters) 130 + static inline int which_bucket(u64 duration_ns, unsigned long nr_iowaiters) 121 131 { 122 132 int bucket = 0; 123 133 ··· 130 140 if (nr_iowaiters) 131 141 bucket = BUCKETS/2; 132 142 133 - if (duration < 10) 143 + if (duration_ns < 10ULL * NSEC_PER_USEC) 134 144 return bucket; 135 - if (duration < 100) 145 + if (duration_ns < 100ULL * NSEC_PER_USEC) 136 146 return bucket + 1; 137 - if (duration < 1000) 147 + if (duration_ns < 1000ULL * NSEC_PER_USEC) 138 148 return bucket + 2; 139 - if (duration < 10000) 149 + if (duration_ns < 10000ULL * NSEC_PER_USEC) 140 150 return bucket + 3; 141 - if (duration < 100000) 151 + if (duration_ns < 100000ULL * NSEC_PER_USEC) 142 152 return bucket + 4; 143 153 return bucket + 5; 144 154 } ··· 266 276 bool *stop_tick) 267 277 { 268 278 struct menu_device *data = this_cpu_ptr(&menu_devices); 269 - int latency_req = cpuidle_governor_latency_req(dev->cpu); 270 - int i; 271 - int idx; 272 - unsigned int interactivity_req; 279 + s64 latency_req = cpuidle_governor_latency_req(dev->cpu); 273 280 unsigned int predicted_us; 281 + u64 predicted_ns; 282 + u64 interactivity_req; 274 283 unsigned long nr_iowaiters; 275 284 ktime_t delta_next; 285 + int i, idx; 276 286 277 287 if (data->needs_update) { 278 288 menu_update(drv, dev); ··· 280 290 } 281 291 282 292 /* determine the expected residency time, round up */ 283 - data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next)); 293 + data->next_timer_ns = tick_nohz_get_sleep_length(&delta_next); 284 294 285 295 nr_iowaiters = nr_iowait_cpu(dev->cpu); 286 - data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); 296 + data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters); 287 297 288 298 if (unlikely(drv->state_count <= 1 || latency_req == 0) || 289 - ((data->next_timer_us < drv->states[1].target_residency || 290 - latency_req < drv->states[1].exit_latency) && 299 + ((data->next_timer_ns < drv->states[1].target_residency_ns || 300 + latency_req < drv->states[1].exit_latency_ns) && 291 301 !dev->states_usage[0].disable)) { 292 302 /* 293 303 * In this case state[0] will be used no matter what, so return ··· 298 308 return 0; 299 309 } 300 310 301 - /* 302 - * Force the result of multiplication to be 64 bits even if both 303 - * operands are 32 bits. 304 - * Make sure to round up for half microseconds. 305 - */ 306 - predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us * 307 - data->correction_factor[data->bucket], 308 - RESOLUTION * DECAY); 309 - /* 310 - * Use the lowest expected idle interval to pick the idle state. 311 - */ 312 - predicted_us = min(predicted_us, get_typical_interval(data, predicted_us)); 311 + /* Round up the result for half microseconds. */ 312 + predicted_us = div_u64(data->next_timer_ns * 313 + data->correction_factor[data->bucket] + 314 + (RESOLUTION * DECAY * NSEC_PER_USEC) / 2, 315 + RESOLUTION * DECAY * NSEC_PER_USEC); 316 + /* Use the lowest expected idle interval to pick the idle state. */ 317 + predicted_ns = (u64)min(predicted_us, 318 + get_typical_interval(data, predicted_us)) * 319 + NSEC_PER_USEC; 313 320 314 321 if (tick_nohz_tick_stopped()) { 315 322 /* ··· 317 330 * the known time till the closest timer event for the idle 318 331 * state selection. 319 332 */ 320 - if (predicted_us < TICK_USEC) 321 - predicted_us = ktime_to_us(delta_next); 333 + if (predicted_ns < TICK_NSEC) 334 + predicted_ns = delta_next; 322 335 } else { 323 336 /* 324 337 * Use the performance multiplier and the user-configurable 325 338 * latency_req to determine the maximum exit latency. 326 339 */ 327 - interactivity_req = predicted_us / performance_multiplier(nr_iowaiters); 340 + interactivity_req = div64_u64(predicted_ns, 341 + performance_multiplier(nr_iowaiters)); 328 342 if (latency_req > interactivity_req) 329 343 latency_req = interactivity_req; 330 344 } ··· 344 356 if (idx == -1) 345 357 idx = i; /* first enabled state */ 346 358 347 - if (s->target_residency > predicted_us) { 359 + if (s->target_residency_ns > predicted_ns) { 348 360 /* 349 361 * Use a physical idle state, not busy polling, unless 350 362 * a timer is going to trigger soon enough. 351 363 */ 352 364 if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && 353 - s->exit_latency <= latency_req && 354 - s->target_residency <= data->next_timer_us) { 355 - predicted_us = s->target_residency; 365 + s->exit_latency_ns <= latency_req && 366 + s->target_residency_ns <= data->next_timer_ns) { 367 + predicted_ns = s->target_residency_ns; 356 368 idx = i; 357 369 break; 358 370 } 359 - if (predicted_us < TICK_USEC) 371 + if (predicted_ns < TICK_NSEC) 360 372 break; 361 373 362 374 if (!tick_nohz_tick_stopped()) { ··· 366 378 * tick in that case and let the governor run 367 379 * again in the next iteration of the loop. 368 380 */ 369 - predicted_us = drv->states[idx].target_residency; 381 + predicted_ns = drv->states[idx].target_residency_ns; 370 382 break; 371 383 } 372 384 ··· 376 388 * closest timer event, select this one to avoid getting 377 389 * stuck in the shallow one for too long. 378 390 */ 379 - if (drv->states[idx].target_residency < TICK_USEC && 380 - s->target_residency <= ktime_to_us(delta_next)) 391 + if (drv->states[idx].target_residency_ns < TICK_NSEC && 392 + s->target_residency_ns <= delta_next) 381 393 idx = i; 382 394 383 395 return idx; 384 396 } 385 - if (s->exit_latency > latency_req) 397 + if (s->exit_latency_ns > latency_req) 386 398 break; 387 399 388 400 idx = i; ··· 396 408 * expected idle duration is shorter than the tick period length. 397 409 */ 398 410 if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || 399 - predicted_us < TICK_USEC) && !tick_nohz_tick_stopped()) { 400 - unsigned int delta_next_us = ktime_to_us(delta_next); 401 - 411 + predicted_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) { 402 412 *stop_tick = false; 403 413 404 - if (idx > 0 && drv->states[idx].target_residency > delta_next_us) { 414 + if (idx > 0 && drv->states[idx].target_residency_ns > delta_next) { 405 415 /* 406 416 * The tick is not going to be stopped and the target 407 417 * residency of the state to be returned is not within ··· 411 425 continue; 412 426 413 427 idx = i; 414 - if (drv->states[i].target_residency <= delta_next_us) 428 + if (drv->states[i].target_residency_ns <= delta_next) 415 429 break; 416 430 } 417 431 } ··· 447 461 struct menu_device *data = this_cpu_ptr(&menu_devices); 448 462 int last_idx = dev->last_state_idx; 449 463 struct cpuidle_state *target = &drv->states[last_idx]; 450 - unsigned int measured_us; 464 + u64 measured_ns; 451 465 unsigned int new_factor; 452 466 453 467 /* ··· 465 479 * assume the state was never reached and the exit latency is 0. 466 480 */ 467 481 468 - if (data->tick_wakeup && data->next_timer_us > TICK_USEC) { 482 + if (data->tick_wakeup && data->next_timer_ns > TICK_NSEC) { 469 483 /* 470 484 * The nohz code said that there wouldn't be any events within 471 485 * the tick boundary (if the tick was stopped), but the idle ··· 475 489 * have been idle long (but not forever) to help the idle 476 490 * duration predictor do a better job next time. 477 491 */ 478 - measured_us = 9 * MAX_INTERESTING / 10; 492 + measured_ns = 9 * MAX_INTERESTING / 10; 479 493 } else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) && 480 494 dev->poll_time_limit) { 481 495 /* ··· 485 499 * the CPU might have been woken up from idle by the next timer. 486 500 * Assume that to be the case. 487 501 */ 488 - measured_us = data->next_timer_us; 502 + measured_ns = data->next_timer_ns; 489 503 } else { 490 504 /* measured value */ 491 - measured_us = dev->last_residency; 505 + measured_ns = dev->last_residency_ns; 492 506 493 507 /* Deduct exit latency */ 494 - if (measured_us > 2 * target->exit_latency) 495 - measured_us -= target->exit_latency; 508 + if (measured_ns > 2 * target->exit_latency_ns) 509 + measured_ns -= target->exit_latency_ns; 496 510 else 497 - measured_us /= 2; 511 + measured_ns /= 2; 498 512 } 499 513 500 514 /* Make sure our coefficients do not exceed unity */ 501 - if (measured_us > data->next_timer_us) 502 - measured_us = data->next_timer_us; 515 + if (measured_ns > data->next_timer_ns) 516 + measured_ns = data->next_timer_ns; 503 517 504 518 /* Update our correction ratio */ 505 519 new_factor = data->correction_factor[data->bucket]; 506 520 new_factor -= new_factor / DECAY; 507 521 508 - if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING) 509 - new_factor += RESOLUTION * measured_us / data->next_timer_us; 522 + if (data->next_timer_ns > 0 && measured_ns < MAX_INTERESTING) 523 + new_factor += div64_u64(RESOLUTION * measured_ns, 524 + data->next_timer_ns); 510 525 else 511 526 /* 512 527 * we were idle so long that we count it as a perfect ··· 527 540 data->correction_factor[data->bucket] = new_factor; 528 541 529 542 /* update the repeating-pattern data */ 530 - data->intervals[data->interval_ptr++] = measured_us; 543 + data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns); 531 544 if (data->interval_ptr >= INTERVALS) 532 545 data->interval_ptr = 0; 533 546 }
+36 -40
drivers/cpuidle/governors/teo.c
··· 104 104 u64 sleep_length_ns; 105 105 struct teo_idle_state states[CPUIDLE_STATE_MAX]; 106 106 int interval_idx; 107 - unsigned int intervals[INTERVALS]; 107 + u64 intervals[INTERVALS]; 108 108 }; 109 109 110 110 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); ··· 117 117 static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) 118 118 { 119 119 struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); 120 - unsigned int sleep_length_us = ktime_to_us(cpu_data->sleep_length_ns); 121 120 int i, idx_hit = -1, idx_timer = -1; 122 - unsigned int measured_us; 121 + u64 measured_ns; 123 122 124 123 if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { 125 124 /* ··· 126 127 * enough to the closest timer event expected at the idle state 127 128 * selection time to be discarded. 128 129 */ 129 - measured_us = UINT_MAX; 130 + measured_ns = U64_MAX; 130 131 } else { 131 - unsigned int lat; 132 + u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns; 132 133 133 - lat = drv->states[dev->last_state_idx].exit_latency; 134 - 135 - measured_us = ktime_to_us(cpu_data->time_span_ns); 134 + measured_ns = cpu_data->time_span_ns; 136 135 /* 137 136 * The delay between the wakeup and the first instruction 138 137 * executed by the CPU is not likely to be worst-case every 139 138 * time, so take 1/2 of the exit latency as a very rough 140 139 * approximation of the average of it. 141 140 */ 142 - if (measured_us >= lat) 143 - measured_us -= lat / 2; 141 + if (measured_ns >= lat_ns) 142 + measured_ns -= lat_ns / 2; 144 143 else 145 - measured_us /= 2; 144 + measured_ns /= 2; 146 145 } 147 146 148 147 /* ··· 152 155 153 156 cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT; 154 157 155 - if (drv->states[i].target_residency <= sleep_length_us) { 158 + if (drv->states[i].target_residency_ns <= cpu_data->sleep_length_ns) { 156 159 idx_timer = i; 157 - if (drv->states[i].target_residency <= measured_us) 160 + if (drv->states[i].target_residency_ns <= measured_ns) 158 161 idx_hit = i; 159 162 } 160 163 } ··· 190 193 * Save idle duration values corresponding to non-timer wakeups for 191 194 * pattern detection. 192 195 */ 193 - cpu_data->intervals[cpu_data->interval_idx++] = measured_us; 196 + cpu_data->intervals[cpu_data->interval_idx++] = measured_ns; 194 197 if (cpu_data->interval_idx > INTERVALS) 195 198 cpu_data->interval_idx = 0; 196 199 } ··· 200 203 * @drv: cpuidle driver containing state data. 201 204 * @dev: Target CPU. 202 205 * @state_idx: Index of the capping idle state. 203 - * @duration_us: Idle duration value to match. 206 + * @duration_ns: Idle duration value to match. 204 207 */ 205 208 static int teo_find_shallower_state(struct cpuidle_driver *drv, 206 209 struct cpuidle_device *dev, int state_idx, 207 - unsigned int duration_us) 210 + u64 duration_ns) 208 211 { 209 212 int i; 210 213 ··· 213 216 continue; 214 217 215 218 state_idx = i; 216 - if (drv->states[i].target_residency <= duration_us) 219 + if (drv->states[i].target_residency_ns <= duration_ns) 217 220 break; 218 221 } 219 222 return state_idx; ··· 229 232 bool *stop_tick) 230 233 { 231 234 struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); 232 - int latency_req = cpuidle_governor_latency_req(dev->cpu); 233 - unsigned int duration_us, hits, misses, early_hits; 235 + s64 latency_req = cpuidle_governor_latency_req(dev->cpu); 236 + u64 duration_ns; 237 + unsigned int hits, misses, early_hits; 234 238 int max_early_idx, constraint_idx, idx, i; 235 239 ktime_t delta_tick; 236 240 ··· 242 244 243 245 cpu_data->time_span_ns = local_clock(); 244 246 245 - cpu_data->sleep_length_ns = tick_nohz_get_sleep_length(&delta_tick); 246 - duration_us = ktime_to_us(cpu_data->sleep_length_ns); 247 + duration_ns = tick_nohz_get_sleep_length(&delta_tick); 248 + cpu_data->sleep_length_ns = duration_ns; 247 249 248 250 hits = 0; 249 251 misses = 0; ··· 260 262 * Ignore disabled states with target residencies beyond 261 263 * the anticipated idle duration. 262 264 */ 263 - if (s->target_residency > duration_us) 265 + if (s->target_residency_ns > duration_ns) 264 266 continue; 265 267 266 268 /* ··· 299 301 * shallow for that role. 300 302 */ 301 303 if (!(tick_nohz_tick_stopped() && 302 - drv->states[idx].target_residency < TICK_USEC)) { 304 + drv->states[idx].target_residency_ns < TICK_NSEC)) { 303 305 early_hits = cpu_data->states[i].early_hits; 304 306 max_early_idx = idx; 305 307 } ··· 313 315 misses = cpu_data->states[i].misses; 314 316 } 315 317 316 - if (s->target_residency > duration_us) 318 + if (s->target_residency_ns > duration_ns) 317 319 break; 318 320 319 - if (s->exit_latency > latency_req && constraint_idx > i) 321 + if (s->exit_latency_ns > latency_req && constraint_idx > i) 320 322 constraint_idx = i; 321 323 322 324 idx = i; ··· 325 327 326 328 if (early_hits < cpu_data->states[i].early_hits && 327 329 !(tick_nohz_tick_stopped() && 328 - drv->states[i].target_residency < TICK_USEC)) { 330 + drv->states[i].target_residency_ns < TICK_NSEC)) { 329 331 early_hits = cpu_data->states[i].early_hits; 330 332 max_early_idx = i; 331 333 } ··· 341 343 */ 342 344 if (hits <= misses && max_early_idx >= 0) { 343 345 idx = max_early_idx; 344 - duration_us = drv->states[idx].target_residency; 346 + duration_ns = drv->states[idx].target_residency_ns; 345 347 } 346 348 347 349 /* ··· 362 364 * the current expected idle duration value. 363 365 */ 364 366 for (i = 0; i < INTERVALS; i++) { 365 - unsigned int val = cpu_data->intervals[i]; 367 + u64 val = cpu_data->intervals[i]; 366 368 367 - if (val >= duration_us) 369 + if (val >= duration_ns) 368 370 continue; 369 371 370 372 count++; ··· 376 378 * values are in the interesting range. 377 379 */ 378 380 if (count > INTERVALS / 2) { 379 - unsigned int avg_us = div64_u64(sum, count); 381 + u64 avg_ns = div64_u64(sum, count); 380 382 381 383 /* 382 384 * Avoid spending too much time in an idle state that 383 385 * would be too shallow. 384 386 */ 385 - if (!(tick_nohz_tick_stopped() && avg_us < TICK_USEC)) { 386 - duration_us = avg_us; 387 - if (drv->states[idx].target_residency > avg_us) 387 + if (!(tick_nohz_tick_stopped() && avg_ns < TICK_NSEC)) { 388 + duration_ns = avg_ns; 389 + if (drv->states[idx].target_residency_ns > avg_ns) 388 390 idx = teo_find_shallower_state(drv, dev, 389 - idx, avg_us); 391 + idx, avg_ns); 390 392 } 391 393 } 392 394 } ··· 396 398 * expected idle duration is shorter than the tick period length. 397 399 */ 398 400 if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || 399 - duration_us < TICK_USEC) && !tick_nohz_tick_stopped()) { 400 - unsigned int delta_tick_us = ktime_to_us(delta_tick); 401 - 401 + duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) { 402 402 *stop_tick = false; 403 403 404 404 /* ··· 405 409 * till the closest timer including the tick, try to correct 406 410 * that. 407 411 */ 408 - if (idx > 0 && drv->states[idx].target_residency > delta_tick_us) 409 - idx = teo_find_shallower_state(drv, dev, idx, delta_tick_us); 412 + if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick) 413 + idx = teo_find_shallower_state(drv, dev, idx, delta_tick); 410 414 } 411 415 412 416 return idx; ··· 450 454 memset(cpu_data, 0, sizeof(*cpu_data)); 451 455 452 456 for (i = 0; i < INTERVALS; i++) 453 - cpu_data->intervals[i] = UINT_MAX; 457 + cpu_data->intervals[i] = U64_MAX; 454 458 455 459 return 0; 456 460 }
+2
drivers/cpuidle/poll_state.c
··· 49 49 snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); 50 50 state->exit_latency = 0; 51 51 state->target_residency = 0; 52 + state->exit_latency_ns = 0; 53 + state->target_residency_ns = 0; 52 54 state->power_usage = -1; 53 55 state->enter = poll_idle; 54 56 state->disabled = false;
+17 -3
drivers/cpuidle/sysfs.c
··· 273 273 return sprintf(buf, "%s\n", state->_name);\ 274 274 } 275 275 276 - define_show_state_function(exit_latency) 277 - define_show_state_function(target_residency) 276 + #define define_show_state_time_function(_name) \ 277 + static ssize_t show_state_##_name(struct cpuidle_state *state, \ 278 + struct cpuidle_state_usage *state_usage, \ 279 + char *buf) \ 280 + { \ 281 + return sprintf(buf, "%llu\n", ktime_to_us(state->_name##_ns)); \ 282 + } 283 + 284 + define_show_state_time_function(exit_latency) 285 + define_show_state_time_function(target_residency) 278 286 define_show_state_function(power_usage) 279 287 define_show_state_ull_function(usage) 280 - define_show_state_ull_function(time) 281 288 define_show_state_str_function(name) 282 289 define_show_state_str_function(desc) 283 290 define_show_state_ull_function(above) 284 291 define_show_state_ull_function(below) 292 + 293 + static ssize_t show_state_time(struct cpuidle_state *state, 294 + struct cpuidle_state_usage *state_usage, 295 + char *buf) 296 + { 297 + return sprintf(buf, "%llu\n", ktime_to_us(state_usage->time_ns)); 298 + } 285 299 286 300 static ssize_t show_state_disable(struct cpuidle_state *state, 287 301 struct cpuidle_state_usage *state_usage,
+5 -3
include/linux/cpuidle.h
··· 35 35 struct cpuidle_state_usage { 36 36 unsigned long long disable; 37 37 unsigned long long usage; 38 - unsigned long long time; /* in US */ 38 + u64 time_ns; 39 39 unsigned long long above; /* Number of times it's been too deep */ 40 40 unsigned long long below; /* Number of times it's been too shallow */ 41 41 #ifdef CONFIG_SUSPEND ··· 48 48 char name[CPUIDLE_NAME_LEN]; 49 49 char desc[CPUIDLE_DESC_LEN]; 50 50 51 + u64 exit_latency_ns; 52 + u64 target_residency_ns; 51 53 unsigned int flags; 52 54 unsigned int exit_latency; /* in US */ 53 55 int power_usage; /* in mW */ ··· 91 89 ktime_t next_hrtimer; 92 90 93 91 int last_state_idx; 94 - int last_residency; 92 + u64 last_residency_ns; 95 93 u64 poll_limit_ns; 96 94 struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; 97 95 struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; ··· 265 263 266 264 #ifdef CONFIG_CPU_IDLE 267 265 extern int cpuidle_register_governor(struct cpuidle_governor *gov); 268 - extern int cpuidle_governor_latency_req(unsigned int cpu); 266 + extern s64 cpuidle_governor_latency_req(unsigned int cpu); 269 267 #else 270 268 static inline int cpuidle_register_governor(struct cpuidle_governor *gov) 271 269 {return 0;}
+1 -1
kernel/sched/idle.c
··· 104 104 * update no idle residency and return. 105 105 */ 106 106 if (current_clr_polling_and_test()) { 107 - dev->last_residency = 0; 107 + dev->last_residency_ns = 0; 108 108 local_irq_enable(); 109 109 return -EBUSY; 110 110 }