Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

PM / sleep: Re-implement suspend-to-idle handling

In preparation for adding support for quiescing timers in the final
stage of suspend-to-idle transitions, rework the freeze_enter()
function making the system wait on a wakeup event, the freeze_wake()
function terminating the suspend-to-idle loop and the mechanism by
which deep idle states are entered during suspend-to-idle.

First of all, introduce a simple state machine for suspend-to-idle
and make the code in question use it.

Second, prevent freeze_enter() from losing wakeup events due to race
conditions and ensure that the number of online CPUs won't change
while it is being executed. In addition to that, make it force
all of the CPUs re-enter the idle loop in case they are in idle
states already (so they can enter deeper idle states if possible).

Next, drop cpuidle_use_deepest_state() and replace use_deepest_state
checks in cpuidle_select() and cpuidle_reflect() with a single
suspend-to-idle state check in cpuidle_idle_call().

Finally, introduce cpuidle_enter_freeze() that will simply find the
deepest idle state available to the given CPU and enter it using
cpuidle_enter().

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

+96 -32
+26 -23
drivers/cpuidle/cpuidle.c
··· 19 19 #include <linux/ktime.h> 20 20 #include <linux/hrtimer.h> 21 21 #include <linux/module.h> 22 + #include <linux/suspend.h> 22 23 #include <trace/events/power.h> 23 24 24 25 #include "cpuidle.h" ··· 33 32 static int enabled_devices; 34 33 static int off __read_mostly; 35 34 static int initialized __read_mostly; 36 - static bool use_deepest_state __read_mostly; 37 35 38 36 int cpuidle_disabled(void) 39 37 { ··· 66 66 } 67 67 68 68 /** 69 - * cpuidle_use_deepest_state - Enable/disable the "deepest idle" mode. 70 - * @enable: Whether enable or disable the feature. 71 - * 72 - * If the "deepest idle" mode is enabled, cpuidle will ignore the governor and 73 - * always use the state with the greatest exit latency (out of the states that 74 - * are not disabled). 75 - * 76 - * This function can only be called after cpuidle_pause() to avoid races. 77 - */ 78 - void cpuidle_use_deepest_state(bool enable) 79 - { 80 - use_deepest_state = enable; 81 - } 82 - 83 - /** 84 - * cpuidle_find_deepest_state - Find the state of the greatest exit latency. 85 - * @drv: cpuidle driver for a given CPU. 86 - * @dev: cpuidle device for a given CPU. 69 + * cpuidle_find_deepest_state - Find deepest state meeting specific conditions. 70 + * @drv: cpuidle driver for the given CPU. 71 + * @dev: cpuidle device for the given CPU. 87 72 */ 88 73 static int cpuidle_find_deepest_state(struct cpuidle_driver *drv, 89 74 struct cpuidle_device *dev) ··· 87 102 ret = i; 88 103 } 89 104 return ret; 105 + } 106 + 107 + /** 108 + * cpuidle_enter_freeze - Enter an idle state suitable for suspend-to-idle. 109 + * 110 + * Find the deepest state available and enter it. 111 + */ 112 + void cpuidle_enter_freeze(void) 113 + { 114 + struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); 115 + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); 116 + int index; 117 + 118 + index = cpuidle_find_deepest_state(drv, dev); 119 + if (index >= 0) 120 + cpuidle_enter(drv, dev, index); 121 + else 122 + arch_cpu_idle(); 123 + 124 + /* Interrupts are enabled again here. */ 125 + local_irq_disable(); 90 126 } 91 127 92 128 /** ··· 172 166 if (!drv || !dev || !dev->enabled) 173 167 return -EBUSY; 174 168 175 - if (unlikely(use_deepest_state)) 176 - return cpuidle_find_deepest_state(drv, dev); 177 - 178 169 return cpuidle_curr_governor->select(drv, dev); 179 170 } 180 171 ··· 203 200 */ 204 201 void cpuidle_reflect(struct cpuidle_device *dev, int index) 205 202 { 206 - if (cpuidle_curr_governor->reflect && !unlikely(use_deepest_state)) 203 + if (cpuidle_curr_governor->reflect) 207 204 cpuidle_curr_governor->reflect(dev, index); 208 205 } 209 206
+2 -2
include/linux/cpuidle.h
··· 141 141 extern int cpuidle_enable_device(struct cpuidle_device *dev); 142 142 extern void cpuidle_disable_device(struct cpuidle_device *dev); 143 143 extern int cpuidle_play_dead(void); 144 - extern void cpuidle_use_deepest_state(bool enable); 144 + extern void cpuidle_enter_freeze(void); 145 145 146 146 extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); 147 147 #else ··· 174 174 {return -ENODEV; } 175 175 static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } 176 176 static inline int cpuidle_play_dead(void) {return -ENODEV; } 177 - static inline void cpuidle_use_deepest_state(bool enable) {} 177 + static inline void cpuidle_enter_freeze(void) { } 178 178 static inline struct cpuidle_driver *cpuidle_get_cpu_driver( 179 179 struct cpuidle_device *dev) {return NULL; } 180 180 #endif
+16
include/linux/suspend.h
··· 201 201 */ 202 202 extern void suspend_set_ops(const struct platform_suspend_ops *ops); 203 203 extern int suspend_valid_only_mem(suspend_state_t state); 204 + 205 + /* Suspend-to-idle state machnine. */ 206 + enum freeze_state { 207 + FREEZE_STATE_NONE, /* Not suspended/suspending. */ 208 + FREEZE_STATE_ENTER, /* Enter suspend-to-idle. */ 209 + FREEZE_STATE_WAKE, /* Wake up from suspend-to-idle. */ 210 + }; 211 + 212 + extern enum freeze_state __read_mostly suspend_freeze_state; 213 + 214 + static inline bool idle_should_freeze(void) 215 + { 216 + return unlikely(suspend_freeze_state == FREEZE_STATE_ENTER); 217 + } 218 + 204 219 extern void freeze_set_ops(const struct platform_freeze_ops *ops); 205 220 extern void freeze_wake(void); 206 221 ··· 243 228 244 229 static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} 245 230 static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } 231 + static inline bool idle_should_freeze(void) { return false; } 246 232 static inline void freeze_set_ops(const struct platform_freeze_ops *ops) {} 247 233 static inline void freeze_wake(void) {} 248 234 #endif /* !CONFIG_SUSPEND */
+36 -7
kernel/power/suspend.c
··· 37 37 static const struct platform_suspend_ops *suspend_ops; 38 38 static const struct platform_freeze_ops *freeze_ops; 39 39 static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head); 40 - static bool suspend_freeze_wake; 40 + 41 + enum freeze_state __read_mostly suspend_freeze_state; 42 + static DEFINE_SPINLOCK(suspend_freeze_lock); 41 43 42 44 void freeze_set_ops(const struct platform_freeze_ops *ops) 43 45 { ··· 50 48 51 49 static void freeze_begin(void) 52 50 { 53 - suspend_freeze_wake = false; 51 + suspend_freeze_state = FREEZE_STATE_NONE; 54 52 } 55 53 56 54 static void freeze_enter(void) 57 55 { 58 - cpuidle_use_deepest_state(true); 56 + spin_lock_irq(&suspend_freeze_lock); 57 + if (pm_wakeup_pending()) 58 + goto out; 59 + 60 + suspend_freeze_state = FREEZE_STATE_ENTER; 61 + spin_unlock_irq(&suspend_freeze_lock); 62 + 63 + get_online_cpus(); 59 64 cpuidle_resume(); 60 - wait_event(suspend_freeze_wait_head, suspend_freeze_wake); 65 + 66 + /* Push all the CPUs into the idle loop. */ 67 + wake_up_all_idle_cpus(); 68 + pr_debug("PM: suspend-to-idle\n"); 69 + /* Make the current CPU wait so it can enter the idle loop too. */ 70 + wait_event(suspend_freeze_wait_head, 71 + suspend_freeze_state == FREEZE_STATE_WAKE); 72 + pr_debug("PM: resume from suspend-to-idle\n"); 73 + 61 74 cpuidle_pause(); 62 - cpuidle_use_deepest_state(false); 75 + put_online_cpus(); 76 + 77 + spin_lock_irq(&suspend_freeze_lock); 78 + 79 + out: 80 + suspend_freeze_state = FREEZE_STATE_NONE; 81 + spin_unlock_irq(&suspend_freeze_lock); 63 82 } 64 83 65 84 void freeze_wake(void) 66 85 { 67 - suspend_freeze_wake = true; 68 - wake_up(&suspend_freeze_wait_head); 86 + unsigned long flags; 87 + 88 + spin_lock_irqsave(&suspend_freeze_lock, flags); 89 + if (suspend_freeze_state > FREEZE_STATE_NONE) { 90 + suspend_freeze_state = FREEZE_STATE_WAKE; 91 + wake_up(&suspend_freeze_wait_head); 92 + } 93 + spin_unlock_irqrestore(&suspend_freeze_lock, flags); 69 94 } 70 95 EXPORT_SYMBOL_GPL(freeze_wake); 71 96
+16
kernel/sched/idle.c
··· 7 7 #include <linux/tick.h> 8 8 #include <linux/mm.h> 9 9 #include <linux/stackprotector.h> 10 + #include <linux/suspend.h> 10 11 11 12 #include <asm/tlb.h> 12 13 ··· 104 103 * step to the grace period 105 104 */ 106 105 rcu_idle_enter(); 106 + 107 + /* 108 + * Suspend-to-idle ("freeze") is a system state in which all user space 109 + * has been frozen, all I/O devices have been suspended and the only 110 + * activity happens here and in iterrupts (if any). In that case bypass 111 + * the cpuidle governor and go stratight for the deepest idle state 112 + * available. Possibly also suspend the local tick and the entire 113 + * timekeeping to prevent timer interrupts from kicking us out of idle 114 + * until a proper wakeup interrupt happens. 115 + */ 116 + if (idle_should_freeze()) { 117 + cpuidle_enter_freeze(); 118 + local_irq_enable(); 119 + goto exit_idle; 120 + } 107 121 108 122 /* 109 123 * Ask the cpuidle framework to choose a convenient idle state.