Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

PM / sleep: Mechanism to avoid resuming runtime-suspended devices unnecessarily

Currently, some subsystems (e.g. PCI and the ACPI PM domain) have to
resume all runtime-suspended devices during system suspend, mostly
because those devices may need to be reprogrammed due to different
wakeup settings for system sleep and for runtime PM.

For some devices, though, it's OK to remain in runtime suspend
throughout a complete system suspend/resume cycle (if the device was in
runtime suspend at the start of the cycle). We would like to do this
whenever possible, to avoid the overhead of extra power-up and power-down
events.

However, problems may arise because the device's descendants may require
it to be at full power at various points during the cycle. Therefore the
most straightforward way to do this safely is if the device and all its
descendants can remain runtime suspended until the complete stage of
system resume.

To this end, introduce a new device PM flag, power.direct_complete
and modify the PM core to use that flag as follows.

If the ->prepare() callback of a device returns a positive number,
the PM core will regard that as an indication that it may leave the
device runtime-suspended. It will then check if the system power
transition in progress is a suspend (and not hibernation in particular)
and if the device is, indeed, runtime-suspended. In that case, the PM
core will set the device's power.direct_complete flag. Otherwise it
will clear power.direct_complete for the device and it also will later
clear it for the device's parent (if there's one).

Next, the PM core will not invoke the ->suspend() ->suspend_late(),
->suspend_irq(), ->resume_irq(), ->resume_early(), or ->resume()
callbacks for all devices having power.direct_complete set. It
will invoke their ->complete() callbacks, however, and those
callbacks are then responsible for resuming the devices as
appropriate, if necessary. For example, in some cases they may
need to queue up runtime resume requests for the devices using
pm_request_resume().

Changelog partly based on an Alan Stern's description of the idea
(http://marc.info/?l=linux-pm&m=139940466625569&w=2).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>

+85 -23
+51 -15
drivers/base/power/main.c
··· 479 479 TRACE_DEVICE(dev); 480 480 TRACE_RESUME(0); 481 481 482 - if (dev->power.syscore) 482 + if (dev->power.syscore || dev->power.direct_complete) 483 483 goto Out; 484 484 485 485 if (!dev->power.is_noirq_suspended) ··· 605 605 TRACE_DEVICE(dev); 606 606 TRACE_RESUME(0); 607 607 608 - if (dev->power.syscore) 608 + if (dev->power.syscore || dev->power.direct_complete) 609 609 goto Out; 610 610 611 611 if (!dev->power.is_late_suspended) ··· 734 734 735 735 if (dev->power.syscore) 736 736 goto Complete; 737 + 738 + if (dev->power.direct_complete) { 739 + /* Match the pm_runtime_disable() in __device_suspend(). */ 740 + pm_runtime_enable(dev); 741 + goto Complete; 742 + } 737 743 738 744 dpm_wait(dev->parent, async); 739 745 dpm_watchdog_set(&wd, dev); ··· 1013 1007 goto Complete; 1014 1008 } 1015 1009 1016 - if (dev->power.syscore) 1010 + if (dev->power.syscore || dev->power.direct_complete) 1017 1011 goto Complete; 1018 1012 1019 1013 dpm_wait_for_children(dev, async); ··· 1152 1146 goto Complete; 1153 1147 } 1154 1148 1155 - if (dev->power.syscore) 1149 + if (dev->power.syscore || dev->power.direct_complete) 1156 1150 goto Complete; 1157 1151 1158 1152 dpm_wait_for_children(dev, async); ··· 1338 1332 if (dev->power.syscore) 1339 1333 goto Complete; 1340 1334 1335 + if (dev->power.direct_complete) { 1336 + if (pm_runtime_status_suspended(dev)) { 1337 + pm_runtime_disable(dev); 1338 + if (pm_runtime_suspended_if_enabled(dev)) 1339 + goto Complete; 1340 + 1341 + pm_runtime_enable(dev); 1342 + } 1343 + dev->power.direct_complete = false; 1344 + } 1345 + 1341 1346 dpm_watchdog_set(&wd, dev); 1342 1347 device_lock(dev); 1343 1348 ··· 1399 1382 1400 1383 End: 1401 1384 if (!error) { 1385 + struct device *parent = dev->parent; 1386 + 1402 1387 dev->power.is_suspended = true; 1403 - if (dev->power.wakeup_path 1404 - && dev->parent && !dev->parent->power.ignore_children) 1405 - dev->parent->power.wakeup_path = true; 1388 + if (parent) { 1389 + spin_lock_irq(&parent->power.lock); 1390 + 1391 + dev->parent->power.direct_complete = false; 1392 + if (dev->power.wakeup_path 1393 + && !dev->parent->power.ignore_children) 1394 + dev->parent->power.wakeup_path = true; 1395 + 1396 + spin_unlock_irq(&parent->power.lock); 1397 + } 1406 1398 } 1407 1399 1408 1400 device_unlock(dev); ··· 1513 1487 { 1514 1488 int (*callback)(struct device *) = NULL; 1515 1489 char *info = NULL; 1516 - int error = 0; 1490 + int ret = 0; 1517 1491 1518 1492 if (dev->power.syscore) 1519 1493 return 0; ··· 1549 1523 callback = dev->driver->pm->prepare; 1550 1524 } 1551 1525 1552 - if (callback) { 1553 - error = callback(dev); 1554 - suspend_report_result(callback, error); 1555 - } 1526 + if (callback) 1527 + ret = callback(dev); 1556 1528 1557 1529 device_unlock(dev); 1558 1530 1559 - if (error) 1531 + if (ret < 0) { 1532 + suspend_report_result(callback, ret); 1560 1533 pm_runtime_put(dev); 1561 - 1562 - return error; 1534 + return ret; 1535 + } 1536 + /* 1537 + * A positive return value from ->prepare() means "this device appears 1538 + * to be runtime-suspended and its state is fine, so if it really is 1539 + * runtime-suspended, you can leave it in that state provided that you 1540 + * will do the same thing with all of its descendants". This only 1541 + * applies to suspend transitions, however. 1542 + */ 1543 + spin_lock_irq(&dev->power.lock); 1544 + dev->power.direct_complete = ret > 0 && state.event == PM_EVENT_SUSPEND; 1545 + spin_unlock_irq(&dev->power.lock); 1546 + return 0; 1563 1547 } 1564 1548 1565 1549 /**
+28 -8
include/linux/pm.h
··· 93 93 * been registered) to recover from the race condition. 94 94 * This method is executed for all kinds of suspend transitions and is 95 95 * followed by one of the suspend callbacks: @suspend(), @freeze(), or 96 - * @poweroff(). The PM core executes subsystem-level @prepare() for all 97 - * devices before starting to invoke suspend callbacks for any of them, so 98 - * generally devices may be assumed to be functional or to respond to 99 - * runtime resume requests while @prepare() is being executed. However, 100 - * device drivers may NOT assume anything about the availability of user 101 - * space at that time and it is NOT valid to request firmware from within 102 - * @prepare() (it's too late to do that). It also is NOT valid to allocate 96 + * @poweroff(). If the transition is a suspend to memory or standby (that 97 + * is, not related to hibernation), the return value of @prepare() may be 98 + * used to indicate to the PM core to leave the device in runtime suspend 99 + * if applicable. Namely, if @prepare() returns a positive number, the PM 100 + * core will understand that as a declaration that the device appears to be 101 + * runtime-suspended and it may be left in that state during the entire 102 + * transition and during the subsequent resume if all of its descendants 103 + * are left in runtime suspend too. If that happens, @complete() will be 104 + * executed directly after @prepare() and it must ensure the proper 105 + * functioning of the device after the system resume. 106 + * The PM core executes subsystem-level @prepare() for all devices before 107 + * starting to invoke suspend callbacks for any of them, so generally 108 + * devices may be assumed to be functional or to respond to runtime resume 109 + * requests while @prepare() is being executed. However, device drivers 110 + * may NOT assume anything about the availability of user space at that 111 + * time and it is NOT valid to request firmware from within @prepare() 112 + * (it's too late to do that). It also is NOT valid to allocate 103 113 * substantial amounts of memory from @prepare() in the GFP_KERNEL mode. 104 114 * [To work around these limitations, drivers may register suspend and 105 115 * hibernation notifiers to be executed before the freezing of tasks.] ··· 122 112 * of the other devices that the PM core has unsuccessfully attempted to 123 113 * suspend earlier). 124 114 * The PM core executes subsystem-level @complete() after it has executed 125 - * the appropriate resume callbacks for all devices. 115 + * the appropriate resume callbacks for all devices. If the corresponding 116 + * @prepare() at the beginning of the suspend transition returned a 117 + * positive number and the device was left in runtime suspend (without 118 + * executing any suspend and resume callbacks for it), @complete() will be 119 + * the only callback executed for the device during resume. In that case, 120 + * @complete() must be prepared to do whatever is necessary to ensure the 121 + * proper functioning of the device after the system resume. To this end, 122 + * @complete() can check the power.direct_complete flag of the device to 123 + * learn whether (unset) or not (set) the previous suspend and resume 124 + * callbacks have been executed for it. 126 125 * 127 126 * @suspend: Executed before putting the system into a sleep state in which the 128 127 * contents of main memory are preserved. The exact action to perform ··· 565 546 bool is_late_suspended:1; 566 547 bool ignore_children:1; 567 548 bool early_init:1; /* Owned by the PM core */ 549 + bool direct_complete:1; /* Owned by the PM core */ 568 550 spinlock_t lock; 569 551 #ifdef CONFIG_PM_SLEEP 570 552 struct list_head entry;
+6
include/linux/pm_runtime.h
··· 101 101 return dev->power.runtime_status == RPM_SUSPENDED; 102 102 } 103 103 104 + static inline bool pm_runtime_suspended_if_enabled(struct device *dev) 105 + { 106 + return pm_runtime_status_suspended(dev) && dev->power.disable_depth == 1; 107 + } 108 + 104 109 static inline bool pm_runtime_enabled(struct device *dev) 105 110 { 106 111 return !dev->power.disable_depth; ··· 155 150 static inline bool pm_runtime_suspended(struct device *dev) { return false; } 156 151 static inline bool pm_runtime_active(struct device *dev) { return true; } 157 152 static inline bool pm_runtime_status_suspended(struct device *dev) { return false; } 153 + static inline bool pm_runtime_suspended_if_enabled(struct device *dev) { return false; } 158 154 static inline bool pm_runtime_enabled(struct device *dev) { return false; } 159 155 160 156 static inline void pm_runtime_no_callbacks(struct device *dev) {}