Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

nvme-pci: add support for ACPI StorageD3Enable property

This patch implements a solution for a BIOS hack used on some currently
shipping Intel systems to change driver power management policy for PCIe
NVMe drives. Some newer Intel platforms, like some Comet Lake systems,
require that PCIe devices use D3 when doing suspend-to-idle in order to
allow the platform to realize maximum power savings. This is particularly
needed to support ATX power supply shutdown on desktop systems. In order to
ensure this happens for root ports with storage devices, Microsoft
apparently created this ACPI _DSD property as a way to influence their
driver policy. To my knowledge this property has not been discussed with
the NVME specification body.

Though the solution is not ideal, it addresses a problem that also affects
Linux since the NVMe driver's default policy of using NVMe APST during
suspend-to-idle prevents the PCI root port from going to D3 and leads to
higher power consumption for these platforms. The power consumption
difference may be negligible on laptop systems, but many watts on desktop
systems when the ATX power supply is blocked from powering down.

The patch creates a new nvme_acpi_storage_d3 function to check for the
StorageD3Enable property during probe and enables D3 as a quirk if set. It
also provides a 'noacpi' module parameter to allow skipping the quirk if
needed.

Tested with:
- PM961 NVMe SED Samsung 512GB
- INTEL SSDPEKKF512G8

Link: https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/power-management-for-storage-hardware-devices-intro
Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>

authored by

David E. Box and committed by
Christoph Hellwig
df4f9bc4 b13c6393

+66
+3
drivers/acpi/property.c
··· 45 45 /* Thunderbolt GUID for WAKE_SUPPORTED: 6c501103-c189-4296-ba72-9bf5a26ebe5d */ 46 46 GUID_INIT(0x6c501103, 0xc189, 0x4296, 47 47 0xba, 0x72, 0x9b, 0xf5, 0xa2, 0x6e, 0xbe, 0x5d), 48 + /* Storage device needs D3 GUID: 5025030f-842f-4ab4-a561-99a5189762d0 */ 49 + GUID_INIT(0x5025030f, 0x842f, 0x4ab4, 50 + 0xa5, 0x61, 0x99, 0xa5, 0x18, 0x97, 0x62, 0xd0), 48 51 }; 49 52 50 53 /* ACPI _DSD data subnodes GUID: dbb8e3e6-5886-4ba6-8795-1319f52a966b */
+63
drivers/nvme/host/pci.c
··· 4 4 * Copyright (c) 2011-2014, Intel Corporation. 5 5 */ 6 6 7 + #include <linux/acpi.h> 7 8 #include <linux/aer.h> 8 9 #include <linux/async.h> 9 10 #include <linux/blkdev.h> ··· 94 93 static unsigned int poll_queues; 95 94 module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644); 96 95 MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); 96 + 97 + static bool noacpi; 98 + module_param(noacpi, bool, 0444); 99 + MODULE_PARM_DESC(noacpi, "disable acpi bios quirks"); 97 100 98 101 struct nvme_dev; 99 102 struct nvme_queue; ··· 2759 2754 return 0; 2760 2755 } 2761 2756 2757 + #ifdef CONFIG_ACPI 2758 + static bool nvme_acpi_storage_d3(struct pci_dev *dev) 2759 + { 2760 + struct acpi_device *adev; 2761 + struct pci_dev *root; 2762 + acpi_handle handle; 2763 + acpi_status status; 2764 + u8 val; 2765 + 2766 + /* 2767 + * Look for _DSD property specifying that the storage device on the port 2768 + * must use D3 to support deep platform power savings during 2769 + * suspend-to-idle. 2770 + */ 2771 + root = pcie_find_root_port(dev); 2772 + if (!root) 2773 + return false; 2774 + 2775 + adev = ACPI_COMPANION(&root->dev); 2776 + if (!adev) 2777 + return false; 2778 + 2779 + /* 2780 + * The property is defined in the PXSX device for South complex ports 2781 + * and in the PEGP device for North complex ports. 2782 + */ 2783 + status = acpi_get_handle(adev->handle, "PXSX", &handle); 2784 + if (ACPI_FAILURE(status)) { 2785 + status = acpi_get_handle(adev->handle, "PEGP", &handle); 2786 + if (ACPI_FAILURE(status)) 2787 + return false; 2788 + } 2789 + 2790 + if (acpi_bus_get_device(handle, &adev)) 2791 + return false; 2792 + 2793 + if (fwnode_property_read_u8(acpi_fwnode_handle(adev), "StorageD3Enable", 2794 + &val)) 2795 + return false; 2796 + return val == 1; 2797 + } 2798 + #else 2799 + static inline bool nvme_acpi_storage_d3(struct pci_dev *dev) 2800 + { 2801 + return false; 2802 + } 2803 + #endif /* CONFIG_ACPI */ 2804 + 2762 2805 static void nvme_async_probe(void *data, async_cookie_t cookie) 2763 2806 { 2764 2807 struct nvme_dev *dev = data; ··· 2855 2802 goto unmap; 2856 2803 2857 2804 quirks |= check_vendor_combination_bug(pdev); 2805 + 2806 + if (!noacpi && nvme_acpi_storage_d3(pdev)) { 2807 + /* 2808 + * Some systems use a bios work around to ask for D3 on 2809 + * platforms that support kernel managed suspend. 2810 + */ 2811 + dev_info(&pdev->dev, 2812 + "platform quirk: setting simple suspend\n"); 2813 + quirks |= NVME_QUIRK_SIMPLE_SUSPEND; 2814 + } 2858 2815 2859 2816 /* 2860 2817 * Double check that our mempool alloc size will cover the biggest