Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe: Expose PCIe link downgrade attributes

Expose sysfs attributes for PCIe link downgrade capability and status.

v2: Move from debugfs to sysfs (Lucas, Rodrigo, Badal)
Rework macros and their naming (Rodrigo)
v3: Use sysfs_create_files() (Riana)
Fix checkpatch warning (Riana)
v4: s/downspeed/downgrade (Lucas, Rodrigo, Riana)
v5: Use PCIe Gen agnostic naming (Rodrigo)
v6: s/pcie_gen/auto_link (Lucas)

Signed-off-by: Raag Jadav <raag.jadav@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Link: https://lore.kernel.org/r/20250506054835.3395220-3-raag.jadav@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

authored by

Raag Jadav and committed by
Rodrigo Vivi
0e414bf7 f3e875b3

+96 -2
+91 -2
drivers/gpu/drm/xe/xe_device_sysfs.c
··· 3 3 * Copyright © 2023 Intel Corporation 4 4 */ 5 5 6 + #include <linux/device.h> 6 7 #include <linux/kobject.h> 7 8 #include <linux/pci.h> 8 9 #include <linux/sysfs.h> 9 10 10 - #include <drm/drm_managed.h> 11 - 12 11 #include "xe_device.h" 13 12 #include "xe_device_sysfs.h" 13 + #include "xe_mmio.h" 14 + #include "xe_pcode_api.h" 15 + #include "xe_pcode.h" 14 16 #include "xe_pm.h" 15 17 16 18 /** ··· 65 63 66 64 static DEVICE_ATTR_RW(vram_d3cold_threshold); 67 65 66 + /** 67 + * DOC: PCIe Gen5 Limitations 68 + * 69 + * Default link speed of discrete GPUs is determined by configuration parameters 70 + * stored in their flash memory, which are subject to override through user 71 + * initiated firmware updates. It has been observed that devices configured with 72 + * PCIe Gen5 as their default link speed can come across link quality issues due 73 + * to host or motherboard limitations and may have to auto-downgrade their link 74 + * to PCIe Gen4 speed when faced with unstable link at Gen5, which makes 75 + * firmware updates rather risky on such setups. It is required to ensure that 76 + * the device is capable of auto-downgrading its link to PCIe Gen4 speed before 77 + * pushing the firmware image with PCIe Gen5 as default configuration. This can 78 + * be done by reading ``auto_link_downgrade_capable`` sysfs entry, which will 79 + * denote if the device is capable of auto-downgrading its link to PCIe Gen4 80 + * speed with boolean output value of ``0`` or ``1``, meaning `incapable` or 81 + * `capable` respectively. 82 + * 83 + * .. code-block:: shell 84 + * 85 + * $ cat /sys/bus/pci/devices/<bdf>/auto_link_downgrade_capable 86 + * 87 + * Pushing the firmware image with PCIe Gen5 as default configuration on a auto 88 + * link downgrade incapable device and facing link instability due to host or 89 + * motherboard limitations can result in driver failing to bind to the device, 90 + * making further firmware updates impossible with RMA being the only last 91 + * resort. 92 + * 93 + * Link downgrade status of auto link downgrade capable devices is available 94 + * through ``auto_link_downgrade_status`` sysfs entry with boolean output value 95 + * of ``0`` or ``1``, where ``0`` means no auto-downgrading was required during 96 + * link training (which is the optimal scenario) and ``1`` means the device has 97 + * auto-downgraded its link to PCIe Gen4 speed due to unstable Gen5 link. 98 + * 99 + * .. code-block:: shell 100 + * 101 + * $ cat /sys/bus/pci/devices/<bdf>/auto_link_downgrade_status 102 + */ 103 + 104 + static ssize_t 105 + auto_link_downgrade_capable_show(struct device *dev, struct device_attribute *attr, char *buf) 106 + { 107 + struct pci_dev *pdev = to_pci_dev(dev); 108 + struct xe_device *xe = pdev_to_xe_device(pdev); 109 + u32 cap, val; 110 + 111 + xe_pm_runtime_get(xe); 112 + val = xe_mmio_read32(xe_root_tile_mmio(xe), BMG_PCIE_CAP); 113 + xe_pm_runtime_put(xe); 114 + 115 + cap = REG_FIELD_GET(LINK_DOWNGRADE, val); 116 + return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE ? true : false); 117 + } 118 + static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_capable); 119 + 120 + static ssize_t 121 + auto_link_downgrade_status_show(struct device *dev, struct device_attribute *attr, char *buf) 122 + { 123 + struct pci_dev *pdev = to_pci_dev(dev); 124 + struct xe_device *xe = pdev_to_xe_device(pdev); 125 + u32 val; 126 + int ret; 127 + 128 + xe_pm_runtime_get(xe); 129 + ret = xe_pcode_read(xe_device_get_root_tile(xe), 130 + PCODE_MBOX(DGFX_PCODE_STATUS, DGFX_GET_INIT_STATUS, 0), 131 + &val, NULL); 132 + xe_pm_runtime_put(xe); 133 + 134 + return ret ?: sysfs_emit(buf, "%u\n", REG_FIELD_GET(DGFX_LINK_DOWNGRADE_STATUS, val)); 135 + } 136 + static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_status); 137 + 138 + static const struct attribute *auto_link_downgrade_attrs[] = { 139 + &dev_attr_auto_link_downgrade_capable.attr, 140 + &dev_attr_auto_link_downgrade_status.attr, 141 + NULL 142 + }; 143 + 68 144 static void xe_device_sysfs_fini(void *arg) 69 145 { 70 146 struct xe_device *xe = arg; 71 147 72 148 if (xe->d3cold.capable) 73 149 sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); 150 + 151 + if (xe->info.platform == XE_BATTLEMAGE) 152 + sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs); 74 153 } 75 154 76 155 int xe_device_sysfs_init(struct xe_device *xe) ··· 161 78 162 79 if (xe->d3cold.capable) { 163 80 ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr); 81 + if (ret) 82 + return ret; 83 + } 84 + 85 + if (xe->info.platform == XE_BATTLEMAGE) { 86 + ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); 164 87 if (ret) 165 88 return ret; 166 89 }
+5
drivers/gpu/drm/xe/xe_pcode_api.h
··· 34 34 #define DGFX_PCODE_STATUS 0x7E 35 35 #define DGFX_GET_INIT_STATUS 0x0 36 36 #define DGFX_INIT_STATUS_COMPLETE 0x1 37 + #define DGFX_LINK_DOWNGRADE_STATUS REG_BIT(31) 37 38 38 39 #define PCODE_POWER_SETUP 0x7C 39 40 #define POWER_SETUP_SUBCOMMAND_READ_I1 0x4 ··· 66 65 67 66 /* Auxiliary info bits */ 68 67 #define AUXINFO_HISTORY_OFFSET REG_GENMASK(31, 29) 68 + 69 + #define BMG_PCIE_CAP XE_REG(0x138340) 70 + #define LINK_DOWNGRADE REG_GENMASK(1, 0) 71 + #define DOWNGRADE_CAPABLE 2 69 72 70 73 struct pcode_err_decode { 71 74 int errno;