Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: fix device attribute node create failed with multi gpu

the origin design will use varible of "attr->states" to save node
supported states on current gpu device, but for multi gpu device, when
probe second gpu device, the driver will check attribute node states
from previous gpu device wthether to create attribute node.
it will cause other gpu device create attribute node faild.

1. add member attr_list into amdgpu_device to link supported device attribute node.
2. add new structure "struct amdgpu_device_attr_entry{}" to track device attribute state.
3. drop member "states" from amdgpu_device_attr.

v2:
1. move "attr_list" into amdgpu_pm and rename to "pm_attr_list".
2. refine create & remove device node functions parameter.

fix:
drm/amdgpu: optimize amdgpu device attribute code

Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Kevin Wang and committed by
Alex Deucher
ba02fd6b 90ca78de

+58 -41
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
··· 450 450 451 451 /* Used for I2C access to various EEPROMs on relevant ASICs */ 452 452 struct i2c_adapter smu_i2c; 453 + struct list_head pm_attr_list; 453 454 }; 454 455 455 456 #define R600_SSTU_DFLT 0
+49 -36
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
··· 1725 1725 }; 1726 1726 1727 1727 static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, 1728 - uint32_t mask) 1728 + uint32_t mask, enum amdgpu_device_attr_states *states) 1729 1729 { 1730 1730 struct device_attribute *dev_attr = &attr->dev_attr; 1731 1731 const char *attr_name = dev_attr->attr.name; ··· 1733 1733 enum amd_asic_type asic_type = adev->asic_type; 1734 1734 1735 1735 if (!(attr->flags & mask)) { 1736 - attr->states = ATTR_STATE_UNSUPPORTED; 1736 + *states = ATTR_STATE_UNSUPPORTED; 1737 1737 return 0; 1738 1738 } 1739 1739 ··· 1741 1741 1742 1742 if (DEVICE_ATTR_IS(pp_dpm_socclk)) { 1743 1743 if (asic_type < CHIP_VEGA10) 1744 - attr->states = ATTR_STATE_UNSUPPORTED; 1744 + *states = ATTR_STATE_UNSUPPORTED; 1745 1745 } else if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) { 1746 1746 if (asic_type < CHIP_VEGA10 || asic_type == CHIP_ARCTURUS) 1747 - attr->states = ATTR_STATE_UNSUPPORTED; 1747 + *states = ATTR_STATE_UNSUPPORTED; 1748 1748 } else if (DEVICE_ATTR_IS(pp_dpm_fclk)) { 1749 1749 if (asic_type < CHIP_VEGA20) 1750 - attr->states = ATTR_STATE_UNSUPPORTED; 1750 + *states = ATTR_STATE_UNSUPPORTED; 1751 1751 } else if (DEVICE_ATTR_IS(pp_dpm_pcie)) { 1752 1752 if (asic_type == CHIP_ARCTURUS) 1753 - attr->states = ATTR_STATE_UNSUPPORTED; 1753 + *states = ATTR_STATE_UNSUPPORTED; 1754 1754 } else if (DEVICE_ATTR_IS(pp_od_clk_voltage)) { 1755 - attr->states = ATTR_STATE_UNSUPPORTED; 1755 + *states = ATTR_STATE_UNSUPPORTED; 1756 1756 if ((is_support_sw_smu(adev) && adev->smu.od_enabled) || 1757 1757 (!is_support_sw_smu(adev) && hwmgr->od_enabled)) 1758 - attr->states = ATTR_STATE_SUPPORTED; 1758 + *states = ATTR_STATE_SUPPORTED; 1759 1759 } else if (DEVICE_ATTR_IS(mem_busy_percent)) { 1760 1760 if (adev->flags & AMD_IS_APU || asic_type == CHIP_VEGA10) 1761 - attr->states = ATTR_STATE_UNSUPPORTED; 1761 + *states = ATTR_STATE_UNSUPPORTED; 1762 1762 } else if (DEVICE_ATTR_IS(pcie_bw)) { 1763 1763 /* PCIe Perf counters won't work on APU nodes */ 1764 1764 if (adev->flags & AMD_IS_APU) 1765 - attr->states = ATTR_STATE_UNSUPPORTED; 1765 + *states = ATTR_STATE_UNSUPPORTED; 1766 1766 } else if (DEVICE_ATTR_IS(unique_id)) { 1767 1767 if (!adev->unique_id) 1768 - attr->states = ATTR_STATE_UNSUPPORTED; 1768 + *states = ATTR_STATE_UNSUPPORTED; 1769 1769 } else if (DEVICE_ATTR_IS(pp_features)) { 1770 1770 if (adev->flags & AMD_IS_APU || asic_type < CHIP_VEGA10) 1771 - attr->states = ATTR_STATE_UNSUPPORTED; 1771 + *states = ATTR_STATE_UNSUPPORTED; 1772 1772 } 1773 1773 1774 1774 if (asic_type == CHIP_ARCTURUS) { ··· 1789 1789 1790 1790 static int amdgpu_device_attr_create(struct amdgpu_device *adev, 1791 1791 struct amdgpu_device_attr *attr, 1792 - uint32_t mask) 1792 + uint32_t mask, struct list_head *attr_list) 1793 1793 { 1794 1794 int ret = 0; 1795 1795 struct device_attribute *dev_attr = &attr->dev_attr; 1796 1796 const char *name = dev_attr->attr.name; 1797 + enum amdgpu_device_attr_states attr_states = ATTR_STATE_SUPPORTED; 1798 + struct amdgpu_device_attr_entry *attr_entry; 1799 + 1797 1800 int (*attr_update)(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, 1798 - uint32_t mask) = default_attr_update; 1801 + uint32_t mask, enum amdgpu_device_attr_states *states) = default_attr_update; 1799 1802 1800 1803 BUG_ON(!attr); 1801 1804 1802 1805 attr_update = attr->attr_update ? attr_update : default_attr_update; 1803 1806 1804 - ret = attr_update(adev, attr, mask); 1807 + ret = attr_update(adev, attr, mask, &attr_states); 1805 1808 if (ret) { 1806 1809 dev_err(adev->dev, "failed to update device file %s, ret = %d\n", 1807 1810 name, ret); 1808 1811 return ret; 1809 1812 } 1810 1813 1811 - /* the attr->states maybe changed after call attr->attr_update function */ 1812 - if (attr->states == ATTR_STATE_UNSUPPORTED) 1814 + if (attr_states == ATTR_STATE_UNSUPPORTED) 1813 1815 return 0; 1814 1816 1815 1817 ret = device_create_file(adev->dev, dev_attr); ··· 1820 1818 name, ret); 1821 1819 } 1822 1820 1823 - attr->states = ATTR_STATE_SUPPORTED; 1821 + attr_entry = kmalloc(sizeof(*attr_entry), GFP_KERNEL); 1822 + if (!attr_entry) 1823 + return -ENOMEM; 1824 + 1825 + attr_entry->attr = attr; 1826 + INIT_LIST_HEAD(&attr_entry->entry); 1827 + 1828 + list_add_tail(&attr_entry->entry, attr_list); 1824 1829 1825 1830 return ret; 1826 1831 } ··· 1836 1827 { 1837 1828 struct device_attribute *dev_attr = &attr->dev_attr; 1838 1829 1839 - if (attr->states == ATTR_STATE_UNSUPPORTED) 1840 - return; 1841 - 1842 1830 device_remove_file(adev->dev, dev_attr); 1843 - 1844 - attr->states = ATTR_STATE_UNSUPPORTED; 1845 1831 } 1832 + 1833 + static void amdgpu_device_attr_remove_groups(struct amdgpu_device *adev, 1834 + struct list_head *attr_list); 1846 1835 1847 1836 static int amdgpu_device_attr_create_groups(struct amdgpu_device *adev, 1848 1837 struct amdgpu_device_attr *attrs, 1849 1838 uint32_t counts, 1850 - uint32_t mask) 1839 + uint32_t mask, 1840 + struct list_head *attr_list) 1851 1841 { 1852 1842 int ret = 0; 1853 1843 uint32_t i = 0; 1854 1844 1855 1845 for (i = 0; i < counts; i++) { 1856 - ret = amdgpu_device_attr_create(adev, &attrs[i], mask); 1846 + ret = amdgpu_device_attr_create(adev, &attrs[i], mask, attr_list); 1857 1847 if (ret) 1858 1848 goto failed; 1859 1849 } ··· 1860 1852 return 0; 1861 1853 1862 1854 failed: 1863 - while (i--) 1864 - amdgpu_device_attr_remove(adev, &attrs[i]); 1855 + amdgpu_device_attr_remove_groups(adev, attr_list); 1865 1856 1866 1857 return ret; 1867 1858 } 1868 1859 1869 1860 static void amdgpu_device_attr_remove_groups(struct amdgpu_device *adev, 1870 - struct amdgpu_device_attr *attrs, 1871 - uint32_t counts) 1861 + struct list_head *attr_list) 1872 1862 { 1873 - uint32_t i = 0; 1863 + struct amdgpu_device_attr_entry *entry, *entry_tmp; 1874 1864 1875 - for (i = 0; i < counts; i++) 1876 - amdgpu_device_attr_remove(adev, &attrs[i]); 1865 + if (list_empty(attr_list)) 1866 + return ; 1867 + 1868 + list_for_each_entry_safe(entry, entry_tmp, attr_list, entry) { 1869 + amdgpu_device_attr_remove(adev, entry->attr); 1870 + list_del(&entry->entry); 1871 + kfree(entry); 1872 + } 1877 1873 } 1878 1874 1879 1875 static ssize_t amdgpu_hwmon_show_temp(struct device *dev, ··· 3288 3276 if (adev->pm.dpm_enabled == 0) 3289 3277 return 0; 3290 3278 3279 + INIT_LIST_HEAD(&adev->pm.pm_attr_list); 3280 + 3291 3281 adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev, 3292 3282 DRIVER_NAME, adev, 3293 3283 hwmon_groups); ··· 3316 3302 ret = amdgpu_device_attr_create_groups(adev, 3317 3303 amdgpu_device_attrs, 3318 3304 ARRAY_SIZE(amdgpu_device_attrs), 3319 - mask); 3305 + mask, 3306 + &adev->pm.pm_attr_list); 3320 3307 if (ret) 3321 3308 return ret; 3322 3309 ··· 3334 3319 if (adev->pm.int_hwmon_dev) 3335 3320 hwmon_device_unregister(adev->pm.int_hwmon_dev); 3336 3321 3337 - amdgpu_device_attr_remove_groups(adev, 3338 - amdgpu_device_attrs, 3339 - ARRAY_SIZE(amdgpu_device_attrs)); 3322 + amdgpu_device_attr_remove_groups(adev, &adev->pm.pm_attr_list); 3340 3323 } 3341 3324 3342 3325 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
+8 -5
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
··· 47 47 struct amdgpu_device_attr { 48 48 struct device_attribute dev_attr; 49 49 enum amdgpu_device_attr_flags flags; 50 - enum amdgpu_device_attr_states states; 51 - int (*attr_update)(struct amdgpu_device *adev, 52 - struct amdgpu_device_attr* attr, 53 - uint32_t mask); 50 + int (*attr_update)(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, 51 + uint32_t mask, enum amdgpu_device_attr_states *states); 52 + 53 + }; 54 + 55 + struct amdgpu_device_attr_entry { 56 + struct list_head entry; 57 + struct amdgpu_device_attr *attr; 54 58 }; 55 59 56 60 #define to_amdgpu_device_attr(_dev_attr) \ ··· 63 59 #define __AMDGPU_DEVICE_ATTR(_name, _mode, _show, _store, _flags, ...) \ 64 60 { .dev_attr = __ATTR(_name, _mode, _show, _store), \ 65 61 .flags = _flags, \ 66 - .states = ATTR_STATE_SUPPORTED, \ 67 62 ##__VA_ARGS__, } 68 63 69 64 #define AMDGPU_DEVICE_ATTR(_name, _mode, _flags, ...) \