Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Add sysfs interface for gc reset mask

Add two sysfs interfaces for gfx and compute:
gfx_reset_mask
compute_reset_mask

These interfaces are read-only and show the resets supported by the IP.
For example, full adapter reset (mode1/mode2/BACO/etc),
soft reset, queue reset, and pipe reset.

V2: the sysfs node returns a text string instead of some flags (Christian)
v3: add a generic helper which takes the ring as parameter
and print the strings in the order they are applied (Christian)

check amdgpu_gpu_recovery before creating sysfs file itself,
and initialize supported_reset_types in IP version files (Lijo)
v4: Fixing uninitialized variables (Tim)

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Tim Huang <tim.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Jesse.zhang@amd.com and committed by
Alex Deucher
6c8d1f4b f4a3246a

+172
+8
drivers/gpu/drm/amd/amdgpu/amdgpu.h
··· 299 299 #define AMDGPU_RESET_VCE (1 << 13) 300 300 #define AMDGPU_RESET_VCE1 (1 << 14) 301 301 302 + /* reset mask */ 303 + #define AMDGPU_RESET_TYPE_FULL (1 << 0) /* full adapter reset, mode1/mode2/BACO/etc. */ 304 + #define AMDGPU_RESET_TYPE_SOFT_RESET (1 << 1) /* IP level soft reset */ 305 + #define AMDGPU_RESET_TYPE_PER_QUEUE (1 << 2) /* per queue */ 306 + #define AMDGPU_RESET_TYPE_PER_PIPE (1 << 3) /* per pipe */ 307 + 302 308 /* max cursor sizes (in pixels) */ 303 309 #define CIK_CURSOR_WIDTH 128 304 310 #define CIK_CURSOR_HEIGHT 128 ··· 1470 1464 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, 1471 1465 struct dma_fence *gang); 1472 1466 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev); 1467 + ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring); 1468 + ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset); 1473 1469 1474 1470 /* atpx handler */ 1475 1471 #if defined(CONFIG_VGA_SWITCHEROO)
+44
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 6715 6715 } 6716 6716 return ret; 6717 6717 } 6718 + 6719 + ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring) 6720 + { 6721 + ssize_t size = 0; 6722 + 6723 + if (!ring || !ring->adev) 6724 + return size; 6725 + 6726 + if (amdgpu_device_should_recover_gpu(ring->adev)) 6727 + size |= AMDGPU_RESET_TYPE_FULL; 6728 + 6729 + if (unlikely(!ring->adev->debug_disable_soft_recovery) && 6730 + !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery) 6731 + size |= AMDGPU_RESET_TYPE_SOFT_RESET; 6732 + 6733 + return size; 6734 + } 6735 + 6736 + ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset) 6737 + { 6738 + ssize_t size = 0; 6739 + 6740 + if (supported_reset == 0) { 6741 + size += sysfs_emit_at(buf, size, "unsupported"); 6742 + size += sysfs_emit_at(buf, size, "\n"); 6743 + return size; 6744 + 6745 + } 6746 + 6747 + if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET) 6748 + size += sysfs_emit_at(buf, size, "soft "); 6749 + 6750 + if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE) 6751 + size += sysfs_emit_at(buf, size, "queue "); 6752 + 6753 + if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE) 6754 + size += sysfs_emit_at(buf, size, "pipe "); 6755 + 6756 + if (supported_reset & AMDGPU_RESET_TYPE_FULL) 6757 + size += sysfs_emit_at(buf, size, "full "); 6758 + 6759 + size += sysfs_emit_at(buf, size, "\n"); 6760 + return size; 6761 + }
+70
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
··· 1588 1588 return count; 1589 1589 } 1590 1590 1591 + static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev, 1592 + struct device_attribute *attr, 1593 + char *buf) 1594 + { 1595 + struct drm_device *ddev = dev_get_drvdata(dev); 1596 + struct amdgpu_device *adev = drm_to_adev(ddev); 1597 + 1598 + if (!adev) 1599 + return -ENODEV; 1600 + 1601 + return amdgpu_show_reset_mask(buf, adev->gfx.gfx_supported_reset); 1602 + } 1603 + 1604 + static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev, 1605 + struct device_attribute *attr, 1606 + char *buf) 1607 + { 1608 + struct drm_device *ddev = dev_get_drvdata(dev); 1609 + struct amdgpu_device *adev = drm_to_adev(ddev); 1610 + 1611 + if (!adev) 1612 + return -ENODEV; 1613 + 1614 + return amdgpu_show_reset_mask(buf, adev->gfx.compute_supported_reset); 1615 + } 1616 + 1591 1617 static DEVICE_ATTR(run_cleaner_shader, 0200, 1592 1618 NULL, amdgpu_gfx_set_run_cleaner_shader); 1593 1619 ··· 1627 1601 1628 1602 static DEVICE_ATTR(available_compute_partition, 0444, 1629 1603 amdgpu_gfx_get_available_compute_partition, NULL); 1604 + static DEVICE_ATTR(gfx_reset_mask, 0444, 1605 + amdgpu_gfx_get_gfx_reset_mask, NULL); 1606 + 1607 + static DEVICE_ATTR(compute_reset_mask, 0444, 1608 + amdgpu_gfx_get_compute_reset_mask, NULL); 1630 1609 1631 1610 static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev) 1632 1611 { ··· 1697 1666 device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); 1698 1667 } 1699 1668 1669 + static int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev) 1670 + { 1671 + int r = 0; 1672 + 1673 + if (!amdgpu_gpu_recovery) 1674 + return r; 1675 + 1676 + if (adev->gfx.num_gfx_rings) { 1677 + r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask); 1678 + if (r) 1679 + return r; 1680 + } 1681 + 1682 + if (adev->gfx.num_compute_rings) { 1683 + r = device_create_file(adev->dev, &dev_attr_compute_reset_mask); 1684 + if (r) 1685 + return r; 1686 + } 1687 + 1688 + return r; 1689 + } 1690 + 1691 + static void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev) 1692 + { 1693 + if (!amdgpu_gpu_recovery) 1694 + return; 1695 + 1696 + if (adev->gfx.num_gfx_rings) 1697 + device_remove_file(adev->dev, &dev_attr_gfx_reset_mask); 1698 + 1699 + if (adev->gfx.num_compute_rings) 1700 + device_remove_file(adev->dev, &dev_attr_compute_reset_mask); 1701 + } 1702 + 1700 1703 int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) 1701 1704 { 1702 1705 int r; ··· 1745 1680 if (r) 1746 1681 dev_err(adev->dev, "failed to create isolation sysfs files"); 1747 1682 1683 + r = amdgpu_gfx_sysfs_reset_mask_init(adev); 1684 + if (r) 1685 + dev_err(adev->dev, "failed to create reset mask sysfs files"); 1686 + 1748 1687 return r; 1749 1688 } 1750 1689 ··· 1756 1687 { 1757 1688 amdgpu_gfx_sysfs_xcp_fini(adev); 1758 1689 amdgpu_gfx_sysfs_isolation_shader_fini(adev); 1690 + amdgpu_gfx_sysfs_reset_mask_fini(adev); 1759 1691 } 1760 1692 1761 1693 int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
··· 424 424 /* reset mask */ 425 425 uint32_t grbm_soft_reset; 426 426 uint32_t srbm_soft_reset; 427 + uint32_t gfx_supported_reset; 428 + uint32_t compute_supported_reset; 427 429 428 430 /* gfx off */ 429 431 bool gfx_off_state; /* true: enabled, false: disabled */
+5
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
··· 4825 4825 } 4826 4826 } 4827 4827 } 4828 + /* TODO: Add queue reset mask when FW fully supports it */ 4829 + adev->gfx.gfx_supported_reset = 4830 + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 4831 + adev->gfx.compute_supported_reset = 4832 + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 4828 4833 4829 4834 r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0); 4830 4835 if (r) {
+18
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
··· 1691 1691 } 1692 1692 } 1693 1693 1694 + adev->gfx.gfx_supported_reset = 1695 + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 1696 + adev->gfx.compute_supported_reset = 1697 + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1698 + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1699 + case IP_VERSION(11, 0, 0): 1700 + case IP_VERSION(11, 0, 2): 1701 + case IP_VERSION(11, 0, 3): 1702 + if ((adev->gfx.me_fw_version >= 2280) && 1703 + (adev->gfx.mec_fw_version >= 2410)) { 1704 + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1705 + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1706 + } 1707 + break; 1708 + default: 1709 + break; 1710 + } 1711 + 1694 1712 if (!adev->enable_mes_kiq) { 1695 1713 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); 1696 1714 if (r) {
+6
drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
··· 1437 1437 } 1438 1438 } 1439 1439 1440 + /* TODO: Add queue reset mask when FW fully supports it */ 1441 + adev->gfx.gfx_supported_reset = 1442 + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 1443 + adev->gfx.compute_supported_reset = 1444 + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1445 + 1440 1446 if (!adev->enable_mes_kiq) { 1441 1447 r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0); 1442 1448 if (r) {
+6
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
··· 2374 2374 } 2375 2375 } 2376 2376 2377 + /* TODO: Add queue reset mask when FW fully supports it */ 2378 + adev->gfx.gfx_supported_reset = 2379 + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 2380 + adev->gfx.compute_supported_reset = 2381 + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 2382 + 2377 2383 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); 2378 2384 if (r) { 2379 2385 DRM_ERROR("Failed to init KIQ BOs!\n");
+13
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
··· 1157 1157 return r; 1158 1158 } 1159 1159 1160 + adev->gfx.compute_supported_reset = 1161 + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1162 + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1163 + case IP_VERSION(9, 4, 3): 1164 + case IP_VERSION(9, 4, 4): 1165 + if (adev->gfx.mec_fw_version >= 155) { 1166 + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1167 + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE; 1168 + } 1169 + break; 1170 + default: 1171 + break; 1172 + } 1160 1173 r = gfx_v9_4_3_gpu_early_init(adev); 1161 1174 if (r) 1162 1175 return r;