Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Modify mmhub block to fit for the unified ras block data and ops

1.Modify mmhub block to fit for the unified ras block data and ops.
2.Change amdgpu_mmhub_ras_funcs to amdgpu_mmhub_ras, and the corresponding variable name remove _funcs suffix.
3.Remove the const flag of mmhub ras variable so that mmhub ras block can be able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register mmhub ras block into amdgpu device ras block link list. 5.Remove the redundant code about mmhub in amdgpu_ras.c after using the unified ras block.
5.Remove the redundant code about mmhub in amdgpu_ras.c after using the unified ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of mmhub versions. If .ras_late_init and .ras_fini had been defined by the selected mmhub version, the defined functions will take effect; if not defined, default fill them with amdgpu_mmhub_ras_late_init and amdgpu_mmhub_ras_fini.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: John Clements <john.clements@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

yipechai and committed by
Alex Deucher
5e67bba3 6d76e904

+73 -75
+6 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 3307 3307 if (adev->asic_reset_res) 3308 3308 goto fail; 3309 3309 3310 - if (adev->mmhub.ras_funcs && 3311 - adev->mmhub.ras_funcs->reset_ras_error_count) 3312 - adev->mmhub.ras_funcs->reset_ras_error_count(adev); 3310 + if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && 3311 + adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) 3312 + adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); 3313 3313 } else { 3314 3314 3315 3315 task_barrier_full(&hive->tb); ··· 4656 4656 4657 4657 if (!r && amdgpu_ras_intr_triggered()) { 4658 4658 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 4659 - if (tmp_adev->mmhub.ras_funcs && 4660 - tmp_adev->mmhub.ras_funcs->reset_ras_error_count) 4661 - tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev); 4659 + if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops && 4660 + tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) 4661 + tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev); 4662 4662 } 4663 4663 4664 4664 amdgpu_ras_intr_cleared();
+4 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
··· 447 447 return r; 448 448 } 449 449 450 - if (adev->mmhub.ras_funcs && 451 - adev->mmhub.ras_funcs->ras_late_init) { 452 - r = adev->mmhub.ras_funcs->ras_late_init(adev); 450 + if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_late_init) { 451 + r = adev->mmhub.ras->ras_block.ras_late_init(adev, NULL); 453 452 if (r) 454 453 return r; 455 454 } ··· 500 501 adev->umc.ras_funcs->ras_fini) 501 502 adev->umc.ras_funcs->ras_fini(adev); 502 503 503 - if (adev->mmhub.ras_funcs && 504 - adev->mmhub.ras_funcs->ras_fini) 505 - adev->mmhub.ras_funcs->ras_fini(adev); 504 + if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini) 505 + adev->mmhub.ras->ras_block.ras_fini(adev); 506 506 507 507 if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini) 508 508 adev->gmc.xgmi.ras->ras_block.ras_fini(adev);
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
··· 24 24 #include "amdgpu.h" 25 25 #include "amdgpu_ras.h" 26 26 27 - int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev) 27 + int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info) 28 28 { 29 29 int r; 30 30 struct ras_ih_if ih_info = {
+4 -10
drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
··· 21 21 #ifndef __AMDGPU_MMHUB_H__ 22 22 #define __AMDGPU_MMHUB_H__ 23 23 24 - struct amdgpu_mmhub_ras_funcs { 25 - int (*ras_late_init)(struct amdgpu_device *adev); 26 - void (*ras_fini)(struct amdgpu_device *adev); 27 - void (*query_ras_error_count)(struct amdgpu_device *adev, 28 - void *ras_error_status); 29 - void (*query_ras_error_status)(struct amdgpu_device *adev); 30 - void (*reset_ras_error_count)(struct amdgpu_device *adev); 31 - void (*reset_ras_error_status)(struct amdgpu_device *adev); 24 + struct amdgpu_mmhub_ras { 25 + struct amdgpu_ras_block_object ras_block; 32 26 }; 33 27 34 28 struct amdgpu_mmhub_funcs { ··· 44 50 struct amdgpu_mmhub { 45 51 struct ras_common_if *ras_if; 46 52 const struct amdgpu_mmhub_funcs *funcs; 47 - const struct amdgpu_mmhub_ras_funcs *ras_funcs; 53 + struct amdgpu_mmhub_ras *ras; 48 54 }; 49 55 50 - int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev); 56 + int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info); 51 57 void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev); 52 58 #endif 53 59
+13 -34
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
··· 986 986 } 987 987 break; 988 988 case AMDGPU_RAS_BLOCK__GFX: 989 + case AMDGPU_RAS_BLOCK__MMHUB: 989 990 if (!block_obj || !block_obj->hw_ops) { 990 991 dev_info(adev->dev, "%s doesn't config ras function \n", 991 992 get_ras_block_str(&info->head)); ··· 997 996 998 997 if (block_obj->hw_ops->query_ras_error_status) 999 998 block_obj->hw_ops->query_ras_error_status(adev); 1000 - break; 1001 - case AMDGPU_RAS_BLOCK__MMHUB: 1002 - if (adev->mmhub.ras_funcs && 1003 - adev->mmhub.ras_funcs->query_ras_error_count) 1004 - adev->mmhub.ras_funcs->query_ras_error_count(adev, &err_data); 1005 - 1006 - if (adev->mmhub.ras_funcs && 1007 - adev->mmhub.ras_funcs->query_ras_error_status) 1008 - adev->mmhub.ras_funcs->query_ras_error_status(adev); 1009 999 break; 1010 1000 case AMDGPU_RAS_BLOCK__PCIE_BIF: 1011 1001 if (adev->nbio.ras_funcs && ··· 1081 1089 1082 1090 switch (block) { 1083 1091 case AMDGPU_RAS_BLOCK__GFX: 1092 + case AMDGPU_RAS_BLOCK__MMHUB: 1084 1093 if (!block_obj || !block_obj->hw_ops) { 1085 1094 dev_info(adev->dev, "%s doesn't config ras function \n", ras_block_str(block)); 1086 1095 return -EINVAL; ··· 1092 1099 1093 1100 if (block_obj->hw_ops->reset_ras_error_status) 1094 1101 block_obj->hw_ops->reset_ras_error_status(adev); 1095 - break; 1096 - case AMDGPU_RAS_BLOCK__MMHUB: 1097 - if (adev->mmhub.ras_funcs && 1098 - adev->mmhub.ras_funcs->reset_ras_error_count) 1099 - adev->mmhub.ras_funcs->reset_ras_error_count(adev); 1100 - 1101 - if (adev->mmhub.ras_funcs && 1102 - adev->mmhub.ras_funcs->reset_ras_error_status) 1103 - adev->mmhub.ras_funcs->reset_ras_error_status(adev); 1104 1102 break; 1105 1103 case AMDGPU_RAS_BLOCK__SDMA: 1106 1104 if (adev->sdma.funcs->reset_ras_error_count) ··· 1809 1825 * Only two block need to query read/write 1810 1826 * RspStatus at current state 1811 1827 */ 1812 - switch (info->head.block) { 1813 - case AMDGPU_RAS_BLOCK__GFX: 1814 - if (!block_obj || !block_obj->hw_ops) { 1815 - dev_info(adev->dev, "%s doesn't config ras function \n", get_ras_block_str(&info->head)); 1816 - return ; 1817 - } 1828 + if ((info->head.block != AMDGPU_RAS_BLOCK__GFX) && 1829 + (info->head.block != AMDGPU_RAS_BLOCK__MMHUB)) 1830 + return ; 1818 1831 1819 - if (block_obj->hw_ops->query_ras_error_status) 1820 - block_obj->hw_ops->query_ras_error_status(adev); 1821 - break; 1822 - case AMDGPU_RAS_BLOCK__MMHUB: 1823 - if (adev->mmhub.ras_funcs && 1824 - adev->mmhub.ras_funcs->query_ras_error_status) 1825 - adev->mmhub.ras_funcs->query_ras_error_status(adev); 1826 - break; 1827 - default: 1828 - break; 1832 + block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, info->head.sub_block_index); 1833 + if (!block_obj || !block_obj->hw_ops) { 1834 + dev_info(adev->dev, "%s doesn't config ras function \n", get_ras_block_str(&info->head)); 1835 + return ; 1829 1836 } 1837 + 1838 + if (block_obj->hw_ops->query_ras_error_status) 1839 + block_obj->hw_ops->query_ras_error_status(adev); 1840 + 1830 1841 } 1831 1842 1832 1843 static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
+21 -6
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
··· 1248 1248 { 1249 1249 switch (adev->ip_versions[MMHUB_HWIP][0]) { 1250 1250 case IP_VERSION(9, 4, 0): 1251 - adev->mmhub.ras_funcs = &mmhub_v1_0_ras_funcs; 1251 + adev->mmhub.ras = &mmhub_v1_0_ras; 1252 1252 break; 1253 1253 case IP_VERSION(9, 4, 1): 1254 - adev->mmhub.ras_funcs = &mmhub_v9_4_ras_funcs; 1254 + adev->mmhub.ras = &mmhub_v9_4_ras; 1255 1255 break; 1256 1256 case IP_VERSION(9, 4, 2): 1257 - adev->mmhub.ras_funcs = &mmhub_v1_7_ras_funcs; 1257 + adev->mmhub.ras = &mmhub_v1_7_ras; 1258 1258 break; 1259 1259 default: 1260 1260 /* mmhub ras is not available */ 1261 1261 break; 1262 + } 1263 + 1264 + if (adev->mmhub.ras) { 1265 + amdgpu_ras_register_ras_block(adev, &adev->mmhub.ras->ras_block); 1266 + 1267 + strcpy(adev->mmhub.ras->ras_block.name,"mmhub"); 1268 + adev->mmhub.ras->ras_block.block = AMDGPU_RAS_BLOCK__MMHUB; 1269 + 1270 + /* If don't define special ras_late_init function, use default ras_late_init */ 1271 + if (!adev->mmhub.ras->ras_block.ras_late_init) 1272 + adev->mmhub.ras->ras_block.ras_late_init = amdgpu_mmhub_ras_late_init; 1273 + 1274 + /* If don't define special ras_fini function, use default ras_fini */ 1275 + if (!adev->mmhub.ras->ras_block.ras_fini) 1276 + adev->mmhub.ras->ras_block.ras_fini = amdgpu_mmhub_ras_fini; 1262 1277 } 1263 1278 } 1264 1279 ··· 1358 1343 } 1359 1344 1360 1345 if (!amdgpu_persistent_edc_harvesting_supported(adev)) { 1361 - if (adev->mmhub.ras_funcs && 1362 - adev->mmhub.ras_funcs->reset_ras_error_count) 1363 - adev->mmhub.ras_funcs->reset_ras_error_count(adev); 1346 + if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && 1347 + adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) 1348 + adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); 1364 1349 1365 1350 if (adev->hdp.ras && adev->hdp.ras->ras_block.hw_ops && 1366 1351 adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count)
+7 -3
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
··· 774 774 } 775 775 } 776 776 777 - const struct amdgpu_mmhub_ras_funcs mmhub_v1_0_ras_funcs = { 778 - .ras_late_init = amdgpu_mmhub_ras_late_init, 779 - .ras_fini = amdgpu_mmhub_ras_fini, 777 + struct amdgpu_ras_block_hw_ops mmhub_v1_0_ras_hw_ops = { 780 778 .query_ras_error_count = mmhub_v1_0_query_ras_error_count, 781 779 .reset_ras_error_count = mmhub_v1_0_reset_ras_error_count, 780 + }; 781 + 782 + struct amdgpu_mmhub_ras mmhub_v1_0_ras = { 783 + .ras_block = { 784 + .hw_ops = &mmhub_v1_0_ras_hw_ops, 785 + }, 782 786 }; 783 787 784 788 const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = {
+1 -1
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
··· 24 24 #define __MMHUB_V1_0_H__ 25 25 26 26 extern const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs; 27 - extern const struct amdgpu_mmhub_ras_funcs mmhub_v1_0_ras_funcs; 27 + extern struct amdgpu_mmhub_ras mmhub_v1_0_ras; 28 28 29 29 #endif
+7 -3
drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
··· 1321 1321 } 1322 1322 } 1323 1323 1324 - const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = { 1325 - .ras_late_init = amdgpu_mmhub_ras_late_init, 1326 - .ras_fini = amdgpu_mmhub_ras_fini, 1324 + struct amdgpu_ras_block_hw_ops mmhub_v1_7_ras_hw_ops = { 1327 1325 .query_ras_error_count = mmhub_v1_7_query_ras_error_count, 1328 1326 .reset_ras_error_count = mmhub_v1_7_reset_ras_error_count, 1329 1327 .query_ras_error_status = mmhub_v1_7_query_ras_error_status, 1330 1328 .reset_ras_error_status = mmhub_v1_7_reset_ras_error_status, 1329 + }; 1330 + 1331 + struct amdgpu_mmhub_ras mmhub_v1_7_ras = { 1332 + .ras_block = { 1333 + .hw_ops = &mmhub_v1_7_ras_hw_ops, 1334 + }, 1331 1335 }; 1332 1336 1333 1337 const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {
+1 -1
drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h
··· 24 24 #define __MMHUB_V1_7_H__ 25 25 26 26 extern const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs; 27 - extern const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs; 27 + extern struct amdgpu_mmhub_ras mmhub_v1_7_ras; 28 28 29 29 #endif
+7 -3
drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
··· 1655 1655 } 1656 1656 } 1657 1657 1658 - const struct amdgpu_mmhub_ras_funcs mmhub_v9_4_ras_funcs = { 1659 - .ras_late_init = amdgpu_mmhub_ras_late_init, 1660 - .ras_fini = amdgpu_mmhub_ras_fini, 1658 + const struct amdgpu_ras_block_hw_ops mmhub_v9_4_ras_hw_ops = { 1661 1659 .query_ras_error_count = mmhub_v9_4_query_ras_error_count, 1662 1660 .reset_ras_error_count = mmhub_v9_4_reset_ras_error_count, 1663 1661 .query_ras_error_status = mmhub_v9_4_query_ras_error_status, 1662 + }; 1663 + 1664 + struct amdgpu_mmhub_ras mmhub_v9_4_ras = { 1665 + .ras_block = { 1666 + .hw_ops = &mmhub_v9_4_ras_hw_ops, 1667 + }, 1664 1668 }; 1665 1669 1666 1670 const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
+1 -1
drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
··· 24 24 #define __MMHUB_V9_4_H__ 25 25 26 26 extern const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs; 27 - extern const struct amdgpu_mmhub_ras_funcs mmhub_v9_4_ras_funcs; 27 + extern struct amdgpu_mmhub_ras mmhub_v9_4_ras; 28 28 29 29 #endif