Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Rework mca ras sw_init

To align with other IP blocks

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Stanley Yang <Stanley.Yang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Hawking Zhang and committed by
Alex Deucher
7f544c54 22e3d934

+103 -54
+13
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
··· 466 466 if (r) 467 467 return r; 468 468 469 + /* mca.x ras block */ 470 + r = amdgpu_mca_mp0_ras_sw_init(adev); 471 + if (r) 472 + return r; 473 + 474 + r = amdgpu_mca_mp1_ras_sw_init(adev); 475 + if (r) 476 + return r; 477 + 478 + r = amdgpu_mca_mpio_ras_sw_init(adev); 479 + if (r) 480 + return r; 481 + 469 482 if (!adev->gmc.xgmi.connected_to_cpu) { 470 483 adev->gmc.xgmi.ras = &xgmi_ras; 471 484 amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block);
+72
drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
··· 70 70 71 71 amdgpu_mca_reset_error_count(adev, mc_status_addr); 72 72 } 73 + 74 + int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev) 75 + { 76 + int err; 77 + struct amdgpu_mca_ras_block *ras; 78 + 79 + if (!adev->mca.mp0.ras) 80 + return 0; 81 + 82 + ras = adev->mca.mp0.ras; 83 + 84 + err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); 85 + if (err) { 86 + dev_err(adev->dev, "Failed to register mca.mp0 ras block!\n"); 87 + return err; 88 + } 89 + 90 + strcpy(ras->ras_block.ras_comm.name, "mca.mp0"); 91 + ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA; 92 + ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; 93 + adev->mca.mp0.ras_if = &ras->ras_block.ras_comm; 94 + 95 + return 0; 96 + } 97 + 98 + int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev) 99 + { 100 + int err; 101 + struct amdgpu_mca_ras_block *ras; 102 + 103 + if (!adev->mca.mp1.ras) 104 + return 0; 105 + 106 + ras = adev->mca.mp1.ras; 107 + 108 + err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); 109 + if (err) { 110 + dev_err(adev->dev, "Failed to register mca.mp1 ras block!\n"); 111 + return err; 112 + } 113 + 114 + strcpy(ras->ras_block.ras_comm.name, "mca.mp1"); 115 + ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA; 116 + ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; 117 + adev->mca.mp1.ras_if = &ras->ras_block.ras_comm; 118 + 119 + return 0; 120 + } 121 + 122 + int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev) 123 + { 124 + int err; 125 + struct amdgpu_mca_ras_block *ras; 126 + 127 + if (!adev->mca.mpio.ras) 128 + return 0; 129 + 130 + ras = adev->mca.mpio.ras; 131 + 132 + err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); 133 + if (err) { 134 + dev_err(adev->dev, "Failed to register mca.mpio ras block!\n"); 135 + return err; 136 + } 137 + 138 + strcpy(ras->ras_block.ras_comm.name, "mca.mpio"); 139 + ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA; 140 + ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; 141 + adev->mca.mpio.ras_if = &ras->ras_block.ras_comm; 142 + 143 + return 0; 144 + }
+3 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
··· 30 30 struct amdgpu_mca_ras_block *ras; 31 31 }; 32 32 33 - struct amdgpu_mca_funcs { 34 - void (*init)(struct amdgpu_device *adev); 35 - }; 36 - 37 33 struct amdgpu_mca { 38 - const struct amdgpu_mca_funcs *funcs; 39 34 struct amdgpu_mca_ras mp0; 40 35 struct amdgpu_mca_ras mp1; 41 36 struct amdgpu_mca_ras mpio; ··· 50 55 void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, 51 56 uint64_t mc_status_addr, 52 57 void *ras_error_status); 53 - 58 + int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev); 59 + int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev); 60 + int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev); 54 61 #endif
+9 -6
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
··· 1363 1363 adev->hdp.ras = &hdp_v4_0_ras; 1364 1364 } 1365 1365 1366 - static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev) 1366 + static void gmc_v9_0_set_mca_ras_funcs(struct amdgpu_device *adev) 1367 1367 { 1368 + struct amdgpu_mca *mca = &adev->mca; 1369 + 1368 1370 /* is UMC the right IP to check for MCA? Maybe DF? */ 1369 1371 switch (adev->ip_versions[UMC_HWIP][0]) { 1370 1372 case IP_VERSION(6, 7, 0): 1371 - if (!adev->gmc.xgmi.connected_to_cpu) 1372 - adev->mca.funcs = &mca_v3_0_funcs; 1373 + if (!adev->gmc.xgmi.connected_to_cpu) { 1374 + mca->mp0.ras = &mca_v3_0_mp0_ras; 1375 + mca->mp1.ras = &mca_v3_0_mp1_ras; 1376 + mca->mpio.ras = &mca_v3_0_mpio_ras; 1377 + } 1373 1378 break; 1374 1379 default: 1375 1380 break; ··· 1403 1398 gmc_v9_0_set_mmhub_ras_funcs(adev); 1404 1399 gmc_v9_0_set_gfxhub_funcs(adev); 1405 1400 gmc_v9_0_set_hdp_ras_funcs(adev); 1406 - gmc_v9_0_set_mca_funcs(adev); 1401 + gmc_v9_0_set_mca_ras_funcs(adev); 1407 1402 1408 1403 adev->gmc.shared_aperture_start = 0x2000000000000000ULL; 1409 1404 adev->gmc.shared_aperture_end = ··· 1616 1611 adev->gfxhub.funcs->init(adev); 1617 1612 1618 1613 adev->mmhub.funcs->init(adev); 1619 - if (adev->mca.funcs) 1620 - adev->mca.funcs->init(adev); 1621 1614 1622 1615 spin_lock_init(&adev->gmc.invalidate_lock); 1623 1616
+3 -41
drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
··· 51 51 return -EINVAL; 52 52 } 53 53 54 - const struct amdgpu_ras_block_hw_ops mca_v3_0_mp0_hw_ops = { 54 + static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp0_hw_ops = { 55 55 .query_ras_error_count = mca_v3_0_mp0_query_ras_error_count, 56 56 .query_ras_error_address = NULL, 57 57 }; 58 58 59 59 struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = { 60 60 .ras_block = { 61 - .ras_comm = { 62 - .block = AMDGPU_RAS_BLOCK__MCA, 63 - .sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP0, 64 - .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 65 - .name = "mp0", 66 - }, 67 61 .hw_ops = &mca_v3_0_mp0_hw_ops, 68 62 .ras_block_match = mca_v3_0_ras_block_match, 69 63 }, ··· 71 77 ras_error_status); 72 78 } 73 79 74 - const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = { 80 + static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = { 75 81 .query_ras_error_count = mca_v3_0_mp1_query_ras_error_count, 76 82 .query_ras_error_address = NULL, 77 83 }; 78 84 79 85 struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = { 80 86 .ras_block = { 81 - .ras_comm = { 82 - .block = AMDGPU_RAS_BLOCK__MCA, 83 - .sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP1, 84 - .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 85 - .name = "mp1", 86 - }, 87 87 .hw_ops = &mca_v3_0_mp1_hw_ops, 88 88 .ras_block_match = mca_v3_0_ras_block_match, 89 89 }, ··· 91 103 ras_error_status); 92 104 } 93 105 94 - const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = { 106 + static const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = { 95 107 .query_ras_error_count = mca_v3_0_mpio_query_ras_error_count, 96 108 .query_ras_error_address = NULL, 97 109 }; 98 110 99 111 struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = { 100 112 .ras_block = { 101 - .ras_comm = { 102 - .block = AMDGPU_RAS_BLOCK__MCA, 103 - .sub_block_index = AMDGPU_RAS_MCA_BLOCK__MPIO, 104 - .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 105 - .name = "mpio", 106 - }, 107 113 .hw_ops = &mca_v3_0_mpio_hw_ops, 108 114 .ras_block_match = mca_v3_0_ras_block_match, 109 115 }, 110 - }; 111 - 112 - 113 - static void mca_v3_0_init(struct amdgpu_device *adev) 114 - { 115 - struct amdgpu_mca *mca = &adev->mca; 116 - 117 - mca->mp0.ras = &mca_v3_0_mp0_ras; 118 - mca->mp1.ras = &mca_v3_0_mp1_ras; 119 - mca->mpio.ras = &mca_v3_0_mpio_ras; 120 - amdgpu_ras_register_ras_block(adev, &mca->mp0.ras->ras_block); 121 - amdgpu_ras_register_ras_block(adev, &mca->mp1.ras->ras_block); 122 - amdgpu_ras_register_ras_block(adev, &mca->mpio.ras->ras_block); 123 - mca->mp0.ras_if = &mca->mp0.ras->ras_block.ras_comm; 124 - mca->mp1.ras_if = &mca->mp1.ras->ras_block.ras_comm; 125 - mca->mpio.ras_if = &mca->mpio.ras->ras_block.ras_comm; 126 - } 127 - 128 - const struct amdgpu_mca_funcs mca_v3_0_funcs = { 129 - .init = mca_v3_0_init, 130 116 };
+3 -1
drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
··· 21 21 #ifndef __MCA_V3_0_H__ 22 22 #define __MCA_V3_0_H__ 23 23 24 - extern const struct amdgpu_mca_funcs mca_v3_0_funcs; 24 + extern struct amdgpu_mca_ras_block mca_v3_0_mp0_ras; 25 + extern struct amdgpu_mca_ras_block mca_v3_0_mp1_ras; 26 + extern struct amdgpu_mca_ras_block mca_v3_0_mpio_ras; 25 27 26 28 #endif