Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: split umc callbacks to ras and non-ras ones

umc ras is not managed by gpu driver when gpu is
connected to cpu through xgmi. split umc callbacks
into ras and non-ras ones so gpu driver only
initializes umc ras callbacks when it manages
umc ras.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: John Clements <John.Clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Hawking Zhang and committed by
Alex Deucher
49070c4e 52137ca8

+51 -32
+8 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
··· 391 391 { 392 392 int r; 393 393 394 - if (adev->umc.funcs && adev->umc.funcs->ras_late_init) { 395 - r = adev->umc.funcs->ras_late_init(adev); 394 + if (adev->umc.ras_funcs && 395 + adev->umc.ras_funcs->ras_late_init) { 396 + r = adev->umc.ras_funcs->ras_late_init(adev); 396 397 if (r) 397 398 return r; 398 399 } ··· 419 418 420 419 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) 421 420 { 422 - amdgpu_umc_ras_fini(adev); 421 + if (adev->umc.ras_funcs && 422 + adev->umc.ras_funcs->ras_fini) 423 + adev->umc.ras_funcs->ras_fini(adev); 424 + 423 425 amdgpu_mmhub_ras_fini(adev); 426 + 424 427 if (adev->gmc.xgmi.ras_funcs && 425 428 adev->gmc.xgmi.ras_funcs->ras_fini) 426 429 adev->gmc.xgmi.ras_funcs->ras_fini(adev);
+6 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
··· 774 774 775 775 switch (info->head.block) { 776 776 case AMDGPU_RAS_BLOCK__UMC: 777 - if (adev->umc.funcs->query_ras_error_count) 778 - adev->umc.funcs->query_ras_error_count(adev, &err_data); 777 + if (adev->umc.ras_funcs && 778 + adev->umc.ras_funcs->query_ras_error_count) 779 + adev->umc.ras_funcs->query_ras_error_count(adev, &err_data); 779 780 /* umc query_ras_error_address is also responsible for clearing 780 781 * error status 781 782 */ 782 - if (adev->umc.funcs->query_ras_error_address) 783 - adev->umc.funcs->query_ras_error_address(adev, &err_data); 783 + if (adev->umc.ras_funcs && 784 + adev->umc.ras_funcs->query_ras_error_address) 785 + adev->umc.ras_funcs->query_ras_error_address(adev, &err_data); 784 786 break; 785 787 case AMDGPU_RAS_BLOCK__SDMA: 786 788 if (adev->sdma.funcs->query_ras_error_count) {
+9 -8
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
··· 60 60 } 61 61 62 62 /* ras init of specific umc version */ 63 - if (adev->umc.funcs && adev->umc.funcs->err_cnt_init) 64 - adev->umc.funcs->err_cnt_init(adev); 63 + if (adev->umc.ras_funcs && 64 + adev->umc.ras_funcs->err_cnt_init) 65 + adev->umc.ras_funcs->err_cnt_init(adev); 65 66 66 67 return 0; 67 68 ··· 96 95 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 97 96 98 97 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 99 - if (adev->umc.funcs && 100 - adev->umc.funcs->query_ras_error_count) 101 - adev->umc.funcs->query_ras_error_count(adev, ras_error_status); 98 + if (adev->umc.ras_funcs && 99 + adev->umc.ras_funcs->query_ras_error_count) 100 + adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status); 102 101 103 - if (adev->umc.funcs && 104 - adev->umc.funcs->query_ras_error_address && 102 + if (adev->umc.ras_funcs && 103 + adev->umc.ras_funcs->query_ras_error_address && 105 104 adev->umc.max_ras_err_cnt_per_query) { 106 105 err_data->err_addr = 107 106 kcalloc(adev->umc.max_ras_err_cnt_per_query, ··· 117 116 /* umc query_ras_error_address is also responsible for clearing 118 117 * error status 119 118 */ 120 - adev->umc.funcs->query_ras_error_address(adev, ras_error_status); 119 + adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status); 121 120 } 122 121 123 122 /* only uncorrectable error needs gpu reset */
+7 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
··· 35 35 #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) 36 36 #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) 37 37 38 - struct amdgpu_umc_funcs { 38 + struct amdgpu_umc_ras_funcs { 39 39 void (*err_cnt_init)(struct amdgpu_device *adev); 40 40 int (*ras_late_init)(struct amdgpu_device *adev); 41 + void (*ras_fini)(struct amdgpu_device *adev); 41 42 void (*query_ras_error_count)(struct amdgpu_device *adev, 42 - void *ras_error_status); 43 + void *ras_error_status); 43 44 void (*query_ras_error_address)(struct amdgpu_device *adev, 44 45 void *ras_error_status); 46 + }; 47 + 48 + struct amdgpu_umc_funcs { 45 49 void (*init_registers)(struct amdgpu_device *adev); 46 50 }; 47 51 ··· 63 59 struct ras_common_if *ras_if; 64 60 65 61 const struct amdgpu_umc_funcs *funcs; 62 + const struct amdgpu_umc_ras_funcs *ras_funcs; 66 63 }; 67 64 68 65 int amdgpu_umc_ras_late_init(struct amdgpu_device *adev);
+1 -1
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
··· 655 655 adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM; 656 656 adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA; 657 657 adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0]; 658 - adev->umc.funcs = &umc_v8_7_funcs; 658 + adev->umc.ras_funcs = &umc_v8_7_ras_funcs; 659 659 break; 660 660 default: 661 661 break;
+8 -8
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
··· 1155 1155 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; 1156 1156 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20; 1157 1157 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; 1158 - adev->umc.funcs = &umc_v6_1_funcs; 1158 + adev->umc.ras_funcs = &umc_v6_1_ras_funcs; 1159 1159 break; 1160 1160 case CHIP_ARCTURUS: 1161 1161 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; ··· 1163 1163 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; 1164 1164 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT; 1165 1165 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; 1166 - adev->umc.funcs = &umc_v6_1_funcs; 1166 + adev->umc.ras_funcs = &umc_v6_1_ras_funcs; 1167 1167 break; 1168 1168 default: 1169 1169 break; ··· 1194 1194 { 1195 1195 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1196 1196 1197 - gmc_v9_0_set_gmc_funcs(adev); 1198 - gmc_v9_0_set_irq_funcs(adev); 1199 - gmc_v9_0_set_umc_funcs(adev); 1200 - gmc_v9_0_set_mmhub_funcs(adev); 1201 - gmc_v9_0_set_gfxhub_funcs(adev); 1202 - 1203 1197 if (adev->asic_type == CHIP_VEGA20 || 1204 1198 adev->asic_type == CHIP_ARCTURUS) 1205 1199 adev->gmc.xgmi.supported = true; ··· 1203 1209 adev->gmc.xgmi.connected_to_cpu = 1204 1210 adev->smuio.funcs->is_host_gpu_xgmi_supported(adev); 1205 1211 } 1212 + 1213 + gmc_v9_0_set_gmc_funcs(adev); 1214 + gmc_v9_0_set_irq_funcs(adev); 1215 + gmc_v9_0_set_umc_funcs(adev); 1216 + gmc_v9_0_set_mmhub_funcs(adev); 1217 + gmc_v9_0_set_gfxhub_funcs(adev); 1206 1218 1207 1219 adev->gmc.shared_aperture_start = 0x2000000000000000ULL; 1208 1220 adev->gmc.shared_aperture_end =
+3 -1
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
··· 22 22 */ 23 23 #include "umc_v6_1.h" 24 24 #include "amdgpu_ras.h" 25 + #include "amdgpu_umc.h" 25 26 #include "amdgpu.h" 26 27 27 28 #include "rsmu/rsmu_0_0_2_offset.h" ··· 465 464 umc_v6_1_enable_umc_index_mode(adev); 466 465 } 467 466 468 - const struct amdgpu_umc_funcs umc_v6_1_funcs = { 467 + const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs = { 469 468 .err_cnt_init = umc_v6_1_err_cnt_init, 470 469 .ras_late_init = amdgpu_umc_ras_late_init, 470 + .ras_fini = amdgpu_umc_ras_fini, 471 471 .query_ras_error_count = umc_v6_1_query_ras_error_count, 472 472 .query_ras_error_address = umc_v6_1_query_ras_error_address, 473 473 };
+1 -1
drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
··· 45 45 /* umc ce count initial value */ 46 46 #define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD) 47 47 48 - extern const struct amdgpu_umc_funcs umc_v6_1_funcs; 48 + extern const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs; 49 49 extern const uint32_t 50 50 umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; 51 51
+3 -1
drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
··· 22 22 */ 23 23 #include "umc_v6_7.h" 24 24 #include "amdgpu_ras.h" 25 + #include "amdgpu_umc.h" 25 26 #include "amdgpu.h" 26 27 27 28 #include "umc/umc_6_7_0_offset.h" ··· 273 272 } 274 273 } 275 274 276 - const struct amdgpu_umc_funcs umc_v6_7_funcs = { 275 + const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = { 277 276 .ras_late_init = amdgpu_umc_ras_late_init, 277 + .ras_fini = amdgpu_umc_ras_fini, 278 278 .query_ras_error_count = umc_v6_7_query_ras_error_count, 279 279 .query_ras_error_address = umc_v6_7_query_ras_error_address, 280 280 };
+1 -1
drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
··· 32 32 33 33 #define UMC_V6_7_INST_DIST 0x40000 34 34 35 - extern const struct amdgpu_umc_funcs umc_v6_7_funcs; 35 + extern const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs; 36 36 37 37 #endif
+3 -1
drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
··· 22 22 */ 23 23 #include "umc_v8_7.h" 24 24 #include "amdgpu_ras.h" 25 + #include "amdgpu_umc.h" 25 26 #include "amdgpu.h" 26 27 27 28 #include "rsmu/rsmu_0_0_2_offset.h" ··· 324 323 } 325 324 } 326 325 327 - const struct amdgpu_umc_funcs umc_v8_7_funcs = { 326 + const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs = { 328 327 .err_cnt_init = umc_v8_7_err_cnt_init, 329 328 .ras_late_init = amdgpu_umc_ras_late_init, 329 + .ras_fini = amdgpu_umc_ras_fini, 330 330 .query_ras_error_count = umc_v8_7_query_ras_error_count, 331 331 .query_ras_error_address = umc_v8_7_query_ras_error_address, 332 332 };
+1 -1
drivers/gpu/drm/amd/amdgpu/umc_v8_7.h
··· 44 44 /* umc ce count initial value */ 45 45 #define UMC_V8_7_CE_CNT_INIT (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD) 46 46 47 - extern const struct amdgpu_umc_funcs umc_v8_7_funcs; 47 + extern const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs; 48 48 extern const uint32_t 49 49 umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM]; 50 50