Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Modify umc block to fit for the unified ras block data and ops

1.Modify umc block to fit for the unified ras block data and ops.
2.Change amdgpu_umc_ras_funcs to amdgpu_umc_ras, and the corresponding variable name remove _funcs suffix.
3.Remove the const flag of umc ras variable so that umc ras block can be able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register umc ras block into amdgpu device ras block link list.
5.Remove the redundant code about umc in amdgpu_ras.c after using the unified ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of umc versions. If .ras_late_init and .ras_fini had been defined by the selected umc version, the defined functions will take effect; if not defined, default fill them with amdgpu_umc_ras_late_init and amdgpu_umc_ras_fini.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: John Clements <john.clements@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

yipechai and committed by
Alex Deucher
efe17d5a 2e54fe5d

+99 -64
+4 -6
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
··· 440 440 { 441 441 int r; 442 442 443 - if (adev->umc.ras_funcs && 444 - adev->umc.ras_funcs->ras_late_init) { 445 - r = adev->umc.ras_funcs->ras_late_init(adev); 443 + if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) { 444 + r = adev->umc.ras->ras_block.ras_late_init(adev, NULL); 446 445 if (r) 447 446 return r; 448 447 } ··· 495 496 496 497 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) 497 498 { 498 - if (adev->umc.ras_funcs && 499 - adev->umc.ras_funcs->ras_fini) 500 - adev->umc.ras_funcs->ras_fini(adev); 499 + if (adev->umc.ras && adev->umc.ras->ras_block.ras_fini) 500 + adev->umc.ras->ras_block.ras_fini(adev); 501 501 502 502 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini) 503 503 adev->mmhub.ras->ras_block.ras_fini(adev);
+15 -15
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
··· 939 939 */ 940 940 ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc)); 941 941 if (ret == -EOPNOTSUPP) { 942 - if (adev->umc.ras_funcs && 943 - adev->umc.ras_funcs->query_ras_error_count) 944 - adev->umc.ras_funcs->query_ras_error_count(adev, err_data); 942 + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && 943 + adev->umc.ras->ras_block.hw_ops->query_ras_error_count) 944 + adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data); 945 945 946 946 /* umc query_ras_error_address is also responsible for clearing 947 947 * error status 948 948 */ 949 - if (adev->umc.ras_funcs && 950 - adev->umc.ras_funcs->query_ras_error_address) 951 - adev->umc.ras_funcs->query_ras_error_address(adev, err_data); 949 + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && 950 + adev->umc.ras->ras_block.hw_ops->query_ras_error_address) 951 + adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data); 952 952 } else if (!ret) { 953 - if (adev->umc.ras_funcs && 954 - adev->umc.ras_funcs->ecc_info_query_ras_error_count) 955 - adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, err_data); 953 + if (adev->umc.ras && 954 + adev->umc.ras->ecc_info_query_ras_error_count) 955 + adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data); 956 956 957 - if (adev->umc.ras_funcs && 958 - adev->umc.ras_funcs->ecc_info_query_ras_error_address) 959 - adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, err_data); 957 + if (adev->umc.ras && 958 + adev->umc.ras->ecc_info_query_ras_error_address) 959 + adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data); 960 960 } 961 961 } 962 962 ··· 2412 2412 } 2413 2413 else if (adev->df.funcs && 2414 2414 adev->df.funcs->query_ras_poison_mode && 2415 - adev->umc.ras_funcs && 2416 - adev->umc.ras_funcs->query_ras_poison_mode) { 2415 + adev->umc.ras && 2416 + adev->umc.ras->query_ras_poison_mode) { 2417 2417 df_poison = 2418 2418 adev->df.funcs->query_ras_poison_mode(adev); 2419 2419 umc_poison = 2420 - adev->umc.ras_funcs->query_ras_poison_mode(adev); 2420 + adev->umc.ras->query_ras_poison_mode(adev); 2421 2421 /* Only poison is set in both DF and UMC, we can support it */ 2422 2422 if (df_poison && umc_poison) 2423 2423 con->poison_supported = true;
+16 -16
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
··· 35 35 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 36 36 ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc)); 37 37 if (ret == -EOPNOTSUPP) { 38 - if (adev->umc.ras_funcs && 39 - adev->umc.ras_funcs->query_ras_error_count) 40 - adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status); 38 + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && 39 + adev->umc.ras->ras_block.hw_ops->query_ras_error_count) 40 + adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status); 41 41 42 - if (adev->umc.ras_funcs && 43 - adev->umc.ras_funcs->query_ras_error_address && 42 + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && 43 + adev->umc.ras->ras_block.hw_ops->query_ras_error_address && 44 44 adev->umc.max_ras_err_cnt_per_query) { 45 45 err_data->err_addr = 46 46 kcalloc(adev->umc.max_ras_err_cnt_per_query, ··· 56 56 /* umc query_ras_error_address is also responsible for clearing 57 57 * error status 58 58 */ 59 - adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status); 59 + adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status); 60 60 } 61 61 } else if (!ret) { 62 - if (adev->umc.ras_funcs && 63 - adev->umc.ras_funcs->ecc_info_query_ras_error_count) 64 - adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, ras_error_status); 62 + if (adev->umc.ras && 63 + adev->umc.ras->ecc_info_query_ras_error_count) 64 + adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status); 65 65 66 - if (adev->umc.ras_funcs && 67 - adev->umc.ras_funcs->ecc_info_query_ras_error_address && 66 + if (adev->umc.ras && 67 + adev->umc.ras->ecc_info_query_ras_error_address && 68 68 adev->umc.max_ras_err_cnt_per_query) { 69 69 err_data->err_addr = 70 70 kcalloc(adev->umc.max_ras_err_cnt_per_query, ··· 80 80 /* umc query_ras_error_address is also responsible for clearing 81 81 * error status 82 82 */ 83 - adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, ras_error_status); 83 + adev->umc.ras->ecc_info_query_ras_error_address(adev, ras_error_status); 84 84 } 85 85 } 86 86 ··· 136 136 return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); 137 137 } 138 138 139 - int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) 139 + int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info) 140 140 { 141 141 int r; 142 142 struct ras_fs_if fs_info = { ··· 172 172 } 173 173 174 174 /* ras init of specific umc version */ 175 - if (adev->umc.ras_funcs && 176 - adev->umc.ras_funcs->err_cnt_init) 177 - adev->umc.ras_funcs->err_cnt_init(adev); 175 + if (adev->umc.ras && 176 + adev->umc.ras->err_cnt_init) 177 + adev->umc.ras->err_cnt_init(adev); 178 178 179 179 return 0; 180 180
+5 -9
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
··· 20 20 */ 21 21 #ifndef __AMDGPU_UMC_H__ 22 22 #define __AMDGPU_UMC_H__ 23 + #include "amdgpu_ras.h" 23 24 24 25 /* 25 26 * (addr / 256) * 4096, the higher 26 bits in ErrorAddr ··· 41 40 #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) 42 41 #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) 43 42 44 - struct amdgpu_umc_ras_funcs { 43 + struct amdgpu_umc_ras { 44 + struct amdgpu_ras_block_object ras_block; 45 45 void (*err_cnt_init)(struct amdgpu_device *adev); 46 - int (*ras_late_init)(struct amdgpu_device *adev); 47 - void (*ras_fini)(struct amdgpu_device *adev); 48 - void (*query_ras_error_count)(struct amdgpu_device *adev, 49 - void *ras_error_status); 50 - void (*query_ras_error_address)(struct amdgpu_device *adev, 51 - void *ras_error_status); 52 46 bool (*query_ras_poison_mode)(struct amdgpu_device *adev); 53 47 void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev, 54 48 void *ras_error_status); ··· 69 73 struct ras_common_if *ras_if; 70 74 71 75 const struct amdgpu_umc_funcs *funcs; 72 - const struct amdgpu_umc_ras_funcs *ras_funcs; 76 + struct amdgpu_umc_ras *ras; 73 77 }; 74 78 75 - int amdgpu_umc_ras_late_init(struct amdgpu_device *adev); 79 + int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info); 76 80 void amdgpu_umc_ras_fini(struct amdgpu_device *adev); 77 81 int amdgpu_umc_poison_handler(struct amdgpu_device *adev, 78 82 void *ras_error_status,
+15 -1
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
··· 664 664 adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM; 665 665 adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA; 666 666 adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0]; 667 - adev->umc.ras_funcs = &umc_v8_7_ras_funcs; 667 + adev->umc.ras = &umc_v8_7_ras; 668 668 break; 669 669 default: 670 670 break; 671 + } 672 + if (adev->umc.ras) { 673 + amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); 674 + 675 + strcpy(adev->umc.ras->ras_block.name,"umc"); 676 + adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC; 677 + 678 + /* If don't define special ras_late_init function, use default ras_late_init */ 679 + if (!adev->umc.ras->ras_block.ras_late_init) 680 + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; 681 + 682 + /* If don't define special ras_fini function, use default ras_fini */ 683 + if (!adev->umc.ras->ras_block.ras_fini) 684 + adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini; 671 685 } 672 686 } 673 687
+18 -3
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
··· 1202 1202 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; 1203 1203 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20; 1204 1204 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; 1205 - adev->umc.ras_funcs = &umc_v6_1_ras_funcs; 1205 + adev->umc.ras = &umc_v6_1_ras; 1206 1206 break; 1207 1207 case IP_VERSION(6, 1, 2): 1208 1208 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; ··· 1210 1210 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; 1211 1211 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT; 1212 1212 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; 1213 - adev->umc.ras_funcs = &umc_v6_1_ras_funcs; 1213 + adev->umc.ras = &umc_v6_1_ras; 1214 1214 break; 1215 1215 case IP_VERSION(6, 7, 0): 1216 1216 adev->umc.max_ras_err_cnt_per_query = UMC_V6_7_TOTAL_CHANNEL_NUM; ··· 1218 1218 adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM; 1219 1219 adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET; 1220 1220 if (!adev->gmc.xgmi.connected_to_cpu) 1221 - adev->umc.ras_funcs = &umc_v6_7_ras_funcs; 1221 + adev->umc.ras = &umc_v6_7_ras; 1222 1222 if (1 & adev->smuio.funcs->get_die_id(adev)) 1223 1223 adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0]; 1224 1224 else ··· 1226 1226 break; 1227 1227 default: 1228 1228 break; 1229 + } 1230 + 1231 + if (adev->umc.ras) { 1232 + amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); 1233 + 1234 + strcpy(adev->umc.ras->ras_block.name,"umc"); 1235 + adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC; 1236 + 1237 + /* If don't define special ras_late_init function, use default ras_late_init */ 1238 + if (!adev->umc.ras->ras_block.ras_late_init) 1239 + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; 1240 + 1241 + /* If don't define special ras_fini function, use default ras_fini */ 1242 + if (!adev->umc.ras->ras_block.ras_fini) 1243 + adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini; 1229 1244 } 1230 1245 } 1231 1246
+8 -4
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
··· 465 465 umc_v6_1_enable_umc_index_mode(adev); 466 466 } 467 467 468 - const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs = { 469 - .err_cnt_init = umc_v6_1_err_cnt_init, 470 - .ras_late_init = amdgpu_umc_ras_late_init, 471 - .ras_fini = amdgpu_umc_ras_fini, 468 + const struct amdgpu_ras_block_hw_ops umc_v6_1_ras_hw_ops = { 472 469 .query_ras_error_count = umc_v6_1_query_ras_error_count, 473 470 .query_ras_error_address = umc_v6_1_query_ras_error_address, 471 + }; 472 + 473 + struct amdgpu_umc_ras umc_v6_1_ras = { 474 + .ras_block = { 475 + .hw_ops = &umc_v6_1_ras_hw_ops, 476 + }, 477 + .err_cnt_init = umc_v6_1_err_cnt_init, 474 478 };
+1 -1
drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
··· 45 45 /* umc ce count initial value */ 46 46 #define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD) 47 47 48 - extern const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs; 48 + extern struct amdgpu_umc_ras umc_v6_1_ras; 49 49 extern const uint32_t 50 50 umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; 51 51
+7 -3
drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
··· 480 480 return true; 481 481 } 482 482 483 - const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = { 484 - .ras_late_init = amdgpu_umc_ras_late_init, 485 - .ras_fini = amdgpu_umc_ras_fini, 483 + const struct amdgpu_ras_block_hw_ops umc_v6_7_ras_hw_ops = { 486 484 .query_ras_error_count = umc_v6_7_query_ras_error_count, 487 485 .query_ras_error_address = umc_v6_7_query_ras_error_address, 486 + }; 487 + 488 + struct amdgpu_umc_ras umc_v6_7_ras = { 489 + .ras_block = { 490 + .hw_ops = &umc_v6_7_ras_hw_ops, 491 + }, 488 492 .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, 489 493 .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count, 490 494 .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
+1 -1
drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
··· 43 43 #define UMC_V6_7_TOTAL_CHANNEL_NUM (UMC_V6_7_CHANNEL_INSTANCE_NUM * UMC_V6_7_UMC_INSTANCE_NUM) 44 44 /* UMC regiser per channel offset */ 45 45 #define UMC_V6_7_PER_CHANNEL_OFFSET 0x400 46 - extern const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs; 46 + extern struct amdgpu_umc_ras umc_v6_7_ras; 47 47 extern const uint32_t 48 48 umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; 49 49 extern const uint32_t
+8 -4
drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
··· 324 324 } 325 325 } 326 326 327 - const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs = { 328 - .err_cnt_init = umc_v8_7_err_cnt_init, 329 - .ras_late_init = amdgpu_umc_ras_late_init, 330 - .ras_fini = amdgpu_umc_ras_fini, 327 + const struct amdgpu_ras_block_hw_ops umc_v8_7_ras_hw_ops = { 331 328 .query_ras_error_count = umc_v8_7_query_ras_error_count, 332 329 .query_ras_error_address = umc_v8_7_query_ras_error_address, 330 + }; 331 + 332 + struct amdgpu_umc_ras umc_v8_7_ras = { 333 + .ras_block = { 334 + .hw_ops = &umc_v8_7_ras_hw_ops, 335 + }, 336 + .err_cnt_init = umc_v8_7_err_cnt_init, 333 337 };
+1 -1
drivers/gpu/drm/amd/amdgpu/umc_v8_7.h
··· 44 44 /* umc ce count initial value */ 45 45 #define UMC_V8_7_CE_CNT_INIT (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD) 46 46 47 - extern const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs; 47 + extern struct amdgpu_umc_ras umc_v8_7_ras; 48 48 extern const uint32_t 49 49 umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM]; 50 50