Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: move convert_error_address out of umc_ras

RAS error address translation algorithm is common
across dGPU and A + A platform as along as the SOC
integrates the same generation of UMC IP.

UMC RAS is managed by x86 MCA on A + A platform,
umc_ras in GPU driver is not initialized at all on
A + A platform. In such case, any umc_ras callback
implemented for dGPU config shouldn't be invoked
from A + A specific callback.

The change moves convert_error_address out of dGPU
umc_ras structure and makes it share between A + A
and dGPU config.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Stanley Yang <Stanley.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Hawking Zhang and committed by
Alex Deucher
6c0ca748 027bf0ce

+18 -12
+12 -4
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
··· 36 36 #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" 37 37 #include "atom.h" 38 38 #include "amdgpu_reset.h" 39 + #include "umc_v6_7.h" 39 40 40 41 #ifdef CONFIG_X86_MCE_AMD 41 42 #include <asm/mce.h> ··· 2900 2899 /* 2901 2900 * Translate UMC channel address to Physical address 2902 2901 */ 2903 - if (adev->umc.ras && 2904 - adev->umc.ras->convert_ras_error_address) 2905 - adev->umc.ras->convert_ras_error_address(adev, 2906 - &err_data, m->addr, ch_inst, umc_inst); 2902 + switch (adev->ip_versions[UMC_HWIP][0]) { 2903 + case IP_VERSION(6, 7, 0): 2904 + umc_v6_7_convert_error_address(adev, 2905 + &err_data, m->addr, ch_inst, umc_inst); 2906 + break; 2907 + default: 2908 + dev_warn(adev->dev, 2909 + "UMC address to Physical address translation is not supported\n"); 2910 + kfree(err_data.err_addr); 2911 + return NOTIFY_DONE; 2912 + } 2907 2913 2908 2914 if (amdgpu_bad_page_threshold != 0) { 2909 2915 amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
-3
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
··· 51 51 struct amdgpu_ras_block_object ras_block; 52 52 void (*err_cnt_init)(struct amdgpu_device *adev); 53 53 bool (*query_ras_poison_mode)(struct amdgpu_device *adev); 54 - void (*convert_ras_error_address)(struct amdgpu_device *adev, 55 - struct ras_err_data *err_data, uint64_t err_addr, 56 - uint32_t ch_inst, uint32_t umc_inst); 57 54 void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev, 58 55 void *ras_error_status); 59 56 void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
+3 -4
drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
··· 187 187 } 188 188 } 189 189 190 - static void umc_v6_7_convert_error_address(struct amdgpu_device *adev, 191 - struct ras_err_data *err_data, uint64_t err_addr, 192 - uint32_t ch_inst, uint32_t umc_inst) 190 + void umc_v6_7_convert_error_address(struct amdgpu_device *adev, 191 + struct ras_err_data *err_data, uint64_t err_addr, 192 + uint32_t ch_inst, uint32_t umc_inst) 193 193 { 194 194 uint32_t channel_index; 195 195 uint64_t soc_pa, retired_page, column; ··· 553 553 .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, 554 554 .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count, 555 555 .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address, 556 - .convert_ras_error_address = umc_v6_7_convert_error_address, 557 556 };
+3 -1
drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
··· 71 71 umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; 72 72 extern const uint32_t 73 73 umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; 74 - 74 + void umc_v6_7_convert_error_address(struct amdgpu_device *adev, 75 + struct ras_err_data *err_data, uint64_t err_addr, 76 + uint32_t ch_inst, uint32_t umc_inst); 75 77 #endif