Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: add reset_ras_error_count function for MMHUB

MMHUB ras error counters are dirty ones after cold reboot
Read operation is needed to reset them to 0

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Hawking Zhang and committed by
Alex Deucher
fe5211f1 86153f1b

+28
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
··· 26 26 int (*ras_late_init)(struct amdgpu_device *adev); 27 27 void (*query_ras_error_count)(struct amdgpu_device *adev, 28 28 void *ras_error_status); 29 + void (*reset_ras_error_count)(struct amdgpu_device *adev); 29 30 }; 30 31 31 32 struct amdgpu_mmhub {
+3
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
··· 948 948 } 949 949 } 950 950 951 + if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count) 952 + adev->mmhub.funcs->reset_ras_error_count(adev); 953 + 951 954 r = amdgpu_gmc_ras_late_init(adev); 952 955 if (r) 953 956 return r;
+12
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
··· 747 747 err_data->ue_count += ded_count; 748 748 } 749 749 750 + static void mmhub_v1_0_reset_ras_error_count(struct amdgpu_device *adev) 751 + { 752 + uint32_t i; 753 + 754 + /* read back edc counter registers to reset the counters to 0 */ 755 + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { 756 + for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_edc_cnt_regs); i++) 757 + RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_0_edc_cnt_regs[i])); 758 + } 759 + } 760 + 750 761 const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = { 751 762 .ras_late_init = amdgpu_mmhub_ras_late_init, 752 763 .query_ras_error_count = mmhub_v1_0_query_ras_error_count, 764 + .reset_ras_error_count = mmhub_v1_0_reset_ras_error_count, 753 765 };
+12
drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
··· 1596 1596 err_data->ue_count += ded_count; 1597 1597 } 1598 1598 1599 + static void mmhub_v9_4_reset_ras_error_count(struct amdgpu_device *adev) 1600 + { 1601 + uint32_t i; 1602 + 1603 + /* read back edc counter registers to reset the counters to 0 */ 1604 + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { 1605 + for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_edc_cnt_regs); i++) 1606 + RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_edc_cnt_regs[i])); 1607 + } 1608 + } 1609 + 1599 1610 const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = { 1600 1611 .ras_late_init = amdgpu_mmhub_ras_late_init, 1601 1612 .query_ras_error_count = mmhub_v9_4_query_ras_error_count, 1613 + .reset_ras_error_count = mmhub_v9_4_reset_ras_error_count, 1602 1614 };