Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Modify sdma block to fit for the unified ras block data and ops

1.Modify sdma block to fit for the unified ras block data and ops.
2.Change amdgpu_sdma_ras_funcs to amdgpu_sdma_ras, and the corresponding variable name remove _funcs suffix.
3.Remove the const flag of sdma ras variable so that sdma ras block can be able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register sdma ras block into amdgpu device ras block link list.
5.Remove the redundant code about sdma in amdgpu_ras.c after using the unified ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of sdma versions. If .ras_late_init and .ras_fini had been defined by the selected sdma version, the defined functions will take effect; if not defined, default fill them with amdgpu_sdma_ras_late_init and amdgpu_sdma_ras_fini.

v2: squash in warning fix (Alex)

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: John Clements <john.clements@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

yipechai and committed by
Alex Deucher
bdc4292b efe17d5a

+71 -36
-10
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
··· 967 967 struct amdgpu_ras_block_object* block_obj = NULL; 968 968 struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); 969 969 struct ras_err_data err_data = {0, 0, 0, NULL}; 970 - int i; 971 970 972 971 if (!obj) 973 972 return -EINVAL; ··· 978 979 amdgpu_ras_get_ecc_info(adev, &err_data); 979 980 break; 980 981 case AMDGPU_RAS_BLOCK__SDMA: 981 - if (adev->sdma.funcs->query_ras_error_count) { 982 - for (i = 0; i < adev->sdma.num_instances; i++) 983 - adev->sdma.funcs->query_ras_error_count(adev, i, 984 - &err_data); 985 - } 986 - break; 987 982 case AMDGPU_RAS_BLOCK__GFX: 988 983 case AMDGPU_RAS_BLOCK__MMHUB: 989 984 if (!block_obj || !block_obj->hw_ops) { ··· 1083 1090 block_obj->hw_ops->reset_ras_error_status(adev); 1084 1091 break; 1085 1092 case AMDGPU_RAS_BLOCK__SDMA: 1086 - if (adev->sdma.funcs->reset_ras_error_count) 1087 - adev->sdma.funcs->reset_ras_error_count(adev); 1088 - break; 1089 1093 case AMDGPU_RAS_BLOCK__HDP: 1090 1094 if (!block_obj || !block_obj->hw_ops) { 1091 1095 dev_info(adev->dev, "%s doesn't config ras function \n", ras_block_str(block));
+4 -8
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
··· 23 23 24 24 #ifndef __AMDGPU_SDMA_H__ 25 25 #define __AMDGPU_SDMA_H__ 26 + #include "amdgpu_ras.h" 26 27 27 28 /* max number of IP instances */ 28 29 #define AMDGPU_MAX_SDMA_INSTANCES 8 ··· 51 50 bool burst_nop; 52 51 }; 53 52 54 - struct amdgpu_sdma_ras_funcs { 55 - int (*ras_late_init)(struct amdgpu_device *adev, 56 - void *ras_ih_info); 57 - void (*ras_fini)(struct amdgpu_device *adev); 58 - int (*query_ras_error_count)(struct amdgpu_device *adev, 59 - uint32_t instance, void *ras_error_status); 60 - void (*reset_ras_error_count)(struct amdgpu_device *adev); 53 + struct amdgpu_sdma_ras { 54 + struct amdgpu_ras_block_object ras_block; 61 55 }; 62 56 63 57 struct amdgpu_sdma { ··· 69 73 uint32_t srbm_soft_reset; 70 74 bool has_page_queue; 71 75 struct ras_common_if *ras_if; 72 - const struct amdgpu_sdma_ras_funcs *funcs; 76 + struct amdgpu_sdma_ras *ras; 73 77 }; 74 78 75 79 /*
+45 -13
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
··· 1892 1892 sdma_v4_0_setup_ulv(adev); 1893 1893 1894 1894 if (!amdgpu_persistent_edc_harvesting_supported(adev)) { 1895 - if (adev->sdma.funcs && 1896 - adev->sdma.funcs->reset_ras_error_count) 1897 - adev->sdma.funcs->reset_ras_error_count(adev); 1895 + if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && 1896 + adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count) 1897 + adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev); 1898 1898 } 1899 1899 1900 - if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) 1901 - return adev->sdma.funcs->ras_late_init(adev, &ih_info); 1900 + if (adev->sdma.ras && adev->sdma.ras->ras_block.ras_late_init) 1901 + return adev->sdma.ras->ras_block.ras_late_init(adev, &ih_info); 1902 1902 else 1903 1903 return 0; 1904 1904 } ··· 2001 2001 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2002 2002 int i; 2003 2003 2004 - if (adev->sdma.funcs && adev->sdma.funcs->ras_fini) 2005 - adev->sdma.funcs->ras_fini(adev); 2004 + if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && 2005 + adev->sdma.ras->ras_block.ras_fini) 2006 + adev->sdma.ras->ras_block.ras_fini(adev); 2006 2007 2007 2008 for (i = 0; i < adev->sdma.num_instances; i++) { 2008 2009 amdgpu_ring_fini(&adev->sdma.instance[i].ring); ··· 2741 2740 } 2742 2741 } 2743 2742 2744 - static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, 2743 + static int sdma_v4_0_query_ras_error_count_by_instance(struct amdgpu_device *adev, 2745 2744 uint32_t instance, void *ras_error_status) 2746 2745 { 2747 2746 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; ··· 2763 2762 return 0; 2764 2763 }; 2765 2764 2765 + static void sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) 2766 + { 2767 + int i = 0; 2768 + for (i = 0; i < adev->sdma.num_instances; i++) { 2769 + if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, ras_error_status)) 2770 + { 2771 + dev_err(adev->dev, "Query ras error count failed in SDMA%d \n", i); 2772 + return; 2773 + } 2774 + } 2775 + } 2776 + 2766 2777 static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev) 2767 2778 { 2768 2779 int i; ··· 2786 2773 } 2787 2774 } 2788 2775 2789 - static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = { 2790 - .ras_late_init = amdgpu_sdma_ras_late_init, 2791 - .ras_fini = amdgpu_sdma_ras_fini, 2776 + const struct amdgpu_ras_block_hw_ops sdma_v4_0_ras_hw_ops = { 2792 2777 .query_ras_error_count = sdma_v4_0_query_ras_error_count, 2793 2778 .reset_ras_error_count = sdma_v4_0_reset_ras_error_count, 2779 + }; 2780 + 2781 + static struct amdgpu_sdma_ras sdma_v4_0_ras = { 2782 + .ras_block = { 2783 + .hw_ops = &sdma_v4_0_ras_hw_ops, 2784 + }, 2794 2785 }; 2795 2786 2796 2787 static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) ··· 2802 2785 switch (adev->ip_versions[SDMA0_HWIP][0]) { 2803 2786 case IP_VERSION(4, 2, 0): 2804 2787 case IP_VERSION(4, 2, 2): 2805 - adev->sdma.funcs = &sdma_v4_0_ras_funcs; 2788 + adev->sdma.ras = &sdma_v4_0_ras; 2806 2789 break; 2807 2790 case IP_VERSION(4, 4, 0): 2808 - adev->sdma.funcs = &sdma_v4_4_ras_funcs; 2791 + adev->sdma.ras = &sdma_v4_4_ras; 2809 2792 break; 2810 2793 default: 2811 2794 break; 2795 + } 2796 + 2797 + if (adev->sdma.ras) { 2798 + amdgpu_ras_register_ras_block(adev, &adev->sdma.ras->ras_block); 2799 + 2800 + strcpy(adev->sdma.ras->ras_block.name,"sdma"); 2801 + adev->sdma.ras->ras_block.block = AMDGPU_RAS_BLOCK__SDMA; 2802 + 2803 + /* If don't define special ras_late_init function, use default ras_late_init */ 2804 + if (!adev->sdma.ras->ras_block.ras_late_init) 2805 + adev->sdma.ras->ras_block.ras_late_init = amdgpu_sdma_ras_late_init; 2806 + 2807 + /* If don't define special ras_fini function, use default ras_fini */ 2808 + if (!adev->sdma.ras->ras_block.ras_fini) 2809 + adev->sdma.ras->ras_block.ras_fini = amdgpu_sdma_ras_fini; 2812 2810 } 2813 2811 } 2814 2812
+21 -4
drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
··· 188 188 } 189 189 } 190 190 191 - static int sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev, 191 + static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev, 192 192 uint32_t instance, 193 193 void *ras_error_status) 194 194 { ··· 245 245 } 246 246 } 247 247 248 - const struct amdgpu_sdma_ras_funcs sdma_v4_4_ras_funcs = { 249 - .ras_late_init = amdgpu_sdma_ras_late_init, 250 - .ras_fini = amdgpu_sdma_ras_fini, 248 + static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) 249 + { 250 + int i = 0; 251 + for (i = 0; i < adev->sdma.num_instances; i++) { 252 + if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status)) 253 + { 254 + dev_err(adev->dev, "Query ras error count failed in SDMA%d \n", i); 255 + return; 256 + } 257 + } 258 + 259 + } 260 + 261 + const struct amdgpu_ras_block_hw_ops sdma_v4_4_ras_hw_ops = { 251 262 .query_ras_error_count = sdma_v4_4_query_ras_error_count, 252 263 .reset_ras_error_count = sdma_v4_4_reset_ras_error_count, 264 + }; 265 + 266 + struct amdgpu_sdma_ras sdma_v4_4_ras = { 267 + .ras_block = { 268 + .hw_ops = &sdma_v4_4_ras_hw_ops, 269 + }, 253 270 };
+1 -1
drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h
··· 23 23 #ifndef __SDMA_V4_4_H__ 24 24 #define __SDMA_V4_4_H__ 25 25 26 - extern const struct amdgpu_sdma_ras_funcs sdma_v4_4_ras_funcs; 26 + extern struct amdgpu_sdma_ras sdma_v4_4_ras; 27 27 28 28 #endif