Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Add umc v8_14 ras functions

Add umc v8_14 ras functions.

Signed-off-by: Candice Li <candice.li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Candice Li and committed by
Alex Deucher
33f1aa21 334a8158

+229 -2
+1 -1
drivers/gpu/drm/amd/amdgpu/Makefile
··· 105 105 106 106 # add UMC block 107 107 amdgpu-y += \ 108 - umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o 108 + umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o umc_v8_14.o 109 109 110 110 # add IH block 111 111 amdgpu-y += \
+17 -1
drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
··· 40 40 #include "gfxhub_v12_0.h" 41 41 #include "mmhub_v4_1_0.h" 42 42 #include "athub_v4_1_0.h" 43 - 43 + #include "umc_v8_14.h" 44 44 45 45 static int gmc_v12_0_ecc_interrupt_state(struct amdgpu_device *adev, 46 46 struct amdgpu_irq_src *src, ··· 581 581 582 582 static void gmc_v12_0_set_umc_funcs(struct amdgpu_device *adev) 583 583 { 584 + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { 585 + case IP_VERSION(8, 14, 0): 586 + adev->umc.channel_inst_num = UMC_V8_14_CHANNEL_INSTANCE_NUM; 587 + adev->umc.umc_inst_num = UMC_V8_14_UMC_INSTANCE_NUM(adev); 588 + adev->umc.node_inst_num = 0; 589 + adev->umc.max_ras_err_cnt_per_query = UMC_V8_14_TOTAL_CHANNEL_NUM(adev); 590 + adev->umc.channel_offs = UMC_V8_14_PER_CHANNEL_OFFSET; 591 + adev->umc.ras = &umc_v8_14_ras; 592 + break; 593 + default: 594 + break; 595 + } 584 596 } 585 597 586 598 ··· 840 828 adev->vm_manager.first_kfd_vmid = 8; 841 829 842 830 amdgpu_vm_manager_init(adev); 831 + 832 + r = amdgpu_gmc_ras_sw_init(adev); 833 + if (r) 834 + return r; 843 835 844 836 return 0; 845 837 }
+160
drivers/gpu/drm/amd/amdgpu/umc_v8_14.c
··· 1 + /* 2 + * Copyright 2024 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + #include "umc_v8_14.h" 24 + #include "amdgpu_ras.h" 25 + #include "amdgpu_umc.h" 26 + #include "amdgpu.h" 27 + #include "umc/umc_8_14_0_offset.h" 28 + #include "umc/umc_8_14_0_sh_mask.h" 29 + 30 + static inline uint32_t get_umc_v8_14_reg_offset(struct amdgpu_device *adev, 31 + uint32_t umc_inst, 32 + uint32_t ch_inst) 33 + { 34 + return adev->umc.channel_offs * ch_inst + UMC_V8_14_INST_DIST * umc_inst; 35 + } 36 + 37 + static int umc_v8_14_clear_error_count_per_channel(struct amdgpu_device *adev, 38 + uint32_t node_inst, uint32_t umc_inst, 39 + uint32_t ch_inst, void *data) 40 + { 41 + uint32_t ecc_err_cnt_addr; 42 + uint32_t umc_reg_offset = 43 + get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst); 44 + 45 + ecc_err_cnt_addr = 46 + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); 47 + 48 + /* clear error count */ 49 + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, 50 + UMC_V8_14_CE_CNT_INIT); 51 + 52 + return 0; 53 + } 54 + 55 + static void umc_v8_14_clear_error_count(struct amdgpu_device *adev) 56 + { 57 + amdgpu_umc_loop_channels(adev, 58 + umc_v8_14_clear_error_count_per_channel, NULL); 59 + } 60 + 61 + static void umc_v8_14_query_correctable_error_count(struct amdgpu_device *adev, 62 + uint32_t umc_reg_offset, 63 + unsigned long *error_count) 64 + { 65 + uint32_t ecc_err_cnt, ecc_err_cnt_addr; 66 + 67 + /* UMC 8_14 registers */ 68 + ecc_err_cnt_addr = 69 + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); 70 + 71 + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); 72 + *error_count += 73 + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccErrCnt) - 74 + UMC_V8_14_CE_CNT_INIT); 75 + } 76 + 77 + static void umc_v8_14_query_uncorrectable_error_count(struct amdgpu_device *adev, 78 + uint32_t umc_reg_offset, 79 + unsigned long *error_count) 80 + { 81 + uint32_t ecc_err_cnt, ecc_err_cnt_addr; 82 + /* UMC 8_14 registers */ 83 + ecc_err_cnt_addr = 84 + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); 85 + 86 + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); 87 + *error_count += 88 + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccUnCorrErrCnt) - 89 + UMC_V8_14_CE_CNT_INIT); 90 + } 91 + 92 + static int umc_v8_14_query_error_count_per_channel(struct amdgpu_device *adev, 93 + uint32_t node_inst, uint32_t umc_inst, 94 + uint32_t ch_inst, void *data) 95 + { 96 + struct ras_err_data *err_data = (struct ras_err_data *)data; 97 + uint32_t umc_reg_offset = 98 + get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst); 99 + 100 + umc_v8_14_query_correctable_error_count(adev, 101 + umc_reg_offset, 102 + &(err_data->ce_count)); 103 + umc_v8_14_query_uncorrectable_error_count(adev, 104 + umc_reg_offset, 105 + &(err_data->ue_count)); 106 + 107 + return 0; 108 + } 109 + 110 + static void umc_v8_14_query_ras_error_count(struct amdgpu_device *adev, 111 + void *ras_error_status) 112 + { 113 + amdgpu_umc_loop_channels(adev, 114 + umc_v8_14_query_error_count_per_channel, ras_error_status); 115 + 116 + umc_v8_14_clear_error_count(adev); 117 + } 118 + 119 + static int umc_v8_14_err_cnt_init_per_channel(struct amdgpu_device *adev, 120 + uint32_t node_inst, uint32_t umc_inst, 121 + uint32_t ch_inst, void *data) 122 + { 123 + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; 124 + uint32_t ecc_err_cnt_addr; 125 + uint32_t umc_reg_offset = 126 + get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst); 127 + 128 + ecc_err_cnt_sel_addr = 129 + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCntSel); 130 + ecc_err_cnt_addr = 131 + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); 132 + 133 + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); 134 + 135 + /* set ce error interrupt type to APIC based interrupt */ 136 + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_GeccErrCntSel, 137 + GeccErrInt, 0x1); 138 + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); 139 + /* set error count to initial value */ 140 + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_14_CE_CNT_INIT); 141 + 142 + return 0; 143 + } 144 + 145 + static void umc_v8_14_err_cnt_init(struct amdgpu_device *adev) 146 + { 147 + amdgpu_umc_loop_channels(adev, 148 + umc_v8_14_err_cnt_init_per_channel, NULL); 149 + } 150 + 151 + const struct amdgpu_ras_block_hw_ops umc_v8_14_ras_hw_ops = { 152 + .query_ras_error_count = umc_v8_14_query_ras_error_count, 153 + }; 154 + 155 + struct amdgpu_umc_ras umc_v8_14_ras = { 156 + .ras_block = { 157 + .hw_ops = &umc_v8_14_ras_hw_ops, 158 + }, 159 + .err_cnt_init = umc_v8_14_err_cnt_init, 160 + };
+51
drivers/gpu/drm/amd/amdgpu/umc_v8_14.h
··· 1 + /* 2 + * Copyright 2024 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + * 22 + */ 23 + #ifndef __UMC_V8_14_H__ 24 + #define __UMC_V8_14_H__ 25 + 26 + #include "soc15_common.h" 27 + #include "amdgpu.h" 28 + 29 + /* number of umc channel instance with memory map register access */ 30 + #define UMC_V8_14_CHANNEL_INSTANCE_NUM 2 31 + /* number of umc instance with memory map register access */ 32 + #define UMC_V8_14_UMC_INSTANCE_NUM(adev) ((adev)->umc.node_inst_num) 33 + 34 + /* Total channel instances for all available umc nodes */ 35 + #define UMC_V8_14_TOTAL_CHANNEL_NUM(adev) \ 36 + (UMC_V8_14_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc) 37 + 38 + /* UMC register per channel offset */ 39 + #define UMC_V8_14_PER_CHANNEL_OFFSET 0x400 40 + 41 + #define UMC_V8_14_INST_DIST 0x40000 42 + 43 + /* EccErrCnt max value */ 44 + #define UMC_V8_14_CE_CNT_MAX 0xffff 45 + /* umc ce interrupt threshold */ 46 + #define UMC_V8_14_CE_INT_THRESHOLD 0xffff 47 + /* umc ce count initial value */ 48 + #define UMC_V8_14_CE_CNT_INIT (UMC_V8_14_CE_CNT_MAX - UMC_V8_14_CE_INT_THRESHOLD) 49 + 50 + extern struct amdgpu_umc_ras umc_v8_14_ras; 51 + #endif