Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Add gfx ras poison consumption irq handling on gfx v11_0_3

Add gfx ras poison consumption irq handling on gfx v11_0_3.

V2:
Move ras poison consumption irq handling code of gfx
v11_0_3 to gfx_v11_0_3.c.
V5:
Create dedicated irq handler for RLC_GC_FED_INTERRUPT.

V6:
Remove invalid function call.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

YiPeng Chai and committed by
Alex Deucher
ae6f2db4 79d949a2

+79 -1
+4
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
··· 210 210 struct amdgpu_ras_block_object ras_block; 211 211 void (*enable_watchdog_timer)(struct amdgpu_device *adev); 212 212 bool (*query_utcl2_poison_status)(struct amdgpu_device *adev); 213 + int (*rlc_gc_fed_irq)(struct amdgpu_device *adev, 214 + struct amdgpu_irq_src *source, 215 + struct amdgpu_iv_entry *entry); 213 216 }; 214 217 215 218 struct amdgpu_gfx_funcs { ··· 326 323 struct amdgpu_irq_src priv_inst_irq; 327 324 struct amdgpu_irq_src cp_ecc_error_irq; 328 325 struct amdgpu_irq_src sq_irq; 326 + struct amdgpu_irq_src rlc_gc_fed_irq; 329 327 struct sq_work sq_work; 330 328 331 329 /* gfx status */
+24
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
··· 1301 1301 if (r) 1302 1302 return r; 1303 1303 1304 + /* FED error */ 1305 + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, 1306 + GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, 1307 + &adev->gfx.rlc_gc_fed_irq); 1308 + if (r) 1309 + return r; 1310 + 1304 1311 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1305 1312 1306 1313 if (adev->gfx.imu.funcs) { ··· 5999 5992 return 0; 6000 5993 } 6001 5994 5995 + static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, 5996 + struct amdgpu_irq_src *source, 5997 + struct amdgpu_iv_entry *entry) 5998 + { 5999 + if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) 6000 + return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); 6001 + 6002 + return 0; 6003 + } 6004 + 6002 6005 #if 0 6003 6006 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6004 6007 struct amdgpu_irq_src *src, ··· 6239 6222 .process = gfx_v11_0_priv_inst_irq, 6240 6223 }; 6241 6224 6225 + static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { 6226 + .process = gfx_v11_0_rlc_gc_fed_irq, 6227 + }; 6228 + 6242 6229 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 6243 6230 { 6244 6231 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; ··· 6253 6232 6254 6233 adev->gfx.priv_inst_irq.num_types = 1; 6255 6234 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 6235 + 6236 + adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ 6237 + adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; 6256 6238 } 6257 6239 6258 6240 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
+49 -1
drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
··· 22 22 */ 23 23 24 24 #include "amdgpu.h" 25 + #include "soc21.h" 26 + #include "gc/gc_11_0_3_offset.h" 27 + #include "gc/gc_11_0_3_sh_mask.h" 28 + #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 29 + #include "soc15.h" 30 + #include "soc15d.h" 31 + #include "gfx_v11_0.h" 25 32 26 33 27 - struct amdgpu_gfx_ras gfx_v11_0_3_ras; 34 + static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev, 35 + struct amdgpu_irq_src *source, 36 + struct amdgpu_iv_entry *entry) 37 + { 38 + uint32_t rlc_status0 = 0, rlc_status1 = 0; 39 + struct ras_common_if *ras_if = NULL; 40 + struct ras_dispatch_if ih_data = { 41 + .entry = entry, 42 + }; 43 + 44 + rlc_status0 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_0)); 45 + rlc_status1 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_1)); 46 + 47 + if (!rlc_status0 && !rlc_status1) { 48 + dev_warn(adev->dev, "RLC_GC_FED irq is generated, but rlc_status0 and rlc_status1 are empty!\n"); 49 + return 0; 50 + } 51 + 52 + /* Use RLC_RLCS_FED_STATUS_0/1 to distinguish FED error block. */ 53 + if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) || 54 + REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR)) 55 + ras_if = adev->sdma.ras_if; 56 + else 57 + ras_if = adev->gfx.ras_if; 58 + 59 + if (!ras_if) { 60 + dev_err(adev->dev, "Gfx or sdma ras block not initialized, rlc_status0:0x%x.\n", 61 + rlc_status0); 62 + return -EINVAL; 63 + } 64 + 65 + ih_data.head = *ras_if; 66 + 67 + dev_warn(adev->dev, "RLC %s FED IRQ\n", ras_if->name); 68 + amdgpu_ras_interrupt_dispatch(adev, &ih_data); 69 + 70 + return 0; 71 + } 72 + 73 + struct amdgpu_gfx_ras gfx_v11_0_3_ras = { 74 + .rlc_gc_fed_irq = gfx_v11_0_3_rlc_gc_fed_irq, 75 + };
+2
drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h
··· 49 49 #define GFX_11_0_0__SRCID__SDMA_SEM_INCOMPLETE_TIMEOUT 65 // 0x41 GPF(Sem incomplete timeout) 50 50 #define GFX_11_0_0__SRCID__SDMA_SEM_WAIT_FAIL_TIMEOUT 66 // 0x42 Semaphore wait fail timeout 51 51 52 + #define GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT 128 // 0x80 FED Interrupt (for data poisoning) 53 + 52 54 #define GFX_11_0_0__SRCID__CP_GENERIC_INT 177 // 0xB1 CP_GENERIC int 53 55 #define GFX_11_0_0__SRCID__CP_PM4_PKT_RSVD_BIT_ERROR 180 // 0xB4 PM4 Pkt Rsvd Bits Error 54 56 #define GFX_11_0_0__SRCID__CP_EOP_INTERRUPT 181 // 0xB5 End-of-Pipe Interrupt