Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: add UTCL2 RAS poison query for Aldebaran (v2)

Add help functions to query and reset RAS UTCL2 poison status.

v2: implement it on amdgpu side and kfd only calls it.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Tao Zhou and committed by
Alex Deucher
6475ae2b 9d8a8d78

+24
+8
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
··· 724 724 else if (reset) 725 725 amdgpu_amdkfd_gpu_reset(adev); 726 726 } 727 + 728 + bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) 729 + { 730 + if (adev->gfx.ras->query_utcl2_poison_status) 731 + return adev->gfx.ras->query_utcl2_poison_status(adev); 732 + else 733 + return false; 734 + }
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
··· 301 301 bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); 302 302 void amdgpu_amdkfd_block_mmu_notifications(void *p); 303 303 int amdgpu_amdkfd_criu_resume(void *p); 304 + bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); 304 305 305 306 #if IS_ENABLED(CONFIG_HSA_AMD) 306 307 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
··· 202 202 struct amdgpu_gfx_ras { 203 203 struct amdgpu_ras_block_object ras_block; 204 204 void (*enable_watchdog_timer)(struct amdgpu_device *adev); 205 + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev); 205 206 }; 206 207 207 208 struct amdgpu_gfx_funcs {
+14
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
··· 1930 1930 mutex_unlock(&adev->grbm_idx_mutex); 1931 1931 } 1932 1932 1933 + static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev) 1934 + { 1935 + u32 status = 0; 1936 + struct amdgpu_vmhub *hub; 1937 + 1938 + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; 1939 + status = RREG32(hub->vm_l2_pro_fault_status); 1940 + /* reset page fault status */ 1941 + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); 1942 + 1943 + return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); 1944 + } 1945 + 1933 1946 struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = { 1934 1947 .ras_error_inject = &gfx_v9_4_2_ras_error_inject, 1935 1948 .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count, ··· 1956 1943 .hw_ops = &gfx_v9_4_2_ras_ops, 1957 1944 }, 1958 1945 .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer, 1946 + .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status, 1959 1947 };