Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: set completion status as preempted for the resubmission

The driver's CSA buffer is shared by all the ibs. When the high priority ib
is submitted after the preempted ib, CP overrides the ib_completion_status
as completed in the csa buffer. After that the preempted ib is resubmitted,
CP would clear some locals stored for ib resume when reading the completed
status, which causes gpu hang in some cases.

Always set status as preempted for those resubmitted ib instead of reading
everything from the CSA buffer.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2535
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2717
Signed-off-by: Jiadong Zhu <Jiadong.Zhu@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Jiadong Zhu and committed by
Alex Deucher
8cbbd115 db996e64

+12
+9
drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
··· 56 56 AMDGPU_MUX_OFFSET_TYPE_CE, 57 57 }; 58 58 59 + enum ib_complete_status { 60 + /* IB not started/reset value, default value. */ 61 + IB_COMPLETION_STATUS_DEFAULT = 0, 62 + /* IB preempted, started but not completed. */ 63 + IB_COMPLETION_STATUS_PREEMPTED = 1, 64 + /* IB completed. */ 65 + IB_COMPLETION_STATUS_COMPLETED = 2, 66 + }; 67 + 59 68 struct amdgpu_ring_mux { 60 69 struct amdgpu_ring *real_ring; 61 70
+3
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
··· 5226 5226 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; 5227 5227 } 5228 5228 5229 + ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = 5230 + IB_COMPLETION_STATUS_PREEMPTED; 5231 + 5229 5232 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { 5230 5233 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size); 5231 5234 } else {