Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: invalidate L2 before SDMA IBs (v2)

This fixes GPU hangs due to cache coherency issues.

v2: Split the version bump to a separate patch

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org

authored by

Marek Olšák and committed by
Alex Deucher
fdf83646 c938628c

+29 -1
+16
drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
··· 73 73 #define SDMA_OP_AQL_COPY 0 74 74 #define SDMA_OP_AQL_BARRIER_OR 0 75 75 76 + #define SDMA_GCR_RANGE_IS_PA (1 << 18) 77 + #define SDMA_GCR_SEQ(x) (((x) & 0x3) << 16) 78 + #define SDMA_GCR_GL2_WB (1 << 15) 79 + #define SDMA_GCR_GL2_INV (1 << 14) 80 + #define SDMA_GCR_GL2_DISCARD (1 << 13) 81 + #define SDMA_GCR_GL2_RANGE(x) (((x) & 0x3) << 11) 82 + #define SDMA_GCR_GL2_US (1 << 10) 83 + #define SDMA_GCR_GL1_INV (1 << 9) 84 + #define SDMA_GCR_GLV_INV (1 << 8) 85 + #define SDMA_GCR_GLK_INV (1 << 7) 86 + #define SDMA_GCR_GLK_WB (1 << 6) 87 + #define SDMA_GCR_GLM_INV (1 << 5) 88 + #define SDMA_GCR_GLM_WB (1 << 4) 89 + #define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2) 90 + #define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0) 91 + 76 92 /*define for op field*/ 77 93 #define SDMA_PKT_HEADER_op_offset 0 78 94 #define SDMA_PKT_HEADER_op_mask 0x000000FF
+13 -1
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
··· 382 382 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 383 383 uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); 384 384 385 + /* Invalidate L2, because if we don't do it, we might get stale cache 386 + * lines from previous IBs. 387 + */ 388 + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ)); 389 + amdgpu_ring_write(ring, 0); 390 + amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV | 391 + SDMA_GCR_GL2_WB | 392 + SDMA_GCR_GLM_INV | 393 + SDMA_GCR_GLM_WB) << 16); 394 + amdgpu_ring_write(ring, 0xffffff80); 395 + amdgpu_ring_write(ring, 0xffff); 396 + 385 397 /* An IB packet must end on a 8 DW boundary--the next dword 386 398 * must be on a 8-dword boundary. Our IB packet below is 6 387 399 * dwords long, thus add x number of NOPs, such that, in ··· 1607 1595 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1608 1596 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + 1609 1597 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ 1610 - .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ 1598 + .emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */ 1611 1599 .emit_ib = sdma_v5_0_ring_emit_ib, 1612 1600 .emit_fence = sdma_v5_0_ring_emit_fence, 1613 1601 .emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,