Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: apply AMDGPU_IB_FLAG_EMIT_MEM_SYNC to compute IBs too (v3)

Compute IBs need this too.

v2: split out version bump
v3: squash in emit frame count fixes

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Marek Olšák and committed by
Alex Deucher
d35745bb 2f9ce2a3

+46 -7
+1 -1
drivers/gpu/drm/amd/amdgpu/cikd.h
··· 450 450 # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 451 451 # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 452 452 # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 453 - #define PACKET3_AQUIRE_MEM 0x58 453 + #define PACKET3_ACQUIRE_MEM 0x58 454 454 #define PACKET3_REWIND 0x59 455 455 #define PACKET3_LOAD_UCONFIG_REG 0x5E 456 456 #define PACKET3_LOAD_SH_REG 0x5F
+3 -1
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
··· 8133 8133 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 8134 8134 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 8135 8135 2 + /* gfx_v10_0_ring_emit_vm_flush */ 8136 - 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */ 8136 + 8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */ 8137 + 8, /* gfx_v10_0_emit_mem_sync */ 8137 8138 .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ 8138 8139 .emit_ib = gfx_v10_0_ring_emit_ib_compute, 8139 8140 .emit_fence = gfx_v10_0_ring_emit_fence, ··· 8149 8148 .emit_wreg = gfx_v10_0_ring_emit_wreg, 8150 8149 .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, 8151 8150 .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, 8151 + .emit_mem_sync = gfx_v10_0_emit_mem_sync, 8152 8152 }; 8153 8153 8154 8154 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
+3 -1
drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
··· 3533 3533 5 + 5 + /* hdp flush / invalidate */ 3534 3534 7 + /* gfx_v6_0_ring_emit_pipeline_sync */ 3535 3535 SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */ 3536 - 14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ 3536 + 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ 3537 + 5, /* SURFACE_SYNC */ 3537 3538 .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */ 3538 3539 .emit_ib = gfx_v6_0_ring_emit_ib, 3539 3540 .emit_fence = gfx_v6_0_ring_emit_fence, ··· 3544 3543 .test_ib = gfx_v6_0_ring_test_ib, 3545 3544 .insert_nop = amdgpu_ring_insert_nop, 3546 3545 .emit_wreg = gfx_v6_0_ring_emit_wreg, 3546 + .emit_mem_sync = gfx_v6_0_emit_mem_sync, 3547 3547 }; 3548 3548 3549 3549 static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev)
+17 -1
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
··· 5010 5010 amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ 5011 5011 } 5012 5012 5013 + static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) 5014 + { 5015 + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 5016 + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | 5017 + PACKET3_TC_ACTION_ENA | 5018 + PACKET3_SH_KCACHE_ACTION_ENA | 5019 + PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */ 5020 + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 5021 + amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */ 5022 + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 5023 + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 5024 + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ 5025 + } 5026 + 5013 5027 static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { 5014 5028 .name = "gfx_v7_0", 5015 5029 .early_init = gfx_v7_0_early_init, ··· 5089 5075 5 + /* hdp invalidate */ 5090 5076 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ 5091 5077 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ 5092 - 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ 5078 + 7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ 5079 + 7, /* gfx_v7_0_emit_mem_sync_compute */ 5093 5080 .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */ 5094 5081 .emit_ib = gfx_v7_0_ring_emit_ib_compute, 5095 5082 .emit_fence = gfx_v7_0_ring_emit_fence_compute, ··· 5103 5088 .insert_nop = amdgpu_ring_insert_nop, 5104 5089 .pad_ib = amdgpu_ring_generic_pad_ib, 5105 5090 .emit_wreg = gfx_v7_0_ring_emit_wreg, 5091 + .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute, 5106 5092 }; 5107 5093 5108 5094 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
+18 -1
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
··· 6830 6830 amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ 6831 6831 } 6832 6832 6833 + static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring) 6834 + { 6835 + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 6836 + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | 6837 + PACKET3_TC_ACTION_ENA | 6838 + PACKET3_SH_KCACHE_ACTION_ENA | 6839 + PACKET3_SH_ICACHE_ACTION_ENA | 6840 + PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */ 6841 + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6842 + amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */ 6843 + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6844 + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6845 + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ 6846 + } 6847 + 6833 6848 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6834 6849 .name = "gfx_v8_0", 6835 6850 .early_init = gfx_v8_0_early_init, ··· 6927 6912 5 + /* hdp_invalidate */ 6928 6913 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6929 6914 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 6930 - 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6915 + 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6916 + 7, /* gfx_v8_0_emit_mem_sync_compute */ 6931 6917 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ 6932 6918 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6933 6919 .emit_fence = gfx_v8_0_ring_emit_fence_compute, ··· 6941 6925 .insert_nop = amdgpu_ring_insert_nop, 6942 6926 .pad_ib = amdgpu_ring_generic_pad_ib, 6943 6927 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6928 + .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute, 6944 6929 }; 6945 6930 6946 6931 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
+3 -1
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
··· 6741 6741 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6742 6742 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6743 6743 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6744 - 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6744 + 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6745 + 7, /* gfx_v9_0_emit_mem_sync */ 6745 6746 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6746 6747 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 6747 6748 .emit_fence = gfx_v9_0_ring_emit_fence, ··· 6757 6756 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6758 6757 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6759 6758 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6759 + .emit_mem_sync = gfx_v9_0_emit_mem_sync, 6760 6760 }; 6761 6761 6762 6762 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
+1 -1
drivers/gpu/drm/amd/amdgpu/vid.h
··· 332 332 # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 333 333 # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 334 334 # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 335 - #define PACKET3_AQUIRE_MEM 0x58 335 + #define PACKET3_ACQUIRE_MEM 0x58 336 336 #define PACKET3_REWIND 0x59 337 337 #define PACKET3_LOAD_UCONFIG_REG 0x5E 338 338 #define PACKET3_LOAD_SH_REG 0x5F