Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdgpu: Add software ring callbacks for gfx9 (v8)

Set ring functions with software ring callbacks on gfx9.

The software ring could be tested by debugfs_test_ib case.

v2: Set sw_ring 2 to enable software ring by default.
v3: Remove the parameter for software ring enablement.
v4: Use amdgpu_ring_init/fini for software rings.
v5: Update for code format. Fix conflict.
v6: Remove unnecessary checks and enable software ring on gfx9 by default.
v7: Use static array for software ring names and priorities.
v8: Stop creating software rings if no gfx ring existed.

Cc: Christian Koenig <Christian.Koenig@amd.com>
Cc: Luben Tuikov <Luben.Tuikov@amd.com>
Cc: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
Cc: Michel Dänzer <michel@daenzer.net>
Cc: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
Acked-by: Luben Tuikov <luben.tuikov@amd.com>
Acked-by: Huang Rui <ray.huang@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Jiadong.Zhu and committed by
Alex Deucher
0c97a19a ded946f3

+136 -1
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
··· 354 354 355 355 bool is_poweron; 356 356 357 + struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS]; 357 358 struct amdgpu_ring_mux muxer; 358 359 }; 359 360
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
··· 39 39 #define AMDGPU_MAX_RINGS 28 40 40 #define AMDGPU_MAX_HWIP_RINGS 8 41 41 #define AMDGPU_MAX_GFX_RINGS 2 42 + #define AMDGPU_MAX_SW_GFX_RINGS 2 42 43 #define AMDGPU_MAX_COMPUTE_RINGS 8 43 44 #define AMDGPU_MAX_VCE_RINGS 3 44 45 #define AMDGPU_MAX_UVD_ENC_RINGS 2
+20
drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
··· 29 29 30 30 #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2) 31 31 32 + static const struct ring_info { 33 + unsigned int hw_pio; 34 + const char *ring_name; 35 + } sw_ring_info[] = { 36 + { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"}, 37 + { AMDGPU_RING_PRIO_2, "gfx_high"}, 38 + }; 39 + 32 40 int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, 33 41 unsigned int entry_size) 34 42 { ··· 226 218 void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 227 219 { 228 220 WARN_ON(!ring->is_sw_ring); 221 + } 222 + 223 + const char *amdgpu_sw_ring_name(int idx) 224 + { 225 + return idx < ARRAY_SIZE(sw_ring_info) ? 226 + sw_ring_info[idx].ring_name : NULL; 227 + } 228 + 229 + unsigned int amdgpu_sw_ring_priority(int idx) 230 + { 231 + return idx < ARRAY_SIZE(sw_ring_info) ? 232 + sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT; 229 233 }
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
··· 73 73 void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); 74 74 void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring); 75 75 76 + const char *amdgpu_sw_ring_name(int idx); 77 + unsigned int amdgpu_sw_ring_priority(int idx); 76 78 #endif
+112 -1
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
··· 47 47 48 48 #include "amdgpu_ras.h" 49 49 50 + #include "amdgpu_ring_mux.h" 50 51 #include "gfx_v9_4.h" 51 52 #include "gfx_v9_0.h" 52 53 #include "gfx_v9_4_2.h" ··· 57 56 #include "asic_reg/gc/gc_9_0_default.h" 58 57 59 58 #define GFX9_NUM_GFX_RINGS 1 59 + #define GFX9_NUM_SW_GFX_RINGS 2 60 60 #define GFX9_MEC_HPD_SIZE 4096 61 61 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 62 62 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L ··· 2105 2103 struct amdgpu_ring *ring; 2106 2104 struct amdgpu_kiq *kiq; 2107 2105 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2106 + unsigned int hw_prio; 2108 2107 2109 2108 switch (adev->ip_versions[GC_HWIP][0]) { 2110 2109 case IP_VERSION(9, 0, 1): ··· 2189 2186 sprintf(ring->name, "gfx_%d", i); 2190 2187 ring->use_doorbell = true; 2191 2188 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2189 + 2190 + /* disable scheduler on the real ring */ 2191 + ring->no_scheduler = true; 2192 2192 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2193 2193 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 2194 2194 AMDGPU_RING_PRIO_DEFAULT, NULL); 2195 2195 if (r) 2196 2196 return r; 2197 + } 2198 + 2199 + /* set up the software rings */ 2200 + if (adev->gfx.num_gfx_rings) { 2201 + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2202 + ring = &adev->gfx.sw_gfx_ring[i]; 2203 + ring->ring_obj = NULL; 2204 + sprintf(ring->name, amdgpu_sw_ring_name(i)); 2205 + ring->use_doorbell = true; 2206 + ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2207 + ring->is_sw_ring = true; 2208 + hw_prio = amdgpu_sw_ring_priority(i); 2209 + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2210 + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, 2211 + NULL); 2212 + if (r) 2213 + return r; 2214 + ring->wptr = 0; 2215 + } 2216 + 2217 + /* init the muxer and add software rings */ 2218 + r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0], 2219 + GFX9_NUM_SW_GFX_RINGS); 2220 + if (r) { 2221 + DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r); 2222 + return r; 2223 + } 2224 + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2225 + r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, 2226 + &adev->gfx.sw_gfx_ring[i]); 2227 + if (r) { 2228 + DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r); 2229 + return r; 2230 + } 2231 + } 2197 2232 } 2198 2233 2199 2234 /* set up the compute queues - allocate horizontally across pipes */ ··· 2283 2242 { 2284 2243 int i; 2285 2244 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2245 + 2246 + if (adev->gfx.num_gfx_rings) { 2247 + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 2248 + amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); 2249 + amdgpu_ring_mux_fini(&adev->gfx.muxer); 2250 + } 2286 2251 2287 2252 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2288 2253 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); ··· 5759 5712 5760 5713 switch (me_id) { 5761 5714 case 0: 5762 - amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5715 + /* Fence signals are handled on the software rings*/ 5716 + if (adev->gfx.num_gfx_rings) { 5717 + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 5718 + amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); 5719 + } 5763 5720 break; 5764 5721 case 1: 5765 5722 case 2: ··· 6768 6717 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 6769 6718 }; 6770 6719 6720 + static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { 6721 + .type = AMDGPU_RING_TYPE_GFX, 6722 + .align_mask = 0xff, 6723 + .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6724 + .support_64bit_ptrs = true, 6725 + .secure_submission_supported = true, 6726 + .vmhub = AMDGPU_GFXHUB_0, 6727 + .get_rptr = amdgpu_sw_ring_get_rptr_gfx, 6728 + .get_wptr = amdgpu_sw_ring_get_wptr_gfx, 6729 + .set_wptr = amdgpu_sw_ring_set_wptr_gfx, 6730 + .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6731 + 5 + /* COND_EXEC */ 6732 + 7 + /* PIPELINE_SYNC */ 6733 + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6734 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6735 + 2 + /* VM_FLUSH */ 6736 + 8 + /* FENCE for VM_FLUSH */ 6737 + 20 + /* GDS switch */ 6738 + 4 + /* double SWITCH_BUFFER, 6739 + * the first COND_EXEC jump to the place just 6740 + * prior to this double SWITCH_BUFFER 6741 + */ 6742 + 5 + /* COND_EXEC */ 6743 + 7 + /* HDP_flush */ 6744 + 4 + /* VGT_flush */ 6745 + 14 + /* CE_META */ 6746 + 31 + /* DE_META */ 6747 + 3 + /* CNTX_CTRL */ 6748 + 5 + /* HDP_INVL */ 6749 + 8 + 8 + /* FENCE x2 */ 6750 + 2 + /* SWITCH_BUFFER */ 6751 + 7, /* gfx_v9_0_emit_mem_sync */ 6752 + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6753 + .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6754 + .emit_fence = gfx_v9_0_ring_emit_fence, 6755 + .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6756 + .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6757 + .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6758 + .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6759 + .test_ring = gfx_v9_0_ring_test_ring, 6760 + .test_ib = gfx_v9_0_ring_test_ib, 6761 + .insert_nop = amdgpu_sw_ring_insert_nop, 6762 + .pad_ib = amdgpu_ring_generic_pad_ib, 6763 + .emit_switch_buffer = gfx_v9_ring_emit_sb, 6764 + .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6765 + .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6766 + .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6767 + .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 6768 + .emit_wreg = gfx_v9_0_ring_emit_wreg, 6769 + .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6770 + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6771 + .soft_recovery = gfx_v9_0_ring_soft_recovery, 6772 + .emit_mem_sync = gfx_v9_0_emit_mem_sync, 6773 + }; 6774 + 6771 6775 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 6772 6776 .type = AMDGPU_RING_TYPE_COMPUTE, 6773 6777 .align_mask = 0xff, ··· 6899 6793 6900 6794 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6901 6795 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 6796 + 6797 + if (adev->gfx.num_gfx_rings) { 6798 + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 6799 + adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; 6800 + } 6902 6801 6903 6802 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6904 6803 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;