drm/amdgpu: Enable tunneling on high-priority compute queues

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

This improves latency if the GPU is already busy with other work.
This is useful for VR compositors that submit highly latency-sensitive
compositing work on high-priority compute queues while the GPU is busy
rendering the next frame.

Userspace merge request:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26462

v2: bump driver version (Alex)

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Friedrich Vock <friedrich.vock@gmx.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Friedrich Vock and committed by

Alex Deucher 2 years ago 91963397 94b1e028

+13 -7

5 changed files

expand all

drivers

gpu

drm

amd

amdgpu

amdgpu.h

amdgpu_drv.c

amdgpu_ring.c

gfx_v10_0.c

gfx_v11_0.c

drivers/gpu/drm/amd/amdgpu/amdgpu.h

··· 791 791 uint64_t eop_gpu_addr; 792 792 uint32_t hqd_pipe_priority; 793 793 uint32_t hqd_queue_priority; 794 + bool allow_tunneling; 794 795 bool hqd_active; 795 796 }; 796 797

+2 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

··· 115 115 * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support 116 116 * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query 117 117 * - 3.56.0 - Update IB start address and size alignment for decode and encode 118 + * - 3.57.0 - Compute tunneling on GFX10+ 118 119 */ 119 120 #define KMS_DRIVER_MAJOR 3 120 - #define KMS_DRIVER_MINOR 56 121 + #define KMS_DRIVER_MINOR 57 121 122 #define KMS_DRIVER_PATCHLEVEL 0 122 123 123 124 /*

+6 -4

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

··· 642 642 struct amdgpu_mqd_prop *prop) 643 643 { 644 644 struct amdgpu_device *adev = ring->adev; 645 + bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && 646 + amdgpu_gfx_is_high_priority_compute_queue(adev, ring); 647 + bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX && 648 + amdgpu_gfx_is_high_priority_graphics_queue(adev, ring); 645 649 646 650 memset(prop, 0, sizeof(*prop)); 647 651 ··· 663 659 */ 664 660 prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ; 665 661 666 - if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && 667 - amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) || 668 - (ring->funcs->type == AMDGPU_RING_TYPE_GFX && 669 - amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) { 662 + prop->allow_tunneling = is_high_prio_compute; 663 + if (is_high_prio_compute || is_high_prio_gfx) { 670 664 prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 671 665 prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 672 666 }

+2 -1

drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

··· 6593 6593 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 6594 6594 #endif 6595 6595 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 6596 - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 6596 + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 6597 + prop->allow_tunneling); 6597 6598 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 6598 6599 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 6599 6600 mqd->cp_hqd_pq_control = tmp;

+2 -1

drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

··· 3847 3847 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3848 3848 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 3849 3849 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3850 - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 3850 + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 3851 + prop->allow_tunneling); 3851 3852 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3852 3853 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3853 3854 mqd->cp_hqd_pq_control = tmp;