Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: move explicit sync check into the CS

This moves the memory allocation out of the critical code path.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014084641.128280-8-christian.koenig@amd.com

+17 -12
+12 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
··· 449 449 } 450 450 451 451 r = amdgpu_sync_fence(&p->gang_leader->sync, fence); 452 - dma_fence_put(fence); 452 + if (r) 453 + goto error; 453 454 455 + /* 456 + * When we have an explicit dependency it might be necessary to insert a 457 + * pipeline sync to make sure that all caches etc are flushed and the 458 + * next job actually sees the results from the previous one. 459 + */ 460 + if (fence->context == p->gang_leader->base.entity->fence_context) 461 + r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); 462 + 463 + error: 464 + dma_fence_put(fence); 454 465 return r; 455 466 } 456 467
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
··· 182 182 183 183 need_ctx_switch = ring->current_ctx != fence_ctx; 184 184 if (ring->funcs->emit_pipeline_sync && job && 185 - ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || 185 + ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || 186 186 (amdgpu_sriov_vf(adev) && need_ctx_switch) || 187 187 amdgpu_vm_need_pipeline_sync(ring, job))) { 188 188 need_pipe_sync = true;
+3 -9
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
··· 108 108 (*job)->vm = vm; 109 109 110 110 amdgpu_sync_create(&(*job)->sync); 111 - amdgpu_sync_create(&(*job)->sched_sync); 111 + amdgpu_sync_create(&(*job)->explicit_sync); 112 112 (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); 113 113 (*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET; 114 114 ··· 176 176 drm_sched_job_cleanup(s_job); 177 177 178 178 amdgpu_sync_free(&job->sync); 179 - amdgpu_sync_free(&job->sched_sync); 179 + amdgpu_sync_free(&job->explicit_sync); 180 180 181 181 dma_fence_put(&job->hw_fence); 182 182 } ··· 204 204 205 205 amdgpu_job_free_resources(job); 206 206 amdgpu_sync_free(&job->sync); 207 - amdgpu_sync_free(&job->sched_sync); 207 + amdgpu_sync_free(&job->explicit_sync); 208 208 if (job->gang_submit != &job->base.s_fence->scheduled) 209 209 dma_fence_put(job->gang_submit); 210 210 ··· 251 251 int r; 252 252 253 253 fence = amdgpu_sync_get_fence(&job->sync); 254 - if (fence && drm_sched_dependency_optimized(fence, s_entity)) { 255 - r = amdgpu_sync_fence(&job->sched_sync, fence); 256 - if (r) 257 - DRM_ERROR("Error adding fence (%d)\n", r); 258 - } 259 - 260 254 while (!fence && job->vm && !job->vmid) { 261 255 r = amdgpu_vmid_grab(job->vm, ring, job, &fence); 262 256 if (r)
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
··· 48 48 struct drm_sched_job base; 49 49 struct amdgpu_vm *vm; 50 50 struct amdgpu_sync sync; 51 - struct amdgpu_sync sched_sync; 51 + struct amdgpu_sync explicit_sync; 52 52 struct dma_fence hw_fence; 53 53 struct dma_fence *gang_submit; 54 54 uint32_t preamble_status;