Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/sched: Rework HW fence processing.

Expedite job deletion from ring mirror list to the HW fence signal
callback instead from finish_work, together with waiting for all
such fences to signal in drm_sched_stop we garantee that
already signaled job will not be processed twice.
Remove the sched finish fence callback and just submit finish_work
directly from the HW fence callback.

v2: Fix comments.
v3: Attach hw fence cb to sched_job
v5: Rebase

Suggested-by: Christian Koenig <Christian.Koenig@amd.com>
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Andrey Grodzovsky and committed by
Alex Deucher
3741540e 222b5f04

+29 -32
+27 -28
drivers/gpu/drm/scheduler/sched_main.c
··· 284 284 cancel_delayed_work_sync(&sched->work_tdr); 285 285 286 286 spin_lock_irqsave(&sched->job_list_lock, flags); 287 - /* remove job from ring_mirror_list */ 288 - list_del_init(&s_job->node); 289 287 /* queue TDR for next job */ 290 288 drm_sched_start_timeout(sched); 291 289 spin_unlock_irqrestore(&sched->job_list_lock, flags); ··· 291 293 sched->ops->free_job(s_job); 292 294 } 293 295 294 - static void drm_sched_job_finish_cb(struct dma_fence *f, 295 - struct dma_fence_cb *cb) 296 - { 297 - struct drm_sched_job *job = container_of(cb, struct drm_sched_job, 298 - finish_cb); 299 - schedule_work(&job->finish_work); 300 - } 301 - 302 296 static void drm_sched_job_begin(struct drm_sched_job *s_job) 303 297 { 304 298 struct drm_gpu_scheduler *sched = s_job->sched; 305 299 unsigned long flags; 306 - 307 - dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb, 308 - drm_sched_job_finish_cb); 309 300 310 301 spin_lock_irqsave(&sched->job_list_lock, flags); 311 302 list_add_tail(&s_job->node, &sched->ring_mirror_list); ··· 390 403 list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { 391 404 if (s_job->s_fence->parent && 392 405 dma_fence_remove_callback(s_job->s_fence->parent, 393 - &s_job->s_fence->cb)) { 406 + &s_job->cb)) { 394 407 dma_fence_put(s_job->s_fence->parent); 395 408 s_job->s_fence->parent = NULL; 396 409 atomic_dec(&sched->hw_rq_count); ··· 418 431 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) 419 432 { 420 433 struct drm_sched_job *s_job, *tmp; 421 - unsigned long flags; 422 434 int r; 423 435 424 436 if (!full_recovery) 425 437 goto unpark; 426 438 427 - spin_lock_irqsave(&sched->job_list_lock, flags); 439 + /* 440 + * Locking the list is not required here as the sched thread is parked 441 + * so no new jobs are being pushed in to HW and in drm_sched_stop we 442 + * flushed all the jobs who were still in mirror list but who already 443 + * signaled and removed them self from the list. Also concurrent 444 + * GPU recovers can't run in parallel. 445 + */ 428 446 list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { 429 - struct drm_sched_fence *s_fence = s_job->s_fence; 430 447 struct dma_fence *fence = s_job->s_fence->parent; 431 448 432 449 if (fence) { 433 - r = dma_fence_add_callback(fence, &s_fence->cb, 450 + r = dma_fence_add_callback(fence, &s_job->cb, 434 451 drm_sched_process_job); 435 452 if (r == -ENOENT) 436 - drm_sched_process_job(fence, &s_fence->cb); 453 + drm_sched_process_job(fence, &s_job->cb); 437 454 else if (r) 438 455 DRM_ERROR("fence add callback failed (%d)\n", 439 456 r); 440 457 } else 441 - drm_sched_process_job(NULL, &s_fence->cb); 458 + drm_sched_process_job(NULL, &s_job->cb); 442 459 } 443 460 444 461 drm_sched_start_timeout(sched); 445 - spin_unlock_irqrestore(&sched->job_list_lock, flags); 446 462 447 463 unpark: 448 464 kthread_unpark(sched->thread); ··· 594 604 */ 595 605 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) 596 606 { 597 - struct drm_sched_fence *s_fence = 598 - container_of(cb, struct drm_sched_fence, cb); 607 + struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb); 608 + struct drm_sched_fence *s_fence = s_job->s_fence; 599 609 struct drm_gpu_scheduler *sched = s_fence->sched; 610 + unsigned long flags; 600 611 601 - dma_fence_get(&s_fence->finished); 612 + cancel_delayed_work(&sched->work_tdr); 613 + 602 614 atomic_dec(&sched->hw_rq_count); 603 615 atomic_dec(&sched->num_jobs); 616 + 617 + spin_lock_irqsave(&sched->job_list_lock, flags); 618 + /* remove job from ring_mirror_list */ 619 + list_del_init(&s_job->node); 620 + spin_unlock_irqrestore(&sched->job_list_lock, flags); 621 + 604 622 drm_sched_fence_finished(s_fence); 605 623 606 624 trace_drm_sched_process_job(s_fence); 607 - dma_fence_put(&s_fence->finished); 608 625 wake_up_interruptible(&sched->wake_up_worker); 626 + 627 + schedule_work(&s_job->finish_work); 609 628 } 610 629 611 630 /** ··· 677 678 678 679 if (fence) { 679 680 s_fence->parent = dma_fence_get(fence); 680 - r = dma_fence_add_callback(fence, &s_fence->cb, 681 + r = dma_fence_add_callback(fence, &sched_job->cb, 681 682 drm_sched_process_job); 682 683 if (r == -ENOENT) 683 - drm_sched_process_job(fence, &s_fence->cb); 684 + drm_sched_process_job(fence, &sched_job->cb); 684 685 else if (r) 685 686 DRM_ERROR("fence add callback failed (%d)\n", 686 687 r); 687 688 dma_fence_put(fence); 688 689 } else 689 - drm_sched_process_job(NULL, &s_fence->cb); 690 + drm_sched_process_job(NULL, &sched_job->cb); 690 691 691 692 wake_up(&sched->job_scheduled); 692 693 }
+2 -4
include/drm/gpu_scheduler.h
··· 138 138 struct dma_fence finished; 139 139 140 140 /** 141 - * @cb: the callback for the parent fence below. 142 - */ 143 - struct dma_fence_cb cb; 144 - /** 145 141 * @parent: the fence returned by &drm_sched_backend_ops.run_job 146 142 * when scheduling the job on hardware. We signal the 147 143 * &drm_sched_fence.finished fence once parent is signalled. ··· 177 181 * be scheduled further. 178 182 * @s_priority: the priority of the job. 179 183 * @entity: the entity to which this job belongs. 184 + * @cb: the callback for the parent fence in s_fence. 180 185 * 181 186 * A job is created by the driver using drm_sched_job_init(), and 182 187 * should call drm_sched_entity_push_job() once it wants the scheduler ··· 194 197 atomic_t karma; 195 198 enum drm_sched_priority s_priority; 196 199 struct drm_sched_entity *entity; 200 + struct dma_fence_cb cb; 197 201 }; 198 202 199 203 static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,