Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/sched: Refactor ring mirror list handling.

Decauple sched threads stop and start and ring mirror
list handling from the policy of what to do about the
guilty jobs.
When stoppping the sched thread and detaching sched fences
from non signaled HW fenes wait for all signaled HW fences
to complete before rerunning the jobs.

v2: Fix resubmission of guilty job into HW after refactoring.

v4:
Full restart for all the jobs, not only from guilty ring.
Extract karma increase into standalone function.

v5:
Rework waiting for signaled jobs without relying on the job
struct itself as those might already be freed for non 'guilty'
job's schedulers.
Expose karma increase to drivers.

v6:
Use list_for_each_entry_safe_continue and drm_sched_process_job
in case fence already signaled.
Call drm_sched_increase_karma only once for amdgpu and add documentation.

v7:
Wait only for the latest job's fence.

Suggested-by: Christian Koenig <Christian.Koenig@amd.com>
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Andrey Grodzovsky and committed by
Alex Deucher
222b5f04 713b64a5

+139 -98
+7 -13
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 3313 3313 if (!ring || !ring->sched.thread) 3314 3314 continue; 3315 3315 3316 - kthread_park(ring->sched.thread); 3317 - 3318 - if (job && job->base.sched != &ring->sched) 3319 - continue; 3320 - 3321 - drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL); 3316 + drm_sched_stop(&ring->sched); 3322 3317 3323 3318 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 3324 3319 amdgpu_fence_driver_force_completion(ring); 3325 3320 } 3321 + 3322 + if(job) 3323 + drm_sched_increase_karma(&job->base); 3326 3324 3327 3325 3328 3326 ··· 3467 3469 if (!ring || !ring->sched.thread) 3468 3470 continue; 3469 3471 3470 - /* only need recovery sched of the given job's ring 3471 - * or all rings (in the case @job is NULL) 3472 - * after above amdgpu_reset accomplished 3473 - */ 3474 - if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res) 3475 - drm_sched_job_recovery(&ring->sched); 3472 + if (!adev->asic_reset_res) 3473 + drm_sched_resubmit_jobs(&ring->sched); 3476 3474 3477 - kthread_unpark(ring->sched.thread); 3475 + drm_sched_start(&ring->sched, !adev->asic_reset_res); 3478 3476 } 3479 3477 3480 3478 if (!amdgpu_device_has_dc_support(adev)) {
+7 -4
drivers/gpu/drm/etnaviv/etnaviv_sched.c
··· 109 109 } 110 110 111 111 /* block scheduler */ 112 - kthread_park(gpu->sched.thread); 113 - drm_sched_hw_job_reset(&gpu->sched, sched_job); 112 + drm_sched_stop(&gpu->sched); 113 + 114 + if(sched_job) 115 + drm_sched_increase_karma(sched_job); 114 116 115 117 /* get the GPU back into the init state */ 116 118 etnaviv_core_dump(gpu); 117 119 etnaviv_gpu_recover_hang(gpu); 118 120 121 + drm_sched_resubmit_jobs(&gpu->sched); 122 + 119 123 /* restart scheduler after GPU is usable again */ 120 - drm_sched_job_recovery(&gpu->sched); 121 - kthread_unpark(gpu->sched.thread); 124 + drm_sched_start(&gpu->sched, true); 122 125 } 123 126 124 127 static void etnaviv_sched_free_job(struct drm_sched_job *sched_job)
+113 -73
drivers/gpu/drm/scheduler/sched_main.c
··· 60 60 61 61 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb); 62 62 63 - static void drm_sched_expel_job_unlocked(struct drm_sched_job *s_job); 64 - 65 63 /** 66 64 * drm_sched_rq_init - initialize a given run queue struct 67 65 * ··· 333 335 spin_unlock_irqrestore(&sched->job_list_lock, flags); 334 336 } 335 337 336 - /** 337 - * drm_sched_hw_job_reset - stop the scheduler if it contains the bad job 338 - * 339 - * @sched: scheduler instance 340 - * @bad: bad scheduler job 341 - * 342 - */ 343 - void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) 338 + /** 339 + * drm_sched_increase_karma - Update sched_entity guilty flag 340 + * 341 + * @bad: The job guilty of time out 342 + * 343 + * Increment on every hang caused by the 'bad' job. If this exceeds the hang 344 + * limit of the scheduler then the respective sched entity is marked guilty and 345 + * jobs from it will not be scheduled further 346 + */ 347 + void drm_sched_increase_karma(struct drm_sched_job *bad) 344 348 { 345 - struct drm_sched_job *s_job; 346 - struct drm_sched_entity *entity, *tmp; 347 - unsigned long flags; 348 349 int i; 350 + struct drm_sched_entity *tmp; 351 + struct drm_sched_entity *entity; 352 + struct drm_gpu_scheduler *sched = bad->sched; 349 353 350 - spin_lock_irqsave(&sched->job_list_lock, flags); 351 - list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { 352 - if (s_job->s_fence->parent && 353 - dma_fence_remove_callback(s_job->s_fence->parent, 354 - &s_job->s_fence->cb)) { 355 - dma_fence_put(s_job->s_fence->parent); 356 - s_job->s_fence->parent = NULL; 357 - atomic_dec(&sched->hw_rq_count); 358 - } 359 - } 360 - spin_unlock_irqrestore(&sched->job_list_lock, flags); 361 - 362 - if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { 354 + /* don't increase @bad's karma if it's from KERNEL RQ, 355 + * because sometimes GPU hang would cause kernel jobs (like VM updating jobs) 356 + * corrupt but keep in mind that kernel jobs always considered good. 357 + */ 358 + if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { 363 359 atomic_inc(&bad->karma); 364 - /* don't increase @bad's karma if it's from KERNEL RQ, 365 - * becuase sometimes GPU hang would cause kernel jobs (like VM updating jobs) 366 - * corrupt but keep in mind that kernel jobs always considered good. 367 - */ 368 - for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; i++ ) { 360 + for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; 361 + i++) { 369 362 struct drm_sched_rq *rq = &sched->sched_rq[i]; 370 363 371 364 spin_lock(&rq->lock); 372 365 list_for_each_entry_safe(entity, tmp, &rq->entities, list) { 373 - if (bad->s_fence->scheduled.context == entity->fence_context) { 374 - if (atomic_read(&bad->karma) > bad->sched->hang_limit) 366 + if (bad->s_fence->scheduled.context == 367 + entity->fence_context) { 368 + if (atomic_read(&bad->karma) > 369 + bad->sched->hang_limit) 375 370 if (entity->guilty) 376 371 atomic_set(entity->guilty, 1); 377 372 break; ··· 376 385 } 377 386 } 378 387 } 379 - EXPORT_SYMBOL(drm_sched_hw_job_reset); 388 + EXPORT_SYMBOL(drm_sched_increase_karma); 389 + 390 + /** 391 + * drm_sched_hw_job_reset - stop the scheduler if it contains the bad job 392 + * 393 + * @sched: scheduler instance 394 + * @bad: bad scheduler job 395 + * 396 + */ 397 + void drm_sched_stop(struct drm_gpu_scheduler *sched) 398 + { 399 + struct drm_sched_job *s_job; 400 + unsigned long flags; 401 + struct dma_fence *last_fence = NULL; 402 + 403 + kthread_park(sched->thread); 404 + 405 + /* 406 + * Verify all the signaled jobs in mirror list are removed from the ring 407 + * by waiting for the latest job to enter the list. This should insure that 408 + * also all the previous jobs that were in flight also already singaled 409 + * and removed from the list. 410 + */ 411 + spin_lock_irqsave(&sched->job_list_lock, flags); 412 + list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { 413 + if (s_job->s_fence->parent && 414 + dma_fence_remove_callback(s_job->s_fence->parent, 415 + &s_job->s_fence->cb)) { 416 + dma_fence_put(s_job->s_fence->parent); 417 + s_job->s_fence->parent = NULL; 418 + atomic_dec(&sched->hw_rq_count); 419 + } else { 420 + last_fence = dma_fence_get(&s_job->s_fence->finished); 421 + break; 422 + } 423 + } 424 + spin_unlock_irqrestore(&sched->job_list_lock, flags); 425 + 426 + if (last_fence) { 427 + dma_fence_wait(last_fence, false); 428 + dma_fence_put(last_fence); 429 + } 430 + } 431 + 432 + EXPORT_SYMBOL(drm_sched_stop); 380 433 381 434 /** 382 435 * drm_sched_job_recovery - recover jobs after a reset ··· 428 393 * @sched: scheduler instance 429 394 * 430 395 */ 431 - void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) 396 + void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) 432 397 { 433 398 struct drm_sched_job *s_job, *tmp; 434 - bool found_guilty = false; 435 399 unsigned long flags; 436 400 int r; 401 + 402 + if (!full_recovery) 403 + goto unpark; 437 404 438 405 spin_lock_irqsave(&sched->job_list_lock, flags); 439 406 list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { 440 407 struct drm_sched_fence *s_fence = s_job->s_fence; 441 - struct dma_fence *fence; 442 - uint64_t guilty_context; 408 + struct dma_fence *fence = s_job->s_fence->parent; 409 + 410 + if (fence) { 411 + r = dma_fence_add_callback(fence, &s_fence->cb, 412 + drm_sched_process_job); 413 + if (r == -ENOENT) 414 + drm_sched_process_job(fence, &s_fence->cb); 415 + else if (r) 416 + DRM_ERROR("fence add callback failed (%d)\n", 417 + r); 418 + } else 419 + drm_sched_process_job(NULL, &s_fence->cb); 420 + } 421 + 422 + drm_sched_start_timeout(sched); 423 + spin_unlock_irqrestore(&sched->job_list_lock, flags); 424 + 425 + unpark: 426 + kthread_unpark(sched->thread); 427 + } 428 + EXPORT_SYMBOL(drm_sched_start); 429 + 430 + /** 431 + * drm_sched_resubmit_jobs - helper to relunch job from mirror ring list 432 + * 433 + * @sched: scheduler instance 434 + * 435 + */ 436 + void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) 437 + { 438 + struct drm_sched_job *s_job, *tmp; 439 + uint64_t guilty_context; 440 + bool found_guilty = false; 441 + 442 + /*TODO DO we need spinlock here ? */ 443 + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { 444 + struct drm_sched_fence *s_fence = s_job->s_fence; 443 445 444 446 if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) { 445 447 found_guilty = true; ··· 486 414 if (found_guilty && s_job->s_fence->scheduled.context == guilty_context) 487 415 dma_fence_set_error(&s_fence->finished, -ECANCELED); 488 416 489 - spin_unlock_irqrestore(&sched->job_list_lock, flags); 490 - fence = sched->ops->run_job(s_job); 417 + s_job->s_fence->parent = sched->ops->run_job(s_job); 491 418 atomic_inc(&sched->hw_rq_count); 492 - 493 - if (fence) { 494 - s_fence->parent = dma_fence_get(fence); 495 - r = dma_fence_add_callback(fence, &s_fence->cb, 496 - drm_sched_process_job); 497 - if (r == -ENOENT) 498 - drm_sched_process_job(fence, &s_fence->cb); 499 - else if (r) 500 - DRM_ERROR("fence add callback failed (%d)\n", 501 - r); 502 - dma_fence_put(fence); 503 - } else { 504 - if (s_fence->finished.error < 0) 505 - drm_sched_expel_job_unlocked(s_job); 506 - drm_sched_process_job(NULL, &s_fence->cb); 507 - } 508 - spin_lock_irqsave(&sched->job_list_lock, flags); 509 419 } 510 - drm_sched_start_timeout(sched); 511 - spin_unlock_irqrestore(&sched->job_list_lock, flags); 512 420 } 513 - EXPORT_SYMBOL(drm_sched_job_recovery); 421 + EXPORT_SYMBOL(drm_sched_resubmit_jobs); 514 422 515 423 /** 516 424 * drm_sched_job_init - init a scheduler job ··· 686 634 DRM_ERROR("fence add callback failed (%d)\n", 687 635 r); 688 636 dma_fence_put(fence); 689 - } else { 690 - if (s_fence->finished.error < 0) 691 - drm_sched_expel_job_unlocked(sched_job); 637 + } else 692 638 drm_sched_process_job(NULL, &s_fence->cb); 693 - } 694 639 695 640 wake_up(&sched->job_scheduled); 696 641 } 697 642 return 0; 698 - } 699 - 700 - static void drm_sched_expel_job_unlocked(struct drm_sched_job *s_job) 701 - { 702 - struct drm_gpu_scheduler *sched = s_job->sched; 703 - 704 - spin_lock(&sched->job_list_lock); 705 - list_del_init(&s_job->node); 706 - spin_unlock(&sched->job_list_lock); 707 643 } 708 644 709 645 /**
+8 -5
drivers/gpu/drm/v3d/v3d_sched.c
··· 234 234 for (q = 0; q < V3D_MAX_QUEUES; q++) { 235 235 struct drm_gpu_scheduler *sched = &v3d->queue[q].sched; 236 236 237 - kthread_park(sched->thread); 238 - drm_sched_hw_job_reset(sched, (sched_job->sched == sched ? 239 - sched_job : NULL)); 237 + drm_sched_stop(sched); 238 + 239 + if(sched_job) 240 + drm_sched_increase_karma(sched_job); 240 241 } 241 242 242 243 /* get the GPU back into the init state */ 243 244 v3d_reset(v3d); 244 245 246 + for (q = 0; q < V3D_MAX_QUEUES; q++) 247 + drm_sched_resubmit_jobs(sched_job->sched); 248 + 245 249 /* Unblock schedulers and restart their jobs. */ 246 250 for (q = 0; q < V3D_MAX_QUEUES; q++) { 247 - drm_sched_job_recovery(&v3d->queue[q].sched); 248 - kthread_unpark(v3d->queue[q].sched.thread); 251 + drm_sched_start(&v3d->queue[q].sched, true); 249 252 } 250 253 251 254 mutex_unlock(&v3d->reset_lock);
+4 -3
include/drm/gpu_scheduler.h
··· 298 298 void *owner); 299 299 void drm_sched_job_cleanup(struct drm_sched_job *job); 300 300 void drm_sched_wakeup(struct drm_gpu_scheduler *sched); 301 - void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, 302 - struct drm_sched_job *job); 303 - void drm_sched_job_recovery(struct drm_gpu_scheduler *sched); 301 + void drm_sched_stop(struct drm_gpu_scheduler *sched); 302 + void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery); 303 + void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched); 304 + void drm_sched_increase_karma(struct drm_sched_job *bad); 304 305 bool drm_sched_dependency_optimized(struct dma_fence* fence, 305 306 struct drm_sched_entity *entity); 306 307 void drm_sched_fault(struct drm_gpu_scheduler *sched);