drm/sched: implement dynamic job-flow control

+6

Documentation/gpu/drm-mm.rst

··· 552 552 .. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c 553 553 :doc: Overview 554 554 555 + Flow Control 556 + ------------ 557 + 558 + .. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c 559 + :doc: Flow Control 560 + 555 561 Scheduler Function References 556 562 ----------------------------- 557 563

+1 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

··· 115 115 if (!entity) 116 116 return 0; 117 117 118 - return drm_sched_job_init(&(*job)->base, entity, owner); 118 + return drm_sched_job_init(&(*job)->base, entity, 1, owner); 119 119 } 120 120 121 121 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,

+1 -1

drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c

··· 535 535 536 536 ret = drm_sched_job_init(&submit->sched_job, 537 537 &ctx->sched_entity[args->pipe], 538 - submit->ctx); 538 + 1, submit->ctx); 539 539 if (ret) 540 540 goto err_submit_put; 541 541

+1 -1

drivers/gpu/drm/etnaviv/etnaviv_gpu.c

··· 1917 1917 u32 idle, mask; 1918 1918 1919 1919 /* If there are any jobs in the HW queue, we're not idle */ 1920 - if (atomic_read(&gpu->sched.hw_rq_count)) 1920 + if (atomic_read(&gpu->sched.credit_count)) 1921 1921 return -EBUSY; 1922 1922 1923 1923 /* Check whether the hardware (except FE and MC) is idle */

+1 -1

drivers/gpu/drm/lima/lima_device.c

··· 514 514 515 515 /* check any task running */ 516 516 for (i = 0; i < lima_pipe_num; i++) { 517 - if (atomic_read(&ldev->pipe[i].base.hw_rq_count)) 517 + if (atomic_read(&ldev->pipe[i].base.credit_count)) 518 518 return -EBUSY; 519 519 } 520 520

+1 -1

drivers/gpu/drm/lima/lima_sched.c

··· 123 123 for (i = 0; i < num_bos; i++) 124 124 drm_gem_object_get(&bos[i]->base.base); 125 125 126 - err = drm_sched_job_init(&task->base, &context->base, vm); 126 + err = drm_sched_job_init(&task->base, &context->base, 1, vm); 127 127 if (err) { 128 128 kfree(task->bos); 129 129 return err;

+1 -1

drivers/gpu/drm/msm/msm_gem_submit.c

··· 48 48 return ERR_PTR(ret); 49 49 } 50 50 51 - ret = drm_sched_job_init(&submit->base, queue->entity, queue); 51 + ret = drm_sched_job_init(&submit->base, queue->entity, 1, queue); 52 52 if (ret) { 53 53 kfree(submit->hw_fence); 54 54 kfree(submit);

+1 -1

drivers/gpu/drm/nouveau/nouveau_sched.c

··· 89 89 90 90 } 91 91 92 - ret = drm_sched_job_init(&job->base, &entity->base, NULL); 92 + ret = drm_sched_job_init(&job->base, &entity->base, 1, NULL); 93 93 if (ret) 94 94 goto err_free_chains; 95 95

+1 -1

drivers/gpu/drm/panfrost/panfrost_drv.c

··· 274 274 275 275 ret = drm_sched_job_init(&job->base, 276 276 &file_priv->sched_entity[slot], 277 - NULL); 277 + 1, NULL); 278 278 if (ret) 279 279 goto out_put_job; 280 280

+1 -1

drivers/gpu/drm/panfrost/panfrost_job.c

··· 963 963 964 964 for (i = 0; i < NUM_JOB_SLOTS; i++) { 965 965 /* If there are any jobs in the HW queue, we're not idle */ 966 - if (atomic_read(&js->queue[i].sched.hw_rq_count)) 966 + if (atomic_read(&js->queue[i].sched.credit_count)) 967 967 return false; 968 968 } 969 969

+1 -1

drivers/gpu/drm/scheduler/gpu_scheduler_trace.h

··· 51 51 __assign_str(name, sched_job->sched->name); 52 52 __entry->job_count = spsc_queue_count(&entity->job_queue); 53 53 __entry->hw_job_count = atomic_read( 54 - &sched_job->sched->hw_rq_count); 54 + &sched_job->sched->credit_count); 55 55 ), 56 56 TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", 57 57 __entry->entity, __entry->id,

+136 -34

drivers/gpu/drm/scheduler/sched_main.c

··· 48 48 * through the jobs entity pointer. 49 49 */ 50 50 51 + /** 52 + * DOC: Flow Control 53 + * 54 + * The DRM GPU scheduler provides a flow control mechanism to regulate the rate 55 + * in which the jobs fetched from scheduler entities are executed. 56 + * 57 + * In this context the &drm_gpu_scheduler keeps track of a driver specified 58 + * credit limit representing the capacity of this scheduler and a credit count; 59 + * every &drm_sched_job carries a driver specified number of credits. 60 + * 61 + * Once a job is executed (but not yet finished), the job's credits contribute 62 + * to the scheduler's credit count until the job is finished. If by executing 63 + * one more job the scheduler's credit count would exceed the scheduler's 64 + * credit limit, the job won't be executed. Instead, the scheduler will wait 65 + * until the credit count has decreased enough to not overflow its credit limit. 66 + * This implies waiting for previously executed jobs. 67 + * 68 + * Optionally, drivers may register a callback (update_job_credits) provided by 69 + * struct drm_sched_backend_ops to update the job's credits dynamically. The 70 + * scheduler executes this callback every time the scheduler considers a job for 71 + * execution and subsequently checks whether the job fits the scheduler's credit 72 + * limit. 73 + */ 74 + 51 75 #include <linux/wait.h> 52 76 #include <linux/sched.h> 53 77 #include <linux/completion.h> ··· 98 74 */ 99 75 MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default)."); 100 76 module_param_named(sched_policy, drm_sched_policy, int, 0444); 77 + 78 + static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched) 79 + { 80 + u32 credits; 81 + 82 + drm_WARN_ON(sched, check_sub_overflow(sched->credit_limit, 83 + atomic_read(&sched->credit_count), 84 + &credits)); 85 + 86 + return credits; 87 + } 88 + 89 + /** 90 + * drm_sched_can_queue -- Can we queue more to the hardware? 91 + * @sched: scheduler instance 92 + * @entity: the scheduler entity 93 + * 94 + * Return true if we can push at least one more job from @entity, false 95 + * otherwise. 96 + */ 97 + static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, 98 + struct drm_sched_entity *entity) 99 + { 100 + struct drm_sched_job *s_job; 101 + 102 + s_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); 103 + if (!s_job) 104 + return false; 105 + 106 + if (sched->ops->update_job_credits) { 107 + s_job->credits = sched->ops->update_job_credits(s_job); 108 + 109 + drm_WARN(sched, !s_job->credits, 110 + "Jobs with zero credits bypass job-flow control.\n"); 111 + } 112 + 113 + /* If a job exceeds the credit limit, truncate it to the credit limit 114 + * itself to guarantee forward progress. 115 + */ 116 + if (drm_WARN(sched, s_job->credits > sched->credit_limit, 117 + "Jobs may not exceed the credit limit, truncate.\n")) 118 + s_job->credits = sched->credit_limit; 119 + 120 + return drm_sched_available_credits(sched) >= s_job->credits; 121 + } 101 122 102 123 static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a, 103 124 const struct rb_node *b) ··· 255 186 /** 256 187 * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run 257 188 * 189 + * @sched: the gpu scheduler 258 190 * @rq: scheduler run queue to check. 259 191 * 260 - * Try to find a ready entity, returns NULL if none found. 192 + * Try to find the next ready entity. 193 + * 194 + * Return an entity if one is found; return an error-pointer (!NULL) if an 195 + * entity was ready, but the scheduler had insufficient credits to accommodate 196 + * its job; return NULL, if no ready entity was found. 261 197 */ 262 198 static struct drm_sched_entity * 263 - drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) 199 + drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, 200 + struct drm_sched_rq *rq) 264 201 { 265 202 struct drm_sched_entity *entity; 266 203 ··· 276 201 if (entity) { 277 202 list_for_each_entry_continue(entity, &rq->entities, list) { 278 203 if (drm_sched_entity_is_ready(entity)) { 204 + /* If we can't queue yet, preserve the current 205 + * entity in terms of fairness. 206 + */ 207 + if (!drm_sched_can_queue(sched, entity)) { 208 + spin_unlock(&rq->lock); 209 + return ERR_PTR(-ENOSPC); 210 + } 211 + 279 212 rq->current_entity = entity; 280 213 reinit_completion(&entity->entity_idle); 281 214 spin_unlock(&rq->lock); ··· 293 210 } 294 211 295 212 list_for_each_entry(entity, &rq->entities, list) { 296 - 297 213 if (drm_sched_entity_is_ready(entity)) { 214 + /* If we can't queue yet, preserve the current entity in 215 + * terms of fairness. 216 + */ 217 + if (!drm_sched_can_queue(sched, entity)) { 218 + spin_unlock(&rq->lock); 219 + return ERR_PTR(-ENOSPC); 220 + } 221 + 298 222 rq->current_entity = entity; 299 223 reinit_completion(&entity->entity_idle); 300 224 spin_unlock(&rq->lock); ··· 320 230 /** 321 231 * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run 322 232 * 233 + * @sched: the gpu scheduler 323 234 * @rq: scheduler run queue to check. 324 235 * 325 - * Find oldest waiting ready entity, returns NULL if none found. 236 + * Find oldest waiting ready entity. 237 + * 238 + * Return an entity if one is found; return an error-pointer (!NULL) if an 239 + * entity was ready, but the scheduler had insufficient credits to accommodate 240 + * its job; return NULL, if no ready entity was found. 326 241 */ 327 242 static struct drm_sched_entity * 328 - drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) 243 + drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched, 244 + struct drm_sched_rq *rq) 329 245 { 330 246 struct rb_node *rb; 331 247 ··· 341 245 342 246 entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); 343 247 if (drm_sched_entity_is_ready(entity)) { 248 + /* If we can't queue yet, preserve the current entity in 249 + * terms of fairness. 250 + */ 251 + if (!drm_sched_can_queue(sched, entity)) { 252 + spin_unlock(&rq->lock); 253 + return ERR_PTR(-ENOSPC); 254 + } 255 + 344 256 rq->current_entity = entity; 345 257 reinit_completion(&entity->entity_idle); 346 258 break; ··· 406 302 struct drm_sched_fence *s_fence = s_job->s_fence; 407 303 struct drm_gpu_scheduler *sched = s_fence->sched; 408 304 409 - atomic_dec(&sched->hw_rq_count); 305 + atomic_sub(s_job->credits, &sched->credit_count); 410 306 atomic_dec(sched->score); 411 307 412 308 trace_drm_sched_process_job(s_fence); ··· 629 525 &s_job->cb)) { 630 526 dma_fence_put(s_job->s_fence->parent); 631 527 s_job->s_fence->parent = NULL; 632 - atomic_dec(&sched->hw_rq_count); 528 + atomic_sub(s_job->credits, &sched->credit_count); 633 529 } else { 634 530 /* 635 531 * remove job from pending_list. ··· 690 586 list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) { 691 587 struct dma_fence *fence = s_job->s_fence->parent; 692 588 693 - atomic_inc(&sched->hw_rq_count); 589 + atomic_add(s_job->credits, &sched->credit_count); 694 590 695 591 if (!full_recovery) 696 592 continue; ··· 771 667 * drm_sched_job_init - init a scheduler job 772 668 * @job: scheduler job to init 773 669 * @entity: scheduler entity to use 670 + * @credits: the number of credits this job contributes to the schedulers 671 + * credit limit 774 672 * @owner: job owner for debugging 775 673 * 776 674 * Refer to drm_sched_entity_push_job() documentation ··· 790 684 */ 791 685 int drm_sched_job_init(struct drm_sched_job *job, 792 686 struct drm_sched_entity *entity, 793 - void *owner) 687 + u32 credits, void *owner) 794 688 { 795 689 if (!entity->rq) { 796 690 /* This will most likely be followed by missing frames ··· 801 695 return -ENOENT; 802 696 } 803 697 698 + if (unlikely(!credits)) { 699 + pr_err("*ERROR* %s: credits cannot be 0!\n", __func__); 700 + return -EINVAL; 701 + } 702 + 804 703 job->entity = entity; 704 + job->credits = credits; 805 705 job->s_fence = drm_sched_fence_alloc(entity, owner); 806 706 if (!job->s_fence) 807 707 return -ENOMEM; ··· 1020 908 EXPORT_SYMBOL(drm_sched_job_cleanup); 1021 909 1022 910 /** 1023 - * drm_sched_can_queue -- Can we queue more to the hardware? 1024 - * @sched: scheduler instance 1025 - * 1026 - * Return true if we can push more jobs to the hw, otherwise false. 1027 - */ 1028 - static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched) 1029 - { 1030 - return atomic_read(&sched->hw_rq_count) < 1031 - sched->hw_submission_limit; 1032 - } 1033 - 1034 - /** 1035 911 * drm_sched_wakeup - Wake up the scheduler if it is ready to queue 1036 912 * @sched: scheduler instance 913 + * @entity: the scheduler entity 1037 914 * 1038 915 * Wake up the scheduler if we can queue jobs. 1039 916 */ ··· 1030 929 struct drm_sched_entity *entity) 1031 930 { 1032 931 if (drm_sched_entity_is_ready(entity)) 1033 - if (drm_sched_can_queue(sched)) 932 + if (drm_sched_can_queue(sched, entity)) 1034 933 drm_sched_run_job_queue(sched); 1035 934 } 1036 935 ··· 1039 938 * 1040 939 * @sched: scheduler instance 1041 940 * 1042 - * Returns the entity to process or NULL if none are found. 941 + * Return an entity to process or NULL if none are found. 942 + * 943 + * Note, that we break out of the for-loop when "entity" is non-null, which can 944 + * also be an error-pointer--this assures we don't process lower priority 945 + * run-queues. See comments in the respectively called functions. 1043 946 */ 1044 947 static struct drm_sched_entity * 1045 948 drm_sched_select_entity(struct drm_gpu_scheduler *sched) ··· 1051 946 struct drm_sched_entity *entity; 1052 947 int i; 1053 948 1054 - if (!drm_sched_can_queue(sched)) 1055 - return NULL; 1056 - 1057 949 /* Kernel run queue has higher priority than normal run queue*/ 1058 950 for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { 1059 951 entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ? 1060 - drm_sched_rq_select_entity_fifo(sched->sched_rq[i]) : 1061 - drm_sched_rq_select_entity_rr(sched->sched_rq[i]); 952 + drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) : 953 + drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]); 1062 954 if (entity) 1063 955 break; 1064 956 } 1065 957 1066 - return entity; 958 + return IS_ERR(entity) ? NULL : entity; 1067 959 } 1068 960 1069 961 /** ··· 1196 1094 1197 1095 s_fence = sched_job->s_fence; 1198 1096 1199 - atomic_inc(&sched->hw_rq_count); 1097 + atomic_add(sched_job->credits, &sched->credit_count); 1200 1098 drm_sched_job_begin(sched_job); 1201 1099 1202 1100 trace_drm_run_job(sched_job, entity); ··· 1231 1129 * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is 1232 1130 * allocated and used 1233 1131 * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT 1234 - * @hw_submission: number of hw submissions that can be in flight 1132 + * @credit_limit: the number of credits this scheduler can hold from all jobs 1235 1133 * @hang_limit: number of times to allow a job to hang before dropping it 1236 1134 * @timeout: timeout value in jiffies for the scheduler 1237 1135 * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is ··· 1245 1143 int drm_sched_init(struct drm_gpu_scheduler *sched, 1246 1144 const struct drm_sched_backend_ops *ops, 1247 1145 struct workqueue_struct *submit_wq, 1248 - u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit, 1146 + u32 num_rqs, u32 credit_limit, unsigned int hang_limit, 1249 1147 long timeout, struct workqueue_struct *timeout_wq, 1250 1148 atomic_t *score, const char *name, struct device *dev) 1251 1149 { 1252 1150 int i, ret; 1253 1151 1254 1152 sched->ops = ops; 1255 - sched->hw_submission_limit = hw_submission; 1153 + sched->credit_limit = credit_limit; 1256 1154 sched->name = name; 1257 1155 sched->timeout = timeout; 1258 1156 sched->timeout_wq = timeout_wq ? : system_wq; ··· 1301 1199 init_waitqueue_head(&sched->job_scheduled); 1302 1200 INIT_LIST_HEAD(&sched->pending_list); 1303 1201 spin_lock_init(&sched->job_list_lock); 1304 - atomic_set(&sched->hw_rq_count, 0); 1202 + atomic_set(&sched->credit_count, 0); 1305 1203 INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); 1306 1204 INIT_WORK(&sched->work_run_job, drm_sched_run_job_work); 1307 1205 INIT_WORK(&sched->work_free_job, drm_sched_free_job_work);

+1 -1

drivers/gpu/drm/v3d/v3d_gem.c

··· 418 418 job->file = file_priv; 419 419 420 420 ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], 421 - v3d_priv); 421 + 1, v3d_priv); 422 422 if (ret) 423 423 goto fail; 424 424

+22 -6

include/drm/gpu_scheduler.h

··· 321 321 * @sched: the scheduler instance on which this job is scheduled. 322 322 * @s_fence: contains the fences for the scheduling of job. 323 323 * @finish_cb: the callback for the finished fence. 324 + * @credits: the number of credits this job contributes to the scheduler 324 325 * @work: Helper to reschdeule job kill to different context. 325 326 * @id: a unique id assigned to each job scheduled on the scheduler. 326 327 * @karma: increment on every hang caused by this job. If this exceeds the hang ··· 340 339 struct list_head list; 341 340 struct drm_gpu_scheduler *sched; 342 341 struct drm_sched_fence *s_fence; 342 + 343 + u32 credits; 343 344 344 345 /* 345 346 * work is used only after finish_cb has been used and will not be ··· 466 463 * and it's time to clean it up. 467 464 */ 468 465 void (*free_job)(struct drm_sched_job *sched_job); 466 + 467 + /** 468 + * @update_job_credits: Called when the scheduler is considering this 469 + * job for execution. 470 + * 471 + * This callback returns the number of credits the job would take if 472 + * pushed to the hardware. Drivers may use this to dynamically update 473 + * the job's credit count. For instance, deduct the number of credits 474 + * for already signalled native fences. 475 + * 476 + * This callback is optional. 477 + */ 478 + u32 (*update_job_credits)(struct drm_sched_job *sched_job); 469 479 }; 470 480 471 481 /** 472 482 * struct drm_gpu_scheduler - scheduler instance-specific data 473 483 * 474 484 * @ops: backend operations provided by the driver. 475 - * @hw_submission_limit: the max size of the hardware queue. 485 + * @credit_limit: the credit limit of this scheduler 486 + * @credit_count: the current credit count of this scheduler 476 487 * @timeout: the time after which a job is removed from the scheduler. 477 488 * @name: name of the ring for which this scheduler is being used. 478 489 * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT, ··· 495 478 * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler 496 479 * waits on this wait queue until all the scheduled jobs are 497 480 * finished. 498 - * @hw_rq_count: the number of jobs currently in the hardware queue. 499 481 * @job_id_count: used to assign unique id to the each job. 500 482 * @submit_wq: workqueue used to queue @work_run_job and @work_free_job 501 483 * @timeout_wq: workqueue used to queue @work_tdr ··· 518 502 */ 519 503 struct drm_gpu_scheduler { 520 504 const struct drm_sched_backend_ops *ops; 521 - uint32_t hw_submission_limit; 505 + u32 credit_limit; 506 + atomic_t credit_count; 522 507 long timeout; 523 508 const char *name; 524 509 u32 num_rqs; 525 510 struct drm_sched_rq **sched_rq; 526 511 wait_queue_head_t job_scheduled; 527 - atomic_t hw_rq_count; 528 512 atomic64_t job_id_count; 529 513 struct workqueue_struct *submit_wq; 530 514 struct workqueue_struct *timeout_wq; ··· 546 530 int drm_sched_init(struct drm_gpu_scheduler *sched, 547 531 const struct drm_sched_backend_ops *ops, 548 532 struct workqueue_struct *submit_wq, 549 - u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit, 533 + u32 num_rqs, u32 credit_limit, unsigned int hang_limit, 550 534 long timeout, struct workqueue_struct *timeout_wq, 551 535 atomic_t *score, const char *name, struct device *dev); 552 536 553 537 void drm_sched_fini(struct drm_gpu_scheduler *sched); 554 538 int drm_sched_job_init(struct drm_sched_job *job, 555 539 struct drm_sched_entity *entity, 556 - void *owner); 540 + u32 credits, void *owner); 557 541 void drm_sched_job_arm(struct drm_sched_job *job); 558 542 int drm_sched_job_add_dependency(struct drm_sched_job *job, 559 543 struct dma_fence *fence);