drm/v3d: Refactor job management. · tjh.dev/kernel@a783a09

+47 -46

drivers/gpu/drm/v3d/v3d_drv.h

··· 67 67 68 68 struct work_struct overflow_mem_work; 69 69 70 - struct v3d_exec_info *bin_job; 71 - struct v3d_exec_info *render_job; 70 + struct v3d_bin_job *bin_job; 71 + struct v3d_render_job *render_job; 72 72 struct v3d_tfu_job *tfu_job; 73 73 74 74 struct v3d_queue_state queue[V3D_MAX_QUEUES]; ··· 117 117 struct drm_mm_node node; 118 118 119 119 /* List entry for the BO's position in 120 - * v3d_exec_info->unref_list 120 + * v3d_render_job->unref_list 121 121 */ 122 122 struct list_head unref_head; 123 123 }; ··· 157 157 struct v3d_job { 158 158 struct drm_sched_job base; 159 159 160 - struct v3d_exec_info *exec; 160 + struct kref refcount; 161 + 162 + struct v3d_dev *v3d; 163 + 164 + /* This is the array of BOs that were looked up at the start 165 + * of submission. 166 + */ 167 + struct drm_gem_object **bo; 168 + u32 bo_count; 161 169 162 170 /* An optional fence userspace can pass in for the job to depend on. */ 163 171 struct dma_fence *in_fence; 164 172 165 173 /* v3d fence to be signaled by IRQ handler when the job is complete. */ 166 174 struct dma_fence *irq_fence; 175 + 176 + /* scheduler fence for when the job is considered complete and 177 + * the BO reservations can be released. 178 + */ 179 + struct dma_fence *done_fence; 180 + 181 + /* Callback for the freeing of the job on refcount going to 0. */ 182 + void (*free)(struct kref *ref); 183 + }; 184 + 185 + struct v3d_bin_job { 186 + struct v3d_job base; 167 187 168 188 /* GPU virtual addresses of the start/end of the CL job. */ 169 189 u32 start, end; 170 190 171 191 u32 timedout_ctca, timedout_ctra; 172 - }; 173 192 174 - struct v3d_exec_info { 175 - struct v3d_dev *v3d; 176 - 177 - struct v3d_job bin, render; 178 - 179 - /* Fence for when the scheduler considers the binner to be 180 - * done, for render to depend on. 181 - */ 182 - struct dma_fence *bin_done_fence; 183 - 184 - /* Fence for when the scheduler considers the render to be 185 - * done, for when the BOs reservations should be complete. 186 - */ 187 - struct dma_fence *render_done_fence; 188 - 189 - struct kref refcount; 190 - 191 - /* This is the array of BOs that were looked up at the start of exec. */ 192 - struct drm_gem_object **bo; 193 - u32 bo_count; 194 - 195 - /* List of overflow BOs used in the job that need to be 196 - * released once the job is complete. 197 - */ 198 - struct list_head unref_list; 193 + /* Corresponding render job, for attaching our overflow memory. */ 194 + struct v3d_render_job *render; 199 195 200 196 /* Submitted tile memory allocation start/size, tile state. */ 201 197 u32 qma, qms, qts; 202 198 }; 203 199 200 + struct v3d_render_job { 201 + struct v3d_job base; 202 + 203 + /* Optional fence for the binner, to depend on before starting 204 + * our job. 205 + */ 206 + struct dma_fence *bin_done_fence; 207 + 208 + /* GPU virtual addresses of the start/end of the CL job. */ 209 + u32 start, end; 210 + 211 + u32 timedout_ctca, timedout_ctra; 212 + 213 + /* List of overflow BOs used in the job that need to be 214 + * released once the job is complete. 215 + */ 216 + struct list_head unref_list; 217 + }; 218 + 204 219 struct v3d_tfu_job { 205 - struct drm_sched_job base; 220 + struct v3d_job base; 206 221 207 222 struct drm_v3d_submit_tfu args; 208 - 209 - /* An optional fence userspace can pass in for the job to depend on. */ 210 - struct dma_fence *in_fence; 211 - 212 - /* v3d fence to be signaled by IRQ handler when the job is complete. */ 213 - struct dma_fence *irq_fence; 214 - 215 - struct v3d_dev *v3d; 216 - 217 - struct kref refcount; 218 - 219 - /* This is the array of BOs that were looked up at the start of exec. */ 220 - struct drm_gem_object *bo[4]; 221 223 }; 222 224 223 225 /** ··· 285 283 struct drm_file *file_priv); 286 284 int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, 287 285 struct drm_file *file_priv); 288 - void v3d_exec_put(struct v3d_exec_info *exec); 289 - void v3d_tfu_job_put(struct v3d_tfu_job *exec); 286 + void v3d_job_put(struct v3d_job *job); 290 287 void v3d_reset(struct v3d_dev *v3d); 291 288 void v3d_invalidate_caches(struct v3d_dev *v3d); 292 289

+191 -184

drivers/gpu/drm/v3d/v3d_gem.c

··· 193 193 v3d_invalidate_slices(v3d, 0); 194 194 } 195 195 196 - static void 197 - v3d_attach_object_fences(struct drm_gem_object **bos, int bo_count, 198 - struct dma_fence *fence) 199 - { 200 - int i; 201 - 202 - for (i = 0; i < bo_count; i++) { 203 - /* XXX: Use shared fences for read-only objects. */ 204 - reservation_object_add_excl_fence(bos[i]->resv, fence); 205 - } 206 - } 207 - 208 196 /* Takes the reservation lock on all the BOs being referenced, so that 209 197 * at queue submit time we can update the reservations. 210 198 * ··· 227 239 } 228 240 229 241 /** 230 - * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects 242 + * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects 231 243 * referenced by the job. 232 244 * @dev: DRM device 233 245 * @file_priv: DRM file for this fd 234 - * @exec: V3D job being set up 246 + * @job: V3D job being set up 235 247 * 236 248 * The command validator needs to reference BOs by their index within 237 249 * the submitted job's BO list. This does the validation of the job's ··· 241 253 * failure, because that will happen at v3d_exec_cleanup() time. 242 254 */ 243 255 static int 244 - v3d_cl_lookup_bos(struct drm_device *dev, 245 - struct drm_file *file_priv, 246 - struct drm_v3d_submit_cl *args, 247 - struct v3d_exec_info *exec) 256 + v3d_lookup_bos(struct drm_device *dev, 257 + struct drm_file *file_priv, 258 + struct v3d_job *job, 259 + u64 bo_handles, 260 + u32 bo_count) 248 261 { 249 262 u32 *handles; 250 263 int ret = 0; 251 264 int i; 252 265 253 - exec->bo_count = args->bo_handle_count; 266 + job->bo_count = bo_count; 254 267 255 - if (!exec->bo_count) { 268 + if (!job->bo_count) { 256 269 /* See comment on bo_index for why we have to check 257 270 * this. 258 271 */ ··· 261 272 return -EINVAL; 262 273 } 263 274 264 - exec->bo = kvmalloc_array(exec->bo_count, 265 - sizeof(struct drm_gem_cma_object *), 266 - GFP_KERNEL | __GFP_ZERO); 267 - if (!exec->bo) { 275 + job->bo = kvmalloc_array(job->bo_count, 276 + sizeof(struct drm_gem_cma_object *), 277 + GFP_KERNEL | __GFP_ZERO); 278 + if (!job->bo) { 268 279 DRM_DEBUG("Failed to allocate validated BO pointers\n"); 269 280 return -ENOMEM; 270 281 } 271 282 272 - handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL); 283 + handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL); 273 284 if (!handles) { 274 285 ret = -ENOMEM; 275 286 DRM_DEBUG("Failed to allocate incoming GEM handles\n"); ··· 277 288 } 278 289 279 290 if (copy_from_user(handles, 280 - (void __user *)(uintptr_t)args->bo_handles, 281 - exec->bo_count * sizeof(u32))) { 291 + (void __user *)(uintptr_t)bo_handles, 292 + job->bo_count * sizeof(u32))) { 282 293 ret = -EFAULT; 283 294 DRM_DEBUG("Failed to copy in GEM handles\n"); 284 295 goto fail; 285 296 } 286 297 287 298 spin_lock(&file_priv->table_lock); 288 - for (i = 0; i < exec->bo_count; i++) { 299 + for (i = 0; i < job->bo_count; i++) { 289 300 struct drm_gem_object *bo = idr_find(&file_priv->object_idr, 290 301 handles[i]); 291 302 if (!bo) { ··· 296 307 goto fail; 297 308 } 298 309 drm_gem_object_get(bo); 299 - exec->bo[i] = bo; 310 + job->bo[i] = bo; 300 311 } 301 312 spin_unlock(&file_priv->table_lock); 302 313 ··· 306 317 } 307 318 308 319 static void 309 - v3d_exec_cleanup(struct kref *ref) 320 + v3d_job_free(struct kref *ref) 310 321 { 311 - struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info, 312 - refcount); 313 - struct v3d_dev *v3d = exec->v3d; 314 - unsigned int i; 315 - struct v3d_bo *bo, *save; 322 + struct v3d_job *job = container_of(ref, struct v3d_job, refcount); 323 + int i; 316 324 317 - dma_fence_put(exec->bin.in_fence); 318 - dma_fence_put(exec->render.in_fence); 319 - 320 - dma_fence_put(exec->bin.irq_fence); 321 - dma_fence_put(exec->render.irq_fence); 322 - 323 - dma_fence_put(exec->bin_done_fence); 324 - dma_fence_put(exec->render_done_fence); 325 - 326 - for (i = 0; i < exec->bo_count; i++) 327 - drm_gem_object_put_unlocked(exec->bo[i]); 328 - kvfree(exec->bo); 329 - 330 - list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) { 331 - drm_gem_object_put_unlocked(&bo->base.base); 332 - } 333 - 334 - pm_runtime_mark_last_busy(v3d->dev); 335 - pm_runtime_put_autosuspend(v3d->dev); 336 - 337 - kfree(exec); 338 - } 339 - 340 - void v3d_exec_put(struct v3d_exec_info *exec) 341 - { 342 - kref_put(&exec->refcount, v3d_exec_cleanup); 343 - } 344 - 345 - static void 346 - v3d_tfu_job_cleanup(struct kref *ref) 347 - { 348 - struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job, 349 - refcount); 350 - struct v3d_dev *v3d = job->v3d; 351 - unsigned int i; 352 - 353 - dma_fence_put(job->in_fence); 354 - dma_fence_put(job->irq_fence); 355 - 356 - for (i = 0; i < ARRAY_SIZE(job->bo); i++) { 325 + for (i = 0; i < job->bo_count; i++) { 357 326 if (job->bo[i]) 358 327 drm_gem_object_put_unlocked(job->bo[i]); 359 328 } 329 + kvfree(job->bo); 360 330 361 - pm_runtime_mark_last_busy(v3d->dev); 362 - pm_runtime_put_autosuspend(v3d->dev); 331 + dma_fence_put(job->in_fence); 332 + dma_fence_put(job->irq_fence); 333 + dma_fence_put(job->done_fence); 334 + 335 + pm_runtime_mark_last_busy(job->v3d->dev); 336 + pm_runtime_put_autosuspend(job->v3d->dev); 363 337 364 338 kfree(job); 365 339 } 366 340 367 - void v3d_tfu_job_put(struct v3d_tfu_job *job) 341 + static void 342 + v3d_render_job_free(struct kref *ref) 368 343 { 369 - kref_put(&job->refcount, v3d_tfu_job_cleanup); 344 + struct v3d_render_job *job = container_of(ref, struct v3d_render_job, 345 + base.refcount); 346 + struct v3d_bo *bo, *save; 347 + 348 + list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { 349 + drm_gem_object_put_unlocked(&bo->base.base); 350 + } 351 + 352 + v3d_job_free(ref); 353 + } 354 + 355 + void v3d_job_put(struct v3d_job *job) 356 + { 357 + kref_put(&job->refcount, job->free); 370 358 } 371 359 372 360 int ··· 379 413 return ret; 380 414 } 381 415 416 + static int 417 + v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, 418 + struct v3d_job *job, void (*free)(struct kref *ref), 419 + u32 in_sync) 420 + { 421 + int ret; 422 + 423 + job->v3d = v3d; 424 + job->free = free; 425 + 426 + ret = pm_runtime_get_sync(v3d->dev); 427 + if (ret < 0) 428 + return ret; 429 + 430 + ret = drm_syncobj_find_fence(file_priv, in_sync, 0, 0, &job->in_fence); 431 + if (ret == -EINVAL) { 432 + pm_runtime_put_autosuspend(v3d->dev); 433 + return ret; 434 + } 435 + 436 + kref_init(&job->refcount); 437 + 438 + return 0; 439 + } 440 + 441 + static int 442 + v3d_push_job(struct v3d_file_priv *v3d_priv, 443 + struct v3d_job *job, enum v3d_queue queue) 444 + { 445 + int ret; 446 + 447 + ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], 448 + v3d_priv); 449 + if (ret) 450 + return ret; 451 + 452 + job->done_fence = dma_fence_get(&job->base.s_fence->finished); 453 + 454 + /* put by scheduler job completion */ 455 + kref_get(&job->refcount); 456 + 457 + drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[queue]); 458 + 459 + return 0; 460 + } 461 + 462 + static void 463 + v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, 464 + struct v3d_job *job, 465 + struct ww_acquire_ctx *acquire_ctx, 466 + u32 out_sync) 467 + { 468 + struct drm_syncobj *sync_out; 469 + int i; 470 + 471 + for (i = 0; i < job->bo_count; i++) { 472 + /* XXX: Use shared fences for read-only objects. */ 473 + reservation_object_add_excl_fence(job->bo[i]->resv, 474 + job->done_fence); 475 + } 476 + 477 + drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 478 + 479 + /* Update the return sync object for the job */ 480 + sync_out = drm_syncobj_find(file_priv, out_sync); 481 + if (sync_out) { 482 + drm_syncobj_replace_fence(sync_out, job->done_fence); 483 + drm_syncobj_put(sync_out); 484 + } 485 + } 486 + 382 487 /** 383 488 * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. 384 489 * @dev: DRM device ··· 469 432 struct v3d_dev *v3d = to_v3d_dev(dev); 470 433 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 471 434 struct drm_v3d_submit_cl *args = data; 472 - struct v3d_exec_info *exec; 435 + struct v3d_bin_job *bin = NULL; 436 + struct v3d_render_job *render; 473 437 struct ww_acquire_ctx acquire_ctx; 474 - struct drm_syncobj *sync_out; 475 438 int ret = 0; 476 439 477 440 trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); ··· 481 444 return -EINVAL; 482 445 } 483 446 484 - exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); 485 - if (!exec) 447 + render = kcalloc(1, sizeof(*render), GFP_KERNEL); 448 + if (!render) 486 449 return -ENOMEM; 487 450 488 - ret = pm_runtime_get_sync(v3d->dev); 489 - if (ret < 0) { 490 - kfree(exec); 451 + render->start = args->rcl_start; 452 + render->end = args->rcl_end; 453 + INIT_LIST_HEAD(&render->unref_list); 454 + 455 + ret = v3d_job_init(v3d, file_priv, &render->base, 456 + v3d_render_job_free, args->in_sync_rcl); 457 + if (ret) { 458 + kfree(render); 491 459 return ret; 492 460 } 493 461 494 - kref_init(&exec->refcount); 462 + if (args->bcl_start != args->bcl_end) { 463 + bin = kcalloc(1, sizeof(*bin), GFP_KERNEL); 464 + if (!bin) 465 + return -ENOMEM; 495 466 496 - ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl, 497 - 0, 0, &exec->bin.in_fence); 498 - if (ret == -EINVAL) 499 - goto fail; 467 + ret = v3d_job_init(v3d, file_priv, &bin->base, 468 + v3d_job_free, args->in_sync_bcl); 469 + if (ret) { 470 + v3d_job_put(&render->base); 471 + return ret; 472 + } 500 473 501 - ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl, 502 - 0, 0, &exec->render.in_fence); 503 - if (ret == -EINVAL) 504 - goto fail; 474 + bin->start = args->bcl_start; 475 + bin->end = args->bcl_end; 476 + bin->qma = args->qma; 477 + bin->qms = args->qms; 478 + bin->qts = args->qts; 479 + bin->render = render; 480 + } 505 481 506 - exec->qma = args->qma; 507 - exec->qms = args->qms; 508 - exec->qts = args->qts; 509 - exec->bin.exec = exec; 510 - exec->bin.start = args->bcl_start; 511 - exec->bin.end = args->bcl_end; 512 - exec->render.exec = exec; 513 - exec->render.start = args->rcl_start; 514 - exec->render.end = args->rcl_end; 515 - exec->v3d = v3d; 516 - INIT_LIST_HEAD(&exec->unref_list); 517 - 518 - ret = v3d_cl_lookup_bos(dev, file_priv, args, exec); 482 + ret = v3d_lookup_bos(dev, file_priv, &render->base, 483 + args->bo_handles, args->bo_handle_count); 519 484 if (ret) 520 485 goto fail; 521 486 522 - ret = v3d_lock_bo_reservations(exec->bo, exec->bo_count, 487 + ret = v3d_lock_bo_reservations(render->base.bo, render->base.bo_count, 523 488 &acquire_ctx); 524 489 if (ret) 525 490 goto fail; 526 491 527 492 mutex_lock(&v3d->sched_lock); 528 - if (exec->bin.start != exec->bin.end) { 529 - ret = drm_sched_job_init(&exec->bin.base, 530 - &v3d_priv->sched_entity[V3D_BIN], 531 - v3d_priv); 493 + if (bin) { 494 + ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN); 532 495 if (ret) 533 496 goto fail_unreserve; 534 497 535 - exec->bin_done_fence = 536 - dma_fence_get(&exec->bin.base.s_fence->finished); 537 - 538 - kref_get(&exec->refcount); /* put by scheduler job completion */ 539 - drm_sched_entity_push_job(&exec->bin.base, 540 - &v3d_priv->sched_entity[V3D_BIN]); 498 + render->bin_done_fence = dma_fence_get(bin->base.done_fence); 541 499 } 542 500 543 - ret = drm_sched_job_init(&exec->render.base, 544 - &v3d_priv->sched_entity[V3D_RENDER], 545 - v3d_priv); 501 + ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER); 546 502 if (ret) 547 503 goto fail_unreserve; 548 - 549 - exec->render_done_fence = 550 - dma_fence_get(&exec->render.base.s_fence->finished); 551 - 552 - kref_get(&exec->refcount); /* put by scheduler job completion */ 553 - drm_sched_entity_push_job(&exec->render.base, 554 - &v3d_priv->sched_entity[V3D_RENDER]); 555 504 mutex_unlock(&v3d->sched_lock); 556 505 557 - v3d_attach_object_fences(exec->bo, exec->bo_count, 558 - exec->render_done_fence); 506 + v3d_attach_fences_and_unlock_reservation(file_priv, 507 + &render->base, &acquire_ctx, 508 + args->out_sync); 559 509 560 - drm_gem_unlock_reservations(exec->bo, exec->bo_count, &acquire_ctx); 561 - 562 - /* Update the return sync object for the */ 563 - sync_out = drm_syncobj_find(file_priv, args->out_sync); 564 - if (sync_out) { 565 - drm_syncobj_replace_fence(sync_out, exec->render_done_fence); 566 - drm_syncobj_put(sync_out); 567 - } 568 - 569 - v3d_exec_put(exec); 510 + if (bin) 511 + v3d_job_put(&bin->base); 512 + v3d_job_put(&render->base); 570 513 571 514 return 0; 572 515 573 516 fail_unreserve: 574 517 mutex_unlock(&v3d->sched_lock); 575 - drm_gem_unlock_reservations(exec->bo, exec->bo_count, &acquire_ctx); 518 + drm_gem_unlock_reservations(render->base.bo, 519 + render->base.bo_count, &acquire_ctx); 576 520 fail: 577 - v3d_exec_put(exec); 521 + if (bin) 522 + v3d_job_put(&bin->base); 523 + v3d_job_put(&render->base); 578 524 579 525 return ret; 580 526 } ··· 580 560 struct drm_v3d_submit_tfu *args = data; 581 561 struct v3d_tfu_job *job; 582 562 struct ww_acquire_ctx acquire_ctx; 583 - struct drm_syncobj *sync_out; 584 - struct dma_fence *sched_done_fence; 585 563 int ret = 0; 586 - int bo_count; 587 564 588 565 trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); 589 566 ··· 588 571 if (!job) 589 572 return -ENOMEM; 590 573 591 - ret = pm_runtime_get_sync(v3d->dev); 592 - if (ret < 0) { 574 + ret = v3d_job_init(v3d, file_priv, &job->base, 575 + v3d_job_free, args->in_sync); 576 + if (ret) { 593 577 kfree(job); 594 578 return ret; 595 579 } 596 580 597 - kref_init(&job->refcount); 598 - 599 - ret = drm_syncobj_find_fence(file_priv, args->in_sync, 600 - 0, 0, &job->in_fence); 601 - if (ret == -EINVAL) 602 - goto fail; 581 + job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), 582 + sizeof(*job->base.bo), GFP_KERNEL); 583 + if (!job->base.bo) { 584 + v3d_job_put(&job->base); 585 + return -ENOMEM; 586 + } 603 587 604 588 job->args = *args; 605 - job->v3d = v3d; 606 589 607 590 spin_lock(&file_priv->table_lock); 608 - for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) { 591 + for (job->base.bo_count = 0; 592 + job->base.bo_count < ARRAY_SIZE(args->bo_handles); 593 + job->base.bo_count++) { 609 594 struct drm_gem_object *bo; 610 595 611 - if (!args->bo_handles[bo_count]) 596 + if (!args->bo_handles[job->base.bo_count]) 612 597 break; 613 598 614 599 bo = idr_find(&file_priv->object_idr, 615 - args->bo_handles[bo_count]); 600 + args->bo_handles[job->base.bo_count]); 616 601 if (!bo) { 617 602 DRM_DEBUG("Failed to look up GEM BO %d: %d\n", 618 - bo_count, args->bo_handles[bo_count]); 603 + job->base.bo_count, 604 + args->bo_handles[job->base.bo_count]); 619 605 ret = -ENOENT; 620 606 spin_unlock(&file_priv->table_lock); 621 607 goto fail; 622 608 } 623 609 drm_gem_object_get(bo); 624 - job->bo[bo_count] = bo; 610 + job->base.bo[job->base.bo_count] = bo; 625 611 } 626 612 spin_unlock(&file_priv->table_lock); 627 613 628 - ret = v3d_lock_bo_reservations(job->bo, bo_count, &acquire_ctx); 614 + ret = v3d_lock_bo_reservations(job->base.bo, job->base.bo_count, 615 + &acquire_ctx); 629 616 if (ret) 630 617 goto fail; 631 618 632 619 mutex_lock(&v3d->sched_lock); 633 - ret = drm_sched_job_init(&job->base, 634 - &v3d_priv->sched_entity[V3D_TFU], 635 - v3d_priv); 620 + ret = v3d_push_job(v3d_priv, &job->base, V3D_TFU); 636 621 if (ret) 637 622 goto fail_unreserve; 638 - 639 - sched_done_fence = dma_fence_get(&job->base.s_fence->finished); 640 - 641 - kref_get(&job->refcount); /* put by scheduler job completion */ 642 - drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]); 643 623 mutex_unlock(&v3d->sched_lock); 644 624 645 - v3d_attach_object_fences(job->bo, bo_count, sched_done_fence); 625 + v3d_attach_fences_and_unlock_reservation(file_priv, 626 + &job->base, &acquire_ctx, 627 + args->out_sync); 646 628 647 - drm_gem_unlock_reservations(job->bo, bo_count, &acquire_ctx); 648 - 649 - /* Update the return sync object */ 650 - sync_out = drm_syncobj_find(file_priv, args->out_sync); 651 - if (sync_out) { 652 - drm_syncobj_replace_fence(sync_out, sched_done_fence); 653 - drm_syncobj_put(sync_out); 654 - } 655 - dma_fence_put(sched_done_fence); 656 - 657 - v3d_tfu_job_put(job); 629 + v3d_job_put(&job->base); 658 630 659 631 return 0; 660 632 661 633 fail_unreserve: 662 634 mutex_unlock(&v3d->sched_lock); 663 - drm_gem_unlock_reservations(job->bo, bo_count, &acquire_ctx); 635 + drm_gem_unlock_reservations(job->base.bo, job->base.bo_count, 636 + &acquire_ctx); 664 637 fail: 665 - v3d_tfu_job_put(job); 638 + v3d_job_put(&job->base); 666 639 667 640 return ret; 668 641 } ··· 710 703 711 704 v3d_sched_fini(v3d); 712 705 713 - /* Waiting for exec to finish would need to be done before 706 + /* Waiting for jobs to finish would need to be done before 714 707 * unregistering V3D. 715 708 */ 716 709 WARN_ON(v3d->bin_job);

+4 -4

drivers/gpu/drm/v3d/v3d_irq.c

··· 62 62 } 63 63 64 64 drm_gem_object_get(obj); 65 - list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list); 65 + list_add_tail(&bo->unref_head, &v3d->bin_job->render->unref_list); 66 66 spin_unlock_irqrestore(&v3d->job_lock, irqflags); 67 67 68 68 V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT); ··· 96 96 97 97 if (intsts & V3D_INT_FLDONE) { 98 98 struct v3d_fence *fence = 99 - to_v3d_fence(v3d->bin_job->bin.irq_fence); 99 + to_v3d_fence(v3d->bin_job->base.irq_fence); 100 100 101 101 trace_v3d_bcl_irq(&v3d->drm, fence->seqno); 102 102 dma_fence_signal(&fence->base); ··· 105 105 106 106 if (intsts & V3D_INT_FRDONE) { 107 107 struct v3d_fence *fence = 108 - to_v3d_fence(v3d->render_job->render.irq_fence); 108 + to_v3d_fence(v3d->render_job->base.irq_fence); 109 109 110 110 trace_v3d_rcl_irq(&v3d->drm, fence->seqno); 111 111 dma_fence_signal(&fence->base); ··· 141 141 142 142 if (intsts & V3D_HUB_INT_TFUC) { 143 143 struct v3d_fence *fence = 144 - to_v3d_fence(v3d->tfu_job->irq_fence); 144 + to_v3d_fence(v3d->tfu_job->base.irq_fence); 145 145 146 146 trace_v3d_tfu_irq(&v3d->drm, fence->seqno); 147 147 dma_fence_signal(&fence->base);

+164 -121

drivers/gpu/drm/v3d/v3d_sched.c

··· 30 30 return container_of(sched_job, struct v3d_job, base); 31 31 } 32 32 33 + static struct v3d_bin_job * 34 + to_bin_job(struct drm_sched_job *sched_job) 35 + { 36 + return container_of(sched_job, struct v3d_bin_job, base.base); 37 + } 38 + 39 + static struct v3d_render_job * 40 + to_render_job(struct drm_sched_job *sched_job) 41 + { 42 + return container_of(sched_job, struct v3d_render_job, base.base); 43 + } 44 + 33 45 static struct v3d_tfu_job * 34 46 to_tfu_job(struct drm_sched_job *sched_job) 35 47 { 36 - return container_of(sched_job, struct v3d_tfu_job, base); 48 + return container_of(sched_job, struct v3d_tfu_job, base.base); 37 49 } 38 50 39 51 static void ··· 54 42 struct v3d_job *job = to_v3d_job(sched_job); 55 43 56 44 drm_sched_job_cleanup(sched_job); 57 - 58 - v3d_exec_put(job->exec); 59 - } 60 - 61 - static void 62 - v3d_tfu_job_free(struct drm_sched_job *sched_job) 63 - { 64 - struct v3d_tfu_job *job = to_tfu_job(sched_job); 65 - 66 - drm_sched_job_cleanup(sched_job); 67 - 68 - v3d_tfu_job_put(job); 45 + v3d_job_put(job); 69 46 } 70 47 71 48 /** 72 - * Returns the fences that the bin or render job depends on, one by one. 73 - * v3d_job_run() won't be called until all of them have been signaled. 49 + * Returns the fences that the job depends on, one by one. 50 + * 51 + * If placed in the scheduler's .dependency method, the corresponding 52 + * .run_job won't be called until all of them have been signaled. 74 53 */ 75 54 static struct dma_fence * 76 55 v3d_job_dependency(struct drm_sched_job *sched_job, 77 56 struct drm_sched_entity *s_entity) 78 57 { 79 58 struct v3d_job *job = to_v3d_job(sched_job); 80 - struct v3d_exec_info *exec = job->exec; 81 - enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; 82 59 struct dma_fence *fence; 83 60 84 61 fence = job->in_fence; 85 62 if (fence) { 86 63 job->in_fence = NULL; 87 64 return fence; 88 - } 89 - 90 - if (q == V3D_RENDER) { 91 - /* If we had a bin job, the render job definitely depends on 92 - * it. We first have to wait for bin to be scheduled, so that 93 - * its done_fence is created. 94 - */ 95 - fence = exec->bin_done_fence; 96 - if (fence) { 97 - exec->bin_done_fence = NULL; 98 - return fence; 99 - } 100 65 } 101 66 102 67 /* XXX: Wait on a fence for switching the GMP if necessary, 103 68 * and then do so. 104 69 */ 105 70 106 - return fence; 107 - } 108 - 109 - /** 110 - * Returns the fences that the TFU job depends on, one by one. 111 - * v3d_tfu_job_run() won't be called until all of them have been 112 - * signaled. 113 - */ 114 - static struct dma_fence * 115 - v3d_tfu_job_dependency(struct drm_sched_job *sched_job, 116 - struct drm_sched_entity *s_entity) 117 - { 118 - struct v3d_tfu_job *job = to_tfu_job(sched_job); 119 - struct dma_fence *fence; 120 - 121 - fence = job->in_fence; 122 - if (fence) { 123 - job->in_fence = NULL; 124 - return fence; 125 - } 126 - 127 71 return NULL; 128 72 } 129 73 130 - static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job) 74 + /** 75 + * Returns the fences that the render job depends on, one by one. 76 + * v3d_job_run() won't be called until all of them have been signaled. 77 + */ 78 + static struct dma_fence * 79 + v3d_render_job_dependency(struct drm_sched_job *sched_job, 80 + struct drm_sched_entity *s_entity) 131 81 { 132 - struct v3d_job *job = to_v3d_job(sched_job); 133 - struct v3d_exec_info *exec = job->exec; 134 - enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; 135 - struct v3d_dev *v3d = exec->v3d; 82 + struct v3d_render_job *job = to_render_job(sched_job); 83 + struct dma_fence *fence; 84 + 85 + fence = v3d_job_dependency(sched_job, s_entity); 86 + if (fence) 87 + return fence; 88 + 89 + /* If we had a bin job, the render job definitely depends on 90 + * it. We first have to wait for bin to be scheduled, so that 91 + * its done_fence is created. 92 + */ 93 + fence = job->bin_done_fence; 94 + if (fence) { 95 + job->bin_done_fence = NULL; 96 + return fence; 97 + } 98 + 99 + return fence; 100 + } 101 + 102 + static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) 103 + { 104 + struct v3d_bin_job *job = to_bin_job(sched_job); 105 + struct v3d_dev *v3d = job->base.v3d; 136 106 struct drm_device *dev = &v3d->drm; 137 107 struct dma_fence *fence; 138 108 unsigned long irqflags; 139 109 140 - if (unlikely(job->base.s_fence->finished.error)) 110 + if (unlikely(job->base.base.s_fence->finished.error)) 141 111 return NULL; 142 112 143 113 /* Lock required around bin_job update vs 144 114 * v3d_overflow_mem_work(). 145 115 */ 146 116 spin_lock_irqsave(&v3d->job_lock, irqflags); 147 - if (q == V3D_BIN) { 148 - v3d->bin_job = job->exec; 149 - 150 - /* Clear out the overflow allocation, so we don't 151 - * reuse the overflow attached to a previous job. 152 - */ 153 - V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); 154 - } else { 155 - v3d->render_job = job->exec; 156 - } 117 + v3d->bin_job = job; 118 + /* Clear out the overflow allocation, so we don't 119 + * reuse the overflow attached to a previous job. 120 + */ 121 + V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); 157 122 spin_unlock_irqrestore(&v3d->job_lock, irqflags); 158 123 159 - /* Can we avoid this flush when q==RENDER? We need to be 160 - * careful of scheduling, though -- imagine job0 rendering to 161 - * texture and job1 reading, and them being executed as bin0, 162 - * bin1, render0, render1, so that render1's flush at bin time 163 - * wasn't enough. 164 - */ 165 124 v3d_invalidate_caches(v3d); 166 125 167 - fence = v3d_fence_create(v3d, q); 126 + fence = v3d_fence_create(v3d, V3D_BIN); 168 127 if (IS_ERR(fence)) 169 128 return NULL; 170 129 171 - if (job->irq_fence) 172 - dma_fence_put(job->irq_fence); 173 - job->irq_fence = dma_fence_get(fence); 130 + if (job->base.irq_fence) 131 + dma_fence_put(job->base.irq_fence); 132 + job->base.irq_fence = dma_fence_get(fence); 174 133 175 - trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno, 134 + trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno, 176 135 job->start, job->end); 177 - 178 - if (q == V3D_BIN) { 179 - if (exec->qma) { 180 - V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma); 181 - V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms); 182 - } 183 - if (exec->qts) { 184 - V3D_CORE_WRITE(0, V3D_CLE_CT0QTS, 185 - V3D_CLE_CT0QTS_ENABLE | 186 - exec->qts); 187 - } 188 - } else { 189 - /* XXX: Set the QCFG */ 190 - } 191 136 192 137 /* Set the current and end address of the control list. 193 138 * Writing the end register is what starts the job. 194 139 */ 195 - V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start); 196 - V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end); 140 + if (job->qma) { 141 + V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma); 142 + V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms); 143 + } 144 + if (job->qts) { 145 + V3D_CORE_WRITE(0, V3D_CLE_CT0QTS, 146 + V3D_CLE_CT0QTS_ENABLE | 147 + job->qts); 148 + } 149 + V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start); 150 + V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end); 151 + 152 + return fence; 153 + } 154 + 155 + static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) 156 + { 157 + struct v3d_render_job *job = to_render_job(sched_job); 158 + struct v3d_dev *v3d = job->base.v3d; 159 + struct drm_device *dev = &v3d->drm; 160 + struct dma_fence *fence; 161 + 162 + if (unlikely(job->base.base.s_fence->finished.error)) 163 + return NULL; 164 + 165 + v3d->render_job = job; 166 + 167 + /* Can we avoid this flush? We need to be careful of 168 + * scheduling, though -- imagine job0 rendering to texture and 169 + * job1 reading, and them being executed as bin0, bin1, 170 + * render0, render1, so that render1's flush at bin time 171 + * wasn't enough. 172 + */ 173 + v3d_invalidate_caches(v3d); 174 + 175 + fence = v3d_fence_create(v3d, V3D_RENDER); 176 + if (IS_ERR(fence)) 177 + return NULL; 178 + 179 + if (job->base.irq_fence) 180 + dma_fence_put(job->base.irq_fence); 181 + job->base.irq_fence = dma_fence_get(fence); 182 + 183 + trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno, 184 + job->start, job->end); 185 + 186 + /* XXX: Set the QCFG */ 187 + 188 + /* Set the current and end address of the control list. 189 + * Writing the end register is what starts the job. 190 + */ 191 + V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start); 192 + V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end); 197 193 198 194 return fence; 199 195 } ··· 210 190 v3d_tfu_job_run(struct drm_sched_job *sched_job) 211 191 { 212 192 struct v3d_tfu_job *job = to_tfu_job(sched_job); 213 - struct v3d_dev *v3d = job->v3d; 193 + struct v3d_dev *v3d = job->base.v3d; 214 194 struct drm_device *dev = &v3d->drm; 215 195 struct dma_fence *fence; 216 196 ··· 219 199 return NULL; 220 200 221 201 v3d->tfu_job = job; 222 - if (job->irq_fence) 223 - dma_fence_put(job->irq_fence); 224 - job->irq_fence = dma_fence_get(fence); 202 + if (job->base.irq_fence) 203 + dma_fence_put(job->base.irq_fence); 204 + job->base.irq_fence = dma_fence_get(fence); 225 205 226 206 trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno); 227 207 ··· 271 251 mutex_unlock(&v3d->reset_lock); 272 252 } 273 253 254 + /* If the current address or return address have changed, then the GPU 255 + * has probably made progress and we should delay the reset. This 256 + * could fail if the GPU got in an infinite loop in the CL, but that 257 + * is pretty unlikely outside of an i-g-t testcase. 258 + */ 274 259 static void 275 - v3d_job_timedout(struct drm_sched_job *sched_job) 260 + v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, 261 + u32 *timedout_ctca, u32 *timedout_ctra) 276 262 { 277 263 struct v3d_job *job = to_v3d_job(sched_job); 278 - struct v3d_exec_info *exec = job->exec; 279 - struct v3d_dev *v3d = exec->v3d; 280 - enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER; 281 - u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q)); 282 - u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q)); 264 + struct v3d_dev *v3d = job->v3d; 265 + u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q)); 266 + u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q)); 283 267 284 - /* If the current address or return address have changed, then 285 - * the GPU has probably made progress and we should delay the 286 - * reset. This could fail if the GPU got in an infinite loop 287 - * in the CL, but that is pretty unlikely outside of an i-g-t 288 - * testcase. 289 - */ 290 - if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) { 291 - job->timedout_ctca = ctca; 292 - job->timedout_ctra = ctra; 268 + if (*timedout_ctca != ctca || *timedout_ctra != ctra) { 269 + *timedout_ctca = ctca; 270 + *timedout_ctra = ctra; 293 271 return; 294 272 } 295 273 ··· 295 277 } 296 278 297 279 static void 280 + v3d_bin_job_timedout(struct drm_sched_job *sched_job) 281 + { 282 + struct v3d_bin_job *job = to_bin_job(sched_job); 283 + 284 + v3d_cl_job_timedout(sched_job, V3D_BIN, 285 + &job->timedout_ctca, &job->timedout_ctra); 286 + } 287 + 288 + static void 289 + v3d_render_job_timedout(struct drm_sched_job *sched_job) 290 + { 291 + struct v3d_render_job *job = to_render_job(sched_job); 292 + 293 + v3d_cl_job_timedout(sched_job, V3D_RENDER, 294 + &job->timedout_ctca, &job->timedout_ctra); 295 + } 296 + 297 + static void 298 298 v3d_tfu_job_timedout(struct drm_sched_job *sched_job) 299 299 { 300 - struct v3d_tfu_job *job = to_tfu_job(sched_job); 300 + struct v3d_job *job = to_v3d_job(sched_job); 301 301 302 302 v3d_gpu_reset_for_timeout(job->v3d, sched_job); 303 303 } 304 304 305 - static const struct drm_sched_backend_ops v3d_sched_ops = { 305 + static const struct drm_sched_backend_ops v3d_bin_sched_ops = { 306 306 .dependency = v3d_job_dependency, 307 - .run_job = v3d_job_run, 308 - .timedout_job = v3d_job_timedout, 309 - .free_job = v3d_job_free 307 + .run_job = v3d_bin_job_run, 308 + .timedout_job = v3d_bin_job_timedout, 309 + .free_job = v3d_job_free, 310 + }; 311 + 312 + static const struct drm_sched_backend_ops v3d_render_sched_ops = { 313 + .dependency = v3d_render_job_dependency, 314 + .run_job = v3d_render_job_run, 315 + .timedout_job = v3d_render_job_timedout, 316 + .free_job = v3d_job_free, 310 317 }; 311 318 312 319 static const struct drm_sched_backend_ops v3d_tfu_sched_ops = { 313 - .dependency = v3d_tfu_job_dependency, 320 + .dependency = v3d_job_dependency, 314 321 .run_job = v3d_tfu_job_run, 315 322 .timedout_job = v3d_tfu_job_timedout, 316 - .free_job = v3d_tfu_job_free 323 + .free_job = v3d_job_free, 317 324 }; 318 325 319 326 int ··· 350 307 int ret; 351 308 352 309 ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, 353 - &v3d_sched_ops, 310 + &v3d_bin_sched_ops, 354 311 hw_jobs_limit, job_hang_limit, 355 312 msecs_to_jiffies(hang_limit_ms), 356 313 "v3d_bin"); ··· 360 317 } 361 318 362 319 ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, 363 - &v3d_sched_ops, 320 + &v3d_render_sched_ops, 364 321 hw_jobs_limit, job_hang_limit, 365 322 msecs_to_jiffies(hang_limit_ms), 366 323 "v3d_render");