Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/vc4: Expose dma-buf fences for V3D rendering.

This is needed for proper synchronization with display on another DRM
device (pl111 or tinydrm) with buffers produced by vc4 V3D. Fixes the
new igt vc4_dmabuf_poll testcase, and rendering of one of the glmark2
desktop tests on pl111+vc4.

This doesn't yet introduce waits on another device's fences before
vc4's rendering/display, because I don't have testcases for them.

v2: Reuse dma_fence_free(), retitle commit message to clarify that
it's not a full dma-buf fencing implementation yet.

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: http://patchwork.freedesktop.org/patch/msgid/20170412191202.22740-6-eric@anholt.net
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>

+262 -5
+1
drivers/gpu/drm/vc4/Makefile
··· 9 9 vc4_drv.o \ 10 10 vc4_dpi.o \ 11 11 vc4_dsi.o \ 12 + vc4_fence.o \ 12 13 vc4_kms.o \ 13 14 vc4_gem.o \ 14 15 vc4_hdmi.o \
+36 -1
drivers/gpu/drm/vc4/vc4_bo.c
··· 19 19 * rendering can return quickly. 20 20 */ 21 21 22 + #include <linux/dma-buf.h> 23 + 22 24 #include "vc4_drv.h" 23 25 #include "uapi/drm/vc4_drm.h" 24 26 ··· 90 88 91 89 vc4->bo_stats.num_allocated--; 92 90 vc4->bo_stats.size_allocated -= obj->size; 91 + 92 + if (bo->resv == &bo->_resv) 93 + reservation_object_fini(bo->resv); 94 + 93 95 drm_gem_cma_free_object(obj); 94 96 } 95 97 ··· 250 244 return ERR_PTR(-ENOMEM); 251 245 } 252 246 } 247 + bo = to_vc4_bo(&cma_obj->base); 253 248 254 - return to_vc4_bo(&cma_obj->base); 249 + bo->resv = &bo->_resv; 250 + reservation_object_init(bo->resv); 251 + 252 + return bo; 255 253 } 256 254 257 255 int vc4_dumb_create(struct drm_file *file_priv, ··· 379 369 schedule_work(&vc4->bo_cache.time_work); 380 370 } 381 371 372 + struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj) 373 + { 374 + struct vc4_bo *bo = to_vc4_bo(obj); 375 + 376 + return bo->resv; 377 + } 378 + 382 379 struct dma_buf * 383 380 vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags) 384 381 { ··· 455 438 } 456 439 457 440 return drm_gem_cma_prime_vmap(obj); 441 + } 442 + 443 + struct drm_gem_object * 444 + vc4_prime_import_sg_table(struct drm_device *dev, 445 + struct dma_buf_attachment *attach, 446 + struct sg_table *sgt) 447 + { 448 + struct drm_gem_object *obj; 449 + struct vc4_bo *bo; 450 + 451 + obj = drm_gem_cma_prime_import_sg_table(dev, attach, sgt); 452 + if (IS_ERR(obj)) 453 + return obj; 454 + 455 + bo = to_vc4_bo(obj); 456 + bo->resv = attach->dmabuf->resv; 457 + 458 + return obj; 458 459 } 459 460 460 461 int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
+2 -1
drivers/gpu/drm/vc4/vc4_drv.c
··· 168 168 .prime_fd_to_handle = drm_gem_prime_fd_to_handle, 169 169 .gem_prime_import = drm_gem_prime_import, 170 170 .gem_prime_export = vc4_prime_export, 171 + .gem_prime_res_obj = vc4_prime_res_obj, 171 172 .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, 172 - .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, 173 + .gem_prime_import_sg_table = vc4_prime_import_sg_table, 173 174 .gem_prime_vmap = vc4_prime_vmap, 174 175 .gem_prime_vunmap = drm_gem_cma_prime_vunmap, 175 176 .gem_prime_mmap = vc4_prime_mmap,
+30
drivers/gpu/drm/vc4/vc4_drv.h
··· 8 8 9 9 #include "drmP.h" 10 10 #include "drm_gem_cma_helper.h" 11 + #include "drm_gem_cma_helper.h" 11 12 13 + #include <linux/reservation.h> 12 14 #include <drm/drm_encoder.h> 13 15 14 16 struct vc4_dev { ··· 57 55 58 56 /* Protects bo_cache and the BO stats. */ 59 57 struct mutex bo_lock; 58 + 59 + uint64_t dma_fence_context; 60 60 61 61 /* Sequence number for the last job queued in bin_job_list. 62 62 * Starts at 0 (no jobs emitted). ··· 154 150 * DRM_IOCTL_VC4_CREATE_SHADER_BO. 155 151 */ 156 152 struct vc4_validated_shader_info *validated_shader; 153 + 154 + /* normally (resv == &_resv) except for imported bo's */ 155 + struct reservation_object *resv; 156 + struct reservation_object _resv; 157 157 }; 158 158 159 159 static inline struct vc4_bo * 160 160 to_vc4_bo(struct drm_gem_object *bo) 161 161 { 162 162 return (struct vc4_bo *)bo; 163 + } 164 + 165 + struct vc4_fence { 166 + struct dma_fence base; 167 + struct drm_device *dev; 168 + /* vc4 seqno for signaled() test */ 169 + uint64_t seqno; 170 + }; 171 + 172 + static inline struct vc4_fence * 173 + to_vc4_fence(struct dma_fence *fence) 174 + { 175 + return (struct vc4_fence *)fence; 163 176 } 164 177 165 178 struct vc4_seqno_cb { ··· 250 229 251 230 /* Latest write_seqno of any BO that binning depends on. */ 252 231 uint64_t bin_dep_seqno; 232 + 233 + struct dma_fence *fence; 253 234 254 235 /* Last current addresses the hardware was processing when the 255 236 * hangcheck timer checked on us. ··· 459 436 int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, 460 437 struct drm_file *file_priv); 461 438 int vc4_mmap(struct file *filp, struct vm_area_struct *vma); 439 + struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj); 462 440 int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); 441 + struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev, 442 + struct dma_buf_attachment *attach, 443 + struct sg_table *sgt); 463 444 void *vc4_prime_vmap(struct drm_gem_object *obj); 464 445 void vc4_bo_cache_init(struct drm_device *dev); 465 446 void vc4_bo_cache_destroy(struct drm_device *dev); ··· 494 467 /* vc4_dsi.c */ 495 468 extern struct platform_driver vc4_dsi_driver; 496 469 int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused); 470 + 471 + /* vc4_fence.c */ 472 + extern const struct dma_fence_ops vc4_fence_ops; 497 473 498 474 /* vc4_gem.c */ 499 475 void vc4_gem_init(struct drm_device *dev);
+56
drivers/gpu/drm/vc4/vc4_fence.c
··· 1 + /* 2 + * Copyright © 2017 Broadcom 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + */ 23 + 24 + #include "vc4_drv.h" 25 + 26 + static const char *vc4_fence_get_driver_name(struct dma_fence *fence) 27 + { 28 + return "vc4"; 29 + } 30 + 31 + static const char *vc4_fence_get_timeline_name(struct dma_fence *fence) 32 + { 33 + return "vc4-v3d"; 34 + } 35 + 36 + static bool vc4_fence_enable_signaling(struct dma_fence *fence) 37 + { 38 + return true; 39 + } 40 + 41 + static bool vc4_fence_signaled(struct dma_fence *fence) 42 + { 43 + struct vc4_fence *f = to_vc4_fence(fence); 44 + struct vc4_dev *vc4 = to_vc4_dev(f->dev); 45 + 46 + return vc4->finished_seqno >= f->seqno; 47 + } 48 + 49 + const struct dma_fence_ops vc4_fence_ops = { 50 + .get_driver_name = vc4_fence_get_driver_name, 51 + .get_timeline_name = vc4_fence_get_timeline_name, 52 + .enable_signaling = vc4_fence_enable_signaling, 53 + .signaled = vc4_fence_signaled, 54 + .wait = dma_fence_default_wait, 55 + .release = dma_fence_free, 56 + };
+133 -3
drivers/gpu/drm/vc4/vc4_gem.c
··· 463 463 for (i = 0; i < exec->bo_count; i++) { 464 464 bo = to_vc4_bo(&exec->bo[i]->base); 465 465 bo->seqno = seqno; 466 + 467 + reservation_object_add_shared_fence(bo->resv, exec->fence); 466 468 } 467 469 468 470 list_for_each_entry(bo, &exec->unref_list, unref_head) { ··· 474 472 for (i = 0; i < exec->rcl_write_bo_count; i++) { 475 473 bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); 476 474 bo->write_seqno = seqno; 475 + 476 + reservation_object_add_excl_fence(bo->resv, exec->fence); 477 477 } 478 + } 479 + 480 + static void 481 + vc4_unlock_bo_reservations(struct drm_device *dev, 482 + struct vc4_exec_info *exec, 483 + struct ww_acquire_ctx *acquire_ctx) 484 + { 485 + int i; 486 + 487 + for (i = 0; i < exec->bo_count; i++) { 488 + struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); 489 + 490 + ww_mutex_unlock(&bo->resv->lock); 491 + } 492 + 493 + ww_acquire_fini(acquire_ctx); 494 + } 495 + 496 + /* Takes the reservation lock on all the BOs being referenced, so that 497 + * at queue submit time we can update the reservations. 498 + * 499 + * We don't lock the RCL the tile alloc/state BOs, or overflow memory 500 + * (all of which are on exec->unref_list). They're entirely private 501 + * to vc4, so we don't attach dma-buf fences to them. 502 + */ 503 + static int 504 + vc4_lock_bo_reservations(struct drm_device *dev, 505 + struct vc4_exec_info *exec, 506 + struct ww_acquire_ctx *acquire_ctx) 507 + { 508 + int contended_lock = -1; 509 + int i, ret; 510 + struct vc4_bo *bo; 511 + 512 + ww_acquire_init(acquire_ctx, &reservation_ww_class); 513 + 514 + retry: 515 + if (contended_lock != -1) { 516 + bo = to_vc4_bo(&exec->bo[contended_lock]->base); 517 + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, 518 + acquire_ctx); 519 + if (ret) { 520 + ww_acquire_done(acquire_ctx); 521 + return ret; 522 + } 523 + } 524 + 525 + for (i = 0; i < exec->bo_count; i++) { 526 + if (i == contended_lock) 527 + continue; 528 + 529 + bo = to_vc4_bo(&exec->bo[i]->base); 530 + 531 + ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx); 532 + if (ret) { 533 + int j; 534 + 535 + for (j = 0; j < i; j++) { 536 + bo = to_vc4_bo(&exec->bo[j]->base); 537 + ww_mutex_unlock(&bo->resv->lock); 538 + } 539 + 540 + if (contended_lock != -1 && contended_lock >= i) { 541 + bo = to_vc4_bo(&exec->bo[contended_lock]->base); 542 + 543 + ww_mutex_unlock(&bo->resv->lock); 544 + } 545 + 546 + if (ret == -EDEADLK) { 547 + contended_lock = i; 548 + goto retry; 549 + } 550 + 551 + ww_acquire_done(acquire_ctx); 552 + return ret; 553 + } 554 + } 555 + 556 + ww_acquire_done(acquire_ctx); 557 + 558 + /* Reserve space for our shared (read-only) fence references, 559 + * before we commit the CL to the hardware. 560 + */ 561 + for (i = 0; i < exec->bo_count; i++) { 562 + bo = to_vc4_bo(&exec->bo[i]->base); 563 + 564 + ret = reservation_object_reserve_shared(bo->resv); 565 + if (ret) { 566 + vc4_unlock_bo_reservations(dev, exec, acquire_ctx); 567 + return ret; 568 + } 569 + } 570 + 571 + return 0; 478 572 } 479 573 480 574 /* Queues a struct vc4_exec_info for execution. If no job is ··· 582 484 * then bump the end address. That's a change for a later date, 583 485 * though. 584 486 */ 585 - static void 586 - vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) 487 + static int 488 + vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, 489 + struct ww_acquire_ctx *acquire_ctx) 587 490 { 588 491 struct vc4_dev *vc4 = to_vc4_dev(dev); 589 492 uint64_t seqno; 590 493 unsigned long irqflags; 494 + struct vc4_fence *fence; 495 + 496 + fence = kzalloc(sizeof(*fence), GFP_KERNEL); 497 + if (!fence) 498 + return -ENOMEM; 499 + fence->dev = dev; 591 500 592 501 spin_lock_irqsave(&vc4->job_lock, irqflags); 593 502 594 503 seqno = ++vc4->emit_seqno; 595 504 exec->seqno = seqno; 505 + 506 + dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock, 507 + vc4->dma_fence_context, exec->seqno); 508 + fence->seqno = exec->seqno; 509 + exec->fence = &fence->base; 510 + 596 511 vc4_update_bo_seqnos(exec, seqno); 512 + 513 + vc4_unlock_bo_reservations(dev, exec, acquire_ctx); 597 514 598 515 list_add_tail(&exec->head, &vc4->bin_job_list); 599 516 ··· 622 509 } 623 510 624 511 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 512 + 513 + return 0; 625 514 } 626 515 627 516 /** ··· 822 707 struct vc4_dev *vc4 = to_vc4_dev(dev); 823 708 unsigned i; 824 709 710 + /* If we got force-completed because of GPU reset rather than 711 + * through our IRQ handler, signal the fence now. 712 + */ 713 + if (exec->fence) 714 + dma_fence_signal(exec->fence); 715 + 825 716 if (exec->bo) { 826 717 for (i = 0; i < exec->bo_count; i++) 827 718 drm_gem_object_unreference_unlocked(&exec->bo[i]->base); ··· 995 874 struct vc4_dev *vc4 = to_vc4_dev(dev); 996 875 struct drm_vc4_submit_cl *args = data; 997 876 struct vc4_exec_info *exec; 877 + struct ww_acquire_ctx acquire_ctx; 998 878 int ret = 0; 999 879 1000 880 if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { ··· 1038 916 if (ret) 1039 917 goto fail; 1040 918 919 + ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); 920 + if (ret) 921 + goto fail; 922 + 1041 923 /* Clear this out of the struct we'll be putting in the queue, 1042 924 * since it's part of our stack. 1043 925 */ 1044 926 exec->args = NULL; 1045 927 1046 - vc4_queue_submit(dev, exec); 928 + ret = vc4_queue_submit(dev, exec, &acquire_ctx); 929 + if (ret) 930 + goto fail; 1047 931 1048 932 /* Return the seqno for our job. */ 1049 933 args->seqno = vc4->emit_seqno; ··· 1066 938 vc4_gem_init(struct drm_device *dev) 1067 939 { 1068 940 struct vc4_dev *vc4 = to_vc4_dev(dev); 941 + 942 + vc4->dma_fence_context = dma_fence_context_alloc(1); 1069 943 1070 944 INIT_LIST_HEAD(&vc4->bin_job_list); 1071 945 INIT_LIST_HEAD(&vc4->render_job_list);
+4
drivers/gpu/drm/vc4/vc4_irq.c
··· 142 142 143 143 vc4->finished_seqno++; 144 144 list_move_tail(&exec->head, &vc4->job_done_list); 145 + if (exec->fence) { 146 + dma_fence_signal_locked(exec->fence); 147 + exec->fence = NULL; 148 + } 145 149 vc4_submit_next_render_job(dev); 146 150 147 151 wake_up_all(&vc4->job_wait_queue);