drm/vc4: Expose dma-buf fences for V3D rendering.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

This is needed for proper synchronization with display on another DRM
device (pl111 or tinydrm) with buffers produced by vc4 V3D. Fixes the
new igt vc4_dmabuf_poll testcase, and rendering of one of the glmark2
desktop tests on pl111+vc4.

This doesn't yet introduce waits on another device's fences before
vc4's rendering/display, because I don't have testcases for them.

v2: Reuse dma_fence_free(), retitle commit message to clarify that
it's not a full dma-buf fencing implementation yet.

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: http://patchwork.freedesktop.org/patch/msgid/20170412191202.22740-6-eric@anholt.net
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Eric Anholt 9 years ago cdec4d36 ce9971de

+262 -5

7 changed files

expand all

drivers

gpu

drm

vc4

Makefile

vc4_bo.c

vc4_drv.c

vc4_drv.h

vc4_fence.c

vc4_gem.c

vc4_irq.c

drivers/gpu/drm/vc4/Makefile

··· 9 9 vc4_drv.o \ 10 10 vc4_dpi.o \ 11 11 vc4_dsi.o \ 12 + vc4_fence.o \ 12 13 vc4_kms.o \ 13 14 vc4_gem.o \ 14 15 vc4_hdmi.o \

+36 -1

drivers/gpu/drm/vc4/vc4_bo.c

··· 19 19 * rendering can return quickly. 20 20 */ 21 21 22 + #include <linux/dma-buf.h> 23 + 22 24 #include "vc4_drv.h" 23 25 #include "uapi/drm/vc4_drm.h" 24 26 ··· 90 88 91 89 vc4->bo_stats.num_allocated--; 92 90 vc4->bo_stats.size_allocated -= obj->size; 91 + 92 + if (bo->resv == &bo->_resv) 93 + reservation_object_fini(bo->resv); 94 + 93 95 drm_gem_cma_free_object(obj); 94 96 } 95 97 ··· 250 244 return ERR_PTR(-ENOMEM); 251 245 } 252 246 } 247 + bo = to_vc4_bo(&cma_obj->base); 253 248 254 - return to_vc4_bo(&cma_obj->base); 249 + bo->resv = &bo->_resv; 250 + reservation_object_init(bo->resv); 251 + 252 + return bo; 255 253 } 256 254 257 255 int vc4_dumb_create(struct drm_file *file_priv, ··· 379 369 schedule_work(&vc4->bo_cache.time_work); 380 370 } 381 371 372 + struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj) 373 + { 374 + struct vc4_bo *bo = to_vc4_bo(obj); 375 + 376 + return bo->resv; 377 + } 378 + 382 379 struct dma_buf * 383 380 vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags) 384 381 { ··· 455 438 } 456 439 457 440 return drm_gem_cma_prime_vmap(obj); 441 + } 442 + 443 + struct drm_gem_object * 444 + vc4_prime_import_sg_table(struct drm_device *dev, 445 + struct dma_buf_attachment *attach, 446 + struct sg_table *sgt) 447 + { 448 + struct drm_gem_object *obj; 449 + struct vc4_bo *bo; 450 + 451 + obj = drm_gem_cma_prime_import_sg_table(dev, attach, sgt); 452 + if (IS_ERR(obj)) 453 + return obj; 454 + 455 + bo = to_vc4_bo(obj); 456 + bo->resv = attach->dmabuf->resv; 457 + 458 + return obj; 458 459 } 459 460 460 461 int vc4_create_bo_ioctl(struct drm_device *dev, void *data,

+2 -1

drivers/gpu/drm/vc4/vc4_drv.c

··· 168 168 .prime_fd_to_handle = drm_gem_prime_fd_to_handle, 169 169 .gem_prime_import = drm_gem_prime_import, 170 170 .gem_prime_export = vc4_prime_export, 171 + .gem_prime_res_obj = vc4_prime_res_obj, 171 172 .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, 172 - .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, 173 + .gem_prime_import_sg_table = vc4_prime_import_sg_table, 173 174 .gem_prime_vmap = vc4_prime_vmap, 174 175 .gem_prime_vunmap = drm_gem_cma_prime_vunmap, 175 176 .gem_prime_mmap = vc4_prime_mmap,

+30

drivers/gpu/drm/vc4/vc4_drv.h

··· 8 8 9 9 #include "drmP.h" 10 10 #include "drm_gem_cma_helper.h" 11 + #include "drm_gem_cma_helper.h" 11 12 13 + #include <linux/reservation.h> 12 14 #include <drm/drm_encoder.h> 13 15 14 16 struct vc4_dev { ··· 57 55 58 56 /* Protects bo_cache and the BO stats. */ 59 57 struct mutex bo_lock; 58 + 59 + uint64_t dma_fence_context; 60 60 61 61 /* Sequence number for the last job queued in bin_job_list. 62 62 * Starts at 0 (no jobs emitted). ··· 154 150 * DRM_IOCTL_VC4_CREATE_SHADER_BO. 155 151 */ 156 152 struct vc4_validated_shader_info *validated_shader; 153 + 154 + /* normally (resv == &_resv) except for imported bo's */ 155 + struct reservation_object *resv; 156 + struct reservation_object _resv; 157 157 }; 158 158 159 159 static inline struct vc4_bo * 160 160 to_vc4_bo(struct drm_gem_object *bo) 161 161 { 162 162 return (struct vc4_bo *)bo; 163 + } 164 + 165 + struct vc4_fence { 166 + struct dma_fence base; 167 + struct drm_device *dev; 168 + /* vc4 seqno for signaled() test */ 169 + uint64_t seqno; 170 + }; 171 + 172 + static inline struct vc4_fence * 173 + to_vc4_fence(struct dma_fence *fence) 174 + { 175 + return (struct vc4_fence *)fence; 163 176 } 164 177 165 178 struct vc4_seqno_cb { ··· 250 229 251 230 /* Latest write_seqno of any BO that binning depends on. */ 252 231 uint64_t bin_dep_seqno; 232 + 233 + struct dma_fence *fence; 253 234 254 235 /* Last current addresses the hardware was processing when the 255 236 * hangcheck timer checked on us. ··· 459 436 int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, 460 437 struct drm_file *file_priv); 461 438 int vc4_mmap(struct file *filp, struct vm_area_struct *vma); 439 + struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj); 462 440 int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); 441 + struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev, 442 + struct dma_buf_attachment *attach, 443 + struct sg_table *sgt); 463 444 void *vc4_prime_vmap(struct drm_gem_object *obj); 464 445 void vc4_bo_cache_init(struct drm_device *dev); 465 446 void vc4_bo_cache_destroy(struct drm_device *dev); ··· 494 467 /* vc4_dsi.c */ 495 468 extern struct platform_driver vc4_dsi_driver; 496 469 int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused); 470 + 471 + /* vc4_fence.c */ 472 + extern const struct dma_fence_ops vc4_fence_ops; 497 473 498 474 /* vc4_gem.c */ 499 475 void vc4_gem_init(struct drm_device *dev);

+56

drivers/gpu/drm/vc4/vc4_fence.c

··· 1 + /* 2 + * Copyright © 2017 Broadcom 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + */ 23 + 24 + #include "vc4_drv.h" 25 + 26 + static const char *vc4_fence_get_driver_name(struct dma_fence *fence) 27 + { 28 + return "vc4"; 29 + } 30 + 31 + static const char *vc4_fence_get_timeline_name(struct dma_fence *fence) 32 + { 33 + return "vc4-v3d"; 34 + } 35 + 36 + static bool vc4_fence_enable_signaling(struct dma_fence *fence) 37 + { 38 + return true; 39 + } 40 + 41 + static bool vc4_fence_signaled(struct dma_fence *fence) 42 + { 43 + struct vc4_fence *f = to_vc4_fence(fence); 44 + struct vc4_dev *vc4 = to_vc4_dev(f->dev); 45 + 46 + return vc4->finished_seqno >= f->seqno; 47 + } 48 + 49 + const struct dma_fence_ops vc4_fence_ops = { 50 + .get_driver_name = vc4_fence_get_driver_name, 51 + .get_timeline_name = vc4_fence_get_timeline_name, 52 + .enable_signaling = vc4_fence_enable_signaling, 53 + .signaled = vc4_fence_signaled, 54 + .wait = dma_fence_default_wait, 55 + .release = dma_fence_free, 56 + };

+133 -3

drivers/gpu/drm/vc4/vc4_gem.c

··· 463 463 for (i = 0; i < exec->bo_count; i++) { 464 464 bo = to_vc4_bo(&exec->bo[i]->base); 465 465 bo->seqno = seqno; 466 + 467 + reservation_object_add_shared_fence(bo->resv, exec->fence); 466 468 } 467 469 468 470 list_for_each_entry(bo, &exec->unref_list, unref_head) { ··· 474 472 for (i = 0; i < exec->rcl_write_bo_count; i++) { 475 473 bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); 476 474 bo->write_seqno = seqno; 475 + 476 + reservation_object_add_excl_fence(bo->resv, exec->fence); 477 477 } 478 + } 479 + 480 + static void 481 + vc4_unlock_bo_reservations(struct drm_device *dev, 482 + struct vc4_exec_info *exec, 483 + struct ww_acquire_ctx *acquire_ctx) 484 + { 485 + int i; 486 + 487 + for (i = 0; i < exec->bo_count; i++) { 488 + struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); 489 + 490 + ww_mutex_unlock(&bo->resv->lock); 491 + } 492 + 493 + ww_acquire_fini(acquire_ctx); 494 + } 495 + 496 + /* Takes the reservation lock on all the BOs being referenced, so that 497 + * at queue submit time we can update the reservations. 498 + * 499 + * We don't lock the RCL the tile alloc/state BOs, or overflow memory 500 + * (all of which are on exec->unref_list). They're entirely private 501 + * to vc4, so we don't attach dma-buf fences to them. 502 + */ 503 + static int 504 + vc4_lock_bo_reservations(struct drm_device *dev, 505 + struct vc4_exec_info *exec, 506 + struct ww_acquire_ctx *acquire_ctx) 507 + { 508 + int contended_lock = -1; 509 + int i, ret; 510 + struct vc4_bo *bo; 511 + 512 + ww_acquire_init(acquire_ctx, &reservation_ww_class); 513 + 514 + retry: 515 + if (contended_lock != -1) { 516 + bo = to_vc4_bo(&exec->bo[contended_lock]->base); 517 + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, 518 + acquire_ctx); 519 + if (ret) { 520 + ww_acquire_done(acquire_ctx); 521 + return ret; 522 + } 523 + } 524 + 525 + for (i = 0; i < exec->bo_count; i++) { 526 + if (i == contended_lock) 527 + continue; 528 + 529 + bo = to_vc4_bo(&exec->bo[i]->base); 530 + 531 + ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx); 532 + if (ret) { 533 + int j; 534 + 535 + for (j = 0; j < i; j++) { 536 + bo = to_vc4_bo(&exec->bo[j]->base); 537 + ww_mutex_unlock(&bo->resv->lock); 538 + } 539 + 540 + if (contended_lock != -1 && contended_lock >= i) { 541 + bo = to_vc4_bo(&exec->bo[contended_lock]->base); 542 + 543 + ww_mutex_unlock(&bo->resv->lock); 544 + } 545 + 546 + if (ret == -EDEADLK) { 547 + contended_lock = i; 548 + goto retry; 549 + } 550 + 551 + ww_acquire_done(acquire_ctx); 552 + return ret; 553 + } 554 + } 555 + 556 + ww_acquire_done(acquire_ctx); 557 + 558 + /* Reserve space for our shared (read-only) fence references, 559 + * before we commit the CL to the hardware. 560 + */ 561 + for (i = 0; i < exec->bo_count; i++) { 562 + bo = to_vc4_bo(&exec->bo[i]->base); 563 + 564 + ret = reservation_object_reserve_shared(bo->resv); 565 + if (ret) { 566 + vc4_unlock_bo_reservations(dev, exec, acquire_ctx); 567 + return ret; 568 + } 569 + } 570 + 571 + return 0; 478 572 } 479 573 480 574 /* Queues a struct vc4_exec_info for execution. If no job is ··· 582 484 * then bump the end address. That's a change for a later date, 583 485 * though. 584 486 */ 585 - static void 586 - vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) 487 + static int 488 + vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, 489 + struct ww_acquire_ctx *acquire_ctx) 587 490 { 588 491 struct vc4_dev *vc4 = to_vc4_dev(dev); 589 492 uint64_t seqno; 590 493 unsigned long irqflags; 494 + struct vc4_fence *fence; 495 + 496 + fence = kzalloc(sizeof(*fence), GFP_KERNEL); 497 + if (!fence) 498 + return -ENOMEM; 499 + fence->dev = dev; 591 500 592 501 spin_lock_irqsave(&vc4->job_lock, irqflags); 593 502 594 503 seqno = ++vc4->emit_seqno; 595 504 exec->seqno = seqno; 505 + 506 + dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock, 507 + vc4->dma_fence_context, exec->seqno); 508 + fence->seqno = exec->seqno; 509 + exec->fence = &fence->base; 510 + 596 511 vc4_update_bo_seqnos(exec, seqno); 512 + 513 + vc4_unlock_bo_reservations(dev, exec, acquire_ctx); 597 514 598 515 list_add_tail(&exec->head, &vc4->bin_job_list); 599 516 ··· 622 509 } 623 510 624 511 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 512 + 513 + return 0; 625 514 } 626 515 627 516 /** ··· 822 707 struct vc4_dev *vc4 = to_vc4_dev(dev); 823 708 unsigned i; 824 709 710 + /* If we got force-completed because of GPU reset rather than 711 + * through our IRQ handler, signal the fence now. 712 + */ 713 + if (exec->fence) 714 + dma_fence_signal(exec->fence); 715 + 825 716 if (exec->bo) { 826 717 for (i = 0; i < exec->bo_count; i++) 827 718 drm_gem_object_unreference_unlocked(&exec->bo[i]->base); ··· 995 874 struct vc4_dev *vc4 = to_vc4_dev(dev); 996 875 struct drm_vc4_submit_cl *args = data; 997 876 struct vc4_exec_info *exec; 877 + struct ww_acquire_ctx acquire_ctx; 998 878 int ret = 0; 999 879 1000 880 if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { ··· 1038 916 if (ret) 1039 917 goto fail; 1040 918 919 + ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); 920 + if (ret) 921 + goto fail; 922 + 1041 923 /* Clear this out of the struct we'll be putting in the queue, 1042 924 * since it's part of our stack. 1043 925 */ 1044 926 exec->args = NULL; 1045 927 1046 - vc4_queue_submit(dev, exec); 928 + ret = vc4_queue_submit(dev, exec, &acquire_ctx); 929 + if (ret) 930 + goto fail; 1047 931 1048 932 /* Return the seqno for our job. */ 1049 933 args->seqno = vc4->emit_seqno; ··· 1066 938 vc4_gem_init(struct drm_device *dev) 1067 939 { 1068 940 struct vc4_dev *vc4 = to_vc4_dev(dev); 941 + 942 + vc4->dma_fence_context = dma_fence_context_alloc(1); 1069 943 1070 944 INIT_LIST_HEAD(&vc4->bin_job_list); 1071 945 INIT_LIST_HEAD(&vc4->render_job_list);

drivers/gpu/drm/vc4/vc4_irq.c

··· 142 142 143 143 vc4->finished_seqno++; 144 144 list_move_tail(&exec->head, &vc4->job_done_list); 145 + if (exec->fence) { 146 + dma_fence_signal_locked(exec->fence); 147 + exec->fence = NULL; 148 + } 145 149 vc4_submit_next_render_job(dev); 146 150 147 151 wake_up_all(&vc4->job_wait_queue);