Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/v3d: add multiple syncobjs support

Using the generic extension from the previous patch, a specific multisync
extension enables more than one in/out binary syncobj per job submission.
Arrays of syncobjs are set in struct drm_v3d_multisync, that also cares
of determining the stage for sync (wait deps) according to the job
queue.

v2:
- subclass the generic extension struct (Daniel)
- simplify adding dependency conditions to make understandable (Iago)

v3:
- fix conditions to consider single or multiples in/out_syncs (Iago)
- remove irrelevant comment (Iago)

Signed-off-by: Melissa Wen <mwen@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Signed-off-by: Melissa Wen <melissa.srw@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ffd8b2e3dd2e0c686db441a0c0a4a0181ff85328.1633016479.git.mwen@igalia.com

authored by

Melissa Wen and committed by
Melissa Wen
e4165ae8 bb3425ef

+232 -32
+4 -2
drivers/gpu/drm/v3d/v3d_drv.c
··· 96 96 case DRM_V3D_PARAM_SUPPORTS_PERFMON: 97 97 args->value = (v3d->ver >= 40); 98 98 return 0; 99 + case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT: 100 + args->value = 1; 101 + return 0; 99 102 default: 100 103 DRM_DEBUG("Unknown parameter %d\n", args->param); 101 104 return -EINVAL; ··· 138 135 struct v3d_file_priv *v3d_priv = file->driver_priv; 139 136 enum v3d_queue q; 140 137 141 - for (q = 0; q < V3D_MAX_QUEUES; q++) { 138 + for (q = 0; q < V3D_MAX_QUEUES; q++) 142 139 drm_sched_entity_destroy(&v3d_priv->sched_entity[q]); 143 - } 144 140 145 141 v3d_perfmon_close_file(v3d_priv); 146 142 kfree(v3d_priv);
+15 -9
drivers/gpu/drm/v3d/v3d_drv.h
··· 19 19 20 20 #define GMP_GRANULARITY (128 * 1024) 21 21 22 - /* Enum for each of the V3D queues. */ 23 - enum v3d_queue { 24 - V3D_BIN, 25 - V3D_RENDER, 26 - V3D_TFU, 27 - V3D_CSD, 28 - V3D_CACHE_CLEAN, 29 - }; 30 - 31 22 #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1) 32 23 33 24 struct v3d_queue_state { ··· 283 292 u32 timedout_batches; 284 293 285 294 struct drm_v3d_submit_csd args; 295 + }; 296 + 297 + struct v3d_submit_outsync { 298 + struct drm_syncobj *syncobj; 299 + }; 300 + 301 + struct v3d_submit_ext { 302 + u32 flags; 303 + u32 wait_stage; 304 + 305 + u32 in_sync_count; 306 + u64 in_syncs; 307 + 308 + u32 out_sync_count; 309 + struct v3d_submit_outsync *out_syncs; 286 310 }; 287 311 288 312 /**
+165 -20
drivers/gpu/drm/v3d/v3d_gem.c
··· 454 454 static int 455 455 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, 456 456 void **container, size_t size, void (*free)(struct kref *ref), 457 - u32 in_sync, enum v3d_queue queue) 457 + u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) 458 458 { 459 459 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 460 460 struct v3d_job *job; 461 - int ret; 461 + bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 462 + int ret, i; 462 463 463 464 *container = kcalloc(1, size, GFP_KERNEL); 464 465 if (!*container) { ··· 480 479 if (ret) 481 480 goto fail_job; 482 481 483 - ret = v3d_job_add_deps(file_priv, job, in_sync, 0); 484 - if (ret) 485 - goto fail_deps; 482 + if (has_multisync) { 483 + if (se->in_sync_count && se->wait_stage == queue) { 484 + struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); 485 + 486 + for (i = 0; i < se->in_sync_count; i++) { 487 + struct drm_v3d_sem in; 488 + 489 + ret = copy_from_user(&in, handle++, sizeof(in)); 490 + if (ret) { 491 + DRM_DEBUG("Failed to copy wait dep handle.\n"); 492 + goto fail_deps; 493 + } 494 + ret = v3d_job_add_deps(file_priv, job, in.handle, 0); 495 + if (ret) 496 + goto fail_deps; 497 + } 498 + } 499 + } else { 500 + ret = v3d_job_add_deps(file_priv, job, in_sync, 0); 501 + if (ret) 502 + goto fail_deps; 503 + } 486 504 487 505 kref_init(&job->refcount); 488 506 ··· 536 516 struct v3d_job *job, 537 517 struct ww_acquire_ctx *acquire_ctx, 538 518 u32 out_sync, 519 + struct v3d_submit_ext *se, 539 520 struct dma_fence *done_fence) 540 521 { 541 522 struct drm_syncobj *sync_out; 523 + bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 542 524 int i; 543 525 544 526 for (i = 0; i < job->bo_count; i++) { ··· 552 530 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 553 531 554 532 /* Update the return sync object for the job */ 555 - sync_out = drm_syncobj_find(file_priv, out_sync); 556 - if (sync_out) { 557 - drm_syncobj_replace_fence(sync_out, done_fence); 558 - drm_syncobj_put(sync_out); 533 + /* If it only supports a single signal semaphore*/ 534 + if (!has_multisync) { 535 + sync_out = drm_syncobj_find(file_priv, out_sync); 536 + if (sync_out) { 537 + drm_syncobj_replace_fence(sync_out, done_fence); 538 + drm_syncobj_put(sync_out); 539 + } 540 + return; 559 541 } 542 + 543 + /* If multiple semaphores extension is supported */ 544 + if (se->out_sync_count) { 545 + for (i = 0; i < se->out_sync_count; i++) { 546 + drm_syncobj_replace_fence(se->out_syncs[i].syncobj, 547 + done_fence); 548 + drm_syncobj_put(se->out_syncs[i].syncobj); 549 + } 550 + kvfree(se->out_syncs); 551 + } 552 + } 553 + 554 + static void 555 + v3d_put_multisync_post_deps(struct v3d_submit_ext *se) 556 + { 557 + unsigned int i; 558 + 559 + if (!(se && se->out_sync_count)) 560 + return; 561 + 562 + for (i = 0; i < se->out_sync_count; i++) 563 + drm_syncobj_put(se->out_syncs[i].syncobj); 564 + kvfree(se->out_syncs); 565 + } 566 + 567 + static int 568 + v3d_get_multisync_post_deps(struct drm_file *file_priv, 569 + struct v3d_submit_ext *se, 570 + u32 count, u64 handles) 571 + { 572 + struct drm_v3d_sem __user *post_deps; 573 + int i, ret; 574 + 575 + if (!count) 576 + return 0; 577 + 578 + se->out_syncs = (struct v3d_submit_outsync *) 579 + kvmalloc_array(count, 580 + sizeof(struct v3d_submit_outsync), 581 + GFP_KERNEL); 582 + if (!se->out_syncs) 583 + return -ENOMEM; 584 + 585 + post_deps = u64_to_user_ptr(handles); 586 + 587 + for (i = 0; i < count; i++) { 588 + struct drm_v3d_sem out; 589 + 590 + ret = copy_from_user(&out, post_deps++, sizeof(out)); 591 + if (ret) { 592 + DRM_DEBUG("Failed to copy post dep handles\n"); 593 + goto fail; 594 + } 595 + 596 + se->out_syncs[i].syncobj = drm_syncobj_find(file_priv, 597 + out.handle); 598 + if (!se->out_syncs[i].syncobj) { 599 + ret = -EINVAL; 600 + goto fail; 601 + } 602 + } 603 + se->out_sync_count = count; 604 + 605 + return 0; 606 + 607 + fail: 608 + for (i--; i >= 0; i--) 609 + drm_syncobj_put(se->out_syncs[i].syncobj); 610 + kvfree(se->out_syncs); 611 + 612 + return ret; 613 + } 614 + 615 + /* Get data for multiple binary semaphores synchronization. Parse syncobj 616 + * to be signaled when job completes (out_sync). 617 + */ 618 + static int 619 + v3d_get_multisync_submit_deps(struct drm_file *file_priv, 620 + struct drm_v3d_extension __user *ext, 621 + void *data) 622 + { 623 + struct drm_v3d_multi_sync multisync; 624 + struct v3d_submit_ext *se = data; 625 + int ret; 626 + 627 + ret = copy_from_user(&multisync, ext, sizeof(multisync)); 628 + if (ret) 629 + return ret; 630 + 631 + if (multisync.pad) 632 + return -EINVAL; 633 + 634 + ret = v3d_get_multisync_post_deps(file_priv, data, multisync.out_sync_count, 635 + multisync.out_syncs); 636 + if (ret) 637 + return ret; 638 + 639 + se->in_sync_count = multisync.in_sync_count; 640 + se->in_syncs = multisync.in_syncs; 641 + se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC; 642 + se->wait_stage = multisync.wait_stage; 643 + 644 + return 0; 560 645 } 561 646 562 647 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data 563 648 * according to the extension id (name). 564 649 */ 565 650 static int 566 - v3d_get_extensions(struct drm_file *file_priv, u64 ext_handles) 651 + v3d_get_extensions(struct drm_file *file_priv, 652 + u64 ext_handles, 653 + void *data) 567 654 { 568 655 struct drm_v3d_extension __user *user_ext; 656 + int ret; 569 657 570 658 user_ext = u64_to_user_ptr(ext_handles); 571 659 while (user_ext) { ··· 687 555 } 688 556 689 557 switch (ext.id) { 690 - case 0: 558 + case DRM_V3D_EXT_ID_MULTI_SYNC: 559 + ret = v3d_get_multisync_submit_deps(file_priv, user_ext, data); 560 + if (ret) 561 + return ret; 562 + break; 691 563 default: 692 564 DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id); 693 565 return -EINVAL; ··· 722 586 struct v3d_dev *v3d = to_v3d_dev(dev); 723 587 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 724 588 struct drm_v3d_submit_cl *args = data; 589 + struct v3d_submit_ext se = {0}; 725 590 struct v3d_bin_job *bin = NULL; 726 591 struct v3d_render_job *render = NULL; 727 592 struct v3d_job *clean_job = NULL; ··· 743 606 } 744 607 745 608 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 746 - ret = v3d_get_extensions(file_priv, args->extensions); 609 + ret = v3d_get_extensions(file_priv, args->extensions, &se); 747 610 if (ret) { 748 611 DRM_DEBUG("Failed to get extensions.\n"); 749 612 return ret; ··· 751 614 } 752 615 753 616 ret = v3d_job_init(v3d, file_priv, (void *)&render, sizeof(*render), 754 - v3d_render_job_free, args->in_sync_rcl, V3D_RENDER); 617 + v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); 755 618 if (ret) 756 619 goto fail; 757 620 ··· 761 624 762 625 if (args->bcl_start != args->bcl_end) { 763 626 ret = v3d_job_init(v3d, file_priv, (void *)&bin, sizeof(*bin), 764 - v3d_job_free, args->in_sync_bcl, V3D_BIN); 627 + v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); 765 628 if (ret) 766 629 goto fail; 767 630 ··· 775 638 776 639 if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { 777 640 ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job), 778 - v3d_job_free, 0, V3D_CACHE_CLEAN); 641 + v3d_job_free, 0, 0, V3D_CACHE_CLEAN); 779 642 if (ret) 780 643 goto fail; 781 644 ··· 835 698 last_job, 836 699 &acquire_ctx, 837 700 args->out_sync, 701 + &se, 838 702 last_job->done_fence); 839 703 840 704 if (bin) ··· 854 716 v3d_job_cleanup((void *)bin); 855 717 v3d_job_cleanup((void *)render); 856 718 v3d_job_cleanup(clean_job); 719 + v3d_put_multisync_post_deps(&se); 857 720 858 721 return ret; 859 722 } ··· 874 735 { 875 736 struct v3d_dev *v3d = to_v3d_dev(dev); 876 737 struct drm_v3d_submit_tfu *args = data; 738 + struct v3d_submit_ext se = {0}; 877 739 struct v3d_tfu_job *job = NULL; 878 740 struct ww_acquire_ctx acquire_ctx; 879 741 int ret = 0; ··· 887 747 } 888 748 889 749 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 890 - ret = v3d_get_extensions(file_priv, args->extensions); 750 + ret = v3d_get_extensions(file_priv, args->extensions, &se); 891 751 if (ret) { 892 752 DRM_DEBUG("Failed to get extensions.\n"); 893 753 return ret; ··· 895 755 } 896 756 897 757 ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job), 898 - v3d_job_free, args->in_sync, V3D_TFU); 758 + v3d_job_free, args->in_sync, &se, V3D_TFU); 899 759 if (ret) 900 760 goto fail; 901 761 ··· 943 803 v3d_attach_fences_and_unlock_reservation(file_priv, 944 804 &job->base, &acquire_ctx, 945 805 args->out_sync, 806 + &se, 946 807 job->base.done_fence); 947 808 948 809 v3d_job_put(&job->base); ··· 952 811 953 812 fail: 954 813 v3d_job_cleanup((void *)job); 814 + v3d_put_multisync_post_deps(&se); 955 815 956 816 return ret; 957 817 } ··· 973 831 struct v3d_dev *v3d = to_v3d_dev(dev); 974 832 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 975 833 struct drm_v3d_submit_csd *args = data; 834 + struct v3d_submit_ext se = {0}; 976 835 struct v3d_csd_job *job = NULL; 977 836 struct v3d_job *clean_job = NULL; 978 837 struct ww_acquire_ctx acquire_ctx; ··· 995 852 } 996 853 997 854 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 998 - ret = v3d_get_extensions(file_priv, args->extensions); 855 + ret = v3d_get_extensions(file_priv, args->extensions, &se); 999 856 if (ret) { 1000 857 DRM_DEBUG("Failed to get extensions.\n"); 1001 858 return ret; ··· 1003 860 } 1004 861 1005 862 ret = v3d_job_init(v3d, file_priv, (void *)&job, sizeof(*job), 1006 - v3d_job_free, args->in_sync, V3D_CSD); 863 + v3d_job_free, args->in_sync, &se, V3D_CSD); 1007 864 if (ret) 1008 865 goto fail; 1009 866 1010 867 ret = v3d_job_init(v3d, file_priv, (void *)&clean_job, sizeof(*clean_job), 1011 - v3d_job_free, 0, V3D_CACHE_CLEAN); 868 + v3d_job_free, 0, 0, V3D_CACHE_CLEAN); 1012 869 if (ret) 1013 870 goto fail; 1014 871 ··· 1047 904 clean_job, 1048 905 &acquire_ctx, 1049 906 args->out_sync, 907 + &se, 1050 908 clean_job->done_fence); 1051 909 1052 910 v3d_job_put(&job->base); ··· 1062 918 fail: 1063 919 v3d_job_cleanup((void *)job); 1064 920 v3d_job_cleanup(clean_job); 921 + v3d_put_multisync_post_deps(&se); 1065 922 1066 923 return ret; 1067 924 }
+48 -1
include/uapi/drm/v3d_drm.h
··· 73 73 __u32 flags; /* mbz */ 74 74 }; 75 75 76 + /* struct drm_v3d_sem - wait/signal semaphore 77 + * 78 + * If binary semaphore, it only takes syncobj handle and ignores flags and 79 + * point fields. Point is defined for timeline syncobj feature. 80 + */ 81 + struct drm_v3d_sem { 82 + __u32 handle; /* syncobj */ 83 + /* rsv below, for future uses */ 84 + __u32 flags; 85 + __u64 point; /* for timeline sem support */ 86 + __u64 mbz[2]; /* must be zero, rsv */ 87 + }; 88 + 89 + /* Enum for each of the V3D queues. */ 90 + enum v3d_queue { 91 + V3D_BIN, 92 + V3D_RENDER, 93 + V3D_TFU, 94 + V3D_CSD, 95 + V3D_CACHE_CLEAN, 96 + }; 97 + 98 + /** 99 + * struct drm_v3d_multi_sync - ioctl extension to add support multiples 100 + * syncobjs for commands submission. 101 + * 102 + * When an extension of DRM_V3D_EXT_ID_MULTI_SYNC id is defined, it points to 103 + * this extension to define wait and signal dependencies, instead of single 104 + * in/out sync entries on submitting commands. The field flags is used to 105 + * determine the stage to set wait dependencies. 106 + */ 107 + struct drm_v3d_multi_sync { 108 + struct drm_v3d_extension base; 109 + /* Array of wait and signal semaphores */ 110 + __u64 in_syncs; 111 + __u64 out_syncs; 112 + 113 + /* Number of entries */ 114 + __u32 in_sync_count; 115 + __u32 out_sync_count; 116 + 117 + /* set the stage (v3d_queue) to sync */ 118 + __u32 wait_stage; 119 + 120 + __u32 pad; /* mbz */ 121 + }; 122 + 76 123 /** 77 124 * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D 78 125 * engine. ··· 275 228 DRM_V3D_PARAM_SUPPORTS_CSD, 276 229 DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH, 277 230 DRM_V3D_PARAM_SUPPORTS_PERFMON, 231 + DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT, 278 232 }; 279 233 280 234 struct drm_v3d_get_param { ··· 319 271 320 272 /* Pointer to an array of ioctl extensions*/ 321 273 __u64 extensions; 322 - 323 274 }; 324 275 325 276 /* Submits a compute shader for dispatch. This job will block on any