Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915/guc: Connect UAPI to GuC multi-lrc interface

Introduce 'set parallel submit' extension to connect UAPI to GuC
multi-lrc interface. Kernel doc in new uAPI should explain it all.

IGT: https://patchwork.freedesktop.org/patch/447008/?series=93071&rev=1
media UMD: https://github.com/intel/media-driver/pull/1252

v2:
(Daniel Vetter)
- Add IGT link and placeholder for media UMD link
v3:
(Kernel test robot)
- Fix warning in unpin engines call
(John Harrison)
- Reword a bunch of the kernel doc
v4:
(John Harrison)
- Add comment why perma-pin is done after setting gem context
- Update some comments / docs for proto contexts
v5:
(John Harrison)
- Rework perma-pin comment
- Add BUG_IN if context is pinned when setting gem context

Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211014172005.27155-17-matthew.brost@intel.com

authored by

Matthew Brost and committed by
John Harrison
e5e32171 d38a9294

+505 -31
+228 -2
drivers/gpu/drm/i915/gem/i915_gem_context.c
··· 556 556 return 0; 557 557 } 558 558 559 + static int 560 + set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, 561 + void *data) 562 + { 563 + struct i915_context_engines_parallel_submit __user *ext = 564 + container_of_user(base, typeof(*ext), base); 565 + const struct set_proto_ctx_engines *set = data; 566 + struct drm_i915_private *i915 = set->i915; 567 + u64 flags; 568 + int err = 0, n, i, j; 569 + u16 slot, width, num_siblings; 570 + struct intel_engine_cs **siblings = NULL; 571 + intel_engine_mask_t prev_mask; 572 + 573 + /* Disabling for now */ 574 + return -ENODEV; 575 + 576 + /* FIXME: This is NIY for execlists */ 577 + if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) 578 + return -ENODEV; 579 + 580 + if (get_user(slot, &ext->engine_index)) 581 + return -EFAULT; 582 + 583 + if (get_user(width, &ext->width)) 584 + return -EFAULT; 585 + 586 + if (get_user(num_siblings, &ext->num_siblings)) 587 + return -EFAULT; 588 + 589 + if (slot >= set->num_engines) { 590 + drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", 591 + slot, set->num_engines); 592 + return -EINVAL; 593 + } 594 + 595 + if (set->engines[slot].type != I915_GEM_ENGINE_TYPE_INVALID) { 596 + drm_dbg(&i915->drm, 597 + "Invalid placement[%d], already occupied\n", slot); 598 + return -EINVAL; 599 + } 600 + 601 + if (get_user(flags, &ext->flags)) 602 + return -EFAULT; 603 + 604 + if (flags) { 605 + drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags); 606 + return -EINVAL; 607 + } 608 + 609 + for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { 610 + err = check_user_mbz(&ext->mbz64[n]); 611 + if (err) 612 + return err; 613 + } 614 + 615 + if (width < 2) { 616 + drm_dbg(&i915->drm, "Width (%d) < 2\n", width); 617 + return -EINVAL; 618 + } 619 + 620 + if (num_siblings < 1) { 621 + drm_dbg(&i915->drm, "Number siblings (%d) < 1\n", 622 + num_siblings); 623 + return -EINVAL; 624 + } 625 + 626 + siblings = kmalloc_array(num_siblings * width, 627 + sizeof(*siblings), 628 + GFP_KERNEL); 629 + if (!siblings) 630 + return -ENOMEM; 631 + 632 + /* Create contexts / engines */ 633 + for (i = 0; i < width; ++i) { 634 + intel_engine_mask_t current_mask = 0; 635 + struct i915_engine_class_instance prev_engine; 636 + 637 + for (j = 0; j < num_siblings; ++j) { 638 + struct i915_engine_class_instance ci; 639 + 640 + n = i * num_siblings + j; 641 + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { 642 + err = -EFAULT; 643 + goto out_err; 644 + } 645 + 646 + siblings[n] = 647 + intel_engine_lookup_user(i915, ci.engine_class, 648 + ci.engine_instance); 649 + if (!siblings[n]) { 650 + drm_dbg(&i915->drm, 651 + "Invalid sibling[%d]: { class:%d, inst:%d }\n", 652 + n, ci.engine_class, ci.engine_instance); 653 + err = -EINVAL; 654 + goto out_err; 655 + } 656 + 657 + if (n) { 658 + if (prev_engine.engine_class != 659 + ci.engine_class) { 660 + drm_dbg(&i915->drm, 661 + "Mismatched class %d, %d\n", 662 + prev_engine.engine_class, 663 + ci.engine_class); 664 + err = -EINVAL; 665 + goto out_err; 666 + } 667 + } 668 + 669 + prev_engine = ci; 670 + current_mask |= siblings[n]->logical_mask; 671 + } 672 + 673 + if (i > 0) { 674 + if (current_mask != prev_mask << 1) { 675 + drm_dbg(&i915->drm, 676 + "Non contiguous logical mask 0x%x, 0x%x\n", 677 + prev_mask, current_mask); 678 + err = -EINVAL; 679 + goto out_err; 680 + } 681 + } 682 + prev_mask = current_mask; 683 + } 684 + 685 + set->engines[slot].type = I915_GEM_ENGINE_TYPE_PARALLEL; 686 + set->engines[slot].num_siblings = num_siblings; 687 + set->engines[slot].width = width; 688 + set->engines[slot].siblings = siblings; 689 + 690 + return 0; 691 + 692 + out_err: 693 + kfree(siblings); 694 + 695 + return err; 696 + } 697 + 559 698 static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = { 560 699 [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance, 561 700 [I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond, 701 + [I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT] = 702 + set_proto_ctx_engines_parallel_submit, 562 703 }; 563 704 564 705 static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv, ··· 935 794 GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); 936 795 RCU_INIT_POINTER(ce->gem_context, ctx); 937 796 797 + GEM_BUG_ON(intel_context_is_pinned(ce)); 938 798 ce->ring_size = SZ_16K; 939 799 940 800 i915_vm_put(ce->vm); ··· 958 816 ret = intel_context_reconfigure_sseu(ce, sseu); 959 817 960 818 return ret; 819 + } 820 + 821 + static void __unpin_engines(struct i915_gem_engines *e, unsigned int count) 822 + { 823 + while (count--) { 824 + struct intel_context *ce = e->engines[count], *child; 825 + 826 + if (!ce || !test_bit(CONTEXT_PERMA_PIN, &ce->flags)) 827 + continue; 828 + 829 + for_each_child(ce, child) 830 + intel_context_unpin(child); 831 + intel_context_unpin(ce); 832 + } 833 + } 834 + 835 + static void unpin_engines(struct i915_gem_engines *e) 836 + { 837 + __unpin_engines(e, e->num_engines); 961 838 } 962 839 963 840 static void __free_engines(struct i915_gem_engines *e, unsigned int count) ··· 1094 933 return err; 1095 934 } 1096 935 936 + static int perma_pin_contexts(struct intel_context *ce) 937 + { 938 + struct intel_context *child; 939 + int i = 0, j = 0, ret; 940 + 941 + GEM_BUG_ON(!intel_context_is_parent(ce)); 942 + 943 + ret = intel_context_pin(ce); 944 + if (unlikely(ret)) 945 + return ret; 946 + 947 + for_each_child(ce, child) { 948 + ret = intel_context_pin(child); 949 + if (unlikely(ret)) 950 + goto unwind; 951 + ++i; 952 + } 953 + 954 + set_bit(CONTEXT_PERMA_PIN, &ce->flags); 955 + 956 + return 0; 957 + 958 + unwind: 959 + intel_context_unpin(ce); 960 + for_each_child(ce, child) { 961 + if (j++ < i) 962 + intel_context_unpin(child); 963 + else 964 + break; 965 + } 966 + 967 + return ret; 968 + } 969 + 1097 970 static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, 1098 971 unsigned int num_engines, 1099 972 struct i915_gem_proto_engine *pe) ··· 1141 946 e->num_engines = num_engines; 1142 947 1143 948 for (n = 0; n < num_engines; n++) { 1144 - struct intel_context *ce; 949 + struct intel_context *ce, *child; 1145 950 int ret; 1146 951 1147 952 switch (pe[n].type) { ··· 1151 956 1152 957 case I915_GEM_ENGINE_TYPE_BALANCED: 1153 958 ce = intel_engine_create_virtual(pe[n].siblings, 1154 - pe[n].num_siblings); 959 + pe[n].num_siblings, 0); 960 + break; 961 + 962 + case I915_GEM_ENGINE_TYPE_PARALLEL: 963 + ce = intel_engine_create_parallel(pe[n].siblings, 964 + pe[n].num_siblings, 965 + pe[n].width); 1155 966 break; 1156 967 1157 968 case I915_GEM_ENGINE_TYPE_INVALID: ··· 1177 976 if (ret) { 1178 977 err = ERR_PTR(ret); 1179 978 goto free_engines; 979 + } 980 + for_each_child(ce, child) { 981 + ret = intel_context_set_gem(child, ctx, pe->sseu); 982 + if (ret) { 983 + err = ERR_PTR(ret); 984 + goto free_engines; 985 + } 986 + } 987 + 988 + /* 989 + * XXX: Must be done after calling intel_context_set_gem as that 990 + * function changes the ring size. The ring is allocated when 991 + * the context is pinned. If the ring size is changed after 992 + * allocation we have a mismatch of the ring size and will cause 993 + * the context to hang. Presumably with a bit of reordering we 994 + * could move the perma-pin step to the backend function 995 + * intel_engine_create_parallel. 996 + */ 997 + if (pe[n].type == I915_GEM_ENGINE_TYPE_PARALLEL) { 998 + ret = perma_pin_contexts(ce); 999 + if (ret) { 1000 + err = ERR_PTR(ret); 1001 + goto free_engines; 1002 + } 1180 1003 } 1181 1004 } 1182 1005 ··· 1444 1219 1445 1220 /* Flush any concurrent set_engines() */ 1446 1221 mutex_lock(&ctx->engines_mutex); 1222 + unpin_engines(__context_engines_static(ctx)); 1447 1223 engines_idle_release(ctx, rcu_replace_pointer(ctx->engines, NULL, 1)); 1448 1224 i915_gem_context_set_closed(ctx); 1449 1225 mutex_unlock(&ctx->engines_mutex);
+13 -3
drivers/gpu/drm/i915/gem/i915_gem_context_types.h
··· 78 78 79 79 /** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */ 80 80 I915_GEM_ENGINE_TYPE_BALANCED, 81 + 82 + /** @I915_GEM_ENGINE_TYPE_PARALLEL: A parallel engine set */ 83 + I915_GEM_ENGINE_TYPE_PARALLEL, 81 84 }; 82 85 83 86 /** 84 87 * struct i915_gem_proto_engine - prototype engine 85 88 * 86 89 * This struct describes an engine that a context may contain. Engines 87 - * have three types: 90 + * have four types: 88 91 * 89 92 * - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they 90 93 * show up as a NULL in i915_gem_engines::engines[i] and any attempt to ··· 100 97 * 101 98 * - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described 102 99 * i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings. 100 + * 101 + * - I915_GEM_ENGINE_TYPE_PARALLEL: A parallel submission engine set, described 102 + * i915_gem_proto_engine::width, i915_gem_proto_engine::num_siblings, and 103 + * i915_gem_proto_engine::siblings. 103 104 */ 104 105 struct i915_gem_proto_engine { 105 106 /** @type: Type of this engine */ ··· 112 105 /** @engine: Engine, for physical */ 113 106 struct intel_engine_cs *engine; 114 107 115 - /** @num_siblings: Number of balanced siblings */ 108 + /** @num_siblings: Number of balanced or parallel siblings */ 116 109 unsigned int num_siblings; 117 110 118 - /** @siblings: Balanced siblings */ 111 + /** @width: Width of each sibling */ 112 + unsigned int width; 113 + 114 + /** @siblings: Balanced siblings or num_siblings * width for parallel */ 119 115 struct intel_engine_cs **siblings; 120 116 121 117 /** @sseu: Client-set SSEU parameters */
+7 -2
drivers/gpu/drm/i915/gt/intel_context_types.h
··· 55 55 void (*reset)(struct intel_context *ce); 56 56 void (*destroy)(struct kref *kref); 57 57 58 - /* virtual engine/context interface */ 58 + /* virtual/parallel engine/context interface */ 59 59 struct intel_context *(*create_virtual)(struct intel_engine_cs **engine, 60 - unsigned int count); 60 + unsigned int count, 61 + unsigned long flags); 62 + struct intel_context *(*create_parallel)(struct intel_engine_cs **engines, 63 + unsigned int num_siblings, 64 + unsigned int width); 61 65 struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine, 62 66 unsigned int sibling); 63 67 }; ··· 117 113 #define CONTEXT_NOPREEMPT 8 118 114 #define CONTEXT_LRCA_DIRTY 9 119 115 #define CONTEXT_GUC_INIT 10 116 + #define CONTEXT_PERMA_PIN 11 120 117 121 118 struct { 122 119 u64 timeout_us;
+11 -1
drivers/gpu/drm/i915/gt/intel_engine.h
··· 282 282 return intel_engine_has_preemption(engine); 283 283 } 284 284 285 + #define FORCE_VIRTUAL BIT(0) 285 286 struct intel_context * 286 287 intel_engine_create_virtual(struct intel_engine_cs **siblings, 287 - unsigned int count); 288 + unsigned int count, unsigned long flags); 289 + 290 + static inline struct intel_context * 291 + intel_engine_create_parallel(struct intel_engine_cs **engines, 292 + unsigned int num_engines, 293 + unsigned int width) 294 + { 295 + GEM_BUG_ON(!engines[0]->cops->create_parallel); 296 + return engines[0]->cops->create_parallel(engines, num_engines, width); 297 + } 288 298 289 299 static inline bool 290 300 intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
+3 -3
drivers/gpu/drm/i915/gt/intel_engine_cs.c
··· 1953 1953 1954 1954 struct intel_context * 1955 1955 intel_engine_create_virtual(struct intel_engine_cs **siblings, 1956 - unsigned int count) 1956 + unsigned int count, unsigned long flags) 1957 1957 { 1958 1958 if (count == 0) 1959 1959 return ERR_PTR(-EINVAL); 1960 1960 1961 - if (count == 1) 1961 + if (count == 1 && !(flags & FORCE_VIRTUAL)) 1962 1962 return intel_context_create(siblings[0]); 1963 1963 1964 1964 GEM_BUG_ON(!siblings[0]->cops->create_virtual); 1965 - return siblings[0]->cops->create_virtual(siblings, count); 1965 + return siblings[0]->cops->create_virtual(siblings, count, flags); 1966 1966 } 1967 1967 1968 1968 struct i915_request *
+4 -2
drivers/gpu/drm/i915/gt/intel_execlists_submission.c
··· 201 201 } 202 202 203 203 static struct intel_context * 204 - execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count); 204 + execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 205 + unsigned long flags); 205 206 206 207 static struct i915_request * 207 208 __active_request(const struct intel_timeline * const tl, ··· 3785 3784 } 3786 3785 3787 3786 static struct intel_context * 3788 - execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) 3787 + execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 3788 + unsigned long flags) 3789 3789 { 3790 3790 struct virtual_engine *ve; 3791 3791 unsigned int n;
+6 -6
drivers/gpu/drm/i915/gt/selftest_execlists.c
··· 3733 3733 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 3734 3734 3735 3735 for (n = 0; n < nctx; n++) { 3736 - ve[n] = intel_engine_create_virtual(siblings, nsibling); 3736 + ve[n] = intel_engine_create_virtual(siblings, nsibling, 0); 3737 3737 if (IS_ERR(ve[n])) { 3738 3738 err = PTR_ERR(ve[n]); 3739 3739 nctx = n; ··· 3929 3929 * restrict it to our desired engine within the virtual engine. 3930 3930 */ 3931 3931 3932 - ve = intel_engine_create_virtual(siblings, nsibling); 3932 + ve = intel_engine_create_virtual(siblings, nsibling, 0); 3933 3933 if (IS_ERR(ve)) { 3934 3934 err = PTR_ERR(ve); 3935 3935 goto out_close; ··· 4060 4060 i915_request_add(rq); 4061 4061 } 4062 4062 4063 - ce = intel_engine_create_virtual(siblings, nsibling); 4063 + ce = intel_engine_create_virtual(siblings, nsibling, 0); 4064 4064 if (IS_ERR(ce)) { 4065 4065 err = PTR_ERR(ce); 4066 4066 goto out; ··· 4112 4112 4113 4113 /* XXX We do not handle oversubscription and fairness with normal rq */ 4114 4114 for (n = 0; n < nsibling; n++) { 4115 - ce = intel_engine_create_virtual(siblings, nsibling); 4115 + ce = intel_engine_create_virtual(siblings, nsibling, 0); 4116 4116 if (IS_ERR(ce)) { 4117 4117 err = PTR_ERR(ce); 4118 4118 goto out; ··· 4214 4214 if (err) 4215 4215 goto out_scratch; 4216 4216 4217 - ve = intel_engine_create_virtual(siblings, nsibling); 4217 + ve = intel_engine_create_virtual(siblings, nsibling, 0); 4218 4218 if (IS_ERR(ve)) { 4219 4219 err = PTR_ERR(ve); 4220 4220 goto out_scratch; ··· 4354 4354 if (igt_spinner_init(&spin, gt)) 4355 4355 return -ENOMEM; 4356 4356 4357 - ve = intel_engine_create_virtual(siblings, nsibling); 4357 + ve = intel_engine_create_virtual(siblings, nsibling, 0); 4358 4358 if (IS_ERR(ve)) { 4359 4359 err = PTR_ERR(ve); 4360 4360 goto out_spin;
+102 -12
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
··· 124 124 }; 125 125 126 126 static struct intel_context * 127 - guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count); 127 + guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 128 + unsigned long flags); 129 + 130 + static struct intel_context * 131 + guc_create_parallel(struct intel_engine_cs **engines, 132 + unsigned int num_siblings, 133 + unsigned int width); 128 134 129 135 #define GUC_REQUEST_SIZE 64 /* bytes */ 130 136 ··· 2615 2609 .destroy = guc_context_destroy, 2616 2610 2617 2611 .create_virtual = guc_create_virtual, 2612 + .create_parallel = guc_create_parallel, 2618 2613 }; 2619 2614 2620 2615 static void submit_work_cb(struct irq_work *wrk) ··· 2865 2858 .get_sibling = guc_virtual_get_sibling, 2866 2859 }; 2867 2860 2868 - /* Future patches will use this function */ 2869 - __maybe_unused 2870 2861 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 2871 2862 { 2872 2863 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); ··· 2881 2876 return __guc_context_pin(ce, engine, vaddr); 2882 2877 } 2883 2878 2884 - /* Future patches will use this function */ 2885 - __maybe_unused 2886 2879 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 2887 2880 { 2888 2881 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); ··· 2892 2889 return __guc_context_pin(ce, engine, vaddr); 2893 2890 } 2894 2891 2895 - /* Future patches will use this function */ 2896 - __maybe_unused 2897 2892 static void guc_parent_context_unpin(struct intel_context *ce) 2898 2893 { 2899 2894 struct intel_guc *guc = ce_to_guc(ce); ··· 2907 2906 lrc_unpin(ce); 2908 2907 } 2909 2908 2910 - /* Future patches will use this function */ 2911 - __maybe_unused 2912 2909 static void guc_child_context_unpin(struct intel_context *ce) 2913 2910 { 2914 2911 GEM_BUG_ON(context_enabled(ce)); ··· 2917 2918 lrc_unpin(ce); 2918 2919 } 2919 2920 2920 - /* Future patches will use this function */ 2921 - __maybe_unused 2922 2921 static void guc_child_context_post_unpin(struct intel_context *ce) 2923 2922 { 2924 2923 GEM_BUG_ON(!intel_context_is_child(ce)); ··· 2925 2928 2926 2929 lrc_post_unpin(ce); 2927 2930 intel_context_unpin(ce->parallel.parent); 2931 + } 2932 + 2933 + static void guc_child_context_destroy(struct kref *kref) 2934 + { 2935 + struct intel_context *ce = container_of(kref, typeof(*ce), ref); 2936 + 2937 + __guc_context_destroy(ce); 2938 + } 2939 + 2940 + static const struct intel_context_ops virtual_parent_context_ops = { 2941 + .alloc = guc_virtual_context_alloc, 2942 + 2943 + .pre_pin = guc_context_pre_pin, 2944 + .pin = guc_parent_context_pin, 2945 + .unpin = guc_parent_context_unpin, 2946 + .post_unpin = guc_context_post_unpin, 2947 + 2948 + .ban = guc_context_ban, 2949 + 2950 + .cancel_request = guc_context_cancel_request, 2951 + 2952 + .enter = guc_virtual_context_enter, 2953 + .exit = guc_virtual_context_exit, 2954 + 2955 + .sched_disable = guc_context_sched_disable, 2956 + 2957 + .destroy = guc_context_destroy, 2958 + 2959 + .get_sibling = guc_virtual_get_sibling, 2960 + }; 2961 + 2962 + static const struct intel_context_ops virtual_child_context_ops = { 2963 + .alloc = guc_virtual_context_alloc, 2964 + 2965 + .pre_pin = guc_context_pre_pin, 2966 + .pin = guc_child_context_pin, 2967 + .unpin = guc_child_context_unpin, 2968 + .post_unpin = guc_child_context_post_unpin, 2969 + 2970 + .cancel_request = guc_context_cancel_request, 2971 + 2972 + .enter = guc_virtual_context_enter, 2973 + .exit = guc_virtual_context_exit, 2974 + 2975 + .destroy = guc_child_context_destroy, 2976 + 2977 + .get_sibling = guc_virtual_get_sibling, 2978 + }; 2979 + 2980 + static struct intel_context * 2981 + guc_create_parallel(struct intel_engine_cs **engines, 2982 + unsigned int num_siblings, 2983 + unsigned int width) 2984 + { 2985 + struct intel_engine_cs **siblings = NULL; 2986 + struct intel_context *parent = NULL, *ce, *err; 2987 + int i, j; 2988 + 2989 + siblings = kmalloc_array(num_siblings, 2990 + sizeof(*siblings), 2991 + GFP_KERNEL); 2992 + if (!siblings) 2993 + return ERR_PTR(-ENOMEM); 2994 + 2995 + for (i = 0; i < width; ++i) { 2996 + for (j = 0; j < num_siblings; ++j) 2997 + siblings[j] = engines[i * num_siblings + j]; 2998 + 2999 + ce = intel_engine_create_virtual(siblings, num_siblings, 3000 + FORCE_VIRTUAL); 3001 + if (!ce) { 3002 + err = ERR_PTR(-ENOMEM); 3003 + goto unwind; 3004 + } 3005 + 3006 + if (i == 0) { 3007 + parent = ce; 3008 + parent->ops = &virtual_parent_context_ops; 3009 + } else { 3010 + ce->ops = &virtual_child_context_ops; 3011 + intel_context_bind_parent_child(parent, ce); 3012 + } 3013 + } 3014 + 3015 + kfree(siblings); 3016 + return parent; 3017 + 3018 + unwind: 3019 + if (parent) 3020 + intel_context_put(parent); 3021 + kfree(siblings); 3022 + return err; 2928 3023 } 2929 3024 2930 3025 static bool ··· 3845 3756 } 3846 3757 3847 3758 static struct intel_context * 3848 - guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) 3759 + guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 3760 + unsigned long flags) 3849 3761 { 3850 3762 struct guc_virtual_engine *ve; 3851 3763 struct intel_guc *guc;
+131
include/uapi/drm/i915_drm.h
··· 1824 1824 * Extensions: 1825 1825 * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE) 1826 1826 * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) 1827 + * i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT) 1827 1828 */ 1828 1829 #define I915_CONTEXT_PARAM_ENGINES 0xa 1829 1830 ··· 2100 2099 } __attribute__((packed)) name__ 2101 2100 2102 2101 /** 2102 + * struct i915_context_engines_parallel_submit - Configure engine for 2103 + * parallel submission. 2104 + * 2105 + * Setup a slot in the context engine map to allow multiple BBs to be submitted 2106 + * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU 2107 + * in parallel. Multiple hardware contexts are created internally in the i915 to 2108 + * run these BBs. Once a slot is configured for N BBs only N BBs can be 2109 + * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user 2110 + * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how 2111 + * many BBs there are based on the slot's configuration. The N BBs are the last 2112 + * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set. 2113 + * 2114 + * The default placement behavior is to create implicit bonds between each 2115 + * context if each context maps to more than 1 physical engine (e.g. context is 2116 + * a virtual engine). Also we only allow contexts of same engine class and these 2117 + * contexts must be in logically contiguous order. Examples of the placement 2118 + * behavior are described below. Lastly, the default is to not allow BBs to be 2119 + * preempted mid-batch. Rather insert coordinated preemption points on all 2120 + * hardware contexts between each set of BBs. Flags could be added in the future 2121 + * to change both of these default behaviors. 2122 + * 2123 + * Returns -EINVAL if hardware context placement configuration is invalid or if 2124 + * the placement configuration isn't supported on the platform / submission 2125 + * interface. 2126 + * Returns -ENODEV if extension isn't supported on the platform / submission 2127 + * interface. 2128 + * 2129 + * .. code-block:: none 2130 + * 2131 + * Examples syntax: 2132 + * CS[X] = generic engine of same class, logical instance X 2133 + * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE 2134 + * 2135 + * Example 1 pseudo code: 2136 + * set_engines(INVALID) 2137 + * set_parallel(engine_index=0, width=2, num_siblings=1, 2138 + * engines=CS[0],CS[1]) 2139 + * 2140 + * Results in the following valid placement: 2141 + * CS[0], CS[1] 2142 + * 2143 + * Example 2 pseudo code: 2144 + * set_engines(INVALID) 2145 + * set_parallel(engine_index=0, width=2, num_siblings=2, 2146 + * engines=CS[0],CS[2],CS[1],CS[3]) 2147 + * 2148 + * Results in the following valid placements: 2149 + * CS[0], CS[1] 2150 + * CS[2], CS[3] 2151 + * 2152 + * This can be thought of as two virtual engines, each containing two 2153 + * engines thereby making a 2D array. However, there are bonds tying the 2154 + * entries together and placing restrictions on how they can be scheduled. 2155 + * Specifically, the scheduler can choose only vertical columns from the 2D 2156 + * array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the 2157 + * scheduler wants to submit to CS[0], it must also choose CS[1] and vice 2158 + * versa. Same for CS[2] requires also using CS[3]. 2159 + * VE[0] = CS[0], CS[2] 2160 + * VE[1] = CS[1], CS[3] 2161 + * 2162 + * Example 3 pseudo code: 2163 + * set_engines(INVALID) 2164 + * set_parallel(engine_index=0, width=2, num_siblings=2, 2165 + * engines=CS[0],CS[1],CS[1],CS[3]) 2166 + * 2167 + * Results in the following valid and invalid placements: 2168 + * CS[0], CS[1] 2169 + * CS[1], CS[3] - Not logically contiguous, return -EINVAL 2170 + */ 2171 + struct i915_context_engines_parallel_submit { 2172 + /** 2173 + * @base: base user extension. 2174 + */ 2175 + struct i915_user_extension base; 2176 + 2177 + /** 2178 + * @engine_index: slot for parallel engine 2179 + */ 2180 + __u16 engine_index; 2181 + 2182 + /** 2183 + * @width: number of contexts per parallel engine or in other words the 2184 + * number of batches in each submission 2185 + */ 2186 + __u16 width; 2187 + 2188 + /** 2189 + * @num_siblings: number of siblings per context or in other words the 2190 + * number of possible placements for each submission 2191 + */ 2192 + __u16 num_siblings; 2193 + 2194 + /** 2195 + * @mbz16: reserved for future use; must be zero 2196 + */ 2197 + __u16 mbz16; 2198 + 2199 + /** 2200 + * @flags: all undefined flags must be zero, currently not defined flags 2201 + */ 2202 + __u64 flags; 2203 + 2204 + /** 2205 + * @mbz64: reserved for future use; must be zero 2206 + */ 2207 + __u64 mbz64[3]; 2208 + 2209 + /** 2210 + * @engines: 2-d array of engine instances to configure parallel engine 2211 + * 2212 + * length = width (i) * num_siblings (j) 2213 + * index = j + i * num_siblings 2214 + */ 2215 + struct i915_engine_class_instance engines[0]; 2216 + 2217 + } __packed; 2218 + 2219 + #define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \ 2220 + struct i915_user_extension base; \ 2221 + __u16 engine_index; \ 2222 + __u16 width; \ 2223 + __u16 num_siblings; \ 2224 + __u16 mbz16; \ 2225 + __u64 flags; \ 2226 + __u64 mbz64[3]; \ 2227 + struct i915_engine_class_instance engines[N__]; \ 2228 + } __attribute__((packed)) name__ 2229 + 2230 + /** 2103 2231 * DOC: Context Engine Map uAPI 2104 2232 * 2105 2233 * Context engine map is a new way of addressing engines when submitting batch- ··· 2287 2157 __u64 extensions; /* linked chain of extension blocks, 0 terminates */ 2288 2158 #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ 2289 2159 #define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */ 2160 + #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ 2290 2161 struct i915_engine_class_instance engines[0]; 2291 2162 } __attribute__((packed)); 2292 2163