drm/i915/guc: Implement GuC context operations for new inteface

+5

drivers/gpu/drm/i915/gt/intel_context.c

··· 384 384 385 385 mutex_init(&ce->pin_mutex); 386 386 387 + spin_lock_init(&ce->guc_state.lock); 388 + 389 + ce->guc_id = GUC_INVALID_LRC_ID; 390 + INIT_LIST_HEAD(&ce->guc_id_link); 391 + 387 392 i915_active_init(&ce->active, 388 393 __intel_context_active, __intel_context_retire, 0); 389 394 }

+20 -4

drivers/gpu/drm/i915/gt/intel_context_types.h

··· 96 96 #define CONTEXT_BANNED 6 97 97 #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 98 98 #define CONTEXT_NOPREEMPT 8 99 + #define CONTEXT_LRCA_DIRTY 9 99 100 100 101 struct { 101 102 u64 timeout_us; ··· 139 138 140 139 u8 wa_bb_page; /* if set, page num reserved for context workarounds */ 141 140 141 + struct { 142 + /** lock: protects everything in guc_state */ 143 + spinlock_t lock; 144 + /** 145 + * sched_state: scheduling state of this context using GuC 146 + * submission 147 + */ 148 + u8 sched_state; 149 + } guc_state; 150 + 142 151 /* GuC scheduling state flags that do not require a lock. */ 143 152 atomic_t guc_sched_state_no_lock; 144 153 145 - /* 146 - * GuC LRC descriptor ID - Not assigned in this patch but future patches 147 - * in the series will. 148 - */ 154 + /* GuC LRC descriptor ID */ 149 155 u16 guc_id; 156 + 157 + /* GuC LRC descriptor reference count */ 158 + atomic_t guc_id_ref; 159 + 160 + /* 161 + * GuC ID link - in list when unpinned but guc_id still valid in GuC 162 + */ 163 + struct list_head guc_id_link; 150 164 }; 151 165 152 166 #endif /* __INTEL_CONTEXT_TYPES__ */

-1

drivers/gpu/drm/i915/gt/intel_lrc_reg.h

··· 87 87 #define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0) 88 88 89 89 #define MAX_CONTEXT_HW_ID (1 << 21) /* exclusive */ 90 - #define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ 91 90 #define GEN11_MAX_CONTEXT_HW_ID (1 << 11) /* exclusive */ 92 91 /* in Gen12 ID 0x7FF is reserved to indicate idle */ 93 92 #define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1)

+47

drivers/gpu/drm/i915/gt/uc/intel_guc.h

··· 7 7 #define _INTEL_GUC_H_ 8 8 9 9 #include <linux/xarray.h> 10 + #include <linux/delay.h> 10 11 11 12 #include "intel_uncore.h" 12 13 #include "intel_guc_fw.h" ··· 44 43 void (*enable)(struct intel_guc *guc); 45 44 void (*disable)(struct intel_guc *guc); 46 45 } interrupts; 46 + 47 + /* 48 + * contexts_lock protects the pool of free guc ids and a linked list of 49 + * guc ids available to be stolen 50 + */ 51 + spinlock_t contexts_lock; 52 + struct ida guc_ids; 53 + struct list_head guc_id_list; 47 54 48 55 bool submission_selected; 49 56 ··· 108 99 { 109 100 return intel_guc_ct_send(&guc->ct, action, len, 110 101 response_buf, response_buf_size, 0); 102 + } 103 + 104 + static inline int intel_guc_send_busy_loop(struct intel_guc *guc, 105 + const u32 *action, 106 + u32 len, 107 + bool loop) 108 + { 109 + int err; 110 + unsigned int sleep_period_ms = 1; 111 + bool not_atomic = !in_atomic() && !irqs_disabled(); 112 + 113 + /* 114 + * FIXME: Have caller pass in if we are in an atomic context to avoid 115 + * using in_atomic(). It is likely safe here as we check for irqs 116 + * disabled which basically all the spin locks in the i915 do but 117 + * regardless this should be cleaned up. 118 + */ 119 + 120 + /* No sleeping with spin locks, just busy loop */ 121 + might_sleep_if(loop && not_atomic); 122 + 123 + retry: 124 + err = intel_guc_send_nb(guc, action, len); 125 + if (unlikely(err == -EBUSY && loop)) { 126 + if (likely(not_atomic)) { 127 + if (msleep_interruptible(sleep_period_ms)) 128 + return -EINTR; 129 + sleep_period_ms = sleep_period_ms << 1; 130 + } else { 131 + cpu_relax(); 132 + } 133 + goto retry; 134 + } 135 + 136 + return err; 111 137 } 112 138 113 139 static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) ··· 245 201 246 202 int intel_guc_reset_engine(struct intel_guc *guc, 247 203 struct intel_engine_cs *engine); 204 + 205 + int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 206 + const u32 *msg, u32 len); 248 207 249 208 void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); 250 209

+4

drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c

··· 928 928 case INTEL_GUC_ACTION_DEFAULT: 929 929 ret = intel_guc_to_host_process_recv_msg(guc, payload, len); 930 930 break; 931 + case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE: 932 + ret = intel_guc_deregister_done_process_msg(guc, payload, 933 + len); 934 + break; 931 935 default: 932 936 ret = -EOPNOTSUPP; 933 937 break;

+630 -62

drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

··· 13 13 #include "gt/intel_gt.h" 14 14 #include "gt/intel_gt_irq.h" 15 15 #include "gt/intel_gt_pm.h" 16 + #include "gt/intel_gt_requests.h" 16 17 #include "gt/intel_lrc.h" 18 + #include "gt/intel_lrc_reg.h" 17 19 #include "gt/intel_mocs.h" 18 20 #include "gt/intel_ring.h" 19 21 ··· 87 85 &ce->guc_sched_state_no_lock); 88 86 } 89 87 88 + /* 89 + * Below is a set of functions which control the GuC scheduling state which 90 + * require a lock, aside from the special case where the functions are called 91 + * from guc_lrc_desc_pin(). In that case it isn't possible for any other code 92 + * path to be executing on the context. 93 + */ 94 + #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 95 + #define SCHED_STATE_DESTROYED BIT(1) 96 + static inline void init_sched_state(struct intel_context *ce) 97 + { 98 + /* Only should be called from guc_lrc_desc_pin() */ 99 + atomic_set(&ce->guc_sched_state_no_lock, 0); 100 + ce->guc_state.sched_state = 0; 101 + } 102 + 103 + static inline bool 104 + context_wait_for_deregister_to_register(struct intel_context *ce) 105 + { 106 + return ce->guc_state.sched_state & 107 + SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 108 + } 109 + 110 + static inline void 111 + set_context_wait_for_deregister_to_register(struct intel_context *ce) 112 + { 113 + /* Only should be called from guc_lrc_desc_pin() */ 114 + ce->guc_state.sched_state |= 115 + SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 116 + } 117 + 118 + static inline void 119 + clr_context_wait_for_deregister_to_register(struct intel_context *ce) 120 + { 121 + lockdep_assert_held(&ce->guc_state.lock); 122 + ce->guc_state.sched_state &= 123 + ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 124 + } 125 + 126 + static inline bool 127 + context_destroyed(struct intel_context *ce) 128 + { 129 + return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 130 + } 131 + 132 + static inline void 133 + set_context_destroyed(struct intel_context *ce) 134 + { 135 + lockdep_assert_held(&ce->guc_state.lock); 136 + ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 137 + } 138 + 139 + static inline bool context_guc_id_invalid(struct intel_context *ce) 140 + { 141 + return ce->guc_id == GUC_INVALID_LRC_ID; 142 + } 143 + 144 + static inline void set_context_guc_id_invalid(struct intel_context *ce) 145 + { 146 + ce->guc_id = GUC_INVALID_LRC_ID; 147 + } 148 + 149 + static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 150 + { 151 + return &ce->engine->gt->uc.guc; 152 + } 153 + 90 154 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 91 155 { 92 156 return rb_entry(rb, struct i915_priolist, node); ··· 222 154 u32 action[3]; 223 155 int len = 0; 224 156 bool enabled = context_enabled(ce); 157 + 158 + GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); 159 + GEM_BUG_ON(context_guc_id_invalid(ce)); 225 160 226 161 if (!enabled) { 227 162 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; ··· 488 417 489 418 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 490 419 420 + spin_lock_init(&guc->contexts_lock); 421 + INIT_LIST_HEAD(&guc->guc_id_list); 422 + ida_init(&guc->guc_ids); 423 + 491 424 return 0; 492 425 } 493 426 ··· 502 427 503 428 guc_lrc_desc_pool_destroy(guc); 504 429 i915_sched_engine_put(guc->sched_engine); 505 - } 506 - 507 - static int guc_context_alloc(struct intel_context *ce) 508 - { 509 - return lrc_alloc(ce, ce->engine); 510 - } 511 - 512 - static int guc_context_pre_pin(struct intel_context *ce, 513 - struct i915_gem_ww_ctx *ww, 514 - void **vaddr) 515 - { 516 - return lrc_pre_pin(ce, ce->engine, ww, vaddr); 517 - } 518 - 519 - static int guc_context_pin(struct intel_context *ce, void *vaddr) 520 - { 521 - return lrc_pin(ce, ce->engine, vaddr); 522 - } 523 - 524 - static const struct intel_context_ops guc_context_ops = { 525 - .alloc = guc_context_alloc, 526 - 527 - .pre_pin = guc_context_pre_pin, 528 - .pin = guc_context_pin, 529 - .unpin = lrc_unpin, 530 - .post_unpin = lrc_post_unpin, 531 - 532 - .enter = intel_context_enter_engine, 533 - .exit = intel_context_exit_engine, 534 - 535 - .reset = lrc_reset, 536 - .destroy = lrc_destroy, 537 - }; 538 - 539 - static int guc_request_alloc(struct i915_request *request) 540 - { 541 - int ret; 542 - 543 - GEM_BUG_ON(!intel_context_is_pinned(request->context)); 544 - 545 - /* 546 - * Flush enough space to reduce the likelihood of waiting after 547 - * we start building the request - in which case we will just 548 - * have to repeat work. 549 - */ 550 - request->reserved_space += GUC_REQUEST_SIZE; 551 - 552 - /* 553 - * Note that after this point, we have committed to using 554 - * this request as it is being used to both track the 555 - * state of engine initialisation and liveness of the 556 - * golden renderstate above. Think twice before you try 557 - * to cancel/unwind this request now. 558 - */ 559 - 560 - /* Unconditionally invalidate GPU caches and TLBs. */ 561 - ret = request->engine->emit_flush(request, EMIT_INVALIDATE); 562 - if (ret) 563 - return ret; 564 - 565 - request->reserved_space -= GUC_REQUEST_SIZE; 566 - return 0; 567 430 } 568 431 569 432 static inline void queue_request(struct i915_sched_engine *sched_engine, ··· 546 533 tasklet_hi_schedule(&sched_engine->tasklet); 547 534 548 535 spin_unlock_irqrestore(&sched_engine->lock, flags); 536 + } 537 + 538 + static int new_guc_id(struct intel_guc *guc) 539 + { 540 + return ida_simple_get(&guc->guc_ids, 0, 541 + GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL | 542 + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 543 + } 544 + 545 + static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 546 + { 547 + if (!context_guc_id_invalid(ce)) { 548 + ida_simple_remove(&guc->guc_ids, ce->guc_id); 549 + reset_lrc_desc(guc, ce->guc_id); 550 + set_context_guc_id_invalid(ce); 551 + } 552 + if (!list_empty(&ce->guc_id_link)) 553 + list_del_init(&ce->guc_id_link); 554 + } 555 + 556 + static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 557 + { 558 + unsigned long flags; 559 + 560 + spin_lock_irqsave(&guc->contexts_lock, flags); 561 + __release_guc_id(guc, ce); 562 + spin_unlock_irqrestore(&guc->contexts_lock, flags); 563 + } 564 + 565 + static int steal_guc_id(struct intel_guc *guc) 566 + { 567 + struct intel_context *ce; 568 + int guc_id; 569 + 570 + lockdep_assert_held(&guc->contexts_lock); 571 + 572 + if (!list_empty(&guc->guc_id_list)) { 573 + ce = list_first_entry(&guc->guc_id_list, 574 + struct intel_context, 575 + guc_id_link); 576 + 577 + GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); 578 + GEM_BUG_ON(context_guc_id_invalid(ce)); 579 + 580 + list_del_init(&ce->guc_id_link); 581 + guc_id = ce->guc_id; 582 + set_context_guc_id_invalid(ce); 583 + return guc_id; 584 + } else { 585 + return -EAGAIN; 586 + } 587 + } 588 + 589 + static int assign_guc_id(struct intel_guc *guc, u16 *out) 590 + { 591 + int ret; 592 + 593 + lockdep_assert_held(&guc->contexts_lock); 594 + 595 + ret = new_guc_id(guc); 596 + if (unlikely(ret < 0)) { 597 + ret = steal_guc_id(guc); 598 + if (ret < 0) 599 + return ret; 600 + } 601 + 602 + *out = ret; 603 + return 0; 604 + } 605 + 606 + #define PIN_GUC_ID_TRIES 4 607 + static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 608 + { 609 + int ret = 0; 610 + unsigned long flags, tries = PIN_GUC_ID_TRIES; 611 + 612 + GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); 613 + 614 + try_again: 615 + spin_lock_irqsave(&guc->contexts_lock, flags); 616 + 617 + if (context_guc_id_invalid(ce)) { 618 + ret = assign_guc_id(guc, &ce->guc_id); 619 + if (ret) 620 + goto out_unlock; 621 + ret = 1; /* Indidcates newly assigned guc_id */ 622 + } 623 + if (!list_empty(&ce->guc_id_link)) 624 + list_del_init(&ce->guc_id_link); 625 + atomic_inc(&ce->guc_id_ref); 626 + 627 + out_unlock: 628 + spin_unlock_irqrestore(&guc->contexts_lock, flags); 629 + 630 + /* 631 + * -EAGAIN indicates no guc_ids are available, let's retire any 632 + * outstanding requests to see if that frees up a guc_id. If the first 633 + * retire didn't help, insert a sleep with the timeslice duration before 634 + * attempting to retire more requests. Double the sleep period each 635 + * subsequent pass before finally giving up. The sleep period has max of 636 + * 100ms and minimum of 1ms. 637 + */ 638 + if (ret == -EAGAIN && --tries) { 639 + if (PIN_GUC_ID_TRIES - tries > 1) { 640 + unsigned int timeslice_shifted = 641 + ce->engine->props.timeslice_duration_ms << 642 + (PIN_GUC_ID_TRIES - tries - 2); 643 + unsigned int max = min_t(unsigned int, 100, 644 + timeslice_shifted); 645 + 646 + msleep(max_t(unsigned int, max, 1)); 647 + } 648 + intel_gt_retire_requests(guc_to_gt(guc)); 649 + goto try_again; 650 + } 651 + 652 + return ret; 653 + } 654 + 655 + static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 656 + { 657 + unsigned long flags; 658 + 659 + GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0); 660 + 661 + if (unlikely(context_guc_id_invalid(ce))) 662 + return; 663 + 664 + spin_lock_irqsave(&guc->contexts_lock, flags); 665 + if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) && 666 + !atomic_read(&ce->guc_id_ref)) 667 + list_add_tail(&ce->guc_id_link, &guc->guc_id_list); 668 + spin_unlock_irqrestore(&guc->contexts_lock, flags); 669 + } 670 + 671 + static int __guc_action_register_context(struct intel_guc *guc, 672 + u32 guc_id, 673 + u32 offset) 674 + { 675 + u32 action[] = { 676 + INTEL_GUC_ACTION_REGISTER_CONTEXT, 677 + guc_id, 678 + offset, 679 + }; 680 + 681 + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); 682 + } 683 + 684 + static int register_context(struct intel_context *ce) 685 + { 686 + struct intel_guc *guc = ce_to_guc(ce); 687 + u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + 688 + ce->guc_id * sizeof(struct guc_lrc_desc); 689 + 690 + return __guc_action_register_context(guc, ce->guc_id, offset); 691 + } 692 + 693 + static int __guc_action_deregister_context(struct intel_guc *guc, 694 + u32 guc_id) 695 + { 696 + u32 action[] = { 697 + INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 698 + guc_id, 699 + }; 700 + 701 + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); 702 + } 703 + 704 + static int deregister_context(struct intel_context *ce, u32 guc_id) 705 + { 706 + struct intel_guc *guc = ce_to_guc(ce); 707 + 708 + return __guc_action_deregister_context(guc, guc_id); 709 + } 710 + 711 + static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask) 712 + { 713 + switch (class) { 714 + case RENDER_CLASS: 715 + return mask >> RCS0; 716 + case VIDEO_ENHANCEMENT_CLASS: 717 + return mask >> VECS0; 718 + case VIDEO_DECODE_CLASS: 719 + return mask >> VCS0; 720 + case COPY_ENGINE_CLASS: 721 + return mask >> BCS0; 722 + default: 723 + MISSING_CASE(class); 724 + return 0; 725 + } 726 + } 727 + 728 + static void guc_context_policy_init(struct intel_engine_cs *engine, 729 + struct guc_lrc_desc *desc) 730 + { 731 + desc->policy_flags = 0; 732 + 733 + desc->execution_quantum = CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US; 734 + desc->preemption_timeout = CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US; 735 + } 736 + 737 + static int guc_lrc_desc_pin(struct intel_context *ce) 738 + { 739 + struct intel_engine_cs *engine = ce->engine; 740 + struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 741 + struct intel_guc *guc = &engine->gt->uc.guc; 742 + u32 desc_idx = ce->guc_id; 743 + struct guc_lrc_desc *desc; 744 + bool context_registered; 745 + intel_wakeref_t wakeref; 746 + int ret = 0; 747 + 748 + GEM_BUG_ON(!engine->mask); 749 + 750 + /* 751 + * Ensure LRC + CT vmas are is same region as write barrier is done 752 + * based on CT vma region. 753 + */ 754 + GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 755 + i915_gem_object_is_lmem(ce->ring->vma->obj)); 756 + 757 + context_registered = lrc_desc_registered(guc, desc_idx); 758 + 759 + reset_lrc_desc(guc, desc_idx); 760 + set_lrc_desc_registered(guc, desc_idx, ce); 761 + 762 + desc = __get_lrc_desc(guc, desc_idx); 763 + desc->engine_class = engine_class_to_guc_class(engine->class); 764 + desc->engine_submit_mask = adjust_engine_mask(engine->class, 765 + engine->mask); 766 + desc->hw_context_desc = ce->lrc.lrca; 767 + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; 768 + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 769 + guc_context_policy_init(engine, desc); 770 + init_sched_state(ce); 771 + 772 + /* 773 + * The context_lookup xarray is used to determine if the hardware 774 + * context is currently registered. There are two cases in which it 775 + * could be registered either the guc_id has been stolen from another 776 + * context or the lrc descriptor address of this context has changed. In 777 + * either case the context needs to be deregistered with the GuC before 778 + * registering this context. 779 + */ 780 + if (context_registered) { 781 + set_context_wait_for_deregister_to_register(ce); 782 + intel_context_get(ce); 783 + 784 + /* 785 + * If stealing the guc_id, this ce has the same guc_id as the 786 + * context whose guc_id was stolen. 787 + */ 788 + with_intel_runtime_pm(runtime_pm, wakeref) 789 + ret = deregister_context(ce, ce->guc_id); 790 + } else { 791 + with_intel_runtime_pm(runtime_pm, wakeref) 792 + ret = register_context(ce); 793 + } 794 + 795 + return ret; 796 + } 797 + 798 + static int guc_context_pre_pin(struct intel_context *ce, 799 + struct i915_gem_ww_ctx *ww, 800 + void **vaddr) 801 + { 802 + return lrc_pre_pin(ce, ce->engine, ww, vaddr); 803 + } 804 + 805 + static int guc_context_pin(struct intel_context *ce, void *vaddr) 806 + { 807 + if (i915_ggtt_offset(ce->state) != 808 + (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 809 + set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 810 + 811 + /* 812 + * GuC context gets pinned in guc_request_alloc. See that function for 813 + * explaination of why. 814 + */ 815 + 816 + return lrc_pin(ce, ce->engine, vaddr); 817 + } 818 + 819 + static void guc_context_unpin(struct intel_context *ce) 820 + { 821 + struct intel_guc *guc = ce_to_guc(ce); 822 + 823 + unpin_guc_id(guc, ce); 824 + lrc_unpin(ce); 825 + } 826 + 827 + static void guc_context_post_unpin(struct intel_context *ce) 828 + { 829 + lrc_post_unpin(ce); 830 + } 831 + 832 + static inline void guc_lrc_desc_unpin(struct intel_context *ce) 833 + { 834 + struct intel_guc *guc = ce_to_guc(ce); 835 + unsigned long flags; 836 + 837 + GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id)); 838 + GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); 839 + 840 + spin_lock_irqsave(&ce->guc_state.lock, flags); 841 + set_context_destroyed(ce); 842 + spin_unlock_irqrestore(&ce->guc_state.lock, flags); 843 + 844 + deregister_context(ce, ce->guc_id); 845 + } 846 + 847 + static void guc_context_destroy(struct kref *kref) 848 + { 849 + struct intel_context *ce = container_of(kref, typeof(*ce), ref); 850 + struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 851 + struct intel_guc *guc = ce_to_guc(ce); 852 + intel_wakeref_t wakeref; 853 + unsigned long flags; 854 + 855 + /* 856 + * If the guc_id is invalid this context has been stolen and we can free 857 + * it immediately. Also can be freed immediately if the context is not 858 + * registered with the GuC. 859 + */ 860 + if (context_guc_id_invalid(ce)) { 861 + lrc_destroy(kref); 862 + return; 863 + } else if (!lrc_desc_registered(guc, ce->guc_id)) { 864 + release_guc_id(guc, ce); 865 + lrc_destroy(kref); 866 + return; 867 + } 868 + 869 + /* 870 + * We have to acquire the context spinlock and check guc_id again, if it 871 + * is valid it hasn't been stolen and needs to be deregistered. We 872 + * delete this context from the list of unpinned guc_ids available to 873 + * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB 874 + * returns indicating this context has been deregistered the guc_id is 875 + * returned to the pool of available guc_ids. 876 + */ 877 + spin_lock_irqsave(&guc->contexts_lock, flags); 878 + if (context_guc_id_invalid(ce)) { 879 + spin_unlock_irqrestore(&guc->contexts_lock, flags); 880 + lrc_destroy(kref); 881 + return; 882 + } 883 + 884 + if (!list_empty(&ce->guc_id_link)) 885 + list_del_init(&ce->guc_id_link); 886 + spin_unlock_irqrestore(&guc->contexts_lock, flags); 887 + 888 + /* 889 + * We defer GuC context deregistration until the context is destroyed 890 + * in order to save on CTBs. With this optimization ideally we only need 891 + * 1 CTB to register the context during the first pin and 1 CTB to 892 + * deregister the context when the context is destroyed. Without this 893 + * optimization, a CTB would be needed every pin & unpin. 894 + * 895 + * XXX: Need to acqiure the runtime wakeref as this can be triggered 896 + * from context_free_worker when runtime wakeref is not held. 897 + * guc_lrc_desc_unpin requires the runtime as a GuC register is written 898 + * in H2G CTB to deregister the context. A future patch may defer this 899 + * H2G CTB if the runtime wakeref is zero. 900 + */ 901 + with_intel_runtime_pm(runtime_pm, wakeref) 902 + guc_lrc_desc_unpin(ce); 903 + } 904 + 905 + static int guc_context_alloc(struct intel_context *ce) 906 + { 907 + return lrc_alloc(ce, ce->engine); 908 + } 909 + 910 + static const struct intel_context_ops guc_context_ops = { 911 + .alloc = guc_context_alloc, 912 + 913 + .pre_pin = guc_context_pre_pin, 914 + .pin = guc_context_pin, 915 + .unpin = guc_context_unpin, 916 + .post_unpin = guc_context_post_unpin, 917 + 918 + .enter = intel_context_enter_engine, 919 + .exit = intel_context_exit_engine, 920 + 921 + .reset = lrc_reset, 922 + .destroy = guc_context_destroy, 923 + }; 924 + 925 + static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 926 + { 927 + return new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 928 + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id); 929 + } 930 + 931 + static int guc_request_alloc(struct i915_request *rq) 932 + { 933 + struct intel_context *ce = rq->context; 934 + struct intel_guc *guc = ce_to_guc(ce); 935 + int ret; 936 + 937 + GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 938 + 939 + /* 940 + * Flush enough space to reduce the likelihood of waiting after 941 + * we start building the request - in which case we will just 942 + * have to repeat work. 943 + */ 944 + rq->reserved_space += GUC_REQUEST_SIZE; 945 + 946 + /* 947 + * Note that after this point, we have committed to using 948 + * this request as it is being used to both track the 949 + * state of engine initialisation and liveness of the 950 + * golden renderstate above. Think twice before you try 951 + * to cancel/unwind this request now. 952 + */ 953 + 954 + /* Unconditionally invalidate GPU caches and TLBs. */ 955 + ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 956 + if (ret) 957 + return ret; 958 + 959 + rq->reserved_space -= GUC_REQUEST_SIZE; 960 + 961 + /* 962 + * Call pin_guc_id here rather than in the pinning step as with 963 + * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 964 + * guc_ids and creating horrible race conditions. This is especially bad 965 + * when guc_ids are being stolen due to over subscription. By the time 966 + * this function is reached, it is guaranteed that the guc_id will be 967 + * persistent until the generated request is retired. Thus, sealing these 968 + * race conditions. It is still safe to fail here if guc_ids are 969 + * exhausted and return -EAGAIN to the user indicating that they can try 970 + * again in the future. 971 + * 972 + * There is no need for a lock here as the timeline mutex ensures at 973 + * most one context can be executing this code path at once. The 974 + * guc_id_ref is incremented once for every request in flight and 975 + * decremented on each retire. When it is zero, a lock around the 976 + * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 977 + */ 978 + if (atomic_add_unless(&ce->guc_id_ref, 1, 0)) 979 + return 0; 980 + 981 + ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 982 + if (unlikely(ret < 0)) 983 + return ret; 984 + if (context_needs_register(ce, !!ret)) { 985 + ret = guc_lrc_desc_pin(ce); 986 + if (unlikely(ret)) { /* unwind */ 987 + atomic_dec(&ce->guc_id_ref); 988 + unpin_guc_id(guc, ce); 989 + return ret; 990 + } 991 + } 992 + 993 + clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 994 + 995 + return 0; 549 996 } 550 997 551 998 static void sanitize_hwsp(struct intel_engine_cs *engine) ··· 1077 604 static void guc_set_default_submission(struct intel_engine_cs *engine) 1078 605 { 1079 606 engine->submit_request = guc_submit_request; 607 + } 608 + 609 + static inline void guc_kernel_context_pin(struct intel_guc *guc, 610 + struct intel_context *ce) 611 + { 612 + if (context_guc_id_invalid(ce)) 613 + pin_guc_id(guc, ce); 614 + guc_lrc_desc_pin(ce); 615 + } 616 + 617 + static inline void guc_init_lrc_mapping(struct intel_guc *guc) 618 + { 619 + struct intel_gt *gt = guc_to_gt(guc); 620 + struct intel_engine_cs *engine; 621 + enum intel_engine_id id; 622 + 623 + /* make sure all descriptors are clean... */ 624 + xa_destroy(&guc->context_lookup); 625 + 626 + /* 627 + * Some contexts might have been pinned before we enabled GuC 628 + * submission, so we need to add them to the GuC bookeeping. 629 + * Also, after a reset the of the GuC we want to make sure that the 630 + * information shared with GuC is properly reset. The kernel LRCs are 631 + * not attached to the gem_context, so they need to be added separately. 632 + * 633 + * Note: we purposefully do not check the return of guc_lrc_desc_pin, 634 + * because that function can only fail if a reset is just starting. This 635 + * is at the end of reset so presumably another reset isn't happening 636 + * and even it did this code would be run again. 637 + */ 638 + 639 + for_each_engine(engine, gt, id) 640 + if (engine->kernel_context) 641 + guc_kernel_context_pin(guc, engine->kernel_context); 1080 642 } 1081 643 1082 644 static void guc_release(struct intel_engine_cs *engine) ··· 1226 718 1227 719 void intel_guc_submission_enable(struct intel_guc *guc) 1228 720 { 721 + guc_init_lrc_mapping(guc); 1229 722 } 1230 723 1231 724 void intel_guc_submission_disable(struct intel_guc *guc) ··· 1251 742 void intel_guc_submission_init_early(struct intel_guc *guc) 1252 743 { 1253 744 guc->submission_selected = __guc_submission_selected(guc); 745 + } 746 + 747 + static inline struct intel_context * 748 + g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) 749 + { 750 + struct intel_context *ce; 751 + 752 + if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) { 753 + drm_err(&guc_to_gt(guc)->i915->drm, 754 + "Invalid desc_idx %u", desc_idx); 755 + return NULL; 756 + } 757 + 758 + ce = __get_context(guc, desc_idx); 759 + if (unlikely(!ce)) { 760 + drm_err(&guc_to_gt(guc)->i915->drm, 761 + "Context is NULL, desc_idx %u", desc_idx); 762 + return NULL; 763 + } 764 + 765 + return ce; 766 + } 767 + 768 + int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 769 + const u32 *msg, 770 + u32 len) 771 + { 772 + struct intel_context *ce; 773 + u32 desc_idx = msg[0]; 774 + 775 + if (unlikely(len < 1)) { 776 + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); 777 + return -EPROTO; 778 + } 779 + 780 + ce = g2h_context_lookup(guc, desc_idx); 781 + if (unlikely(!ce)) 782 + return -EPROTO; 783 + 784 + if (context_wait_for_deregister_to_register(ce)) { 785 + struct intel_runtime_pm *runtime_pm = 786 + &ce->engine->gt->i915->runtime_pm; 787 + intel_wakeref_t wakeref; 788 + 789 + /* 790 + * Previous owner of this guc_id has been deregistered, now safe 791 + * register this context. 792 + */ 793 + with_intel_runtime_pm(runtime_pm, wakeref) 794 + register_context(ce); 795 + clr_context_wait_for_deregister_to_register(ce); 796 + intel_context_put(ce); 797 + } else if (context_destroyed(ce)) { 798 + /* Context has been destroyed */ 799 + release_guc_id(guc, ce); 800 + lrc_destroy(&ce->ref); 801 + } 802 + 803 + return 0; 1254 804 }

+1

drivers/gpu/drm/i915/i915_reg.h

··· 4142 4142 FAULT_AND_CONTINUE /* Unsupported */ 4143 4143 }; 4144 4144 4145 + #define CTX_GTT_ADDRESS_MASK GENMASK(31, 12) 4145 4146 #define GEN8_CTX_VALID (1 << 0) 4146 4147 #define GEN8_CTX_FORCE_PD_RESTORE (1 << 1) 4147 4148 #define GEN8_CTX_FORCE_RESTORE (1 << 2)

+1

drivers/gpu/drm/i915/i915_request.c

··· 407 407 */ 408 408 if (!list_empty(&rq->sched.link)) 409 409 remove_from_engine(rq); 410 + atomic_dec(&rq->context->guc_id_ref); 410 411 GEM_BUG_ON(!llist_empty(&rq->execute_cb)); 411 412 412 413 __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */