Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915/guc: Ensure G2H response has space in buffer

Ensure G2H response has space in the buffer before sending H2G CTB as
the GuC can't handle any backpressure on the G2H interface.

v2:
(Matthew)
- s/INTEL_GUC_SEND/INTEL_GUC_CT_SEND
v3:
(Matthew)
- Add G2H credit accounting to blocking path, add g2h_release_space
helper
(John H)
- CTB_G2H_BUFFER_SIZE / 4 == G2H_ROOM_BUFFER_SIZE

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-15-matthew.brost@intel.com

authored by

Matthew Brost and committed by
John Harrison
f4eb1f3f 4dbd3944

+104 -26
+5 -3
drivers/gpu/drm/i915/gt/uc/intel_guc.h
··· 96 96 } 97 97 98 98 static 99 - inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len) 99 + inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len, 100 + u32 g2h_len_dw) 100 101 { 101 102 return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, 102 - INTEL_GUC_CT_SEND_NB); 103 + MAKE_SEND_FLAGS(g2h_len_dw)); 103 104 } 104 105 105 106 static inline int ··· 114 113 static inline int intel_guc_send_busy_loop(struct intel_guc *guc, 115 114 const u32 *action, 116 115 u32 len, 116 + u32 g2h_len_dw, 117 117 bool loop) 118 118 { 119 119 int err; ··· 132 130 might_sleep_if(loop && not_atomic); 133 131 134 132 retry: 135 - err = intel_guc_send_nb(guc, action, len); 133 + err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 136 134 if (unlikely(err == -EBUSY && loop)) { 137 135 if (likely(not_atomic)) { 138 136 if (msleep_interruptible(sleep_period_ms))
+77 -17
drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
··· 73 73 #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) 74 74 #define CTB_H2G_BUFFER_SIZE (SZ_4K) 75 75 #define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) 76 + #define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) 76 77 77 78 struct ct_request { 78 79 struct list_head link; ··· 130 129 131 130 static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb) 132 131 { 132 + u32 space; 133 + 133 134 ctb->broken = false; 134 135 ctb->tail = 0; 135 136 ctb->head = 0; 136 - ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size); 137 + space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size) - ctb->resv_space; 138 + atomic_set(&ctb->space, space); 137 139 138 140 guc_ct_buffer_desc_init(ctb->desc); 139 141 } 140 142 141 143 static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb, 142 144 struct guc_ct_buffer_desc *desc, 143 - u32 *cmds, u32 size_in_bytes) 145 + u32 *cmds, u32 size_in_bytes, u32 resv_space) 144 146 { 145 147 GEM_BUG_ON(size_in_bytes % 4); 146 148 147 149 ctb->desc = desc; 148 150 ctb->cmds = cmds; 149 151 ctb->size = size_in_bytes / 4; 152 + ctb->resv_space = resv_space / 4; 150 153 151 154 guc_ct_buffer_reset(ctb); 152 155 } ··· 231 226 struct guc_ct_buffer_desc *desc; 232 227 u32 blob_size; 233 228 u32 cmds_size; 229 + u32 resv_space; 234 230 void *blob; 235 231 u32 *cmds; 236 232 int err; ··· 256 250 desc = blob; 257 251 cmds = blob + 2 * CTB_DESC_SIZE; 258 252 cmds_size = CTB_H2G_BUFFER_SIZE; 259 - CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "send", 260 - ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size); 253 + resv_space = 0; 254 + CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send", 255 + ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size, 256 + resv_space); 261 257 262 - guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size); 258 + guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size, resv_space); 263 259 264 260 /* store pointers to desc and cmds for recv ctb */ 265 261 desc = blob + CTB_DESC_SIZE; 266 262 cmds = blob + 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE; 267 263 cmds_size = CTB_G2H_BUFFER_SIZE; 268 - CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "recv", 269 - ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size); 264 + resv_space = G2H_ROOM_BUFFER_SIZE; 265 + CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv", 266 + ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size, 267 + resv_space); 270 268 271 - guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size); 269 + guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size, resv_space); 272 270 273 271 return 0; 274 272 } ··· 471 461 472 462 /* update local copies */ 473 463 ctb->tail = tail; 474 - GEM_BUG_ON(ctb->space < len + GUC_CTB_HDR_LEN); 475 - ctb->space -= len + GUC_CTB_HDR_LEN; 464 + GEM_BUG_ON(atomic_read(&ctb->space) < len + GUC_CTB_HDR_LEN); 465 + atomic_sub(len + GUC_CTB_HDR_LEN, &ctb->space); 476 466 477 467 /* now update descriptor */ 478 468 WRITE_ONCE(desc->tail, tail); ··· 547 537 return ret; 548 538 } 549 539 540 + static inline bool g2h_has_room(struct intel_guc_ct *ct, u32 g2h_len_dw) 541 + { 542 + struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv; 543 + 544 + /* 545 + * We leave a certain amount of space in the G2H CTB buffer for 546 + * unexpected G2H CTBs (e.g. logging, engine hang, etc...) 547 + */ 548 + return !g2h_len_dw || atomic_read(&ctb->space) >= g2h_len_dw; 549 + } 550 + 551 + static inline void g2h_reserve_space(struct intel_guc_ct *ct, u32 g2h_len_dw) 552 + { 553 + lockdep_assert_held(&ct->ctbs.send.lock); 554 + 555 + GEM_BUG_ON(!g2h_has_room(ct, g2h_len_dw)); 556 + 557 + if (g2h_len_dw) 558 + atomic_sub(g2h_len_dw, &ct->ctbs.recv.space); 559 + } 560 + 561 + static inline void g2h_release_space(struct intel_guc_ct *ct, u32 g2h_len_dw) 562 + { 563 + atomic_add(g2h_len_dw, &ct->ctbs.recv.space); 564 + } 565 + 550 566 static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) 551 567 { 552 568 struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; ··· 580 544 u32 head; 581 545 u32 space; 582 546 583 - if (ctb->space >= len_dw) 547 + if (atomic_read(&ctb->space) >= len_dw) 584 548 return true; 585 549 586 550 head = READ_ONCE(desc->head); ··· 593 557 } 594 558 595 559 space = CIRC_SPACE(ctb->tail, head, ctb->size); 596 - ctb->space = space; 560 + atomic_set(&ctb->space, space); 597 561 598 562 return space >= len_dw; 599 563 } 600 564 601 - static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw) 565 + static int has_room_nb(struct intel_guc_ct *ct, u32 h2g_dw, u32 g2h_dw) 602 566 { 603 567 lockdep_assert_held(&ct->ctbs.send.lock); 604 568 605 - if (unlikely(!h2g_has_room(ct, len_dw))) { 569 + if (unlikely(!h2g_has_room(ct, h2g_dw) || !g2h_has_room(ct, g2h_dw))) { 606 570 if (ct->stall_time == KTIME_MAX) 607 571 ct->stall_time = ktime_get(); 608 572 ··· 616 580 return 0; 617 581 } 618 582 583 + #define G2H_LEN_DW(f) ({ \ 584 + typeof(f) f_ = (f); \ 585 + FIELD_GET(INTEL_GUC_CT_SEND_G2H_DW_MASK, f_) ? \ 586 + FIELD_GET(INTEL_GUC_CT_SEND_G2H_DW_MASK, f_) + \ 587 + GUC_CTB_HXG_MSG_MIN_LEN : 0; \ 588 + }) 619 589 static int ct_send_nb(struct intel_guc_ct *ct, 620 590 const u32 *action, 621 591 u32 len, ··· 629 587 { 630 588 struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; 631 589 unsigned long spin_flags; 590 + u32 g2h_len_dw = G2H_LEN_DW(flags); 632 591 u32 fence; 633 592 int ret; 634 593 635 594 spin_lock_irqsave(&ctb->lock, spin_flags); 636 595 637 - ret = has_room_nb(ct, len + GUC_CTB_HDR_LEN); 596 + ret = has_room_nb(ct, len + GUC_CTB_HDR_LEN, g2h_len_dw); 638 597 if (unlikely(ret)) 639 598 goto out; 640 599 ··· 644 601 if (unlikely(ret)) 645 602 goto out; 646 603 604 + g2h_reserve_space(ct, g2h_len_dw); 647 605 intel_guc_notify(ct_to_guc(ct)); 648 606 649 607 out: ··· 676 632 /* 677 633 * We use a lazy spin wait loop here as we believe that if the CT 678 634 * buffers are sized correctly the flow control condition should be 679 - * rare. 635 + * rare. Reserving the maximum size in the G2H credits as we don't know 636 + * how big the response is going to be. 680 637 */ 681 638 retry: 682 639 spin_lock_irqsave(&ctb->lock, flags); 683 - if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) { 640 + if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN) || 641 + !g2h_has_room(ct, GUC_CTB_HXG_MSG_MAX_LEN))) { 684 642 if (ct->stall_time == KTIME_MAX) 685 643 ct->stall_time = ktime_get(); 686 644 spin_unlock_irqrestore(&ctb->lock, flags); ··· 710 664 spin_unlock(&ct->requests.lock); 711 665 712 666 err = ct_write(ct, action, len, fence, 0); 667 + g2h_reserve_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); 713 668 714 669 spin_unlock_irqrestore(&ctb->lock, flags); 715 670 ··· 720 673 intel_guc_notify(ct_to_guc(ct)); 721 674 722 675 err = wait_for_ct_request_update(&request, status); 676 + g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); 723 677 if (unlikely(err)) 724 678 goto unlink; 725 679 ··· 1040 992 static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *request) 1041 993 { 1042 994 const u32 *hxg = &request->msg[GUC_CTB_MSG_MIN_LEN]; 995 + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]); 1043 996 unsigned long flags; 1044 997 1045 998 GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT); 999 + 1000 + /* 1001 + * Adjusting the space must be done in IRQ or deadlock can occur as the 1002 + * CTB processing in the below workqueue can send CTBs which creates a 1003 + * circular dependency if the space was returned there. 1004 + */ 1005 + switch (action) { 1006 + case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: 1007 + case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE: 1008 + g2h_release_space(ct, request->size); 1009 + } 1046 1010 1047 1011 spin_lock_irqsave(&ct->requests.lock, flags); 1048 1012 list_add_tail(&request->link, &ct->requests.incoming);
+10 -1
drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
··· 33 33 * @desc: pointer to the buffer descriptor 34 34 * @cmds: pointer to the commands buffer 35 35 * @size: size of the commands buffer in dwords 36 + * @resv_space: reserved space in buffer in dwords 36 37 * @head: local shadow copy of head in dwords 37 38 * @tail: local shadow copy of tail in dwords 38 39 * @space: local shadow copy of space in dwords ··· 44 43 struct guc_ct_buffer_desc *desc; 45 44 u32 *cmds; 46 45 u32 size; 46 + u32 resv_space; 47 47 u32 tail; 48 48 u32 head; 49 - u32 space; 49 + atomic_t space; 50 50 bool broken; 51 51 }; 52 52 ··· 99 97 } 100 98 101 99 #define INTEL_GUC_CT_SEND_NB BIT(31) 100 + #define INTEL_GUC_CT_SEND_G2H_DW_SHIFT 0 101 + #define INTEL_GUC_CT_SEND_G2H_DW_MASK (0xff << INTEL_GUC_CT_SEND_G2H_DW_SHIFT) 102 + #define MAKE_SEND_FLAGS(len) ({ \ 103 + typeof(len) len_ = (len); \ 104 + GEM_BUG_ON(!FIELD_FIT(INTEL_GUC_CT_SEND_G2H_DW_MASK, len_)); \ 105 + (FIELD_PREP(INTEL_GUC_CT_SEND_G2H_DW_MASK, len_) | INTEL_GUC_CT_SEND_NB); \ 106 + }) 102 107 int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, 103 108 u32 *response_buf, u32 response_buf_size, u32 flags); 104 109 void intel_guc_ct_event_handler(struct intel_guc_ct *ct);
+4
drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
··· 17 17 #include "abi/guc_communication_ctb_abi.h" 18 18 #include "abi/guc_messages_abi.h" 19 19 20 + /* Payload length only i.e. don't include G2H header length */ 21 + #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 2 22 + #define G2H_LEN_DW_DEREGISTER_CONTEXT 1 23 + 20 24 #define GUC_CONTEXT_DISABLE 0 21 25 #define GUC_CONTEXT_ENABLE 1 22 26
+8 -5
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
··· 258 258 struct intel_context *ce = rq->context; 259 259 u32 action[3]; 260 260 int len = 0; 261 + u32 g2h_len_dw = 0; 261 262 bool enabled = context_enabled(ce); 262 263 263 264 GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); ··· 270 269 action[len++] = GUC_CONTEXT_ENABLE; 271 270 set_context_pending_enable(ce); 272 271 intel_context_get(ce); 272 + g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 273 273 } else { 274 274 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 275 275 action[len++] = ce->guc_id; 276 276 } 277 277 278 - err = intel_guc_send_nb(guc, action, len); 279 - 278 + err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 280 279 if (!enabled && !err) { 281 280 set_context_enabled(ce); 282 281 } else if (!enabled) { ··· 735 734 offset, 736 735 }; 737 736 738 - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); 737 + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 739 738 } 740 739 741 740 static int register_context(struct intel_context *ce) ··· 755 754 guc_id, 756 755 }; 757 756 758 - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); 757 + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 758 + G2H_LEN_DW_DEREGISTER_CONTEXT, true); 759 759 } 760 760 761 761 static int deregister_context(struct intel_context *ce, u32 guc_id) ··· 901 899 902 900 intel_context_get(ce); 903 901 904 - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); 902 + intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 903 + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 905 904 } 906 905 907 906 static u16 prep_context_pending_disable(struct intel_context *ce)