drm/i915/guc: Ensure G2H response has space in buffer

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Ensure G2H response has space in the buffer before sending H2G CTB as
the GuC can't handle any backpressure on the G2H interface.

v2:
(Matthew)
- s/INTEL_GUC_SEND/INTEL_GUC_CT_SEND
v3:
(Matthew)
- Add G2H credit accounting to blocking path, add g2h_release_space
helper
(John H)
- CTB_G2H_BUFFER_SIZE / 4 == G2H_ROOM_BUFFER_SIZE

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210721215101.139794-15-matthew.brost@intel.com

authored by

Matthew Brost and committed by

John Harrison 4 years ago f4eb1f3f 4dbd3944

+104 -26

5 changed files

expand all

drivers

gpu

drm

i915

intel_guc.h

intel_guc_ct.c

intel_guc_ct.h

intel_guc_fwif.h

intel_guc_submission.c

+5 -3

drivers/gpu/drm/i915/gt/uc/intel_guc.h

··· 96 96 } 97 97 98 98 static 99 - inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len) 99 + inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len, 100 + u32 g2h_len_dw) 100 101 { 101 102 return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, 102 - INTEL_GUC_CT_SEND_NB); 103 + MAKE_SEND_FLAGS(g2h_len_dw)); 103 104 } 104 105 105 106 static inline int ··· 114 113 static inline int intel_guc_send_busy_loop(struct intel_guc *guc, 115 114 const u32 *action, 116 115 u32 len, 116 + u32 g2h_len_dw, 117 117 bool loop) 118 118 { 119 119 int err; ··· 132 130 might_sleep_if(loop && not_atomic); 133 131 134 132 retry: 135 - err = intel_guc_send_nb(guc, action, len); 133 + err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 136 134 if (unlikely(err == -EBUSY && loop)) { 137 135 if (likely(not_atomic)) { 138 136 if (msleep_interruptible(sleep_period_ms))

+77 -17

drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c

··· 73 73 #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) 74 74 #define CTB_H2G_BUFFER_SIZE (SZ_4K) 75 75 #define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) 76 + #define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) 76 77 77 78 struct ct_request { 78 79 struct list_head link; ··· 130 129 131 130 static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb) 132 131 { 132 + u32 space; 133 + 133 134 ctb->broken = false; 134 135 ctb->tail = 0; 135 136 ctb->head = 0; 136 - ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size); 137 + space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size) - ctb->resv_space; 138 + atomic_set(&ctb->space, space); 137 139 138 140 guc_ct_buffer_desc_init(ctb->desc); 139 141 } 140 142 141 143 static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb, 142 144 struct guc_ct_buffer_desc *desc, 143 - u32 *cmds, u32 size_in_bytes) 145 + u32 *cmds, u32 size_in_bytes, u32 resv_space) 144 146 { 145 147 GEM_BUG_ON(size_in_bytes % 4); 146 148 147 149 ctb->desc = desc; 148 150 ctb->cmds = cmds; 149 151 ctb->size = size_in_bytes / 4; 152 + ctb->resv_space = resv_space / 4; 150 153 151 154 guc_ct_buffer_reset(ctb); 152 155 } ··· 231 226 struct guc_ct_buffer_desc *desc; 232 227 u32 blob_size; 233 228 u32 cmds_size; 229 + u32 resv_space; 234 230 void *blob; 235 231 u32 *cmds; 236 232 int err; ··· 256 250 desc = blob; 257 251 cmds = blob + 2 * CTB_DESC_SIZE; 258 252 cmds_size = CTB_H2G_BUFFER_SIZE; 259 - CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "send", 260 - ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size); 253 + resv_space = 0; 254 + CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send", 255 + ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size, 256 + resv_space); 261 257 262 - guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size); 258 + guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size, resv_space); 263 259 264 260 /* store pointers to desc and cmds for recv ctb */ 265 261 desc = blob + CTB_DESC_SIZE; 266 262 cmds = blob + 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE; 267 263 cmds_size = CTB_G2H_BUFFER_SIZE; 268 - CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "recv", 269 - ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size); 264 + resv_space = G2H_ROOM_BUFFER_SIZE; 265 + CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv", 266 + ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size, 267 + resv_space); 270 268 271 - guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size); 269 + guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size, resv_space); 272 270 273 271 return 0; 274 272 } ··· 471 461 472 462 /* update local copies */ 473 463 ctb->tail = tail; 474 - GEM_BUG_ON(ctb->space < len + GUC_CTB_HDR_LEN); 475 - ctb->space -= len + GUC_CTB_HDR_LEN; 464 + GEM_BUG_ON(atomic_read(&ctb->space) < len + GUC_CTB_HDR_LEN); 465 + atomic_sub(len + GUC_CTB_HDR_LEN, &ctb->space); 476 466 477 467 /* now update descriptor */ 478 468 WRITE_ONCE(desc->tail, tail); ··· 547 537 return ret; 548 538 } 549 539 540 + static inline bool g2h_has_room(struct intel_guc_ct *ct, u32 g2h_len_dw) 541 + { 542 + struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv; 543 + 544 + /* 545 + * We leave a certain amount of space in the G2H CTB buffer for 546 + * unexpected G2H CTBs (e.g. logging, engine hang, etc...) 547 + */ 548 + return !g2h_len_dw || atomic_read(&ctb->space) >= g2h_len_dw; 549 + } 550 + 551 + static inline void g2h_reserve_space(struct intel_guc_ct *ct, u32 g2h_len_dw) 552 + { 553 + lockdep_assert_held(&ct->ctbs.send.lock); 554 + 555 + GEM_BUG_ON(!g2h_has_room(ct, g2h_len_dw)); 556 + 557 + if (g2h_len_dw) 558 + atomic_sub(g2h_len_dw, &ct->ctbs.recv.space); 559 + } 560 + 561 + static inline void g2h_release_space(struct intel_guc_ct *ct, u32 g2h_len_dw) 562 + { 563 + atomic_add(g2h_len_dw, &ct->ctbs.recv.space); 564 + } 565 + 550 566 static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) 551 567 { 552 568 struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; ··· 580 544 u32 head; 581 545 u32 space; 582 546 583 - if (ctb->space >= len_dw) 547 + if (atomic_read(&ctb->space) >= len_dw) 584 548 return true; 585 549 586 550 head = READ_ONCE(desc->head); ··· 593 557 } 594 558 595 559 space = CIRC_SPACE(ctb->tail, head, ctb->size); 596 - ctb->space = space; 560 + atomic_set(&ctb->space, space); 597 561 598 562 return space >= len_dw; 599 563 } 600 564 601 - static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw) 565 + static int has_room_nb(struct intel_guc_ct *ct, u32 h2g_dw, u32 g2h_dw) 602 566 { 603 567 lockdep_assert_held(&ct->ctbs.send.lock); 604 568 605 - if (unlikely(!h2g_has_room(ct, len_dw))) { 569 + if (unlikely(!h2g_has_room(ct, h2g_dw) || !g2h_has_room(ct, g2h_dw))) { 606 570 if (ct->stall_time == KTIME_MAX) 607 571 ct->stall_time = ktime_get(); 608 572 ··· 616 580 return 0; 617 581 } 618 582 583 + #define G2H_LEN_DW(f) ({ \ 584 + typeof(f) f_ = (f); \ 585 + FIELD_GET(INTEL_GUC_CT_SEND_G2H_DW_MASK, f_) ? \ 586 + FIELD_GET(INTEL_GUC_CT_SEND_G2H_DW_MASK, f_) + \ 587 + GUC_CTB_HXG_MSG_MIN_LEN : 0; \ 588 + }) 619 589 static int ct_send_nb(struct intel_guc_ct *ct, 620 590 const u32 *action, 621 591 u32 len, ··· 629 587 { 630 588 struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; 631 589 unsigned long spin_flags; 590 + u32 g2h_len_dw = G2H_LEN_DW(flags); 632 591 u32 fence; 633 592 int ret; 634 593 635 594 spin_lock_irqsave(&ctb->lock, spin_flags); 636 595 637 - ret = has_room_nb(ct, len + GUC_CTB_HDR_LEN); 596 + ret = has_room_nb(ct, len + GUC_CTB_HDR_LEN, g2h_len_dw); 638 597 if (unlikely(ret)) 639 598 goto out; 640 599 ··· 644 601 if (unlikely(ret)) 645 602 goto out; 646 603 604 + g2h_reserve_space(ct, g2h_len_dw); 647 605 intel_guc_notify(ct_to_guc(ct)); 648 606 649 607 out: ··· 676 632 /* 677 633 * We use a lazy spin wait loop here as we believe that if the CT 678 634 * buffers are sized correctly the flow control condition should be 679 - * rare. 635 + * rare. Reserving the maximum size in the G2H credits as we don't know 636 + * how big the response is going to be. 680 637 */ 681 638 retry: 682 639 spin_lock_irqsave(&ctb->lock, flags); 683 - if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) { 640 + if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN) || 641 + !g2h_has_room(ct, GUC_CTB_HXG_MSG_MAX_LEN))) { 684 642 if (ct->stall_time == KTIME_MAX) 685 643 ct->stall_time = ktime_get(); 686 644 spin_unlock_irqrestore(&ctb->lock, flags); ··· 710 664 spin_unlock(&ct->requests.lock); 711 665 712 666 err = ct_write(ct, action, len, fence, 0); 667 + g2h_reserve_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); 713 668 714 669 spin_unlock_irqrestore(&ctb->lock, flags); 715 670 ··· 720 673 intel_guc_notify(ct_to_guc(ct)); 721 674 722 675 err = wait_for_ct_request_update(&request, status); 676 + g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); 723 677 if (unlikely(err)) 724 678 goto unlink; 725 679 ··· 1040 992 static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *request) 1041 993 { 1042 994 const u32 *hxg = &request->msg[GUC_CTB_MSG_MIN_LEN]; 995 + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]); 1043 996 unsigned long flags; 1044 997 1045 998 GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT); 999 + 1000 + /* 1001 + * Adjusting the space must be done in IRQ or deadlock can occur as the 1002 + * CTB processing in the below workqueue can send CTBs which creates a 1003 + * circular dependency if the space was returned there. 1004 + */ 1005 + switch (action) { 1006 + case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: 1007 + case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE: 1008 + g2h_release_space(ct, request->size); 1009 + } 1046 1010 1047 1011 spin_lock_irqsave(&ct->requests.lock, flags); 1048 1012 list_add_tail(&request->link, &ct->requests.incoming);

+10 -1

drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h

··· 33 33 * @desc: pointer to the buffer descriptor 34 34 * @cmds: pointer to the commands buffer 35 35 * @size: size of the commands buffer in dwords 36 + * @resv_space: reserved space in buffer in dwords 36 37 * @head: local shadow copy of head in dwords 37 38 * @tail: local shadow copy of tail in dwords 38 39 * @space: local shadow copy of space in dwords ··· 44 43 struct guc_ct_buffer_desc *desc; 45 44 u32 *cmds; 46 45 u32 size; 46 + u32 resv_space; 47 47 u32 tail; 48 48 u32 head; 49 - u32 space; 49 + atomic_t space; 50 50 bool broken; 51 51 }; 52 52 ··· 99 97 } 100 98 101 99 #define INTEL_GUC_CT_SEND_NB BIT(31) 100 + #define INTEL_GUC_CT_SEND_G2H_DW_SHIFT 0 101 + #define INTEL_GUC_CT_SEND_G2H_DW_MASK (0xff << INTEL_GUC_CT_SEND_G2H_DW_SHIFT) 102 + #define MAKE_SEND_FLAGS(len) ({ \ 103 + typeof(len) len_ = (len); \ 104 + GEM_BUG_ON(!FIELD_FIT(INTEL_GUC_CT_SEND_G2H_DW_MASK, len_)); \ 105 + (FIELD_PREP(INTEL_GUC_CT_SEND_G2H_DW_MASK, len_) | INTEL_GUC_CT_SEND_NB); \ 106 + }) 102 107 int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, 103 108 u32 *response_buf, u32 response_buf_size, u32 flags); 104 109 void intel_guc_ct_event_handler(struct intel_guc_ct *ct);

drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h

··· 17 17 #include "abi/guc_communication_ctb_abi.h" 18 18 #include "abi/guc_messages_abi.h" 19 19 20 + /* Payload length only i.e. don't include G2H header length */ 21 + #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 2 22 + #define G2H_LEN_DW_DEREGISTER_CONTEXT 1 23 + 20 24 #define GUC_CONTEXT_DISABLE 0 21 25 #define GUC_CONTEXT_ENABLE 1 22 26

+8 -5

drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

··· 258 258 struct intel_context *ce = rq->context; 259 259 u32 action[3]; 260 260 int len = 0; 261 + u32 g2h_len_dw = 0; 261 262 bool enabled = context_enabled(ce); 262 263 263 264 GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); ··· 270 269 action[len++] = GUC_CONTEXT_ENABLE; 271 270 set_context_pending_enable(ce); 272 271 intel_context_get(ce); 272 + g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 273 273 } else { 274 274 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 275 275 action[len++] = ce->guc_id; 276 276 } 277 277 278 - err = intel_guc_send_nb(guc, action, len); 279 - 278 + err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 280 279 if (!enabled && !err) { 281 280 set_context_enabled(ce); 282 281 } else if (!enabled) { ··· 735 734 offset, 736 735 }; 737 736 738 - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); 737 + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 739 738 } 740 739 741 740 static int register_context(struct intel_context *ce) ··· 755 754 guc_id, 756 755 }; 757 756 758 - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); 757 + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 758 + G2H_LEN_DW_DEREGISTER_CONTEXT, true); 759 759 } 760 760 761 761 static int deregister_context(struct intel_context *ce, u32 guc_id) ··· 901 899 902 900 intel_context_get(ce); 903 901 904 - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true); 902 + intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 903 + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 905 904 } 906 905 907 906 static u16 prep_context_pending_disable(struct intel_context *ce)