drm/i915/ringbuffer: Fix context restore upon reset

-45

drivers/gpu/drm/i915/i915_gem_gtt.c

··· 1712 1712 wmb(); 1713 1713 } 1714 1714 1715 - static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1716 - { 1717 - GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1718 - return ppgtt->pd.base.ggtt_offset << 10; 1719 - } 1720 - 1721 - static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1722 - struct i915_request *rq) 1723 - { 1724 - struct intel_engine_cs *engine = rq->engine; 1725 - u32 *cs; 1726 - 1727 - /* NB: TLBs must be flushed and invalidated before a switch */ 1728 - cs = intel_ring_begin(rq, 6); 1729 - if (IS_ERR(cs)) 1730 - return PTR_ERR(cs); 1731 - 1732 - *cs++ = MI_LOAD_REGISTER_IMM(2); 1733 - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); 1734 - *cs++ = PP_DIR_DCLV_2G; 1735 - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); 1736 - *cs++ = get_pd_offset(ppgtt); 1737 - *cs++ = MI_NOOP; 1738 - intel_ring_advance(rq, cs); 1739 - 1740 - return 0; 1741 - } 1742 - 1743 - static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1744 - struct i915_request *rq) 1745 - { 1746 - struct intel_engine_cs *engine = rq->engine; 1747 - struct drm_i915_private *dev_priv = rq->i915; 1748 - 1749 - I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1750 - I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1751 - return 0; 1752 - } 1753 - 1754 1715 static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) 1755 1716 { 1756 1717 struct intel_engine_cs *engine; ··· 1985 2024 ppgtt->vm.dma = &i915->drm.pdev->dev; 1986 2025 1987 2026 ppgtt->vm.pte_encode = ggtt->vm.pte_encode; 1988 - if (IS_GEN6(i915)) 1989 - ppgtt->switch_mm = gen6_mm_switch; 1990 - else if (IS_GEN7(i915)) 1991 - ppgtt->switch_mm = gen7_mm_switch; 1992 - else 1993 - BUG(); 1994 2027 1995 2028 err = gen6_ppgtt_alloc(ppgtt); 1996 2029 if (err)

-2

drivers/gpu/drm/i915/i915_gem_gtt.h

··· 406 406 407 407 gen6_pte_t __iomem *pd_addr; 408 408 409 - int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, 410 - struct i915_request *rq); 411 409 void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); 412 410 }; 413 411

+2

drivers/gpu/drm/i915/i915_request.c

··· 817 817 /* Keep a second pin for the dual retirement along engine and ring */ 818 818 __intel_context_pin(ce); 819 819 820 + rq->infix = rq->ring->emit; /* end of header; start of user payload */ 821 + 820 822 /* Check that we didn't interrupt ourselves with a new request */ 821 823 GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); 822 824 return rq;

+3

drivers/gpu/drm/i915/i915_request.h

··· 134 134 /** Position in the ring of the start of the request */ 135 135 u32 head; 136 136 137 + /** Position in the ring of the start of the user packets */ 138 + u32 infix; 139 + 137 140 /** 138 141 * Position in the ring of the start of the postfix. 139 142 * This is required to calculate the maximum available ring space

-33

drivers/gpu/drm/i915/i915_trace.h

··· 973 973 TP_ARGS(ctx) 974 974 ); 975 975 976 - /** 977 - * DOC: switch_mm tracepoint 978 - * 979 - * This tracepoint allows tracking of the mm switch, which is an important point 980 - * in the lifetime of the vm in the legacy submission path. This tracepoint is 981 - * called only if full ppgtt is enabled. 982 - */ 983 - TRACE_EVENT(switch_mm, 984 - TP_PROTO(struct intel_engine_cs *engine, struct i915_gem_context *to), 985 - 986 - TP_ARGS(engine, to), 987 - 988 - TP_STRUCT__entry( 989 - __field(u16, class) 990 - __field(u16, instance) 991 - __field(struct i915_gem_context *, to) 992 - __field(struct i915_address_space *, vm) 993 - __field(u32, dev) 994 - ), 995 - 996 - TP_fast_assign( 997 - __entry->class = engine->uabi_class; 998 - __entry->instance = engine->instance; 999 - __entry->to = to; 1000 - __entry->vm = to->ppgtt ? &to->ppgtt->vm : NULL; 1001 - __entry->dev = engine->i915->drm.primary->index; 1002 - ), 1003 - 1004 - TP_printk("dev=%u, engine=%u:%u, ctx=%p, ctx_vm=%p", 1005 - __entry->dev, __entry->class, __entry->instance, __entry->to, 1006 - __entry->vm) 1007 - ); 1008 - 1009 976 #endif /* _I915_TRACE_H_ */ 1010 977 1011 978 /* This part must be outside protection */

-3

drivers/gpu/drm/i915/intel_engine_cs.c

··· 1168 1168 1169 1169 lockdep_assert_held(&engine->i915->drm.struct_mutex); 1170 1170 1171 - engine->legacy_active_context = NULL; 1172 - engine->legacy_active_ppgtt = NULL; 1173 - 1174 1171 ce = fetch_and_zero(&engine->last_retired_context); 1175 1172 if (ce) 1176 1173 intel_context_unpin(ce);

+61 -64

drivers/gpu/drm/i915/intel_ringbuffer.c

··· 541 541 return i915_gem_find_active_request(engine); 542 542 } 543 543 544 - static void reset_ring(struct intel_engine_cs *engine, 545 - struct i915_request *request) 544 + static void skip_request(struct i915_request *rq) 546 545 { 547 - GEM_TRACE("%s seqno=%x\n", 548 - engine->name, request ? request->global_seqno : 0); 546 + void *vaddr = rq->ring->vaddr; 547 + u32 head; 548 + 549 + head = rq->infix; 550 + if (rq->postfix < head) { 551 + memset32(vaddr + head, MI_NOOP, 552 + (rq->ring->size - head) / sizeof(u32)); 553 + head = 0; 554 + } 555 + memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32)); 556 + } 557 + 558 + static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq) 559 + { 560 + GEM_TRACE("%s seqno=%x\n", engine->name, rq ? rq->global_seqno : 0); 549 561 550 562 /* 551 563 * RC6 must be prevented until the reset is complete and the engine ··· 581 569 * If the request was innocent, we try to replay the request with 582 570 * the restored context. 583 571 */ 584 - if (request) { 585 - struct drm_i915_private *dev_priv = request->i915; 586 - struct intel_context *ce = request->hw_context; 587 - struct i915_hw_ppgtt *ppgtt; 588 - 589 - if (ce->state) { 590 - I915_WRITE(CCID, 591 - i915_ggtt_offset(ce->state) | 592 - BIT(8) /* must be set! */ | 593 - CCID_EXTENDED_STATE_SAVE | 594 - CCID_EXTENDED_STATE_RESTORE | 595 - CCID_EN); 596 - } 597 - 598 - ppgtt = request->gem_context->ppgtt ?: engine->i915->mm.aliasing_ppgtt; 599 - if (ppgtt) { 600 - u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10; 601 - 602 - I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 603 - I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset); 604 - 605 - /* Wait for the PD reload to complete */ 606 - if (intel_wait_for_register(dev_priv, 607 - RING_PP_DIR_BASE(engine), 608 - BIT(0), 0, 609 - 10)) 610 - DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n"); 611 - 612 - ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); 613 - } 614 - 572 + if (rq) { 615 573 /* If the rq hung, jump to its breadcrumb and skip the batch */ 616 - if (request->fence.error == -EIO) 617 - request->ring->head = request->postfix; 618 - } else { 619 - engine->legacy_active_context = NULL; 620 - engine->legacy_active_ppgtt = NULL; 574 + rq->ring->head = intel_ring_wrap(rq->ring, rq->head); 575 + if (rq->fence.error == -EIO) 576 + skip_request(rq); 621 577 } 622 578 } 623 579 ··· 1426 1446 intel_ring_reset(engine->buffer, 0); 1427 1447 } 1428 1448 1449 + static int load_pd_dir(struct i915_request *rq, 1450 + const struct i915_hw_ppgtt *ppgtt) 1451 + { 1452 + const struct intel_engine_cs * const engine = rq->engine; 1453 + u32 *cs; 1454 + 1455 + cs = intel_ring_begin(rq, 6); 1456 + if (IS_ERR(cs)) 1457 + return PTR_ERR(cs); 1458 + 1459 + *cs++ = MI_LOAD_REGISTER_IMM(1); 1460 + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); 1461 + *cs++ = PP_DIR_DCLV_2G; 1462 + 1463 + *cs++ = MI_LOAD_REGISTER_IMM(1); 1464 + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); 1465 + *cs++ = ppgtt->pd.base.ggtt_offset << 10; 1466 + 1467 + intel_ring_advance(rq, cs); 1468 + 1469 + return 0; 1470 + } 1471 + 1429 1472 static inline int mi_set_context(struct i915_request *rq, u32 flags) 1430 1473 { 1431 1474 struct drm_i915_private *i915 = rq->i915; ··· 1593 1590 static int switch_context(struct i915_request *rq) 1594 1591 { 1595 1592 struct intel_engine_cs *engine = rq->engine; 1596 - struct i915_gem_context *to_ctx = rq->gem_context; 1597 - struct i915_hw_ppgtt *to_mm = 1598 - to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; 1599 - struct i915_gem_context *from_ctx = engine->legacy_active_context; 1600 - struct i915_hw_ppgtt *from_mm = engine->legacy_active_ppgtt; 1593 + struct i915_gem_context *ctx = rq->gem_context; 1594 + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; 1595 + unsigned int unwind_mm = 0; 1601 1596 u32 hw_flags = 0; 1602 1597 int ret, i; 1603 1598 1604 1599 lockdep_assert_held(&rq->i915->drm.struct_mutex); 1605 1600 GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); 1606 1601 1607 - if (to_mm != from_mm || 1608 - (to_mm && intel_engine_flag(engine) & to_mm->pd_dirty_rings)) { 1609 - trace_switch_mm(engine, to_ctx); 1610 - ret = to_mm->switch_mm(to_mm, rq); 1602 + if (ppgtt) { 1603 + ret = load_pd_dir(rq, ppgtt); 1611 1604 if (ret) 1612 1605 goto err; 1613 1606 1614 - to_mm->pd_dirty_rings &= ~intel_engine_flag(engine); 1615 - engine->legacy_active_ppgtt = to_mm; 1616 - hw_flags = MI_FORCE_RESTORE; 1607 + if (intel_engine_flag(engine) & ppgtt->pd_dirty_rings) { 1608 + unwind_mm = intel_engine_flag(engine); 1609 + ppgtt->pd_dirty_rings &= ~unwind_mm; 1610 + hw_flags = MI_FORCE_RESTORE; 1611 + } 1617 1612 } 1618 1613 1619 - if (rq->hw_context->state && 1620 - (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { 1614 + if (rq->hw_context->state) { 1621 1615 GEM_BUG_ON(engine->id != RCS); 1622 1616 1623 1617 /* ··· 1624 1624 * as nothing actually executes using the kernel context; it 1625 1625 * is purely used for flushing user contexts. 1626 1626 */ 1627 - if (i915_gem_context_is_kernel(to_ctx)) 1627 + if (i915_gem_context_is_kernel(ctx)) 1628 1628 hw_flags = MI_RESTORE_INHIBIT; 1629 1629 1630 1630 ret = mi_set_context(rq, hw_flags); 1631 1631 if (ret) 1632 1632 goto err_mm; 1633 - 1634 - engine->legacy_active_context = to_ctx; 1635 1633 } 1636 1634 1637 - if (to_ctx->remap_slice) { 1635 + if (ctx->remap_slice) { 1638 1636 for (i = 0; i < MAX_L3_SLICES; i++) { 1639 - if (!(to_ctx->remap_slice & BIT(i))) 1637 + if (!(ctx->remap_slice & BIT(i))) 1640 1638 continue; 1641 1639 1642 1640 ret = remap_l3(rq, i); 1643 1641 if (ret) 1644 - goto err_ctx; 1642 + goto err_mm; 1645 1643 } 1646 1644 1647 - to_ctx->remap_slice = 0; 1645 + ctx->remap_slice = 0; 1648 1646 } 1649 1647 1650 1648 return 0; 1651 1649 1652 - err_ctx: 1653 - engine->legacy_active_context = from_ctx; 1654 1650 err_mm: 1655 - engine->legacy_active_ppgtt = from_mm; 1651 + if (unwind_mm) 1652 + ppgtt->pd_dirty_rings |= unwind_mm; 1656 1653 err: 1657 1654 return ret; 1658 1655 }

-9

drivers/gpu/drm/i915/intel_ringbuffer.h

··· 557 557 */ 558 558 struct intel_context *last_retired_context; 559 559 560 - /* We track the current MI_SET_CONTEXT in order to eliminate 561 - * redudant context switches. This presumes that requests are not 562 - * reordered! Or when they are the tracking is updated along with 563 - * the emission of individual requests into the legacy command 564 - * stream (ring). 565 - */ 566 - struct i915_gem_context *legacy_active_context; 567 - struct i915_hw_ppgtt *legacy_active_ppgtt; 568 - 569 560 /* status_notifier: list of callbacks for context-switch changes */ 570 561 struct atomic_notifier_head context_status_notifier; 571 562