Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915: Allocate a common scratch page

Currently we allocate a scratch page for each engine, but since we only
ever write into it for post-sync operations, it is not exposed to
userspace nor do we care for coherency. As we then do not care about its
contents, we can use one page for all, reducing our allocations and
avoid complications by not assuming per-engine isolation.

For later use, it simplifies engine initialisation (by removing the
allocation that required struct_mutex!) and means that we can always rely
on there being a scratch page.

v2: Check that we allocated a large enough scratch for I830 w/a

Fixes: 06e562e7f515 ("drm/i915/ringbuffer: Delay after EMIT_INVALIDATE for gen4/gen5") # v4.18.20
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108850
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181204141522.13640-1-chris@chris-wilson.co.uk
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.18.20+
(cherry picked from commit 5179749925933575a67f9d8f16d0cc204f98a29f)
[Joonas: Use new function in gen9_init_indirectctx_bb too]
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

authored by

Chris Wilson and committed by
Joonas Lahtinen
fe78742d cf66b8a0

+75 -87
+7
drivers/gpu/drm/i915/i915_drv.h
··· 2150 2150 struct delayed_work idle_work; 2151 2151 2152 2152 ktime_t last_init_time; 2153 + 2154 + struct i915_vma *scratch; 2153 2155 } gt; 2154 2156 2155 2157 /* perform PHY state sanity checks? */ ··· 3872 3870 return CNL_HWS_CSB_WRITE_INDEX; 3873 3871 else 3874 3872 return I915_HWS_CSB_WRITE_INDEX; 3873 + } 3874 + 3875 + static inline u32 i915_scratch_offset(const struct drm_i915_private *i915) 3876 + { 3877 + return i915_ggtt_offset(i915->gt.scratch); 3875 3878 } 3876 3879 3877 3880 #endif
+49 -1
drivers/gpu/drm/i915/i915_gem.c
··· 5500 5500 goto out_ctx; 5501 5501 } 5502 5502 5503 + static int 5504 + i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) 5505 + { 5506 + struct drm_i915_gem_object *obj; 5507 + struct i915_vma *vma; 5508 + int ret; 5509 + 5510 + obj = i915_gem_object_create_stolen(i915, size); 5511 + if (!obj) 5512 + obj = i915_gem_object_create_internal(i915, size); 5513 + if (IS_ERR(obj)) { 5514 + DRM_ERROR("Failed to allocate scratch page\n"); 5515 + return PTR_ERR(obj); 5516 + } 5517 + 5518 + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 5519 + if (IS_ERR(vma)) { 5520 + ret = PTR_ERR(vma); 5521 + goto err_unref; 5522 + } 5523 + 5524 + ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 5525 + if (ret) 5526 + goto err_unref; 5527 + 5528 + i915->gt.scratch = vma; 5529 + return 0; 5530 + 5531 + err_unref: 5532 + i915_gem_object_put(obj); 5533 + return ret; 5534 + } 5535 + 5536 + static void i915_gem_fini_scratch(struct drm_i915_private *i915) 5537 + { 5538 + i915_vma_unpin_and_release(&i915->gt.scratch, 0); 5539 + } 5540 + 5503 5541 int i915_gem_init(struct drm_i915_private *dev_priv) 5504 5542 { 5505 5543 int ret; ··· 5584 5546 goto err_unlock; 5585 5547 } 5586 5548 5587 - ret = i915_gem_contexts_init(dev_priv); 5549 + ret = i915_gem_init_scratch(dev_priv, 5550 + IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE); 5588 5551 if (ret) { 5589 5552 GEM_BUG_ON(ret == -EIO); 5590 5553 goto err_ggtt; 5554 + } 5555 + 5556 + ret = i915_gem_contexts_init(dev_priv); 5557 + if (ret) { 5558 + GEM_BUG_ON(ret == -EIO); 5559 + goto err_scratch; 5591 5560 } 5592 5561 5593 5562 ret = intel_engines_init(dev_priv); ··· 5669 5624 err_context: 5670 5625 if (ret != -EIO) 5671 5626 i915_gem_contexts_fini(dev_priv); 5627 + err_scratch: 5628 + i915_gem_fini_scratch(dev_priv); 5672 5629 err_ggtt: 5673 5630 err_unlock: 5674 5631 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); ··· 5722 5675 intel_uc_fini(dev_priv); 5723 5676 i915_gem_cleanup_engines(dev_priv); 5724 5677 i915_gem_contexts_fini(dev_priv); 5678 + i915_gem_fini_scratch(dev_priv); 5725 5679 mutex_unlock(&dev_priv->drm.struct_mutex); 5726 5680 5727 5681 intel_wa_list_free(&dev_priv->gt_wa_list);
+1 -1
drivers/gpu/drm/i915/i915_gpu_error.c
··· 1495 1495 if (HAS_BROKEN_CS_TLB(i915)) 1496 1496 ee->wa_batchbuffer = 1497 1497 i915_error_object_create(i915, 1498 - engine->scratch); 1498 + i915->gt.scratch); 1499 1499 request_record_user_bo(request, ee); 1500 1500 1501 1501 ee->ctx =
-42
drivers/gpu/drm/i915/intel_engine_cs.c
··· 490 490 intel_engine_init_cmd_parser(engine); 491 491 } 492 492 493 - int intel_engine_create_scratch(struct intel_engine_cs *engine, 494 - unsigned int size) 495 - { 496 - struct drm_i915_gem_object *obj; 497 - struct i915_vma *vma; 498 - int ret; 499 - 500 - WARN_ON(engine->scratch); 501 - 502 - obj = i915_gem_object_create_stolen(engine->i915, size); 503 - if (!obj) 504 - obj = i915_gem_object_create_internal(engine->i915, size); 505 - if (IS_ERR(obj)) { 506 - DRM_ERROR("Failed to allocate scratch page\n"); 507 - return PTR_ERR(obj); 508 - } 509 - 510 - vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); 511 - if (IS_ERR(vma)) { 512 - ret = PTR_ERR(vma); 513 - goto err_unref; 514 - } 515 - 516 - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 517 - if (ret) 518 - goto err_unref; 519 - 520 - engine->scratch = vma; 521 - return 0; 522 - 523 - err_unref: 524 - i915_gem_object_put(obj); 525 - return ret; 526 - } 527 - 528 - void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) 529 - { 530 - i915_vma_unpin_and_release(&engine->scratch, 0); 531 - } 532 - 533 493 static void cleanup_status_page(struct intel_engine_cs *engine) 534 494 { 535 495 if (HWS_NEEDS_PHYSICAL(engine->i915)) { ··· 663 703 void intel_engine_cleanup_common(struct intel_engine_cs *engine) 664 704 { 665 705 struct drm_i915_private *i915 = engine->i915; 666 - 667 - intel_engine_cleanup_scratch(engine); 668 706 669 707 cleanup_status_page(engine); 670 708
+6 -13
drivers/gpu/drm/i915/intel_lrc.c
··· 1448 1448 static u32 * 1449 1449 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) 1450 1450 { 1451 + /* NB no one else is allowed to scribble over scratch + 256! */ 1451 1452 *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 1452 1453 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1453 - *batch++ = i915_ggtt_offset(engine->scratch) + 256; 1454 + *batch++ = i915_scratch_offset(engine->i915) + 256; 1454 1455 *batch++ = 0; 1455 1456 1456 1457 *batch++ = MI_LOAD_REGISTER_IMM(1); ··· 1465 1464 1466 1465 *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 1467 1466 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1468 - *batch++ = i915_ggtt_offset(engine->scratch) + 256; 1467 + *batch++ = i915_scratch_offset(engine->i915) + 256; 1469 1468 *batch++ = 0; 1470 1469 1471 1470 return batch; ··· 1502 1501 PIPE_CONTROL_GLOBAL_GTT_IVB | 1503 1502 PIPE_CONTROL_CS_STALL | 1504 1503 PIPE_CONTROL_QW_WRITE, 1505 - i915_ggtt_offset(engine->scratch) + 1504 + i915_scratch_offset(engine->i915) + 1506 1505 2 * CACHELINE_BYTES); 1507 1506 1508 1507 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; ··· 1579 1578 PIPE_CONTROL_GLOBAL_GTT_IVB | 1580 1579 PIPE_CONTROL_CS_STALL | 1581 1580 PIPE_CONTROL_QW_WRITE, 1582 - i915_ggtt_offset(engine->scratch) 1581 + i915_scratch_offset(engine->i915) 1583 1582 + 2 * CACHELINE_BYTES); 1584 1583 } 1585 1584 ··· 2147 2146 { 2148 2147 struct intel_engine_cs *engine = request->engine; 2149 2148 u32 scratch_addr = 2150 - i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; 2149 + i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES; 2151 2150 bool vf_flush_wa = false, dc_flush_wa = false; 2152 2151 u32 *cs, flags = 0; 2153 2152 int len; ··· 2484 2483 if (ret) 2485 2484 return ret; 2486 2485 2487 - ret = intel_engine_create_scratch(engine, PAGE_SIZE); 2488 - if (ret) 2489 - goto err_cleanup_common; 2490 - 2491 2486 ret = intel_init_workaround_bb(engine); 2492 2487 if (ret) { 2493 2488 /* ··· 2498 2501 intel_engine_init_workarounds(engine); 2499 2502 2500 2503 return 0; 2501 - 2502 - err_cleanup_common: 2503 - intel_engine_cleanup_common(engine); 2504 - return ret; 2505 2504 } 2506 2505 2507 2506 int logical_xcs_ring_init(struct intel_engine_cs *engine)
+12 -25
drivers/gpu/drm/i915/intel_ringbuffer.c
··· 150 150 */ 151 151 if (mode & EMIT_INVALIDATE) { 152 152 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; 153 - *cs++ = i915_ggtt_offset(rq->engine->scratch) | 154 - PIPE_CONTROL_GLOBAL_GTT; 153 + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; 155 154 *cs++ = 0; 156 155 *cs++ = 0; 157 156 ··· 158 159 *cs++ = MI_FLUSH; 159 160 160 161 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; 161 - *cs++ = i915_ggtt_offset(rq->engine->scratch) | 162 - PIPE_CONTROL_GLOBAL_GTT; 162 + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; 163 163 *cs++ = 0; 164 164 *cs++ = 0; 165 165 } ··· 210 212 static int 211 213 intel_emit_post_sync_nonzero_flush(struct i915_request *rq) 212 214 { 213 - u32 scratch_addr = 214 - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; 215 + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; 215 216 u32 *cs; 216 217 217 218 cs = intel_ring_begin(rq, 6); ··· 243 246 static int 244 247 gen6_render_ring_flush(struct i915_request *rq, u32 mode) 245 248 { 246 - u32 scratch_addr = 247 - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; 249 + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; 248 250 u32 *cs, flags = 0; 249 251 int ret; 250 252 ··· 312 316 static int 313 317 gen7_render_ring_flush(struct i915_request *rq, u32 mode) 314 318 { 315 - u32 scratch_addr = 316 - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; 319 + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; 317 320 u32 *cs, flags = 0; 318 321 319 322 /* ··· 966 971 } 967 972 968 973 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 969 - #define I830_BATCH_LIMIT (256*1024) 974 + #define I830_BATCH_LIMIT SZ_256K 970 975 #define I830_TLB_ENTRIES (2) 971 976 #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) 972 977 static int ··· 974 979 u64 offset, u32 len, 975 980 unsigned int dispatch_flags) 976 981 { 977 - u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch); 982 + u32 *cs, cs_offset = i915_scratch_offset(rq->i915); 983 + 984 + GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE); 978 985 979 986 cs = intel_ring_begin(rq, 6); 980 987 if (IS_ERR(cs)) ··· 1434 1437 { 1435 1438 struct i915_timeline *timeline; 1436 1439 struct intel_ring *ring; 1437 - unsigned int size; 1438 1440 int err; 1439 1441 1440 1442 intel_engine_setup_common(engine); ··· 1458 1462 GEM_BUG_ON(engine->buffer); 1459 1463 engine->buffer = ring; 1460 1464 1461 - size = PAGE_SIZE; 1462 - if (HAS_BROKEN_CS_TLB(engine->i915)) 1463 - size = I830_WA_SIZE; 1464 - err = intel_engine_create_scratch(engine, size); 1465 + err = intel_engine_init_common(engine); 1465 1466 if (err) 1466 1467 goto err_unpin; 1467 1468 1468 - err = intel_engine_init_common(engine); 1469 - if (err) 1470 - goto err_scratch; 1471 - 1472 1469 return 0; 1473 1470 1474 - err_scratch: 1475 - intel_engine_cleanup_scratch(engine); 1476 1471 err_unpin: 1477 1472 intel_ring_unpin(ring); 1478 1473 err_ring: ··· 1537 1550 /* Stall until the page table load is complete */ 1538 1551 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 1539 1552 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); 1540 - *cs++ = i915_ggtt_offset(engine->scratch); 1553 + *cs++ = i915_scratch_offset(rq->i915); 1541 1554 *cs++ = MI_NOOP; 1542 1555 1543 1556 intel_ring_advance(rq, cs); ··· 1646 1659 /* Insert a delay before the next switch! */ 1647 1660 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 1648 1661 *cs++ = i915_mmio_reg_offset(last_reg); 1649 - *cs++ = i915_ggtt_offset(engine->scratch); 1662 + *cs++ = i915_scratch_offset(rq->i915); 1650 1663 *cs++ = MI_NOOP; 1651 1664 } 1652 1665 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-5
drivers/gpu/drm/i915/intel_ringbuffer.h
··· 442 442 struct intel_hw_status_page status_page; 443 443 struct i915_ctx_workarounds wa_ctx; 444 444 struct i915_wa_list wa_list; 445 - struct i915_vma *scratch; 446 445 447 446 u32 irq_keep_mask; /* always keep these interrupts */ 448 447 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ ··· 898 899 void intel_engine_setup_common(struct intel_engine_cs *engine); 899 900 int intel_engine_init_common(struct intel_engine_cs *engine); 900 901 void intel_engine_cleanup_common(struct intel_engine_cs *engine); 901 - 902 - int intel_engine_create_scratch(struct intel_engine_cs *engine, 903 - unsigned int size); 904 - void intel_engine_cleanup_scratch(struct intel_engine_cs *engine); 905 902 906 903 int intel_init_render_ring_buffer(struct intel_engine_cs *engine); 907 904 int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);