at v4.18 2137 lines 56 kB view raw
1/* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 */ 29 30#include <linux/log2.h> 31 32#include <drm/drmP.h> 33#include <drm/i915_drm.h> 34 35#include "i915_drv.h" 36#include "i915_gem_render_state.h" 37#include "i915_trace.h" 38#include "intel_drv.h" 39#include "intel_workarounds.h" 40 41/* Rough estimate of the typical request size, performing a flush, 42 * set-context and then emitting the batch. 43 */ 44#define LEGACY_REQUEST_SIZE 200 45 46static unsigned int __intel_ring_space(unsigned int head, 47 unsigned int tail, 48 unsigned int size) 49{ 50 /* 51 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the 52 * same cacheline, the Head Pointer must not be greater than the Tail 53 * Pointer." 54 */ 55 GEM_BUG_ON(!is_power_of_2(size)); 56 return (head - tail - CACHELINE_BYTES) & (size - 1); 57} 58 59unsigned int intel_ring_update_space(struct intel_ring *ring) 60{ 61 unsigned int space; 62 63 space = __intel_ring_space(ring->head, ring->emit, ring->size); 64 65 ring->space = space; 66 return space; 67} 68 69static int 70gen2_render_ring_flush(struct i915_request *rq, u32 mode) 71{ 72 u32 cmd, *cs; 73 74 cmd = MI_FLUSH; 75 76 if (mode & EMIT_INVALIDATE) 77 cmd |= MI_READ_FLUSH; 78 79 cs = intel_ring_begin(rq, 2); 80 if (IS_ERR(cs)) 81 return PTR_ERR(cs); 82 83 *cs++ = cmd; 84 *cs++ = MI_NOOP; 85 intel_ring_advance(rq, cs); 86 87 return 0; 88} 89 90static int 91gen4_render_ring_flush(struct i915_request *rq, u32 mode) 92{ 93 u32 cmd, *cs; 94 95 /* 96 * read/write caches: 97 * 98 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 99 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 100 * also flushed at 2d versus 3d pipeline switches. 101 * 102 * read-only caches: 103 * 104 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 105 * MI_READ_FLUSH is set, and is always flushed on 965. 106 * 107 * I915_GEM_DOMAIN_COMMAND may not exist? 108 * 109 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 110 * invalidated when MI_EXE_FLUSH is set. 111 * 112 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 113 * invalidated with every MI_FLUSH. 114 * 115 * TLBs: 116 * 117 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 118 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 119 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 120 * are flushed at any MI_FLUSH. 121 */ 122 123 cmd = MI_FLUSH; 124 if (mode & EMIT_INVALIDATE) { 125 cmd |= MI_EXE_FLUSH; 126 if (IS_G4X(rq->i915) || IS_GEN5(rq->i915)) 127 cmd |= MI_INVALIDATE_ISP; 128 } 129 130 cs = intel_ring_begin(rq, 2); 131 if (IS_ERR(cs)) 132 return PTR_ERR(cs); 133 134 *cs++ = cmd; 135 *cs++ = MI_NOOP; 136 intel_ring_advance(rq, cs); 137 138 return 0; 139} 140 141/* 142 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 143 * implementing two workarounds on gen6. From section 1.4.7.1 144 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 145 * 146 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 147 * produced by non-pipelined state commands), software needs to first 148 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 149 * 0. 150 * 151 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 152 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 153 * 154 * And the workaround for these two requires this workaround first: 155 * 156 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 157 * BEFORE the pipe-control with a post-sync op and no write-cache 158 * flushes. 159 * 160 * And this last workaround is tricky because of the requirements on 161 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 162 * volume 2 part 1: 163 * 164 * "1 of the following must also be set: 165 * - Render Target Cache Flush Enable ([12] of DW1) 166 * - Depth Cache Flush Enable ([0] of DW1) 167 * - Stall at Pixel Scoreboard ([1] of DW1) 168 * - Depth Stall ([13] of DW1) 169 * - Post-Sync Operation ([13] of DW1) 170 * - Notify Enable ([8] of DW1)" 171 * 172 * The cache flushes require the workaround flush that triggered this 173 * one, so we can't use it. Depth stall would trigger the same. 174 * Post-sync nonzero is what triggered this second workaround, so we 175 * can't use that one either. Notify enable is IRQs, which aren't 176 * really our business. That leaves only stall at scoreboard. 177 */ 178static int 179intel_emit_post_sync_nonzero_flush(struct i915_request *rq) 180{ 181 u32 scratch_addr = 182 i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; 183 u32 *cs; 184 185 cs = intel_ring_begin(rq, 6); 186 if (IS_ERR(cs)) 187 return PTR_ERR(cs); 188 189 *cs++ = GFX_OP_PIPE_CONTROL(5); 190 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; 191 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; 192 *cs++ = 0; /* low dword */ 193 *cs++ = 0; /* high dword */ 194 *cs++ = MI_NOOP; 195 intel_ring_advance(rq, cs); 196 197 cs = intel_ring_begin(rq, 6); 198 if (IS_ERR(cs)) 199 return PTR_ERR(cs); 200 201 *cs++ = GFX_OP_PIPE_CONTROL(5); 202 *cs++ = PIPE_CONTROL_QW_WRITE; 203 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; 204 *cs++ = 0; 205 *cs++ = 0; 206 *cs++ = MI_NOOP; 207 intel_ring_advance(rq, cs); 208 209 return 0; 210} 211 212static int 213gen6_render_ring_flush(struct i915_request *rq, u32 mode) 214{ 215 u32 scratch_addr = 216 i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; 217 u32 *cs, flags = 0; 218 int ret; 219 220 /* Force SNB workarounds for PIPE_CONTROL flushes */ 221 ret = intel_emit_post_sync_nonzero_flush(rq); 222 if (ret) 223 return ret; 224 225 /* Just flush everything. Experiments have shown that reducing the 226 * number of bits based on the write domains has little performance 227 * impact. 228 */ 229 if (mode & EMIT_FLUSH) { 230 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 231 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 232 /* 233 * Ensure that any following seqno writes only happen 234 * when the render cache is indeed flushed. 235 */ 236 flags |= PIPE_CONTROL_CS_STALL; 237 } 238 if (mode & EMIT_INVALIDATE) { 239 flags |= PIPE_CONTROL_TLB_INVALIDATE; 240 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 241 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 242 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 243 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 244 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 245 /* 246 * TLB invalidate requires a post-sync write. 247 */ 248 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; 249 } 250 251 cs = intel_ring_begin(rq, 4); 252 if (IS_ERR(cs)) 253 return PTR_ERR(cs); 254 255 *cs++ = GFX_OP_PIPE_CONTROL(4); 256 *cs++ = flags; 257 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; 258 *cs++ = 0; 259 intel_ring_advance(rq, cs); 260 261 return 0; 262} 263 264static int 265gen7_render_ring_cs_stall_wa(struct i915_request *rq) 266{ 267 u32 *cs; 268 269 cs = intel_ring_begin(rq, 4); 270 if (IS_ERR(cs)) 271 return PTR_ERR(cs); 272 273 *cs++ = GFX_OP_PIPE_CONTROL(4); 274 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; 275 *cs++ = 0; 276 *cs++ = 0; 277 intel_ring_advance(rq, cs); 278 279 return 0; 280} 281 282static int 283gen7_render_ring_flush(struct i915_request *rq, u32 mode) 284{ 285 u32 scratch_addr = 286 i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; 287 u32 *cs, flags = 0; 288 289 /* 290 * Ensure that any following seqno writes only happen when the render 291 * cache is indeed flushed. 292 * 293 * Workaround: 4th PIPE_CONTROL command (except the ones with only 294 * read-cache invalidate bits set) must have the CS_STALL bit set. We 295 * don't try to be clever and just set it unconditionally. 296 */ 297 flags |= PIPE_CONTROL_CS_STALL; 298 299 /* Just flush everything. Experiments have shown that reducing the 300 * number of bits based on the write domains has little performance 301 * impact. 302 */ 303 if (mode & EMIT_FLUSH) { 304 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 305 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 306 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 307 flags |= PIPE_CONTROL_FLUSH_ENABLE; 308 } 309 if (mode & EMIT_INVALIDATE) { 310 flags |= PIPE_CONTROL_TLB_INVALIDATE; 311 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 312 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 313 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 314 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 315 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 316 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; 317 /* 318 * TLB invalidate requires a post-sync write. 319 */ 320 flags |= PIPE_CONTROL_QW_WRITE; 321 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; 322 323 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; 324 325 /* Workaround: we must issue a pipe_control with CS-stall bit 326 * set before a pipe_control command that has the state cache 327 * invalidate bit set. */ 328 gen7_render_ring_cs_stall_wa(rq); 329 } 330 331 cs = intel_ring_begin(rq, 4); 332 if (IS_ERR(cs)) 333 return PTR_ERR(cs); 334 335 *cs++ = GFX_OP_PIPE_CONTROL(4); 336 *cs++ = flags; 337 *cs++ = scratch_addr; 338 *cs++ = 0; 339 intel_ring_advance(rq, cs); 340 341 return 0; 342} 343 344static void ring_setup_phys_status_page(struct intel_engine_cs *engine) 345{ 346 struct drm_i915_private *dev_priv = engine->i915; 347 u32 addr; 348 349 addr = dev_priv->status_page_dmah->busaddr; 350 if (INTEL_GEN(dev_priv) >= 4) 351 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; 352 I915_WRITE(HWS_PGA, addr); 353} 354 355static void intel_ring_setup_status_page(struct intel_engine_cs *engine) 356{ 357 struct drm_i915_private *dev_priv = engine->i915; 358 i915_reg_t mmio; 359 360 /* The ring status page addresses are no longer next to the rest of 361 * the ring registers as of gen7. 362 */ 363 if (IS_GEN7(dev_priv)) { 364 switch (engine->id) { 365 /* 366 * No more rings exist on Gen7. Default case is only to shut up 367 * gcc switch check warning. 368 */ 369 default: 370 GEM_BUG_ON(engine->id); 371 case RCS: 372 mmio = RENDER_HWS_PGA_GEN7; 373 break; 374 case BCS: 375 mmio = BLT_HWS_PGA_GEN7; 376 break; 377 case VCS: 378 mmio = BSD_HWS_PGA_GEN7; 379 break; 380 case VECS: 381 mmio = VEBOX_HWS_PGA_GEN7; 382 break; 383 } 384 } else if (IS_GEN6(dev_priv)) { 385 mmio = RING_HWS_PGA_GEN6(engine->mmio_base); 386 } else { 387 mmio = RING_HWS_PGA(engine->mmio_base); 388 } 389 390 if (INTEL_GEN(dev_priv) >= 6) 391 I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); 392 393 I915_WRITE(mmio, engine->status_page.ggtt_offset); 394 POSTING_READ(mmio); 395 396 /* Flush the TLB for this page */ 397 if (IS_GEN(dev_priv, 6, 7)) { 398 i915_reg_t reg = RING_INSTPM(engine->mmio_base); 399 400 /* ring should be idle before issuing a sync flush*/ 401 WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0); 402 403 I915_WRITE(reg, 404 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | 405 INSTPM_SYNC_FLUSH)); 406 if (intel_wait_for_register(dev_priv, 407 reg, INSTPM_SYNC_FLUSH, 0, 408 1000)) 409 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", 410 engine->name); 411 } 412} 413 414static bool stop_ring(struct intel_engine_cs *engine) 415{ 416 struct drm_i915_private *dev_priv = engine->i915; 417 418 if (INTEL_GEN(dev_priv) > 2) { 419 I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING)); 420 if (intel_wait_for_register(dev_priv, 421 RING_MI_MODE(engine->mmio_base), 422 MODE_IDLE, 423 MODE_IDLE, 424 1000)) { 425 DRM_ERROR("%s : timed out trying to stop ring\n", 426 engine->name); 427 /* Sometimes we observe that the idle flag is not 428 * set even though the ring is empty. So double 429 * check before giving up. 430 */ 431 if (I915_READ_HEAD(engine) != I915_READ_TAIL(engine)) 432 return false; 433 } 434 } 435 436 I915_WRITE_HEAD(engine, I915_READ_TAIL(engine)); 437 438 I915_WRITE_HEAD(engine, 0); 439 I915_WRITE_TAIL(engine, 0); 440 441 /* The ring must be empty before it is disabled */ 442 I915_WRITE_CTL(engine, 0); 443 444 return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0; 445} 446 447static int init_ring_common(struct intel_engine_cs *engine) 448{ 449 struct drm_i915_private *dev_priv = engine->i915; 450 struct intel_ring *ring = engine->buffer; 451 int ret = 0; 452 453 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 454 455 if (!stop_ring(engine)) { 456 /* G45 ring initialization often fails to reset head to zero */ 457 DRM_DEBUG_DRIVER("%s head not reset to zero " 458 "ctl %08x head %08x tail %08x start %08x\n", 459 engine->name, 460 I915_READ_CTL(engine), 461 I915_READ_HEAD(engine), 462 I915_READ_TAIL(engine), 463 I915_READ_START(engine)); 464 465 if (!stop_ring(engine)) { 466 DRM_ERROR("failed to set %s head to zero " 467 "ctl %08x head %08x tail %08x start %08x\n", 468 engine->name, 469 I915_READ_CTL(engine), 470 I915_READ_HEAD(engine), 471 I915_READ_TAIL(engine), 472 I915_READ_START(engine)); 473 ret = -EIO; 474 goto out; 475 } 476 } 477 478 if (HWS_NEEDS_PHYSICAL(dev_priv)) 479 ring_setup_phys_status_page(engine); 480 else 481 intel_ring_setup_status_page(engine); 482 483 intel_engine_reset_breadcrumbs(engine); 484 485 /* Enforce ordering by reading HEAD register back */ 486 I915_READ_HEAD(engine); 487 488 /* Initialize the ring. This must happen _after_ we've cleared the ring 489 * registers with the above sequence (the readback of the HEAD registers 490 * also enforces ordering), otherwise the hw might lose the new ring 491 * register values. */ 492 I915_WRITE_START(engine, i915_ggtt_offset(ring->vma)); 493 494 /* WaClearRingBufHeadRegAtInit:ctg,elk */ 495 if (I915_READ_HEAD(engine)) 496 DRM_DEBUG_DRIVER("%s initialization failed [head=%08x], fudging\n", 497 engine->name, I915_READ_HEAD(engine)); 498 499 intel_ring_update_space(ring); 500 I915_WRITE_HEAD(engine, ring->head); 501 I915_WRITE_TAIL(engine, ring->tail); 502 (void)I915_READ_TAIL(engine); 503 504 I915_WRITE_CTL(engine, RING_CTL_SIZE(ring->size) | RING_VALID); 505 506 /* If the head is still not zero, the ring is dead */ 507 if (intel_wait_for_register(dev_priv, RING_CTL(engine->mmio_base), 508 RING_VALID, RING_VALID, 509 50)) { 510 DRM_ERROR("%s initialization failed " 511 "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", 512 engine->name, 513 I915_READ_CTL(engine), 514 I915_READ_CTL(engine) & RING_VALID, 515 I915_READ_HEAD(engine), ring->head, 516 I915_READ_TAIL(engine), ring->tail, 517 I915_READ_START(engine), 518 i915_ggtt_offset(ring->vma)); 519 ret = -EIO; 520 goto out; 521 } 522 523 intel_engine_init_hangcheck(engine); 524 525 if (INTEL_GEN(dev_priv) > 2) 526 I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); 527 528out: 529 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 530 531 return ret; 532} 533 534static void reset_ring_common(struct intel_engine_cs *engine, 535 struct i915_request *request) 536{ 537 /* 538 * RC6 must be prevented until the reset is complete and the engine 539 * reinitialised. If it occurs in the middle of this sequence, the 540 * state written to/loaded from the power context is ill-defined (e.g. 541 * the PP_BASE_DIR may be lost). 542 */ 543 assert_forcewakes_active(engine->i915, FORCEWAKE_ALL); 544 545 /* 546 * Try to restore the logical GPU state to match the continuation 547 * of the request queue. If we skip the context/PD restore, then 548 * the next request may try to execute assuming that its context 549 * is valid and loaded on the GPU and so may try to access invalid 550 * memory, prompting repeated GPU hangs. 551 * 552 * If the request was guilty, we still restore the logical state 553 * in case the next request requires it (e.g. the aliasing ppgtt), 554 * but skip over the hung batch. 555 * 556 * If the request was innocent, we try to replay the request with 557 * the restored context. 558 */ 559 if (request) { 560 struct drm_i915_private *dev_priv = request->i915; 561 struct intel_context *ce = to_intel_context(request->ctx, 562 engine); 563 struct i915_hw_ppgtt *ppgtt; 564 565 if (ce->state) { 566 I915_WRITE(CCID, 567 i915_ggtt_offset(ce->state) | 568 BIT(8) /* must be set! */ | 569 CCID_EXTENDED_STATE_SAVE | 570 CCID_EXTENDED_STATE_RESTORE | 571 CCID_EN); 572 } 573 574 ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt; 575 if (ppgtt) { 576 u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10; 577 578 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 579 I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset); 580 581 /* Wait for the PD reload to complete */ 582 if (intel_wait_for_register(dev_priv, 583 RING_PP_DIR_BASE(engine), 584 BIT(0), 0, 585 10)) 586 DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n"); 587 588 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); 589 } 590 591 /* If the rq hung, jump to its breadcrumb and skip the batch */ 592 if (request->fence.error == -EIO) 593 request->ring->head = request->postfix; 594 } else { 595 engine->legacy_active_context = NULL; 596 engine->legacy_active_ppgtt = NULL; 597 } 598} 599 600static int intel_rcs_ctx_init(struct i915_request *rq) 601{ 602 int ret; 603 604 ret = intel_ctx_workarounds_emit(rq); 605 if (ret != 0) 606 return ret; 607 608 ret = i915_gem_render_state_emit(rq); 609 if (ret) 610 return ret; 611 612 return 0; 613} 614 615static int init_render_ring(struct intel_engine_cs *engine) 616{ 617 struct drm_i915_private *dev_priv = engine->i915; 618 int ret = init_ring_common(engine); 619 if (ret) 620 return ret; 621 622 intel_whitelist_workarounds_apply(engine); 623 624 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ 625 if (IS_GEN(dev_priv, 4, 6)) 626 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 627 628 /* We need to disable the AsyncFlip performance optimisations in order 629 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 630 * programmed to '1' on all products. 631 * 632 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv 633 */ 634 if (IS_GEN(dev_priv, 6, 7)) 635 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 636 637 /* Required for the hardware to program scanline values for waiting */ 638 /* WaEnableFlushTlbInvalidationMode:snb */ 639 if (IS_GEN6(dev_priv)) 640 I915_WRITE(GFX_MODE, 641 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT)); 642 643 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ 644 if (IS_GEN7(dev_priv)) 645 I915_WRITE(GFX_MODE_GEN7, 646 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) | 647 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 648 649 if (IS_GEN6(dev_priv)) { 650 /* From the Sandybridge PRM, volume 1 part 3, page 24: 651 * "If this bit is set, STCunit will have LRA as replacement 652 * policy. [...] This bit must be reset. LRA replacement 653 * policy is not supported." 654 */ 655 I915_WRITE(CACHE_MODE_0, 656 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 657 } 658 659 if (IS_GEN(dev_priv, 6, 7)) 660 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 661 662 if (INTEL_GEN(dev_priv) >= 6) 663 I915_WRITE_IMR(engine, ~engine->irq_keep_mask); 664 665 return 0; 666} 667 668static u32 *gen6_signal(struct i915_request *rq, u32 *cs) 669{ 670 struct drm_i915_private *dev_priv = rq->i915; 671 struct intel_engine_cs *engine; 672 enum intel_engine_id id; 673 int num_rings = 0; 674 675 for_each_engine(engine, dev_priv, id) { 676 i915_reg_t mbox_reg; 677 678 if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK)) 679 continue; 680 681 mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id]; 682 if (i915_mmio_reg_valid(mbox_reg)) { 683 *cs++ = MI_LOAD_REGISTER_IMM(1); 684 *cs++ = i915_mmio_reg_offset(mbox_reg); 685 *cs++ = rq->global_seqno; 686 num_rings++; 687 } 688 } 689 if (num_rings & 1) 690 *cs++ = MI_NOOP; 691 692 return cs; 693} 694 695static void cancel_requests(struct intel_engine_cs *engine) 696{ 697 struct i915_request *request; 698 unsigned long flags; 699 700 spin_lock_irqsave(&engine->timeline.lock, flags); 701 702 /* Mark all submitted requests as skipped. */ 703 list_for_each_entry(request, &engine->timeline.requests, link) { 704 GEM_BUG_ON(!request->global_seqno); 705 if (!i915_request_completed(request)) 706 dma_fence_set_error(&request->fence, -EIO); 707 } 708 /* Remaining _unready_ requests will be nop'ed when submitted */ 709 710 spin_unlock_irqrestore(&engine->timeline.lock, flags); 711} 712 713static void i9xx_submit_request(struct i915_request *request) 714{ 715 struct drm_i915_private *dev_priv = request->i915; 716 717 i915_request_submit(request); 718 719 I915_WRITE_TAIL(request->engine, 720 intel_ring_set_tail(request->ring, request->tail)); 721} 722 723static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) 724{ 725 *cs++ = MI_STORE_DWORD_INDEX; 726 *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; 727 *cs++ = rq->global_seqno; 728 *cs++ = MI_USER_INTERRUPT; 729 730 rq->tail = intel_ring_offset(rq, cs); 731 assert_ring_tail_valid(rq->ring, rq->tail); 732} 733 734static const int i9xx_emit_breadcrumb_sz = 4; 735 736static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs) 737{ 738 return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs)); 739} 740 741static int 742gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal) 743{ 744 u32 dw1 = MI_SEMAPHORE_MBOX | 745 MI_SEMAPHORE_COMPARE | 746 MI_SEMAPHORE_REGISTER; 747 u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id]; 748 u32 *cs; 749 750 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); 751 752 cs = intel_ring_begin(rq, 4); 753 if (IS_ERR(cs)) 754 return PTR_ERR(cs); 755 756 *cs++ = dw1 | wait_mbox; 757 /* Throughout all of the GEM code, seqno passed implies our current 758 * seqno is >= the last seqno executed. However for hardware the 759 * comparison is strictly greater than. 760 */ 761 *cs++ = signal->global_seqno - 1; 762 *cs++ = 0; 763 *cs++ = MI_NOOP; 764 intel_ring_advance(rq, cs); 765 766 return 0; 767} 768 769static void 770gen5_seqno_barrier(struct intel_engine_cs *engine) 771{ 772 /* MI_STORE are internally buffered by the GPU and not flushed 773 * either by MI_FLUSH or SyncFlush or any other combination of 774 * MI commands. 775 * 776 * "Only the submission of the store operation is guaranteed. 777 * The write result will be complete (coherent) some time later 778 * (this is practically a finite period but there is no guaranteed 779 * latency)." 780 * 781 * Empirically, we observe that we need a delay of at least 75us to 782 * be sure that the seqno write is visible by the CPU. 783 */ 784 usleep_range(125, 250); 785} 786 787static void 788gen6_seqno_barrier(struct intel_engine_cs *engine) 789{ 790 struct drm_i915_private *dev_priv = engine->i915; 791 792 /* Workaround to force correct ordering between irq and seqno writes on 793 * ivb (and maybe also on snb) by reading from a CS register (like 794 * ACTHD) before reading the status page. 795 * 796 * Note that this effectively stalls the read by the time it takes to 797 * do a memory transaction, which more or less ensures that the write 798 * from the GPU has sufficient time to invalidate the CPU cacheline. 799 * Alternatively we could delay the interrupt from the CS ring to give 800 * the write time to land, but that would incur a delay after every 801 * batch i.e. much more frequent than a delay when waiting for the 802 * interrupt (with the same net latency). 803 * 804 * Also note that to prevent whole machine hangs on gen7, we have to 805 * take the spinlock to guard against concurrent cacheline access. 806 */ 807 spin_lock_irq(&dev_priv->uncore.lock); 808 POSTING_READ_FW(RING_ACTHD(engine->mmio_base)); 809 spin_unlock_irq(&dev_priv->uncore.lock); 810} 811 812static void 813gen5_irq_enable(struct intel_engine_cs *engine) 814{ 815 gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); 816} 817 818static void 819gen5_irq_disable(struct intel_engine_cs *engine) 820{ 821 gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask); 822} 823 824static void 825i9xx_irq_enable(struct intel_engine_cs *engine) 826{ 827 struct drm_i915_private *dev_priv = engine->i915; 828 829 dev_priv->irq_mask &= ~engine->irq_enable_mask; 830 I915_WRITE(IMR, dev_priv->irq_mask); 831 POSTING_READ_FW(RING_IMR(engine->mmio_base)); 832} 833 834static void 835i9xx_irq_disable(struct intel_engine_cs *engine) 836{ 837 struct drm_i915_private *dev_priv = engine->i915; 838 839 dev_priv->irq_mask |= engine->irq_enable_mask; 840 I915_WRITE(IMR, dev_priv->irq_mask); 841} 842 843static void 844i8xx_irq_enable(struct intel_engine_cs *engine) 845{ 846 struct drm_i915_private *dev_priv = engine->i915; 847 848 dev_priv->irq_mask &= ~engine->irq_enable_mask; 849 I915_WRITE16(IMR, dev_priv->irq_mask); 850 POSTING_READ16(RING_IMR(engine->mmio_base)); 851} 852 853static void 854i8xx_irq_disable(struct intel_engine_cs *engine) 855{ 856 struct drm_i915_private *dev_priv = engine->i915; 857 858 dev_priv->irq_mask |= engine->irq_enable_mask; 859 I915_WRITE16(IMR, dev_priv->irq_mask); 860} 861 862static int 863bsd_ring_flush(struct i915_request *rq, u32 mode) 864{ 865 u32 *cs; 866 867 cs = intel_ring_begin(rq, 2); 868 if (IS_ERR(cs)) 869 return PTR_ERR(cs); 870 871 *cs++ = MI_FLUSH; 872 *cs++ = MI_NOOP; 873 intel_ring_advance(rq, cs); 874 return 0; 875} 876 877static void 878gen6_irq_enable(struct intel_engine_cs *engine) 879{ 880 struct drm_i915_private *dev_priv = engine->i915; 881 882 I915_WRITE_IMR(engine, 883 ~(engine->irq_enable_mask | 884 engine->irq_keep_mask)); 885 gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask); 886} 887 888static void 889gen6_irq_disable(struct intel_engine_cs *engine) 890{ 891 struct drm_i915_private *dev_priv = engine->i915; 892 893 I915_WRITE_IMR(engine, ~engine->irq_keep_mask); 894 gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask); 895} 896 897static void 898hsw_vebox_irq_enable(struct intel_engine_cs *engine) 899{ 900 struct drm_i915_private *dev_priv = engine->i915; 901 902 I915_WRITE_IMR(engine, ~engine->irq_enable_mask); 903 gen6_unmask_pm_irq(dev_priv, engine->irq_enable_mask); 904} 905 906static void 907hsw_vebox_irq_disable(struct intel_engine_cs *engine) 908{ 909 struct drm_i915_private *dev_priv = engine->i915; 910 911 I915_WRITE_IMR(engine, ~0); 912 gen6_mask_pm_irq(dev_priv, engine->irq_enable_mask); 913} 914 915static int 916i965_emit_bb_start(struct i915_request *rq, 917 u64 offset, u32 length, 918 unsigned int dispatch_flags) 919{ 920 u32 *cs; 921 922 cs = intel_ring_begin(rq, 2); 923 if (IS_ERR(cs)) 924 return PTR_ERR(cs); 925 926 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & 927 I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); 928 *cs++ = offset; 929 intel_ring_advance(rq, cs); 930 931 return 0; 932} 933 934/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 935#define I830_BATCH_LIMIT (256*1024) 936#define I830_TLB_ENTRIES (2) 937#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) 938static int 939i830_emit_bb_start(struct i915_request *rq, 940 u64 offset, u32 len, 941 unsigned int dispatch_flags) 942{ 943 u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch); 944 945 cs = intel_ring_begin(rq, 6); 946 if (IS_ERR(cs)) 947 return PTR_ERR(cs); 948 949 /* Evict the invalid PTE TLBs */ 950 *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; 951 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; 952 *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ 953 *cs++ = cs_offset; 954 *cs++ = 0xdeadbeef; 955 *cs++ = MI_NOOP; 956 intel_ring_advance(rq, cs); 957 958 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { 959 if (len > I830_BATCH_LIMIT) 960 return -ENOSPC; 961 962 cs = intel_ring_begin(rq, 6 + 2); 963 if (IS_ERR(cs)) 964 return PTR_ERR(cs); 965 966 /* Blit the batch (which has now all relocs applied) to the 967 * stable batch scratch bo area (so that the CS never 968 * stumbles over its tlb invalidation bug) ... 969 */ 970 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA; 971 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; 972 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; 973 *cs++ = cs_offset; 974 *cs++ = 4096; 975 *cs++ = offset; 976 977 *cs++ = MI_FLUSH; 978 *cs++ = MI_NOOP; 979 intel_ring_advance(rq, cs); 980 981 /* ... and execute it. */ 982 offset = cs_offset; 983 } 984 985 cs = intel_ring_begin(rq, 2); 986 if (IS_ERR(cs)) 987 return PTR_ERR(cs); 988 989 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 990 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : 991 MI_BATCH_NON_SECURE); 992 intel_ring_advance(rq, cs); 993 994 return 0; 995} 996 997static int 998i915_emit_bb_start(struct i915_request *rq, 999 u64 offset, u32 len, 1000 unsigned int dispatch_flags) 1001{ 1002 u32 *cs; 1003 1004 cs = intel_ring_begin(rq, 2); 1005 if (IS_ERR(cs)) 1006 return PTR_ERR(cs); 1007 1008 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 1009 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : 1010 MI_BATCH_NON_SECURE); 1011 intel_ring_advance(rq, cs); 1012 1013 return 0; 1014} 1015 1016 1017 1018int intel_ring_pin(struct intel_ring *ring, 1019 struct drm_i915_private *i915, 1020 unsigned int offset_bias) 1021{ 1022 enum i915_map_type map = HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; 1023 struct i915_vma *vma = ring->vma; 1024 unsigned int flags; 1025 void *addr; 1026 int ret; 1027 1028 GEM_BUG_ON(ring->vaddr); 1029 1030 1031 flags = PIN_GLOBAL; 1032 if (offset_bias) 1033 flags |= PIN_OFFSET_BIAS | offset_bias; 1034 if (vma->obj->stolen) 1035 flags |= PIN_MAPPABLE; 1036 1037 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { 1038 if (flags & PIN_MAPPABLE || map == I915_MAP_WC) 1039 ret = i915_gem_object_set_to_gtt_domain(vma->obj, true); 1040 else 1041 ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); 1042 if (unlikely(ret)) 1043 return ret; 1044 } 1045 1046 ret = i915_vma_pin(vma, 0, PAGE_SIZE, flags); 1047 if (unlikely(ret)) 1048 return ret; 1049 1050 if (i915_vma_is_map_and_fenceable(vma)) 1051 addr = (void __force *)i915_vma_pin_iomap(vma); 1052 else 1053 addr = i915_gem_object_pin_map(vma->obj, map); 1054 if (IS_ERR(addr)) 1055 goto err; 1056 1057 vma->obj->pin_global++; 1058 1059 ring->vaddr = addr; 1060 return 0; 1061 1062err: 1063 i915_vma_unpin(vma); 1064 return PTR_ERR(addr); 1065} 1066 1067void intel_ring_reset(struct intel_ring *ring, u32 tail) 1068{ 1069 ring->tail = tail; 1070 ring->head = tail; 1071 ring->emit = tail; 1072 intel_ring_update_space(ring); 1073} 1074 1075void intel_ring_unpin(struct intel_ring *ring) 1076{ 1077 GEM_BUG_ON(!ring->vma); 1078 GEM_BUG_ON(!ring->vaddr); 1079 1080 /* Discard any unused bytes beyond that submitted to hw. */ 1081 intel_ring_reset(ring, ring->tail); 1082 1083 if (i915_vma_is_map_and_fenceable(ring->vma)) 1084 i915_vma_unpin_iomap(ring->vma); 1085 else 1086 i915_gem_object_unpin_map(ring->vma->obj); 1087 ring->vaddr = NULL; 1088 1089 ring->vma->obj->pin_global--; 1090 i915_vma_unpin(ring->vma); 1091} 1092 1093static struct i915_vma * 1094intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) 1095{ 1096 struct drm_i915_gem_object *obj; 1097 struct i915_vma *vma; 1098 1099 obj = i915_gem_object_create_stolen(dev_priv, size); 1100 if (!obj) 1101 obj = i915_gem_object_create_internal(dev_priv, size); 1102 if (IS_ERR(obj)) 1103 return ERR_CAST(obj); 1104 1105 /* mark ring buffers as read-only from GPU side by default */ 1106 obj->gt_ro = 1; 1107 1108 vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); 1109 if (IS_ERR(vma)) 1110 goto err; 1111 1112 return vma; 1113 1114err: 1115 i915_gem_object_put(obj); 1116 return vma; 1117} 1118 1119struct intel_ring * 1120intel_engine_create_ring(struct intel_engine_cs *engine, 1121 struct i915_timeline *timeline, 1122 int size) 1123{ 1124 struct intel_ring *ring; 1125 struct i915_vma *vma; 1126 1127 GEM_BUG_ON(!is_power_of_2(size)); 1128 GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); 1129 GEM_BUG_ON(timeline == &engine->timeline); 1130 lockdep_assert_held(&engine->i915->drm.struct_mutex); 1131 1132 ring = kzalloc(sizeof(*ring), GFP_KERNEL); 1133 if (!ring) 1134 return ERR_PTR(-ENOMEM); 1135 1136 INIT_LIST_HEAD(&ring->request_list); 1137 ring->timeline = i915_timeline_get(timeline); 1138 1139 ring->size = size; 1140 /* Workaround an erratum on the i830 which causes a hang if 1141 * the TAIL pointer points to within the last 2 cachelines 1142 * of the buffer. 1143 */ 1144 ring->effective_size = size; 1145 if (IS_I830(engine->i915) || IS_I845G(engine->i915)) 1146 ring->effective_size -= 2 * CACHELINE_BYTES; 1147 1148 intel_ring_update_space(ring); 1149 1150 vma = intel_ring_create_vma(engine->i915, size); 1151 if (IS_ERR(vma)) { 1152 kfree(ring); 1153 return ERR_CAST(vma); 1154 } 1155 ring->vma = vma; 1156 1157 return ring; 1158} 1159 1160void 1161intel_ring_free(struct intel_ring *ring) 1162{ 1163 struct drm_i915_gem_object *obj = ring->vma->obj; 1164 1165 i915_vma_close(ring->vma); 1166 __i915_gem_object_release_unless_active(obj); 1167 1168 i915_timeline_put(ring->timeline); 1169 kfree(ring); 1170} 1171 1172static int context_pin(struct intel_context *ce) 1173{ 1174 struct i915_vma *vma = ce->state; 1175 int ret; 1176 1177 /* 1178 * Clear this page out of any CPU caches for coherent swap-in/out. 1179 * We only want to do this on the first bind so that we do not stall 1180 * on an active context (which by nature is already on the GPU). 1181 */ 1182 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { 1183 ret = i915_gem_object_set_to_gtt_domain(vma->obj, true); 1184 if (ret) 1185 return ret; 1186 } 1187 1188 return i915_vma_pin(vma, 0, I915_GTT_MIN_ALIGNMENT, 1189 PIN_GLOBAL | PIN_HIGH); 1190} 1191 1192static struct i915_vma * 1193alloc_context_vma(struct intel_engine_cs *engine) 1194{ 1195 struct drm_i915_private *i915 = engine->i915; 1196 struct drm_i915_gem_object *obj; 1197 struct i915_vma *vma; 1198 int err; 1199 1200 obj = i915_gem_object_create(i915, engine->context_size); 1201 if (IS_ERR(obj)) 1202 return ERR_CAST(obj); 1203 1204 if (engine->default_state) { 1205 void *defaults, *vaddr; 1206 1207 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 1208 if (IS_ERR(vaddr)) { 1209 err = PTR_ERR(vaddr); 1210 goto err_obj; 1211 } 1212 1213 defaults = i915_gem_object_pin_map(engine->default_state, 1214 I915_MAP_WB); 1215 if (IS_ERR(defaults)) { 1216 err = PTR_ERR(defaults); 1217 goto err_map; 1218 } 1219 1220 memcpy(vaddr, defaults, engine->context_size); 1221 1222 i915_gem_object_unpin_map(engine->default_state); 1223 i915_gem_object_unpin_map(obj); 1224 } 1225 1226 /* 1227 * Try to make the context utilize L3 as well as LLC. 1228 * 1229 * On VLV we don't have L3 controls in the PTEs so we 1230 * shouldn't touch the cache level, especially as that 1231 * would make the object snooped which might have a 1232 * negative performance impact. 1233 * 1234 * Snooping is required on non-llc platforms in execlist 1235 * mode, but since all GGTT accesses use PAT entry 0 we 1236 * get snooping anyway regardless of cache_level. 1237 * 1238 * This is only applicable for Ivy Bridge devices since 1239 * later platforms don't have L3 control bits in the PTE. 1240 */ 1241 if (IS_IVYBRIDGE(i915)) { 1242 /* Ignore any error, regard it as a simple optimisation */ 1243 i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); 1244 } 1245 1246 vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); 1247 if (IS_ERR(vma)) { 1248 err = PTR_ERR(vma); 1249 goto err_obj; 1250 } 1251 1252 return vma; 1253 1254err_map: 1255 i915_gem_object_unpin_map(obj); 1256err_obj: 1257 i915_gem_object_put(obj); 1258 return ERR_PTR(err); 1259} 1260 1261static struct intel_ring * 1262intel_ring_context_pin(struct intel_engine_cs *engine, 1263 struct i915_gem_context *ctx) 1264{ 1265 struct intel_context *ce = to_intel_context(ctx, engine); 1266 int ret; 1267 1268 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 1269 1270 if (likely(ce->pin_count++)) 1271 goto out; 1272 GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ 1273 1274 if (!ce->state && engine->context_size) { 1275 struct i915_vma *vma; 1276 1277 vma = alloc_context_vma(engine); 1278 if (IS_ERR(vma)) { 1279 ret = PTR_ERR(vma); 1280 goto err; 1281 } 1282 1283 ce->state = vma; 1284 } 1285 1286 if (ce->state) { 1287 ret = context_pin(ce); 1288 if (ret) 1289 goto err; 1290 1291 ce->state->obj->pin_global++; 1292 } 1293 1294 i915_gem_context_get(ctx); 1295 1296out: 1297 /* One ringbuffer to rule them all */ 1298 return engine->buffer; 1299 1300err: 1301 ce->pin_count = 0; 1302 return ERR_PTR(ret); 1303} 1304 1305static void intel_ring_context_unpin(struct intel_engine_cs *engine, 1306 struct i915_gem_context *ctx) 1307{ 1308 struct intel_context *ce = to_intel_context(ctx, engine); 1309 1310 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 1311 GEM_BUG_ON(ce->pin_count == 0); 1312 1313 if (--ce->pin_count) 1314 return; 1315 1316 if (ce->state) { 1317 ce->state->obj->pin_global--; 1318 i915_vma_unpin(ce->state); 1319 } 1320 1321 i915_gem_context_put(ctx); 1322} 1323 1324static int intel_init_ring_buffer(struct intel_engine_cs *engine) 1325{ 1326 struct intel_ring *ring; 1327 struct i915_timeline *timeline; 1328 int err; 1329 1330 intel_engine_setup_common(engine); 1331 1332 err = intel_engine_init_common(engine); 1333 if (err) 1334 goto err; 1335 1336 timeline = i915_timeline_create(engine->i915, engine->name); 1337 if (IS_ERR(timeline)) { 1338 err = PTR_ERR(timeline); 1339 goto err; 1340 } 1341 1342 ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); 1343 i915_timeline_put(timeline); 1344 if (IS_ERR(ring)) { 1345 err = PTR_ERR(ring); 1346 goto err; 1347 } 1348 1349 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ 1350 err = intel_ring_pin(ring, engine->i915, I915_GTT_PAGE_SIZE); 1351 if (err) 1352 goto err_ring; 1353 1354 GEM_BUG_ON(engine->buffer); 1355 engine->buffer = ring; 1356 1357 return 0; 1358 1359err_ring: 1360 intel_ring_free(ring); 1361err: 1362 intel_engine_cleanup_common(engine); 1363 return err; 1364} 1365 1366void intel_engine_cleanup(struct intel_engine_cs *engine) 1367{ 1368 struct drm_i915_private *dev_priv = engine->i915; 1369 1370 WARN_ON(INTEL_GEN(dev_priv) > 2 && 1371 (I915_READ_MODE(engine) & MODE_IDLE) == 0); 1372 1373 intel_ring_unpin(engine->buffer); 1374 intel_ring_free(engine->buffer); 1375 1376 if (engine->cleanup) 1377 engine->cleanup(engine); 1378 1379 intel_engine_cleanup_common(engine); 1380 1381 dev_priv->engine[engine->id] = NULL; 1382 kfree(engine); 1383} 1384 1385void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) 1386{ 1387 struct intel_engine_cs *engine; 1388 enum intel_engine_id id; 1389 1390 /* Restart from the beginning of the rings for convenience */ 1391 for_each_engine(engine, dev_priv, id) 1392 intel_ring_reset(engine->buffer, 0); 1393} 1394 1395static inline int mi_set_context(struct i915_request *rq, u32 flags) 1396{ 1397 struct drm_i915_private *i915 = rq->i915; 1398 struct intel_engine_cs *engine = rq->engine; 1399 enum intel_engine_id id; 1400 const int num_rings = 1401 /* Use an extended w/a on gen7 if signalling from other rings */ 1402 (HAS_LEGACY_SEMAPHORES(i915) && IS_GEN7(i915)) ? 1403 INTEL_INFO(i915)->num_rings - 1 : 1404 0; 1405 int len; 1406 u32 *cs; 1407 1408 flags |= MI_MM_SPACE_GTT; 1409 if (IS_HASWELL(i915)) 1410 /* These flags are for resource streamer on HSW+ */ 1411 flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; 1412 else 1413 flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; 1414 1415 len = 4; 1416 if (IS_GEN7(i915)) 1417 len += 2 + (num_rings ? 4*num_rings + 6 : 0); 1418 1419 cs = intel_ring_begin(rq, len); 1420 if (IS_ERR(cs)) 1421 return PTR_ERR(cs); 1422 1423 /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ 1424 if (IS_GEN7(i915)) { 1425 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1426 if (num_rings) { 1427 struct intel_engine_cs *signaller; 1428 1429 *cs++ = MI_LOAD_REGISTER_IMM(num_rings); 1430 for_each_engine(signaller, i915, id) { 1431 if (signaller == engine) 1432 continue; 1433 1434 *cs++ = i915_mmio_reg_offset( 1435 RING_PSMI_CTL(signaller->mmio_base)); 1436 *cs++ = _MASKED_BIT_ENABLE( 1437 GEN6_PSMI_SLEEP_MSG_DISABLE); 1438 } 1439 } 1440 } 1441 1442 *cs++ = MI_NOOP; 1443 *cs++ = MI_SET_CONTEXT; 1444 *cs++ = i915_ggtt_offset(to_intel_context(rq->ctx, engine)->state) | flags; 1445 /* 1446 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP 1447 * WaMiSetContext_Hang:snb,ivb,vlv 1448 */ 1449 *cs++ = MI_NOOP; 1450 1451 if (IS_GEN7(i915)) { 1452 if (num_rings) { 1453 struct intel_engine_cs *signaller; 1454 i915_reg_t last_reg = {}; /* keep gcc quiet */ 1455 1456 *cs++ = MI_LOAD_REGISTER_IMM(num_rings); 1457 for_each_engine(signaller, i915, id) { 1458 if (signaller == engine) 1459 continue; 1460 1461 last_reg = RING_PSMI_CTL(signaller->mmio_base); 1462 *cs++ = i915_mmio_reg_offset(last_reg); 1463 *cs++ = _MASKED_BIT_DISABLE( 1464 GEN6_PSMI_SLEEP_MSG_DISABLE); 1465 } 1466 1467 /* Insert a delay before the next switch! */ 1468 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 1469 *cs++ = i915_mmio_reg_offset(last_reg); 1470 *cs++ = i915_ggtt_offset(engine->scratch); 1471 *cs++ = MI_NOOP; 1472 } 1473 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1474 } 1475 1476 intel_ring_advance(rq, cs); 1477 1478 return 0; 1479} 1480 1481static int remap_l3(struct i915_request *rq, int slice) 1482{ 1483 u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; 1484 int i; 1485 1486 if (!remap_info) 1487 return 0; 1488 1489 cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2); 1490 if (IS_ERR(cs)) 1491 return PTR_ERR(cs); 1492 1493 /* 1494 * Note: We do not worry about the concurrent register cacheline hang 1495 * here because no other code should access these registers other than 1496 * at initialization time. 1497 */ 1498 *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); 1499 for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { 1500 *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); 1501 *cs++ = remap_info[i]; 1502 } 1503 *cs++ = MI_NOOP; 1504 intel_ring_advance(rq, cs); 1505 1506 return 0; 1507} 1508 1509static int switch_context(struct i915_request *rq) 1510{ 1511 struct intel_engine_cs *engine = rq->engine; 1512 struct i915_gem_context *to_ctx = rq->ctx; 1513 struct i915_hw_ppgtt *to_mm = 1514 to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; 1515 struct i915_gem_context *from_ctx = engine->legacy_active_context; 1516 struct i915_hw_ppgtt *from_mm = engine->legacy_active_ppgtt; 1517 u32 hw_flags = 0; 1518 int ret, i; 1519 1520 lockdep_assert_held(&rq->i915->drm.struct_mutex); 1521 GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); 1522 1523 if (to_mm != from_mm || 1524 (to_mm && intel_engine_flag(engine) & to_mm->pd_dirty_rings)) { 1525 trace_switch_mm(engine, to_ctx); 1526 ret = to_mm->switch_mm(to_mm, rq); 1527 if (ret) 1528 goto err; 1529 1530 to_mm->pd_dirty_rings &= ~intel_engine_flag(engine); 1531 engine->legacy_active_ppgtt = to_mm; 1532 hw_flags = MI_FORCE_RESTORE; 1533 } 1534 1535 if (to_intel_context(to_ctx, engine)->state && 1536 (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { 1537 GEM_BUG_ON(engine->id != RCS); 1538 1539 /* 1540 * The kernel context(s) is treated as pure scratch and is not 1541 * expected to retain any state (as we sacrifice it during 1542 * suspend and on resume it may be corrupted). This is ok, 1543 * as nothing actually executes using the kernel context; it 1544 * is purely used for flushing user contexts. 1545 */ 1546 if (i915_gem_context_is_kernel(to_ctx)) 1547 hw_flags = MI_RESTORE_INHIBIT; 1548 1549 ret = mi_set_context(rq, hw_flags); 1550 if (ret) 1551 goto err_mm; 1552 1553 engine->legacy_active_context = to_ctx; 1554 } 1555 1556 if (to_ctx->remap_slice) { 1557 for (i = 0; i < MAX_L3_SLICES; i++) { 1558 if (!(to_ctx->remap_slice & BIT(i))) 1559 continue; 1560 1561 ret = remap_l3(rq, i); 1562 if (ret) 1563 goto err_ctx; 1564 } 1565 1566 to_ctx->remap_slice = 0; 1567 } 1568 1569 return 0; 1570 1571err_ctx: 1572 engine->legacy_active_context = from_ctx; 1573err_mm: 1574 engine->legacy_active_ppgtt = from_mm; 1575err: 1576 return ret; 1577} 1578 1579static int ring_request_alloc(struct i915_request *request) 1580{ 1581 int ret; 1582 1583 GEM_BUG_ON(!to_intel_context(request->ctx, request->engine)->pin_count); 1584 1585 /* Flush enough space to reduce the likelihood of waiting after 1586 * we start building the request - in which case we will just 1587 * have to repeat work. 1588 */ 1589 request->reserved_space += LEGACY_REQUEST_SIZE; 1590 1591 ret = intel_ring_wait_for_space(request->ring, request->reserved_space); 1592 if (ret) 1593 return ret; 1594 1595 ret = switch_context(request); 1596 if (ret) 1597 return ret; 1598 1599 request->reserved_space -= LEGACY_REQUEST_SIZE; 1600 return 0; 1601} 1602 1603static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) 1604{ 1605 struct i915_request *target; 1606 long timeout; 1607 1608 lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); 1609 1610 if (intel_ring_update_space(ring) >= bytes) 1611 return 0; 1612 1613 GEM_BUG_ON(list_empty(&ring->request_list)); 1614 list_for_each_entry(target, &ring->request_list, ring_link) { 1615 /* Would completion of this request free enough space? */ 1616 if (bytes <= __intel_ring_space(target->postfix, 1617 ring->emit, ring->size)) 1618 break; 1619 } 1620 1621 if (WARN_ON(&target->ring_link == &ring->request_list)) 1622 return -ENOSPC; 1623 1624 timeout = i915_request_wait(target, 1625 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, 1626 MAX_SCHEDULE_TIMEOUT); 1627 if (timeout < 0) 1628 return timeout; 1629 1630 i915_request_retire_upto(target); 1631 1632 intel_ring_update_space(ring); 1633 GEM_BUG_ON(ring->space < bytes); 1634 return 0; 1635} 1636 1637int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes) 1638{ 1639 GEM_BUG_ON(bytes > ring->effective_size); 1640 if (unlikely(bytes > ring->effective_size - ring->emit)) 1641 bytes += ring->size - ring->emit; 1642 1643 if (unlikely(bytes > ring->space)) { 1644 int ret = wait_for_space(ring, bytes); 1645 if (unlikely(ret)) 1646 return ret; 1647 } 1648 1649 GEM_BUG_ON(ring->space < bytes); 1650 return 0; 1651} 1652 1653u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) 1654{ 1655 struct intel_ring *ring = rq->ring; 1656 const unsigned int remain_usable = ring->effective_size - ring->emit; 1657 const unsigned int bytes = num_dwords * sizeof(u32); 1658 unsigned int need_wrap = 0; 1659 unsigned int total_bytes; 1660 u32 *cs; 1661 1662 /* Packets must be qword aligned. */ 1663 GEM_BUG_ON(num_dwords & 1); 1664 1665 total_bytes = bytes + rq->reserved_space; 1666 GEM_BUG_ON(total_bytes > ring->effective_size); 1667 1668 if (unlikely(total_bytes > remain_usable)) { 1669 const int remain_actual = ring->size - ring->emit; 1670 1671 if (bytes > remain_usable) { 1672 /* 1673 * Not enough space for the basic request. So need to 1674 * flush out the remainder and then wait for 1675 * base + reserved. 1676 */ 1677 total_bytes += remain_actual; 1678 need_wrap = remain_actual | 1; 1679 } else { 1680 /* 1681 * The base request will fit but the reserved space 1682 * falls off the end. So we don't need an immediate 1683 * wrap and only need to effectively wait for the 1684 * reserved size from the start of ringbuffer. 1685 */ 1686 total_bytes = rq->reserved_space + remain_actual; 1687 } 1688 } 1689 1690 if (unlikely(total_bytes > ring->space)) { 1691 int ret; 1692 1693 /* 1694 * Space is reserved in the ringbuffer for finalising the 1695 * request, as that cannot be allowed to fail. During request 1696 * finalisation, reserved_space is set to 0 to stop the 1697 * overallocation and the assumption is that then we never need 1698 * to wait (which has the risk of failing with EINTR). 1699 * 1700 * See also i915_request_alloc() and i915_request_add(). 1701 */ 1702 GEM_BUG_ON(!rq->reserved_space); 1703 1704 ret = wait_for_space(ring, total_bytes); 1705 if (unlikely(ret)) 1706 return ERR_PTR(ret); 1707 } 1708 1709 if (unlikely(need_wrap)) { 1710 need_wrap &= ~1; 1711 GEM_BUG_ON(need_wrap > ring->space); 1712 GEM_BUG_ON(ring->emit + need_wrap > ring->size); 1713 GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); 1714 1715 /* Fill the tail with MI_NOOP */ 1716 memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); 1717 ring->space -= need_wrap; 1718 ring->emit = 0; 1719 } 1720 1721 GEM_BUG_ON(ring->emit > ring->size - bytes); 1722 GEM_BUG_ON(ring->space < bytes); 1723 cs = ring->vaddr + ring->emit; 1724 GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); 1725 ring->emit += bytes; 1726 ring->space -= bytes; 1727 1728 return cs; 1729} 1730 1731/* Align the ring tail to a cacheline boundary */ 1732int intel_ring_cacheline_align(struct i915_request *rq) 1733{ 1734 int num_dwords; 1735 void *cs; 1736 1737 num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); 1738 if (num_dwords == 0) 1739 return 0; 1740 1741 num_dwords = CACHELINE_DWORDS - num_dwords; 1742 GEM_BUG_ON(num_dwords & 1); 1743 1744 cs = intel_ring_begin(rq, num_dwords); 1745 if (IS_ERR(cs)) 1746 return PTR_ERR(cs); 1747 1748 memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); 1749 intel_ring_advance(rq, cs); 1750 1751 GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); 1752 return 0; 1753} 1754 1755static void gen6_bsd_submit_request(struct i915_request *request) 1756{ 1757 struct drm_i915_private *dev_priv = request->i915; 1758 1759 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 1760 1761 /* Every tail move must follow the sequence below */ 1762 1763 /* Disable notification that the ring is IDLE. The GT 1764 * will then assume that it is busy and bring it out of rc6. 1765 */ 1766 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL, 1767 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1768 1769 /* Clear the context id. Here be magic! */ 1770 I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0); 1771 1772 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1773 if (__intel_wait_for_register_fw(dev_priv, 1774 GEN6_BSD_SLEEP_PSMI_CONTROL, 1775 GEN6_BSD_SLEEP_INDICATOR, 1776 0, 1777 1000, 0, NULL)) 1778 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 1779 1780 /* Now that the ring is fully powered up, update the tail */ 1781 i9xx_submit_request(request); 1782 1783 /* Let the ring send IDLE messages to the GT again, 1784 * and so let it sleep to conserve power when idle. 1785 */ 1786 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL, 1787 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1788 1789 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 1790} 1791 1792static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode) 1793{ 1794 u32 cmd, *cs; 1795 1796 cs = intel_ring_begin(rq, 4); 1797 if (IS_ERR(cs)) 1798 return PTR_ERR(cs); 1799 1800 cmd = MI_FLUSH_DW; 1801 1802 /* We always require a command barrier so that subsequent 1803 * commands, such as breadcrumb interrupts, are strictly ordered 1804 * wrt the contents of the write cache being flushed to memory 1805 * (and thus being coherent from the CPU). 1806 */ 1807 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 1808 1809 /* 1810 * Bspec vol 1c.5 - video engine command streamer: 1811 * "If ENABLED, all TLBs will be invalidated once the flush 1812 * operation is complete. This bit is only valid when the 1813 * Post-Sync Operation field is a value of 1h or 3h." 1814 */ 1815 if (mode & EMIT_INVALIDATE) 1816 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; 1817 1818 *cs++ = cmd; 1819 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; 1820 *cs++ = 0; 1821 *cs++ = MI_NOOP; 1822 intel_ring_advance(rq, cs); 1823 return 0; 1824} 1825 1826static int 1827hsw_emit_bb_start(struct i915_request *rq, 1828 u64 offset, u32 len, 1829 unsigned int dispatch_flags) 1830{ 1831 u32 *cs; 1832 1833 cs = intel_ring_begin(rq, 2); 1834 if (IS_ERR(cs)) 1835 return PTR_ERR(cs); 1836 1837 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? 1838 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | 1839 (dispatch_flags & I915_DISPATCH_RS ? 1840 MI_BATCH_RESOURCE_STREAMER : 0); 1841 /* bit0-7 is the length on GEN6+ */ 1842 *cs++ = offset; 1843 intel_ring_advance(rq, cs); 1844 1845 return 0; 1846} 1847 1848static int 1849gen6_emit_bb_start(struct i915_request *rq, 1850 u64 offset, u32 len, 1851 unsigned int dispatch_flags) 1852{ 1853 u32 *cs; 1854 1855 cs = intel_ring_begin(rq, 2); 1856 if (IS_ERR(cs)) 1857 return PTR_ERR(cs); 1858 1859 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? 1860 0 : MI_BATCH_NON_SECURE_I965); 1861 /* bit0-7 is the length on GEN6+ */ 1862 *cs++ = offset; 1863 intel_ring_advance(rq, cs); 1864 1865 return 0; 1866} 1867 1868/* Blitter support (SandyBridge+) */ 1869 1870static int gen6_ring_flush(struct i915_request *rq, u32 mode) 1871{ 1872 u32 cmd, *cs; 1873 1874 cs = intel_ring_begin(rq, 4); 1875 if (IS_ERR(cs)) 1876 return PTR_ERR(cs); 1877 1878 cmd = MI_FLUSH_DW; 1879 1880 /* We always require a command barrier so that subsequent 1881 * commands, such as breadcrumb interrupts, are strictly ordered 1882 * wrt the contents of the write cache being flushed to memory 1883 * (and thus being coherent from the CPU). 1884 */ 1885 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 1886 1887 /* 1888 * Bspec vol 1c.3 - blitter engine command streamer: 1889 * "If ENABLED, all TLBs will be invalidated once the flush 1890 * operation is complete. This bit is only valid when the 1891 * Post-Sync Operation field is a value of 1h or 3h." 1892 */ 1893 if (mode & EMIT_INVALIDATE) 1894 cmd |= MI_INVALIDATE_TLB; 1895 *cs++ = cmd; 1896 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; 1897 *cs++ = 0; 1898 *cs++ = MI_NOOP; 1899 intel_ring_advance(rq, cs); 1900 1901 return 0; 1902} 1903 1904static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, 1905 struct intel_engine_cs *engine) 1906{ 1907 int i; 1908 1909 if (!HAS_LEGACY_SEMAPHORES(dev_priv)) 1910 return; 1911 1912 GEM_BUG_ON(INTEL_GEN(dev_priv) < 6); 1913 engine->semaphore.sync_to = gen6_ring_sync_to; 1914 engine->semaphore.signal = gen6_signal; 1915 1916 /* 1917 * The current semaphore is only applied on pre-gen8 1918 * platform. And there is no VCS2 ring on the pre-gen8 1919 * platform. So the semaphore between RCS and VCS2 is 1920 * initialized as INVALID. 1921 */ 1922 for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) { 1923 static const struct { 1924 u32 wait_mbox; 1925 i915_reg_t mbox_reg; 1926 } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = { 1927 [RCS_HW] = { 1928 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, 1929 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, 1930 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, 1931 }, 1932 [VCS_HW] = { 1933 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, 1934 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, 1935 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, 1936 }, 1937 [BCS_HW] = { 1938 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, 1939 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, 1940 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, 1941 }, 1942 [VECS_HW] = { 1943 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, 1944 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, 1945 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, 1946 }, 1947 }; 1948 u32 wait_mbox; 1949 i915_reg_t mbox_reg; 1950 1951 if (i == engine->hw_id) { 1952 wait_mbox = MI_SEMAPHORE_SYNC_INVALID; 1953 mbox_reg = GEN6_NOSYNC; 1954 } else { 1955 wait_mbox = sem_data[engine->hw_id][i].wait_mbox; 1956 mbox_reg = sem_data[engine->hw_id][i].mbox_reg; 1957 } 1958 1959 engine->semaphore.mbox.wait[i] = wait_mbox; 1960 engine->semaphore.mbox.signal[i] = mbox_reg; 1961 } 1962} 1963 1964static void intel_ring_init_irq(struct drm_i915_private *dev_priv, 1965 struct intel_engine_cs *engine) 1966{ 1967 if (INTEL_GEN(dev_priv) >= 6) { 1968 engine->irq_enable = gen6_irq_enable; 1969 engine->irq_disable = gen6_irq_disable; 1970 engine->irq_seqno_barrier = gen6_seqno_barrier; 1971 } else if (INTEL_GEN(dev_priv) >= 5) { 1972 engine->irq_enable = gen5_irq_enable; 1973 engine->irq_disable = gen5_irq_disable; 1974 engine->irq_seqno_barrier = gen5_seqno_barrier; 1975 } else if (INTEL_GEN(dev_priv) >= 3) { 1976 engine->irq_enable = i9xx_irq_enable; 1977 engine->irq_disable = i9xx_irq_disable; 1978 } else { 1979 engine->irq_enable = i8xx_irq_enable; 1980 engine->irq_disable = i8xx_irq_disable; 1981 } 1982} 1983 1984static void i9xx_set_default_submission(struct intel_engine_cs *engine) 1985{ 1986 engine->submit_request = i9xx_submit_request; 1987 engine->cancel_requests = cancel_requests; 1988 1989 engine->park = NULL; 1990 engine->unpark = NULL; 1991} 1992 1993static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) 1994{ 1995 i9xx_set_default_submission(engine); 1996 engine->submit_request = gen6_bsd_submit_request; 1997} 1998 1999static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, 2000 struct intel_engine_cs *engine) 2001{ 2002 /* gen8+ are only supported with execlists */ 2003 GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8); 2004 2005 intel_ring_init_irq(dev_priv, engine); 2006 intel_ring_init_semaphores(dev_priv, engine); 2007 2008 engine->init_hw = init_ring_common; 2009 engine->reset_hw = reset_ring_common; 2010 2011 engine->context_pin = intel_ring_context_pin; 2012 engine->context_unpin = intel_ring_context_unpin; 2013 2014 engine->request_alloc = ring_request_alloc; 2015 2016 engine->emit_breadcrumb = i9xx_emit_breadcrumb; 2017 engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; 2018 if (HAS_LEGACY_SEMAPHORES(dev_priv)) { 2019 int num_rings; 2020 2021 engine->emit_breadcrumb = gen6_sema_emit_breadcrumb; 2022 2023 num_rings = INTEL_INFO(dev_priv)->num_rings - 1; 2024 engine->emit_breadcrumb_sz += num_rings * 3; 2025 if (num_rings & 1) 2026 engine->emit_breadcrumb_sz++; 2027 } 2028 2029 engine->set_default_submission = i9xx_set_default_submission; 2030 2031 if (INTEL_GEN(dev_priv) >= 6) 2032 engine->emit_bb_start = gen6_emit_bb_start; 2033 else if (INTEL_GEN(dev_priv) >= 4) 2034 engine->emit_bb_start = i965_emit_bb_start; 2035 else if (IS_I830(dev_priv) || IS_I845G(dev_priv)) 2036 engine->emit_bb_start = i830_emit_bb_start; 2037 else 2038 engine->emit_bb_start = i915_emit_bb_start; 2039} 2040 2041int intel_init_render_ring_buffer(struct intel_engine_cs *engine) 2042{ 2043 struct drm_i915_private *dev_priv = engine->i915; 2044 int ret; 2045 2046 intel_ring_default_vfuncs(dev_priv, engine); 2047 2048 if (HAS_L3_DPF(dev_priv)) 2049 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; 2050 2051 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; 2052 2053 if (INTEL_GEN(dev_priv) >= 6) { 2054 engine->init_context = intel_rcs_ctx_init; 2055 engine->emit_flush = gen7_render_ring_flush; 2056 if (IS_GEN6(dev_priv)) 2057 engine->emit_flush = gen6_render_ring_flush; 2058 } else if (IS_GEN5(dev_priv)) { 2059 engine->emit_flush = gen4_render_ring_flush; 2060 } else { 2061 if (INTEL_GEN(dev_priv) < 4) 2062 engine->emit_flush = gen2_render_ring_flush; 2063 else 2064 engine->emit_flush = gen4_render_ring_flush; 2065 engine->irq_enable_mask = I915_USER_INTERRUPT; 2066 } 2067 2068 if (IS_HASWELL(dev_priv)) 2069 engine->emit_bb_start = hsw_emit_bb_start; 2070 2071 engine->init_hw = init_render_ring; 2072 2073 ret = intel_init_ring_buffer(engine); 2074 if (ret) 2075 return ret; 2076 2077 if (INTEL_GEN(dev_priv) >= 6) { 2078 ret = intel_engine_create_scratch(engine, PAGE_SIZE); 2079 if (ret) 2080 return ret; 2081 } else if (HAS_BROKEN_CS_TLB(dev_priv)) { 2082 ret = intel_engine_create_scratch(engine, I830_WA_SIZE); 2083 if (ret) 2084 return ret; 2085 } 2086 2087 return 0; 2088} 2089 2090int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) 2091{ 2092 struct drm_i915_private *dev_priv = engine->i915; 2093 2094 intel_ring_default_vfuncs(dev_priv, engine); 2095 2096 if (INTEL_GEN(dev_priv) >= 6) { 2097 /* gen6 bsd needs a special wa for tail updates */ 2098 if (IS_GEN6(dev_priv)) 2099 engine->set_default_submission = gen6_bsd_set_default_submission; 2100 engine->emit_flush = gen6_bsd_ring_flush; 2101 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; 2102 } else { 2103 engine->emit_flush = bsd_ring_flush; 2104 if (IS_GEN5(dev_priv)) 2105 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; 2106 else 2107 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; 2108 } 2109 2110 return intel_init_ring_buffer(engine); 2111} 2112 2113int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) 2114{ 2115 struct drm_i915_private *dev_priv = engine->i915; 2116 2117 intel_ring_default_vfuncs(dev_priv, engine); 2118 2119 engine->emit_flush = gen6_ring_flush; 2120 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; 2121 2122 return intel_init_ring_buffer(engine); 2123} 2124 2125int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) 2126{ 2127 struct drm_i915_private *dev_priv = engine->i915; 2128 2129 intel_ring_default_vfuncs(dev_priv, engine); 2130 2131 engine->emit_flush = gen6_ring_flush; 2132 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; 2133 engine->irq_enable = hsw_vebox_irq_enable; 2134 engine->irq_disable = hsw_vebox_irq_disable; 2135 2136 return intel_init_ring_buffer(engine); 2137}