Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v4.19 2284 lines 59 kB view raw
1/* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 */ 29 30#include <linux/log2.h> 31 32#include <drm/drmP.h> 33#include <drm/i915_drm.h> 34 35#include "i915_drv.h" 36#include "i915_gem_render_state.h" 37#include "i915_trace.h" 38#include "intel_drv.h" 39#include "intel_workarounds.h" 40 41/* Rough estimate of the typical request size, performing a flush, 42 * set-context and then emitting the batch. 43 */ 44#define LEGACY_REQUEST_SIZE 200 45 46static unsigned int __intel_ring_space(unsigned int head, 47 unsigned int tail, 48 unsigned int size) 49{ 50 /* 51 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the 52 * same cacheline, the Head Pointer must not be greater than the Tail 53 * Pointer." 54 */ 55 GEM_BUG_ON(!is_power_of_2(size)); 56 return (head - tail - CACHELINE_BYTES) & (size - 1); 57} 58 59unsigned int intel_ring_update_space(struct intel_ring *ring) 60{ 61 unsigned int space; 62 63 space = __intel_ring_space(ring->head, ring->emit, ring->size); 64 65 ring->space = space; 66 return space; 67} 68 69static int 70gen2_render_ring_flush(struct i915_request *rq, u32 mode) 71{ 72 u32 cmd, *cs; 73 74 cmd = MI_FLUSH; 75 76 if (mode & EMIT_INVALIDATE) 77 cmd |= MI_READ_FLUSH; 78 79 cs = intel_ring_begin(rq, 2); 80 if (IS_ERR(cs)) 81 return PTR_ERR(cs); 82 83 *cs++ = cmd; 84 *cs++ = MI_NOOP; 85 intel_ring_advance(rq, cs); 86 87 return 0; 88} 89 90static int 91gen4_render_ring_flush(struct i915_request *rq, u32 mode) 92{ 93 u32 cmd, *cs; 94 95 /* 96 * read/write caches: 97 * 98 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 99 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 100 * also flushed at 2d versus 3d pipeline switches. 101 * 102 * read-only caches: 103 * 104 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 105 * MI_READ_FLUSH is set, and is always flushed on 965. 106 * 107 * I915_GEM_DOMAIN_COMMAND may not exist? 108 * 109 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 110 * invalidated when MI_EXE_FLUSH is set. 111 * 112 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 113 * invalidated with every MI_FLUSH. 114 * 115 * TLBs: 116 * 117 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 118 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 119 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 120 * are flushed at any MI_FLUSH. 121 */ 122 123 cmd = MI_FLUSH; 124 if (mode & EMIT_INVALIDATE) { 125 cmd |= MI_EXE_FLUSH; 126 if (IS_G4X(rq->i915) || IS_GEN5(rq->i915)) 127 cmd |= MI_INVALIDATE_ISP; 128 } 129 130 cs = intel_ring_begin(rq, 2); 131 if (IS_ERR(cs)) 132 return PTR_ERR(cs); 133 134 *cs++ = cmd; 135 *cs++ = MI_NOOP; 136 intel_ring_advance(rq, cs); 137 138 return 0; 139} 140 141/* 142 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 143 * implementing two workarounds on gen6. From section 1.4.7.1 144 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 145 * 146 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 147 * produced by non-pipelined state commands), software needs to first 148 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 149 * 0. 150 * 151 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 152 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 153 * 154 * And the workaround for these two requires this workaround first: 155 * 156 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 157 * BEFORE the pipe-control with a post-sync op and no write-cache 158 * flushes. 159 * 160 * And this last workaround is tricky because of the requirements on 161 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 162 * volume 2 part 1: 163 * 164 * "1 of the following must also be set: 165 * - Render Target Cache Flush Enable ([12] of DW1) 166 * - Depth Cache Flush Enable ([0] of DW1) 167 * - Stall at Pixel Scoreboard ([1] of DW1) 168 * - Depth Stall ([13] of DW1) 169 * - Post-Sync Operation ([13] of DW1) 170 * - Notify Enable ([8] of DW1)" 171 * 172 * The cache flushes require the workaround flush that triggered this 173 * one, so we can't use it. Depth stall would trigger the same. 174 * Post-sync nonzero is what triggered this second workaround, so we 175 * can't use that one either. Notify enable is IRQs, which aren't 176 * really our business. That leaves only stall at scoreboard. 177 */ 178static int 179intel_emit_post_sync_nonzero_flush(struct i915_request *rq) 180{ 181 u32 scratch_addr = 182 i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; 183 u32 *cs; 184 185 cs = intel_ring_begin(rq, 6); 186 if (IS_ERR(cs)) 187 return PTR_ERR(cs); 188 189 *cs++ = GFX_OP_PIPE_CONTROL(5); 190 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; 191 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; 192 *cs++ = 0; /* low dword */ 193 *cs++ = 0; /* high dword */ 194 *cs++ = MI_NOOP; 195 intel_ring_advance(rq, cs); 196 197 cs = intel_ring_begin(rq, 6); 198 if (IS_ERR(cs)) 199 return PTR_ERR(cs); 200 201 *cs++ = GFX_OP_PIPE_CONTROL(5); 202 *cs++ = PIPE_CONTROL_QW_WRITE; 203 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; 204 *cs++ = 0; 205 *cs++ = 0; 206 *cs++ = MI_NOOP; 207 intel_ring_advance(rq, cs); 208 209 return 0; 210} 211 212static int 213gen6_render_ring_flush(struct i915_request *rq, u32 mode) 214{ 215 u32 scratch_addr = 216 i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; 217 u32 *cs, flags = 0; 218 int ret; 219 220 /* Force SNB workarounds for PIPE_CONTROL flushes */ 221 ret = intel_emit_post_sync_nonzero_flush(rq); 222 if (ret) 223 return ret; 224 225 /* Just flush everything. Experiments have shown that reducing the 226 * number of bits based on the write domains has little performance 227 * impact. 228 */ 229 if (mode & EMIT_FLUSH) { 230 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 231 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 232 /* 233 * Ensure that any following seqno writes only happen 234 * when the render cache is indeed flushed. 235 */ 236 flags |= PIPE_CONTROL_CS_STALL; 237 } 238 if (mode & EMIT_INVALIDATE) { 239 flags |= PIPE_CONTROL_TLB_INVALIDATE; 240 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 241 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 242 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 243 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 244 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 245 /* 246 * TLB invalidate requires a post-sync write. 247 */ 248 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; 249 } 250 251 cs = intel_ring_begin(rq, 4); 252 if (IS_ERR(cs)) 253 return PTR_ERR(cs); 254 255 *cs++ = GFX_OP_PIPE_CONTROL(4); 256 *cs++ = flags; 257 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; 258 *cs++ = 0; 259 intel_ring_advance(rq, cs); 260 261 return 0; 262} 263 264static int 265gen7_render_ring_cs_stall_wa(struct i915_request *rq) 266{ 267 u32 *cs; 268 269 cs = intel_ring_begin(rq, 4); 270 if (IS_ERR(cs)) 271 return PTR_ERR(cs); 272 273 *cs++ = GFX_OP_PIPE_CONTROL(4); 274 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; 275 *cs++ = 0; 276 *cs++ = 0; 277 intel_ring_advance(rq, cs); 278 279 return 0; 280} 281 282static int 283gen7_render_ring_flush(struct i915_request *rq, u32 mode) 284{ 285 u32 scratch_addr = 286 i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; 287 u32 *cs, flags = 0; 288 289 /* 290 * Ensure that any following seqno writes only happen when the render 291 * cache is indeed flushed. 292 * 293 * Workaround: 4th PIPE_CONTROL command (except the ones with only 294 * read-cache invalidate bits set) must have the CS_STALL bit set. We 295 * don't try to be clever and just set it unconditionally. 296 */ 297 flags |= PIPE_CONTROL_CS_STALL; 298 299 /* Just flush everything. Experiments have shown that reducing the 300 * number of bits based on the write domains has little performance 301 * impact. 302 */ 303 if (mode & EMIT_FLUSH) { 304 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 305 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 306 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 307 flags |= PIPE_CONTROL_FLUSH_ENABLE; 308 } 309 if (mode & EMIT_INVALIDATE) { 310 flags |= PIPE_CONTROL_TLB_INVALIDATE; 311 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 312 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 313 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 314 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 315 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 316 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; 317 /* 318 * TLB invalidate requires a post-sync write. 319 */ 320 flags |= PIPE_CONTROL_QW_WRITE; 321 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; 322 323 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; 324 325 /* Workaround: we must issue a pipe_control with CS-stall bit 326 * set before a pipe_control command that has the state cache 327 * invalidate bit set. */ 328 gen7_render_ring_cs_stall_wa(rq); 329 } 330 331 cs = intel_ring_begin(rq, 4); 332 if (IS_ERR(cs)) 333 return PTR_ERR(cs); 334 335 *cs++ = GFX_OP_PIPE_CONTROL(4); 336 *cs++ = flags; 337 *cs++ = scratch_addr; 338 *cs++ = 0; 339 intel_ring_advance(rq, cs); 340 341 return 0; 342} 343 344static void ring_setup_phys_status_page(struct intel_engine_cs *engine) 345{ 346 struct drm_i915_private *dev_priv = engine->i915; 347 u32 addr; 348 349 addr = dev_priv->status_page_dmah->busaddr; 350 if (INTEL_GEN(dev_priv) >= 4) 351 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; 352 I915_WRITE(HWS_PGA, addr); 353} 354 355static void intel_ring_setup_status_page(struct intel_engine_cs *engine) 356{ 357 struct drm_i915_private *dev_priv = engine->i915; 358 i915_reg_t mmio; 359 360 /* The ring status page addresses are no longer next to the rest of 361 * the ring registers as of gen7. 362 */ 363 if (IS_GEN7(dev_priv)) { 364 switch (engine->id) { 365 /* 366 * No more rings exist on Gen7. Default case is only to shut up 367 * gcc switch check warning. 368 */ 369 default: 370 GEM_BUG_ON(engine->id); 371 case RCS: 372 mmio = RENDER_HWS_PGA_GEN7; 373 break; 374 case BCS: 375 mmio = BLT_HWS_PGA_GEN7; 376 break; 377 case VCS: 378 mmio = BSD_HWS_PGA_GEN7; 379 break; 380 case VECS: 381 mmio = VEBOX_HWS_PGA_GEN7; 382 break; 383 } 384 } else if (IS_GEN6(dev_priv)) { 385 mmio = RING_HWS_PGA_GEN6(engine->mmio_base); 386 } else { 387 mmio = RING_HWS_PGA(engine->mmio_base); 388 } 389 390 if (INTEL_GEN(dev_priv) >= 6) { 391 u32 mask = ~0u; 392 393 /* 394 * Keep the render interrupt unmasked as this papers over 395 * lost interrupts following a reset. 396 */ 397 if (engine->id == RCS) 398 mask &= ~BIT(0); 399 400 I915_WRITE(RING_HWSTAM(engine->mmio_base), mask); 401 } 402 403 I915_WRITE(mmio, engine->status_page.ggtt_offset); 404 POSTING_READ(mmio); 405 406 /* Flush the TLB for this page */ 407 if (IS_GEN(dev_priv, 6, 7)) { 408 i915_reg_t reg = RING_INSTPM(engine->mmio_base); 409 410 /* ring should be idle before issuing a sync flush*/ 411 WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0); 412 413 I915_WRITE(reg, 414 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | 415 INSTPM_SYNC_FLUSH)); 416 if (intel_wait_for_register(dev_priv, 417 reg, INSTPM_SYNC_FLUSH, 0, 418 1000)) 419 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", 420 engine->name); 421 } 422} 423 424static bool stop_ring(struct intel_engine_cs *engine) 425{ 426 struct drm_i915_private *dev_priv = engine->i915; 427 428 if (INTEL_GEN(dev_priv) > 2) { 429 I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING)); 430 if (intel_wait_for_register(dev_priv, 431 RING_MI_MODE(engine->mmio_base), 432 MODE_IDLE, 433 MODE_IDLE, 434 1000)) { 435 DRM_ERROR("%s : timed out trying to stop ring\n", 436 engine->name); 437 /* Sometimes we observe that the idle flag is not 438 * set even though the ring is empty. So double 439 * check before giving up. 440 */ 441 if (I915_READ_HEAD(engine) != I915_READ_TAIL(engine)) 442 return false; 443 } 444 } 445 446 I915_WRITE_HEAD(engine, I915_READ_TAIL(engine)); 447 448 I915_WRITE_HEAD(engine, 0); 449 I915_WRITE_TAIL(engine, 0); 450 451 /* The ring must be empty before it is disabled */ 452 I915_WRITE_CTL(engine, 0); 453 454 return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0; 455} 456 457static int init_ring_common(struct intel_engine_cs *engine) 458{ 459 struct drm_i915_private *dev_priv = engine->i915; 460 struct intel_ring *ring = engine->buffer; 461 int ret = 0; 462 463 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 464 465 if (!stop_ring(engine)) { 466 /* G45 ring initialization often fails to reset head to zero */ 467 DRM_DEBUG_DRIVER("%s head not reset to zero " 468 "ctl %08x head %08x tail %08x start %08x\n", 469 engine->name, 470 I915_READ_CTL(engine), 471 I915_READ_HEAD(engine), 472 I915_READ_TAIL(engine), 473 I915_READ_START(engine)); 474 475 if (!stop_ring(engine)) { 476 DRM_ERROR("failed to set %s head to zero " 477 "ctl %08x head %08x tail %08x start %08x\n", 478 engine->name, 479 I915_READ_CTL(engine), 480 I915_READ_HEAD(engine), 481 I915_READ_TAIL(engine), 482 I915_READ_START(engine)); 483 ret = -EIO; 484 goto out; 485 } 486 } 487 488 if (HWS_NEEDS_PHYSICAL(dev_priv)) 489 ring_setup_phys_status_page(engine); 490 else 491 intel_ring_setup_status_page(engine); 492 493 intel_engine_reset_breadcrumbs(engine); 494 495 /* Enforce ordering by reading HEAD register back */ 496 I915_READ_HEAD(engine); 497 498 /* Initialize the ring. This must happen _after_ we've cleared the ring 499 * registers with the above sequence (the readback of the HEAD registers 500 * also enforces ordering), otherwise the hw might lose the new ring 501 * register values. */ 502 I915_WRITE_START(engine, i915_ggtt_offset(ring->vma)); 503 504 /* WaClearRingBufHeadRegAtInit:ctg,elk */ 505 if (I915_READ_HEAD(engine)) 506 DRM_DEBUG_DRIVER("%s initialization failed [head=%08x], fudging\n", 507 engine->name, I915_READ_HEAD(engine)); 508 509 /* Check that the ring offsets point within the ring! */ 510 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); 511 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); 512 513 intel_ring_update_space(ring); 514 I915_WRITE_HEAD(engine, ring->head); 515 I915_WRITE_TAIL(engine, ring->tail); 516 (void)I915_READ_TAIL(engine); 517 518 I915_WRITE_CTL(engine, RING_CTL_SIZE(ring->size) | RING_VALID); 519 520 /* If the head is still not zero, the ring is dead */ 521 if (intel_wait_for_register(dev_priv, RING_CTL(engine->mmio_base), 522 RING_VALID, RING_VALID, 523 50)) { 524 DRM_ERROR("%s initialization failed " 525 "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", 526 engine->name, 527 I915_READ_CTL(engine), 528 I915_READ_CTL(engine) & RING_VALID, 529 I915_READ_HEAD(engine), ring->head, 530 I915_READ_TAIL(engine), ring->tail, 531 I915_READ_START(engine), 532 i915_ggtt_offset(ring->vma)); 533 ret = -EIO; 534 goto out; 535 } 536 537 if (INTEL_GEN(dev_priv) > 2) 538 I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); 539 540out: 541 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 542 543 return ret; 544} 545 546static struct i915_request *reset_prepare(struct intel_engine_cs *engine) 547{ 548 intel_engine_stop_cs(engine); 549 550 if (engine->irq_seqno_barrier) 551 engine->irq_seqno_barrier(engine); 552 553 return i915_gem_find_active_request(engine); 554} 555 556static void skip_request(struct i915_request *rq) 557{ 558 void *vaddr = rq->ring->vaddr; 559 u32 head; 560 561 head = rq->infix; 562 if (rq->postfix < head) { 563 memset32(vaddr + head, MI_NOOP, 564 (rq->ring->size - head) / sizeof(u32)); 565 head = 0; 566 } 567 memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32)); 568} 569 570static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq) 571{ 572 GEM_TRACE("%s seqno=%x\n", engine->name, rq ? rq->global_seqno : 0); 573 574 /* 575 * Try to restore the logical GPU state to match the continuation 576 * of the request queue. If we skip the context/PD restore, then 577 * the next request may try to execute assuming that its context 578 * is valid and loaded on the GPU and so may try to access invalid 579 * memory, prompting repeated GPU hangs. 580 * 581 * If the request was guilty, we still restore the logical state 582 * in case the next request requires it (e.g. the aliasing ppgtt), 583 * but skip over the hung batch. 584 * 585 * If the request was innocent, we try to replay the request with 586 * the restored context. 587 */ 588 if (rq) { 589 /* If the rq hung, jump to its breadcrumb and skip the batch */ 590 rq->ring->head = intel_ring_wrap(rq->ring, rq->head); 591 if (rq->fence.error == -EIO) 592 skip_request(rq); 593 } 594} 595 596static void reset_finish(struct intel_engine_cs *engine) 597{ 598} 599 600static int intel_rcs_ctx_init(struct i915_request *rq) 601{ 602 int ret; 603 604 ret = intel_ctx_workarounds_emit(rq); 605 if (ret != 0) 606 return ret; 607 608 ret = i915_gem_render_state_emit(rq); 609 if (ret) 610 return ret; 611 612 return 0; 613} 614 615static int init_render_ring(struct intel_engine_cs *engine) 616{ 617 struct drm_i915_private *dev_priv = engine->i915; 618 int ret = init_ring_common(engine); 619 if (ret) 620 return ret; 621 622 intel_whitelist_workarounds_apply(engine); 623 624 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ 625 if (IS_GEN(dev_priv, 4, 6)) 626 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 627 628 /* We need to disable the AsyncFlip performance optimisations in order 629 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 630 * programmed to '1' on all products. 631 * 632 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv 633 */ 634 if (IS_GEN(dev_priv, 6, 7)) 635 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 636 637 /* Required for the hardware to program scanline values for waiting */ 638 /* WaEnableFlushTlbInvalidationMode:snb */ 639 if (IS_GEN6(dev_priv)) 640 I915_WRITE(GFX_MODE, 641 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT)); 642 643 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ 644 if (IS_GEN7(dev_priv)) 645 I915_WRITE(GFX_MODE_GEN7, 646 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) | 647 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 648 649 if (IS_GEN6(dev_priv)) { 650 /* From the Sandybridge PRM, volume 1 part 3, page 24: 651 * "If this bit is set, STCunit will have LRA as replacement 652 * policy. [...] This bit must be reset. LRA replacement 653 * policy is not supported." 654 */ 655 I915_WRITE(CACHE_MODE_0, 656 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 657 } 658 659 if (IS_GEN(dev_priv, 6, 7)) 660 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 661 662 if (INTEL_GEN(dev_priv) >= 6) 663 I915_WRITE_IMR(engine, ~engine->irq_keep_mask); 664 665 return 0; 666} 667 668static u32 *gen6_signal(struct i915_request *rq, u32 *cs) 669{ 670 struct drm_i915_private *dev_priv = rq->i915; 671 struct intel_engine_cs *engine; 672 enum intel_engine_id id; 673 int num_rings = 0; 674 675 for_each_engine(engine, dev_priv, id) { 676 i915_reg_t mbox_reg; 677 678 if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK)) 679 continue; 680 681 mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id]; 682 if (i915_mmio_reg_valid(mbox_reg)) { 683 *cs++ = MI_LOAD_REGISTER_IMM(1); 684 *cs++ = i915_mmio_reg_offset(mbox_reg); 685 *cs++ = rq->global_seqno; 686 num_rings++; 687 } 688 } 689 if (num_rings & 1) 690 *cs++ = MI_NOOP; 691 692 return cs; 693} 694 695static void cancel_requests(struct intel_engine_cs *engine) 696{ 697 struct i915_request *request; 698 unsigned long flags; 699 700 spin_lock_irqsave(&engine->timeline.lock, flags); 701 702 /* Mark all submitted requests as skipped. */ 703 list_for_each_entry(request, &engine->timeline.requests, link) { 704 GEM_BUG_ON(!request->global_seqno); 705 if (!i915_request_completed(request)) 706 dma_fence_set_error(&request->fence, -EIO); 707 } 708 /* Remaining _unready_ requests will be nop'ed when submitted */ 709 710 spin_unlock_irqrestore(&engine->timeline.lock, flags); 711} 712 713static void i9xx_submit_request(struct i915_request *request) 714{ 715 struct drm_i915_private *dev_priv = request->i915; 716 717 i915_request_submit(request); 718 719 I915_WRITE_TAIL(request->engine, 720 intel_ring_set_tail(request->ring, request->tail)); 721} 722 723static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) 724{ 725 *cs++ = MI_STORE_DWORD_INDEX; 726 *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; 727 *cs++ = rq->global_seqno; 728 *cs++ = MI_USER_INTERRUPT; 729 730 rq->tail = intel_ring_offset(rq, cs); 731 assert_ring_tail_valid(rq->ring, rq->tail); 732} 733 734static const int i9xx_emit_breadcrumb_sz = 4; 735 736static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs) 737{ 738 return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs)); 739} 740 741static int 742gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal) 743{ 744 u32 dw1 = MI_SEMAPHORE_MBOX | 745 MI_SEMAPHORE_COMPARE | 746 MI_SEMAPHORE_REGISTER; 747 u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id]; 748 u32 *cs; 749 750 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); 751 752 cs = intel_ring_begin(rq, 4); 753 if (IS_ERR(cs)) 754 return PTR_ERR(cs); 755 756 *cs++ = dw1 | wait_mbox; 757 /* Throughout all of the GEM code, seqno passed implies our current 758 * seqno is >= the last seqno executed. However for hardware the 759 * comparison is strictly greater than. 760 */ 761 *cs++ = signal->global_seqno - 1; 762 *cs++ = 0; 763 *cs++ = MI_NOOP; 764 intel_ring_advance(rq, cs); 765 766 return 0; 767} 768 769static void 770gen5_seqno_barrier(struct intel_engine_cs *engine) 771{ 772 /* MI_STORE are internally buffered by the GPU and not flushed 773 * either by MI_FLUSH or SyncFlush or any other combination of 774 * MI commands. 775 * 776 * "Only the submission of the store operation is guaranteed. 777 * The write result will be complete (coherent) some time later 778 * (this is practically a finite period but there is no guaranteed 779 * latency)." 780 * 781 * Empirically, we observe that we need a delay of at least 75us to 782 * be sure that the seqno write is visible by the CPU. 783 */ 784 usleep_range(125, 250); 785} 786 787static void 788gen6_seqno_barrier(struct intel_engine_cs *engine) 789{ 790 struct drm_i915_private *dev_priv = engine->i915; 791 792 /* Workaround to force correct ordering between irq and seqno writes on 793 * ivb (and maybe also on snb) by reading from a CS register (like 794 * ACTHD) before reading the status page. 795 * 796 * Note that this effectively stalls the read by the time it takes to 797 * do a memory transaction, which more or less ensures that the write 798 * from the GPU has sufficient time to invalidate the CPU cacheline. 799 * Alternatively we could delay the interrupt from the CS ring to give 800 * the write time to land, but that would incur a delay after every 801 * batch i.e. much more frequent than a delay when waiting for the 802 * interrupt (with the same net latency). 803 * 804 * Also note that to prevent whole machine hangs on gen7, we have to 805 * take the spinlock to guard against concurrent cacheline access. 806 */ 807 spin_lock_irq(&dev_priv->uncore.lock); 808 POSTING_READ_FW(RING_ACTHD(engine->mmio_base)); 809 spin_unlock_irq(&dev_priv->uncore.lock); 810} 811 812static void 813gen5_irq_enable(struct intel_engine_cs *engine) 814{ 815 gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask); 816} 817 818static void 819gen5_irq_disable(struct intel_engine_cs *engine) 820{ 821 gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask); 822} 823 824static void 825i9xx_irq_enable(struct intel_engine_cs *engine) 826{ 827 struct drm_i915_private *dev_priv = engine->i915; 828 829 dev_priv->irq_mask &= ~engine->irq_enable_mask; 830 I915_WRITE(IMR, dev_priv->irq_mask); 831 POSTING_READ_FW(RING_IMR(engine->mmio_base)); 832} 833 834static void 835i9xx_irq_disable(struct intel_engine_cs *engine) 836{ 837 struct drm_i915_private *dev_priv = engine->i915; 838 839 dev_priv->irq_mask |= engine->irq_enable_mask; 840 I915_WRITE(IMR, dev_priv->irq_mask); 841} 842 843static void 844i8xx_irq_enable(struct intel_engine_cs *engine) 845{ 846 struct drm_i915_private *dev_priv = engine->i915; 847 848 dev_priv->irq_mask &= ~engine->irq_enable_mask; 849 I915_WRITE16(IMR, dev_priv->irq_mask); 850 POSTING_READ16(RING_IMR(engine->mmio_base)); 851} 852 853static void 854i8xx_irq_disable(struct intel_engine_cs *engine) 855{ 856 struct drm_i915_private *dev_priv = engine->i915; 857 858 dev_priv->irq_mask |= engine->irq_enable_mask; 859 I915_WRITE16(IMR, dev_priv->irq_mask); 860} 861 862static int 863bsd_ring_flush(struct i915_request *rq, u32 mode) 864{ 865 u32 *cs; 866 867 cs = intel_ring_begin(rq, 2); 868 if (IS_ERR(cs)) 869 return PTR_ERR(cs); 870 871 *cs++ = MI_FLUSH; 872 *cs++ = MI_NOOP; 873 intel_ring_advance(rq, cs); 874 return 0; 875} 876 877static void 878gen6_irq_enable(struct intel_engine_cs *engine) 879{ 880 struct drm_i915_private *dev_priv = engine->i915; 881 882 I915_WRITE_IMR(engine, 883 ~(engine->irq_enable_mask | 884 engine->irq_keep_mask)); 885 gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask); 886} 887 888static void 889gen6_irq_disable(struct intel_engine_cs *engine) 890{ 891 struct drm_i915_private *dev_priv = engine->i915; 892 893 I915_WRITE_IMR(engine, ~engine->irq_keep_mask); 894 gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask); 895} 896 897static void 898hsw_vebox_irq_enable(struct intel_engine_cs *engine) 899{ 900 struct drm_i915_private *dev_priv = engine->i915; 901 902 I915_WRITE_IMR(engine, ~engine->irq_enable_mask); 903 gen6_unmask_pm_irq(dev_priv, engine->irq_enable_mask); 904} 905 906static void 907hsw_vebox_irq_disable(struct intel_engine_cs *engine) 908{ 909 struct drm_i915_private *dev_priv = engine->i915; 910 911 I915_WRITE_IMR(engine, ~0); 912 gen6_mask_pm_irq(dev_priv, engine->irq_enable_mask); 913} 914 915static int 916i965_emit_bb_start(struct i915_request *rq, 917 u64 offset, u32 length, 918 unsigned int dispatch_flags) 919{ 920 u32 *cs; 921 922 cs = intel_ring_begin(rq, 2); 923 if (IS_ERR(cs)) 924 return PTR_ERR(cs); 925 926 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & 927 I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); 928 *cs++ = offset; 929 intel_ring_advance(rq, cs); 930 931 return 0; 932} 933 934/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 935#define I830_BATCH_LIMIT (256*1024) 936#define I830_TLB_ENTRIES (2) 937#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) 938static int 939i830_emit_bb_start(struct i915_request *rq, 940 u64 offset, u32 len, 941 unsigned int dispatch_flags) 942{ 943 u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch); 944 945 cs = intel_ring_begin(rq, 6); 946 if (IS_ERR(cs)) 947 return PTR_ERR(cs); 948 949 /* Evict the invalid PTE TLBs */ 950 *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; 951 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; 952 *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ 953 *cs++ = cs_offset; 954 *cs++ = 0xdeadbeef; 955 *cs++ = MI_NOOP; 956 intel_ring_advance(rq, cs); 957 958 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { 959 if (len > I830_BATCH_LIMIT) 960 return -ENOSPC; 961 962 cs = intel_ring_begin(rq, 6 + 2); 963 if (IS_ERR(cs)) 964 return PTR_ERR(cs); 965 966 /* Blit the batch (which has now all relocs applied) to the 967 * stable batch scratch bo area (so that the CS never 968 * stumbles over its tlb invalidation bug) ... 969 */ 970 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA; 971 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; 972 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; 973 *cs++ = cs_offset; 974 *cs++ = 4096; 975 *cs++ = offset; 976 977 *cs++ = MI_FLUSH; 978 *cs++ = MI_NOOP; 979 intel_ring_advance(rq, cs); 980 981 /* ... and execute it. */ 982 offset = cs_offset; 983 } 984 985 cs = intel_ring_begin(rq, 2); 986 if (IS_ERR(cs)) 987 return PTR_ERR(cs); 988 989 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 990 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : 991 MI_BATCH_NON_SECURE); 992 intel_ring_advance(rq, cs); 993 994 return 0; 995} 996 997static int 998i915_emit_bb_start(struct i915_request *rq, 999 u64 offset, u32 len, 1000 unsigned int dispatch_flags) 1001{ 1002 u32 *cs; 1003 1004 cs = intel_ring_begin(rq, 2); 1005 if (IS_ERR(cs)) 1006 return PTR_ERR(cs); 1007 1008 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 1009 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : 1010 MI_BATCH_NON_SECURE); 1011 intel_ring_advance(rq, cs); 1012 1013 return 0; 1014} 1015 1016 1017 1018int intel_ring_pin(struct intel_ring *ring, 1019 struct drm_i915_private *i915, 1020 unsigned int offset_bias) 1021{ 1022 enum i915_map_type map = HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; 1023 struct i915_vma *vma = ring->vma; 1024 unsigned int flags; 1025 void *addr; 1026 int ret; 1027 1028 GEM_BUG_ON(ring->vaddr); 1029 1030 1031 flags = PIN_GLOBAL; 1032 if (offset_bias) 1033 flags |= PIN_OFFSET_BIAS | offset_bias; 1034 if (vma->obj->stolen) 1035 flags |= PIN_MAPPABLE; 1036 else 1037 flags |= PIN_HIGH; 1038 1039 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { 1040 if (flags & PIN_MAPPABLE || map == I915_MAP_WC) 1041 ret = i915_gem_object_set_to_gtt_domain(vma->obj, true); 1042 else 1043 ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); 1044 if (unlikely(ret)) 1045 return ret; 1046 } 1047 1048 ret = i915_vma_pin(vma, 0, PAGE_SIZE, flags); 1049 if (unlikely(ret)) 1050 return ret; 1051 1052 if (i915_vma_is_map_and_fenceable(vma)) 1053 addr = (void __force *)i915_vma_pin_iomap(vma); 1054 else 1055 addr = i915_gem_object_pin_map(vma->obj, map); 1056 if (IS_ERR(addr)) 1057 goto err; 1058 1059 vma->obj->pin_global++; 1060 1061 ring->vaddr = addr; 1062 return 0; 1063 1064err: 1065 i915_vma_unpin(vma); 1066 return PTR_ERR(addr); 1067} 1068 1069void intel_ring_reset(struct intel_ring *ring, u32 tail) 1070{ 1071 GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); 1072 1073 ring->tail = tail; 1074 ring->head = tail; 1075 ring->emit = tail; 1076 intel_ring_update_space(ring); 1077} 1078 1079void intel_ring_unpin(struct intel_ring *ring) 1080{ 1081 GEM_BUG_ON(!ring->vma); 1082 GEM_BUG_ON(!ring->vaddr); 1083 1084 /* Discard any unused bytes beyond that submitted to hw. */ 1085 intel_ring_reset(ring, ring->tail); 1086 1087 if (i915_vma_is_map_and_fenceable(ring->vma)) 1088 i915_vma_unpin_iomap(ring->vma); 1089 else 1090 i915_gem_object_unpin_map(ring->vma->obj); 1091 ring->vaddr = NULL; 1092 1093 ring->vma->obj->pin_global--; 1094 i915_vma_unpin(ring->vma); 1095} 1096 1097static struct i915_vma * 1098intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) 1099{ 1100 struct i915_address_space *vm = &dev_priv->ggtt.vm; 1101 struct drm_i915_gem_object *obj; 1102 struct i915_vma *vma; 1103 1104 obj = i915_gem_object_create_stolen(dev_priv, size); 1105 if (!obj) 1106 obj = i915_gem_object_create_internal(dev_priv, size); 1107 if (IS_ERR(obj)) 1108 return ERR_CAST(obj); 1109 1110 /* 1111 * Mark ring buffers as read-only from GPU side (so no stray overwrites) 1112 * if supported by the platform's GGTT. 1113 */ 1114 if (vm->has_read_only) 1115 i915_gem_object_set_readonly(obj); 1116 1117 vma = i915_vma_instance(obj, vm, NULL); 1118 if (IS_ERR(vma)) 1119 goto err; 1120 1121 return vma; 1122 1123err: 1124 i915_gem_object_put(obj); 1125 return vma; 1126} 1127 1128struct intel_ring * 1129intel_engine_create_ring(struct intel_engine_cs *engine, 1130 struct i915_timeline *timeline, 1131 int size) 1132{ 1133 struct intel_ring *ring; 1134 struct i915_vma *vma; 1135 1136 GEM_BUG_ON(!is_power_of_2(size)); 1137 GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); 1138 GEM_BUG_ON(timeline == &engine->timeline); 1139 lockdep_assert_held(&engine->i915->drm.struct_mutex); 1140 1141 ring = kzalloc(sizeof(*ring), GFP_KERNEL); 1142 if (!ring) 1143 return ERR_PTR(-ENOMEM); 1144 1145 INIT_LIST_HEAD(&ring->request_list); 1146 ring->timeline = i915_timeline_get(timeline); 1147 1148 ring->size = size; 1149 /* Workaround an erratum on the i830 which causes a hang if 1150 * the TAIL pointer points to within the last 2 cachelines 1151 * of the buffer. 1152 */ 1153 ring->effective_size = size; 1154 if (IS_I830(engine->i915) || IS_I845G(engine->i915)) 1155 ring->effective_size -= 2 * CACHELINE_BYTES; 1156 1157 intel_ring_update_space(ring); 1158 1159 vma = intel_ring_create_vma(engine->i915, size); 1160 if (IS_ERR(vma)) { 1161 kfree(ring); 1162 return ERR_CAST(vma); 1163 } 1164 ring->vma = vma; 1165 1166 return ring; 1167} 1168 1169void 1170intel_ring_free(struct intel_ring *ring) 1171{ 1172 struct drm_i915_gem_object *obj = ring->vma->obj; 1173 1174 i915_vma_close(ring->vma); 1175 __i915_gem_object_release_unless_active(obj); 1176 1177 i915_timeline_put(ring->timeline); 1178 kfree(ring); 1179} 1180 1181static void intel_ring_context_destroy(struct intel_context *ce) 1182{ 1183 GEM_BUG_ON(ce->pin_count); 1184 1185 if (!ce->state) 1186 return; 1187 1188 GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); 1189 i915_gem_object_put(ce->state->obj); 1190} 1191 1192static int __context_pin_ppgtt(struct i915_gem_context *ctx) 1193{ 1194 struct i915_hw_ppgtt *ppgtt; 1195 int err = 0; 1196 1197 ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt; 1198 if (ppgtt) 1199 err = gen6_ppgtt_pin(ppgtt); 1200 1201 return err; 1202} 1203 1204static void __context_unpin_ppgtt(struct i915_gem_context *ctx) 1205{ 1206 struct i915_hw_ppgtt *ppgtt; 1207 1208 ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt; 1209 if (ppgtt) 1210 gen6_ppgtt_unpin(ppgtt); 1211} 1212 1213static int __context_pin(struct intel_context *ce) 1214{ 1215 struct i915_vma *vma; 1216 int err; 1217 1218 vma = ce->state; 1219 if (!vma) 1220 return 0; 1221 1222 /* 1223 * Clear this page out of any CPU caches for coherent swap-in/out. 1224 * We only want to do this on the first bind so that we do not stall 1225 * on an active context (which by nature is already on the GPU). 1226 */ 1227 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { 1228 err = i915_gem_object_set_to_gtt_domain(vma->obj, true); 1229 if (err) 1230 return err; 1231 } 1232 1233 err = i915_vma_pin(vma, 0, I915_GTT_MIN_ALIGNMENT, 1234 PIN_GLOBAL | PIN_HIGH); 1235 if (err) 1236 return err; 1237 1238 /* 1239 * And mark is as a globally pinned object to let the shrinker know 1240 * it cannot reclaim the object until we release it. 1241 */ 1242 vma->obj->pin_global++; 1243 1244 return 0; 1245} 1246 1247static void __context_unpin(struct intel_context *ce) 1248{ 1249 struct i915_vma *vma; 1250 1251 vma = ce->state; 1252 if (!vma) 1253 return; 1254 1255 vma->obj->pin_global--; 1256 i915_vma_unpin(vma); 1257} 1258 1259static void intel_ring_context_unpin(struct intel_context *ce) 1260{ 1261 __context_unpin_ppgtt(ce->gem_context); 1262 __context_unpin(ce); 1263 1264 i915_gem_context_put(ce->gem_context); 1265} 1266 1267static struct i915_vma * 1268alloc_context_vma(struct intel_engine_cs *engine) 1269{ 1270 struct drm_i915_private *i915 = engine->i915; 1271 struct drm_i915_gem_object *obj; 1272 struct i915_vma *vma; 1273 int err; 1274 1275 obj = i915_gem_object_create(i915, engine->context_size); 1276 if (IS_ERR(obj)) 1277 return ERR_CAST(obj); 1278 1279 if (engine->default_state) { 1280 void *defaults, *vaddr; 1281 1282 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 1283 if (IS_ERR(vaddr)) { 1284 err = PTR_ERR(vaddr); 1285 goto err_obj; 1286 } 1287 1288 defaults = i915_gem_object_pin_map(engine->default_state, 1289 I915_MAP_WB); 1290 if (IS_ERR(defaults)) { 1291 err = PTR_ERR(defaults); 1292 goto err_map; 1293 } 1294 1295 memcpy(vaddr, defaults, engine->context_size); 1296 1297 i915_gem_object_unpin_map(engine->default_state); 1298 i915_gem_object_unpin_map(obj); 1299 } 1300 1301 /* 1302 * Try to make the context utilize L3 as well as LLC. 1303 * 1304 * On VLV we don't have L3 controls in the PTEs so we 1305 * shouldn't touch the cache level, especially as that 1306 * would make the object snooped which might have a 1307 * negative performance impact. 1308 * 1309 * Snooping is required on non-llc platforms in execlist 1310 * mode, but since all GGTT accesses use PAT entry 0 we 1311 * get snooping anyway regardless of cache_level. 1312 * 1313 * This is only applicable for Ivy Bridge devices since 1314 * later platforms don't have L3 control bits in the PTE. 1315 */ 1316 if (IS_IVYBRIDGE(i915)) { 1317 /* Ignore any error, regard it as a simple optimisation */ 1318 i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); 1319 } 1320 1321 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 1322 if (IS_ERR(vma)) { 1323 err = PTR_ERR(vma); 1324 goto err_obj; 1325 } 1326 1327 return vma; 1328 1329err_map: 1330 i915_gem_object_unpin_map(obj); 1331err_obj: 1332 i915_gem_object_put(obj); 1333 return ERR_PTR(err); 1334} 1335 1336static struct intel_context * 1337__ring_context_pin(struct intel_engine_cs *engine, 1338 struct i915_gem_context *ctx, 1339 struct intel_context *ce) 1340{ 1341 int err; 1342 1343 if (!ce->state && engine->context_size) { 1344 struct i915_vma *vma; 1345 1346 vma = alloc_context_vma(engine); 1347 if (IS_ERR(vma)) { 1348 err = PTR_ERR(vma); 1349 goto err; 1350 } 1351 1352 ce->state = vma; 1353 } 1354 1355 err = __context_pin(ce); 1356 if (err) 1357 goto err; 1358 1359 err = __context_pin_ppgtt(ce->gem_context); 1360 if (err) 1361 goto err_unpin; 1362 1363 i915_gem_context_get(ctx); 1364 1365 /* One ringbuffer to rule them all */ 1366 GEM_BUG_ON(!engine->buffer); 1367 ce->ring = engine->buffer; 1368 1369 return ce; 1370 1371err_unpin: 1372 __context_unpin(ce); 1373err: 1374 ce->pin_count = 0; 1375 return ERR_PTR(err); 1376} 1377 1378static const struct intel_context_ops ring_context_ops = { 1379 .unpin = intel_ring_context_unpin, 1380 .destroy = intel_ring_context_destroy, 1381}; 1382 1383static struct intel_context * 1384intel_ring_context_pin(struct intel_engine_cs *engine, 1385 struct i915_gem_context *ctx) 1386{ 1387 struct intel_context *ce = to_intel_context(ctx, engine); 1388 1389 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 1390 1391 if (likely(ce->pin_count++)) 1392 return ce; 1393 GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ 1394 1395 ce->ops = &ring_context_ops; 1396 1397 return __ring_context_pin(engine, ctx, ce); 1398} 1399 1400static int intel_init_ring_buffer(struct intel_engine_cs *engine) 1401{ 1402 struct i915_timeline *timeline; 1403 struct intel_ring *ring; 1404 unsigned int size; 1405 int err; 1406 1407 intel_engine_setup_common(engine); 1408 1409 timeline = i915_timeline_create(engine->i915, engine->name); 1410 if (IS_ERR(timeline)) { 1411 err = PTR_ERR(timeline); 1412 goto err; 1413 } 1414 1415 ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); 1416 i915_timeline_put(timeline); 1417 if (IS_ERR(ring)) { 1418 err = PTR_ERR(ring); 1419 goto err; 1420 } 1421 1422 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ 1423 err = intel_ring_pin(ring, engine->i915, I915_GTT_PAGE_SIZE); 1424 if (err) 1425 goto err_ring; 1426 1427 GEM_BUG_ON(engine->buffer); 1428 engine->buffer = ring; 1429 1430 size = PAGE_SIZE; 1431 if (HAS_BROKEN_CS_TLB(engine->i915)) 1432 size = I830_WA_SIZE; 1433 err = intel_engine_create_scratch(engine, size); 1434 if (err) 1435 goto err_unpin; 1436 1437 err = intel_engine_init_common(engine); 1438 if (err) 1439 goto err_scratch; 1440 1441 return 0; 1442 1443err_scratch: 1444 intel_engine_cleanup_scratch(engine); 1445err_unpin: 1446 intel_ring_unpin(ring); 1447err_ring: 1448 intel_ring_free(ring); 1449err: 1450 intel_engine_cleanup_common(engine); 1451 return err; 1452} 1453 1454void intel_engine_cleanup(struct intel_engine_cs *engine) 1455{ 1456 struct drm_i915_private *dev_priv = engine->i915; 1457 1458 WARN_ON(INTEL_GEN(dev_priv) > 2 && 1459 (I915_READ_MODE(engine) & MODE_IDLE) == 0); 1460 1461 intel_ring_unpin(engine->buffer); 1462 intel_ring_free(engine->buffer); 1463 1464 if (engine->cleanup) 1465 engine->cleanup(engine); 1466 1467 intel_engine_cleanup_common(engine); 1468 1469 dev_priv->engine[engine->id] = NULL; 1470 kfree(engine); 1471} 1472 1473void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) 1474{ 1475 struct intel_engine_cs *engine; 1476 enum intel_engine_id id; 1477 1478 /* Restart from the beginning of the rings for convenience */ 1479 for_each_engine(engine, dev_priv, id) 1480 intel_ring_reset(engine->buffer, 0); 1481} 1482 1483static int load_pd_dir(struct i915_request *rq, 1484 const struct i915_hw_ppgtt *ppgtt) 1485{ 1486 const struct intel_engine_cs * const engine = rq->engine; 1487 u32 *cs; 1488 1489 cs = intel_ring_begin(rq, 6); 1490 if (IS_ERR(cs)) 1491 return PTR_ERR(cs); 1492 1493 *cs++ = MI_LOAD_REGISTER_IMM(1); 1494 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); 1495 *cs++ = PP_DIR_DCLV_2G; 1496 1497 *cs++ = MI_LOAD_REGISTER_IMM(1); 1498 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); 1499 *cs++ = ppgtt->pd.base.ggtt_offset << 10; 1500 1501 intel_ring_advance(rq, cs); 1502 1503 return 0; 1504} 1505 1506static int flush_pd_dir(struct i915_request *rq) 1507{ 1508 const struct intel_engine_cs * const engine = rq->engine; 1509 u32 *cs; 1510 1511 cs = intel_ring_begin(rq, 4); 1512 if (IS_ERR(cs)) 1513 return PTR_ERR(cs); 1514 1515 /* Stall until the page table load is complete */ 1516 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 1517 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); 1518 *cs++ = i915_ggtt_offset(engine->scratch); 1519 *cs++ = MI_NOOP; 1520 1521 intel_ring_advance(rq, cs); 1522 return 0; 1523} 1524 1525static inline int mi_set_context(struct i915_request *rq, u32 flags) 1526{ 1527 struct drm_i915_private *i915 = rq->i915; 1528 struct intel_engine_cs *engine = rq->engine; 1529 enum intel_engine_id id; 1530 const int num_rings = 1531 /* Use an extended w/a on gen7 if signalling from other rings */ 1532 (HAS_LEGACY_SEMAPHORES(i915) && IS_GEN7(i915)) ? 1533 INTEL_INFO(i915)->num_rings - 1 : 1534 0; 1535 bool force_restore = false; 1536 int len; 1537 u32 *cs; 1538 1539 flags |= MI_MM_SPACE_GTT; 1540 if (IS_HASWELL(i915)) 1541 /* These flags are for resource streamer on HSW+ */ 1542 flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; 1543 else 1544 flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; 1545 1546 len = 4; 1547 if (IS_GEN7(i915)) 1548 len += 2 + (num_rings ? 4*num_rings + 6 : 0); 1549 if (flags & MI_FORCE_RESTORE) { 1550 GEM_BUG_ON(flags & MI_RESTORE_INHIBIT); 1551 flags &= ~MI_FORCE_RESTORE; 1552 force_restore = true; 1553 len += 2; 1554 } 1555 1556 cs = intel_ring_begin(rq, len); 1557 if (IS_ERR(cs)) 1558 return PTR_ERR(cs); 1559 1560 /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ 1561 if (IS_GEN7(i915)) { 1562 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1563 if (num_rings) { 1564 struct intel_engine_cs *signaller; 1565 1566 *cs++ = MI_LOAD_REGISTER_IMM(num_rings); 1567 for_each_engine(signaller, i915, id) { 1568 if (signaller == engine) 1569 continue; 1570 1571 *cs++ = i915_mmio_reg_offset( 1572 RING_PSMI_CTL(signaller->mmio_base)); 1573 *cs++ = _MASKED_BIT_ENABLE( 1574 GEN6_PSMI_SLEEP_MSG_DISABLE); 1575 } 1576 } 1577 } 1578 1579 if (force_restore) { 1580 /* 1581 * The HW doesn't handle being told to restore the current 1582 * context very well. Quite often it likes goes to go off and 1583 * sulk, especially when it is meant to be reloading PP_DIR. 1584 * A very simple fix to force the reload is to simply switch 1585 * away from the current context and back again. 1586 * 1587 * Note that the kernel_context will contain random state 1588 * following the INHIBIT_RESTORE. We accept this since we 1589 * never use the kernel_context state; it is merely a 1590 * placeholder we use to flush other contexts. 1591 */ 1592 *cs++ = MI_SET_CONTEXT; 1593 *cs++ = i915_ggtt_offset(to_intel_context(i915->kernel_context, 1594 engine)->state) | 1595 MI_MM_SPACE_GTT | 1596 MI_RESTORE_INHIBIT; 1597 } 1598 1599 *cs++ = MI_NOOP; 1600 *cs++ = MI_SET_CONTEXT; 1601 *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags; 1602 /* 1603 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP 1604 * WaMiSetContext_Hang:snb,ivb,vlv 1605 */ 1606 *cs++ = MI_NOOP; 1607 1608 if (IS_GEN7(i915)) { 1609 if (num_rings) { 1610 struct intel_engine_cs *signaller; 1611 i915_reg_t last_reg = {}; /* keep gcc quiet */ 1612 1613 *cs++ = MI_LOAD_REGISTER_IMM(num_rings); 1614 for_each_engine(signaller, i915, id) { 1615 if (signaller == engine) 1616 continue; 1617 1618 last_reg = RING_PSMI_CTL(signaller->mmio_base); 1619 *cs++ = i915_mmio_reg_offset(last_reg); 1620 *cs++ = _MASKED_BIT_DISABLE( 1621 GEN6_PSMI_SLEEP_MSG_DISABLE); 1622 } 1623 1624 /* Insert a delay before the next switch! */ 1625 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 1626 *cs++ = i915_mmio_reg_offset(last_reg); 1627 *cs++ = i915_ggtt_offset(engine->scratch); 1628 *cs++ = MI_NOOP; 1629 } 1630 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1631 } 1632 1633 intel_ring_advance(rq, cs); 1634 1635 return 0; 1636} 1637 1638static int remap_l3(struct i915_request *rq, int slice) 1639{ 1640 u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; 1641 int i; 1642 1643 if (!remap_info) 1644 return 0; 1645 1646 cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2); 1647 if (IS_ERR(cs)) 1648 return PTR_ERR(cs); 1649 1650 /* 1651 * Note: We do not worry about the concurrent register cacheline hang 1652 * here because no other code should access these registers other than 1653 * at initialization time. 1654 */ 1655 *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); 1656 for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { 1657 *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); 1658 *cs++ = remap_info[i]; 1659 } 1660 *cs++ = MI_NOOP; 1661 intel_ring_advance(rq, cs); 1662 1663 return 0; 1664} 1665 1666static int switch_context(struct i915_request *rq) 1667{ 1668 struct intel_engine_cs *engine = rq->engine; 1669 struct i915_gem_context *ctx = rq->gem_context; 1670 struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; 1671 unsigned int unwind_mm = 0; 1672 u32 hw_flags = 0; 1673 int ret, i; 1674 1675 lockdep_assert_held(&rq->i915->drm.struct_mutex); 1676 GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); 1677 1678 if (ppgtt) { 1679 ret = load_pd_dir(rq, ppgtt); 1680 if (ret) 1681 goto err; 1682 1683 if (intel_engine_flag(engine) & ppgtt->pd_dirty_rings) { 1684 unwind_mm = intel_engine_flag(engine); 1685 ppgtt->pd_dirty_rings &= ~unwind_mm; 1686 hw_flags = MI_FORCE_RESTORE; 1687 } 1688 } 1689 1690 if (rq->hw_context->state) { 1691 GEM_BUG_ON(engine->id != RCS); 1692 1693 /* 1694 * The kernel context(s) is treated as pure scratch and is not 1695 * expected to retain any state (as we sacrifice it during 1696 * suspend and on resume it may be corrupted). This is ok, 1697 * as nothing actually executes using the kernel context; it 1698 * is purely used for flushing user contexts. 1699 */ 1700 if (i915_gem_context_is_kernel(ctx)) 1701 hw_flags = MI_RESTORE_INHIBIT; 1702 1703 ret = mi_set_context(rq, hw_flags); 1704 if (ret) 1705 goto err_mm; 1706 } 1707 1708 if (ppgtt) { 1709 ret = flush_pd_dir(rq); 1710 if (ret) 1711 goto err_mm; 1712 } 1713 1714 if (ctx->remap_slice) { 1715 for (i = 0; i < MAX_L3_SLICES; i++) { 1716 if (!(ctx->remap_slice & BIT(i))) 1717 continue; 1718 1719 ret = remap_l3(rq, i); 1720 if (ret) 1721 goto err_mm; 1722 } 1723 1724 ctx->remap_slice = 0; 1725 } 1726 1727 return 0; 1728 1729err_mm: 1730 if (unwind_mm) 1731 ppgtt->pd_dirty_rings |= unwind_mm; 1732err: 1733 return ret; 1734} 1735 1736static int ring_request_alloc(struct i915_request *request) 1737{ 1738 int ret; 1739 1740 GEM_BUG_ON(!request->hw_context->pin_count); 1741 1742 /* Flush enough space to reduce the likelihood of waiting after 1743 * we start building the request - in which case we will just 1744 * have to repeat work. 1745 */ 1746 request->reserved_space += LEGACY_REQUEST_SIZE; 1747 1748 ret = intel_ring_wait_for_space(request->ring, request->reserved_space); 1749 if (ret) 1750 return ret; 1751 1752 ret = switch_context(request); 1753 if (ret) 1754 return ret; 1755 1756 request->reserved_space -= LEGACY_REQUEST_SIZE; 1757 return 0; 1758} 1759 1760static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) 1761{ 1762 struct i915_request *target; 1763 long timeout; 1764 1765 lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); 1766 1767 if (intel_ring_update_space(ring) >= bytes) 1768 return 0; 1769 1770 GEM_BUG_ON(list_empty(&ring->request_list)); 1771 list_for_each_entry(target, &ring->request_list, ring_link) { 1772 /* Would completion of this request free enough space? */ 1773 if (bytes <= __intel_ring_space(target->postfix, 1774 ring->emit, ring->size)) 1775 break; 1776 } 1777 1778 if (WARN_ON(&target->ring_link == &ring->request_list)) 1779 return -ENOSPC; 1780 1781 timeout = i915_request_wait(target, 1782 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, 1783 MAX_SCHEDULE_TIMEOUT); 1784 if (timeout < 0) 1785 return timeout; 1786 1787 i915_request_retire_upto(target); 1788 1789 intel_ring_update_space(ring); 1790 GEM_BUG_ON(ring->space < bytes); 1791 return 0; 1792} 1793 1794int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes) 1795{ 1796 GEM_BUG_ON(bytes > ring->effective_size); 1797 if (unlikely(bytes > ring->effective_size - ring->emit)) 1798 bytes += ring->size - ring->emit; 1799 1800 if (unlikely(bytes > ring->space)) { 1801 int ret = wait_for_space(ring, bytes); 1802 if (unlikely(ret)) 1803 return ret; 1804 } 1805 1806 GEM_BUG_ON(ring->space < bytes); 1807 return 0; 1808} 1809 1810u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) 1811{ 1812 struct intel_ring *ring = rq->ring; 1813 const unsigned int remain_usable = ring->effective_size - ring->emit; 1814 const unsigned int bytes = num_dwords * sizeof(u32); 1815 unsigned int need_wrap = 0; 1816 unsigned int total_bytes; 1817 u32 *cs; 1818 1819 /* Packets must be qword aligned. */ 1820 GEM_BUG_ON(num_dwords & 1); 1821 1822 total_bytes = bytes + rq->reserved_space; 1823 GEM_BUG_ON(total_bytes > ring->effective_size); 1824 1825 if (unlikely(total_bytes > remain_usable)) { 1826 const int remain_actual = ring->size - ring->emit; 1827 1828 if (bytes > remain_usable) { 1829 /* 1830 * Not enough space for the basic request. So need to 1831 * flush out the remainder and then wait for 1832 * base + reserved. 1833 */ 1834 total_bytes += remain_actual; 1835 need_wrap = remain_actual | 1; 1836 } else { 1837 /* 1838 * The base request will fit but the reserved space 1839 * falls off the end. So we don't need an immediate 1840 * wrap and only need to effectively wait for the 1841 * reserved size from the start of ringbuffer. 1842 */ 1843 total_bytes = rq->reserved_space + remain_actual; 1844 } 1845 } 1846 1847 if (unlikely(total_bytes > ring->space)) { 1848 int ret; 1849 1850 /* 1851 * Space is reserved in the ringbuffer for finalising the 1852 * request, as that cannot be allowed to fail. During request 1853 * finalisation, reserved_space is set to 0 to stop the 1854 * overallocation and the assumption is that then we never need 1855 * to wait (which has the risk of failing with EINTR). 1856 * 1857 * See also i915_request_alloc() and i915_request_add(). 1858 */ 1859 GEM_BUG_ON(!rq->reserved_space); 1860 1861 ret = wait_for_space(ring, total_bytes); 1862 if (unlikely(ret)) 1863 return ERR_PTR(ret); 1864 } 1865 1866 if (unlikely(need_wrap)) { 1867 need_wrap &= ~1; 1868 GEM_BUG_ON(need_wrap > ring->space); 1869 GEM_BUG_ON(ring->emit + need_wrap > ring->size); 1870 GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); 1871 1872 /* Fill the tail with MI_NOOP */ 1873 memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); 1874 ring->space -= need_wrap; 1875 ring->emit = 0; 1876 } 1877 1878 GEM_BUG_ON(ring->emit > ring->size - bytes); 1879 GEM_BUG_ON(ring->space < bytes); 1880 cs = ring->vaddr + ring->emit; 1881 GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); 1882 ring->emit += bytes; 1883 ring->space -= bytes; 1884 1885 return cs; 1886} 1887 1888/* Align the ring tail to a cacheline boundary */ 1889int intel_ring_cacheline_align(struct i915_request *rq) 1890{ 1891 int num_dwords; 1892 void *cs; 1893 1894 num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); 1895 if (num_dwords == 0) 1896 return 0; 1897 1898 num_dwords = CACHELINE_DWORDS - num_dwords; 1899 GEM_BUG_ON(num_dwords & 1); 1900 1901 cs = intel_ring_begin(rq, num_dwords); 1902 if (IS_ERR(cs)) 1903 return PTR_ERR(cs); 1904 1905 memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); 1906 intel_ring_advance(rq, cs); 1907 1908 GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); 1909 return 0; 1910} 1911 1912static void gen6_bsd_submit_request(struct i915_request *request) 1913{ 1914 struct drm_i915_private *dev_priv = request->i915; 1915 1916 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 1917 1918 /* Every tail move must follow the sequence below */ 1919 1920 /* Disable notification that the ring is IDLE. The GT 1921 * will then assume that it is busy and bring it out of rc6. 1922 */ 1923 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL, 1924 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1925 1926 /* Clear the context id. Here be magic! */ 1927 I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0); 1928 1929 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1930 if (__intel_wait_for_register_fw(dev_priv, 1931 GEN6_BSD_SLEEP_PSMI_CONTROL, 1932 GEN6_BSD_SLEEP_INDICATOR, 1933 0, 1934 1000, 0, NULL)) 1935 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 1936 1937 /* Now that the ring is fully powered up, update the tail */ 1938 i9xx_submit_request(request); 1939 1940 /* Let the ring send IDLE messages to the GT again, 1941 * and so let it sleep to conserve power when idle. 1942 */ 1943 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL, 1944 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1945 1946 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 1947} 1948 1949static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode) 1950{ 1951 u32 cmd, *cs; 1952 1953 cs = intel_ring_begin(rq, 4); 1954 if (IS_ERR(cs)) 1955 return PTR_ERR(cs); 1956 1957 cmd = MI_FLUSH_DW; 1958 1959 /* We always require a command barrier so that subsequent 1960 * commands, such as breadcrumb interrupts, are strictly ordered 1961 * wrt the contents of the write cache being flushed to memory 1962 * (and thus being coherent from the CPU). 1963 */ 1964 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 1965 1966 /* 1967 * Bspec vol 1c.5 - video engine command streamer: 1968 * "If ENABLED, all TLBs will be invalidated once the flush 1969 * operation is complete. This bit is only valid when the 1970 * Post-Sync Operation field is a value of 1h or 3h." 1971 */ 1972 if (mode & EMIT_INVALIDATE) 1973 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; 1974 1975 *cs++ = cmd; 1976 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; 1977 *cs++ = 0; 1978 *cs++ = MI_NOOP; 1979 intel_ring_advance(rq, cs); 1980 return 0; 1981} 1982 1983static int 1984hsw_emit_bb_start(struct i915_request *rq, 1985 u64 offset, u32 len, 1986 unsigned int dispatch_flags) 1987{ 1988 u32 *cs; 1989 1990 cs = intel_ring_begin(rq, 2); 1991 if (IS_ERR(cs)) 1992 return PTR_ERR(cs); 1993 1994 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? 1995 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | 1996 (dispatch_flags & I915_DISPATCH_RS ? 1997 MI_BATCH_RESOURCE_STREAMER : 0); 1998 /* bit0-7 is the length on GEN6+ */ 1999 *cs++ = offset; 2000 intel_ring_advance(rq, cs); 2001 2002 return 0; 2003} 2004 2005static int 2006gen6_emit_bb_start(struct i915_request *rq, 2007 u64 offset, u32 len, 2008 unsigned int dispatch_flags) 2009{ 2010 u32 *cs; 2011 2012 cs = intel_ring_begin(rq, 2); 2013 if (IS_ERR(cs)) 2014 return PTR_ERR(cs); 2015 2016 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? 2017 0 : MI_BATCH_NON_SECURE_I965); 2018 /* bit0-7 is the length on GEN6+ */ 2019 *cs++ = offset; 2020 intel_ring_advance(rq, cs); 2021 2022 return 0; 2023} 2024 2025/* Blitter support (SandyBridge+) */ 2026 2027static int gen6_ring_flush(struct i915_request *rq, u32 mode) 2028{ 2029 u32 cmd, *cs; 2030 2031 cs = intel_ring_begin(rq, 4); 2032 if (IS_ERR(cs)) 2033 return PTR_ERR(cs); 2034 2035 cmd = MI_FLUSH_DW; 2036 2037 /* We always require a command barrier so that subsequent 2038 * commands, such as breadcrumb interrupts, are strictly ordered 2039 * wrt the contents of the write cache being flushed to memory 2040 * (and thus being coherent from the CPU). 2041 */ 2042 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 2043 2044 /* 2045 * Bspec vol 1c.3 - blitter engine command streamer: 2046 * "If ENABLED, all TLBs will be invalidated once the flush 2047 * operation is complete. This bit is only valid when the 2048 * Post-Sync Operation field is a value of 1h or 3h." 2049 */ 2050 if (mode & EMIT_INVALIDATE) 2051 cmd |= MI_INVALIDATE_TLB; 2052 *cs++ = cmd; 2053 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; 2054 *cs++ = 0; 2055 *cs++ = MI_NOOP; 2056 intel_ring_advance(rq, cs); 2057 2058 return 0; 2059} 2060 2061static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, 2062 struct intel_engine_cs *engine) 2063{ 2064 int i; 2065 2066 if (!HAS_LEGACY_SEMAPHORES(dev_priv)) 2067 return; 2068 2069 GEM_BUG_ON(INTEL_GEN(dev_priv) < 6); 2070 engine->semaphore.sync_to = gen6_ring_sync_to; 2071 engine->semaphore.signal = gen6_signal; 2072 2073 /* 2074 * The current semaphore is only applied on pre-gen8 2075 * platform. And there is no VCS2 ring on the pre-gen8 2076 * platform. So the semaphore between RCS and VCS2 is 2077 * initialized as INVALID. 2078 */ 2079 for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) { 2080 static const struct { 2081 u32 wait_mbox; 2082 i915_reg_t mbox_reg; 2083 } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = { 2084 [RCS_HW] = { 2085 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, 2086 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, 2087 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, 2088 }, 2089 [VCS_HW] = { 2090 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, 2091 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, 2092 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, 2093 }, 2094 [BCS_HW] = { 2095 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, 2096 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, 2097 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, 2098 }, 2099 [VECS_HW] = { 2100 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, 2101 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, 2102 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, 2103 }, 2104 }; 2105 u32 wait_mbox; 2106 i915_reg_t mbox_reg; 2107 2108 if (i == engine->hw_id) { 2109 wait_mbox = MI_SEMAPHORE_SYNC_INVALID; 2110 mbox_reg = GEN6_NOSYNC; 2111 } else { 2112 wait_mbox = sem_data[engine->hw_id][i].wait_mbox; 2113 mbox_reg = sem_data[engine->hw_id][i].mbox_reg; 2114 } 2115 2116 engine->semaphore.mbox.wait[i] = wait_mbox; 2117 engine->semaphore.mbox.signal[i] = mbox_reg; 2118 } 2119} 2120 2121static void intel_ring_init_irq(struct drm_i915_private *dev_priv, 2122 struct intel_engine_cs *engine) 2123{ 2124 if (INTEL_GEN(dev_priv) >= 6) { 2125 engine->irq_enable = gen6_irq_enable; 2126 engine->irq_disable = gen6_irq_disable; 2127 engine->irq_seqno_barrier = gen6_seqno_barrier; 2128 } else if (INTEL_GEN(dev_priv) >= 5) { 2129 engine->irq_enable = gen5_irq_enable; 2130 engine->irq_disable = gen5_irq_disable; 2131 engine->irq_seqno_barrier = gen5_seqno_barrier; 2132 } else if (INTEL_GEN(dev_priv) >= 3) { 2133 engine->irq_enable = i9xx_irq_enable; 2134 engine->irq_disable = i9xx_irq_disable; 2135 } else { 2136 engine->irq_enable = i8xx_irq_enable; 2137 engine->irq_disable = i8xx_irq_disable; 2138 } 2139} 2140 2141static void i9xx_set_default_submission(struct intel_engine_cs *engine) 2142{ 2143 engine->submit_request = i9xx_submit_request; 2144 engine->cancel_requests = cancel_requests; 2145 2146 engine->park = NULL; 2147 engine->unpark = NULL; 2148} 2149 2150static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) 2151{ 2152 i9xx_set_default_submission(engine); 2153 engine->submit_request = gen6_bsd_submit_request; 2154} 2155 2156static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, 2157 struct intel_engine_cs *engine) 2158{ 2159 /* gen8+ are only supported with execlists */ 2160 GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8); 2161 2162 intel_ring_init_irq(dev_priv, engine); 2163 intel_ring_init_semaphores(dev_priv, engine); 2164 2165 engine->init_hw = init_ring_common; 2166 engine->reset.prepare = reset_prepare; 2167 engine->reset.reset = reset_ring; 2168 engine->reset.finish = reset_finish; 2169 2170 engine->context_pin = intel_ring_context_pin; 2171 engine->request_alloc = ring_request_alloc; 2172 2173 engine->emit_breadcrumb = i9xx_emit_breadcrumb; 2174 engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; 2175 if (HAS_LEGACY_SEMAPHORES(dev_priv)) { 2176 int num_rings; 2177 2178 engine->emit_breadcrumb = gen6_sema_emit_breadcrumb; 2179 2180 num_rings = INTEL_INFO(dev_priv)->num_rings - 1; 2181 engine->emit_breadcrumb_sz += num_rings * 3; 2182 if (num_rings & 1) 2183 engine->emit_breadcrumb_sz++; 2184 } 2185 2186 engine->set_default_submission = i9xx_set_default_submission; 2187 2188 if (INTEL_GEN(dev_priv) >= 6) 2189 engine->emit_bb_start = gen6_emit_bb_start; 2190 else if (INTEL_GEN(dev_priv) >= 4) 2191 engine->emit_bb_start = i965_emit_bb_start; 2192 else if (IS_I830(dev_priv) || IS_I845G(dev_priv)) 2193 engine->emit_bb_start = i830_emit_bb_start; 2194 else 2195 engine->emit_bb_start = i915_emit_bb_start; 2196} 2197 2198int intel_init_render_ring_buffer(struct intel_engine_cs *engine) 2199{ 2200 struct drm_i915_private *dev_priv = engine->i915; 2201 int ret; 2202 2203 intel_ring_default_vfuncs(dev_priv, engine); 2204 2205 if (HAS_L3_DPF(dev_priv)) 2206 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; 2207 2208 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; 2209 2210 if (INTEL_GEN(dev_priv) >= 6) { 2211 engine->init_context = intel_rcs_ctx_init; 2212 engine->emit_flush = gen7_render_ring_flush; 2213 if (IS_GEN6(dev_priv)) 2214 engine->emit_flush = gen6_render_ring_flush; 2215 } else if (IS_GEN5(dev_priv)) { 2216 engine->emit_flush = gen4_render_ring_flush; 2217 } else { 2218 if (INTEL_GEN(dev_priv) < 4) 2219 engine->emit_flush = gen2_render_ring_flush; 2220 else 2221 engine->emit_flush = gen4_render_ring_flush; 2222 engine->irq_enable_mask = I915_USER_INTERRUPT; 2223 } 2224 2225 if (IS_HASWELL(dev_priv)) 2226 engine->emit_bb_start = hsw_emit_bb_start; 2227 2228 engine->init_hw = init_render_ring; 2229 2230 ret = intel_init_ring_buffer(engine); 2231 if (ret) 2232 return ret; 2233 2234 return 0; 2235} 2236 2237int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) 2238{ 2239 struct drm_i915_private *dev_priv = engine->i915; 2240 2241 intel_ring_default_vfuncs(dev_priv, engine); 2242 2243 if (INTEL_GEN(dev_priv) >= 6) { 2244 /* gen6 bsd needs a special wa for tail updates */ 2245 if (IS_GEN6(dev_priv)) 2246 engine->set_default_submission = gen6_bsd_set_default_submission; 2247 engine->emit_flush = gen6_bsd_ring_flush; 2248 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; 2249 } else { 2250 engine->emit_flush = bsd_ring_flush; 2251 if (IS_GEN5(dev_priv)) 2252 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; 2253 else 2254 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; 2255 } 2256 2257 return intel_init_ring_buffer(engine); 2258} 2259 2260int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) 2261{ 2262 struct drm_i915_private *dev_priv = engine->i915; 2263 2264 intel_ring_default_vfuncs(dev_priv, engine); 2265 2266 engine->emit_flush = gen6_ring_flush; 2267 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; 2268 2269 return intel_init_ring_buffer(engine); 2270} 2271 2272int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) 2273{ 2274 struct drm_i915_private *dev_priv = engine->i915; 2275 2276 intel_ring_default_vfuncs(dev_priv, engine); 2277 2278 engine->emit_flush = gen6_ring_flush; 2279 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; 2280 engine->irq_enable = hsw_vebox_irq_enable; 2281 engine->irq_disable = hsw_vebox_irq_disable; 2282 2283 return intel_init_ring_buffer(engine); 2284}