Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v3.18 2766 lines 76 kB view raw
1/* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 */ 29 30#include <drm/drmP.h> 31#include "i915_drv.h" 32#include <drm/i915_drm.h> 33#include "i915_trace.h" 34#include "intel_drv.h" 35 36bool 37intel_ring_initialized(struct intel_engine_cs *ring) 38{ 39 struct drm_device *dev = ring->dev; 40 41 if (!dev) 42 return false; 43 44 if (i915.enable_execlists) { 45 struct intel_context *dctx = ring->default_context; 46 struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf; 47 48 return ringbuf->obj; 49 } else 50 return ring->buffer && ring->buffer->obj; 51} 52 53int __intel_ring_space(int head, int tail, int size) 54{ 55 int space = head - (tail + I915_RING_FREE_SPACE); 56 if (space < 0) 57 space += size; 58 return space; 59} 60 61int intel_ring_space(struct intel_ringbuffer *ringbuf) 62{ 63 return __intel_ring_space(ringbuf->head & HEAD_ADDR, 64 ringbuf->tail, ringbuf->size); 65} 66 67bool intel_ring_stopped(struct intel_engine_cs *ring) 68{ 69 struct drm_i915_private *dev_priv = ring->dev->dev_private; 70 return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring); 71} 72 73void __intel_ring_advance(struct intel_engine_cs *ring) 74{ 75 struct intel_ringbuffer *ringbuf = ring->buffer; 76 ringbuf->tail &= ringbuf->size - 1; 77 if (intel_ring_stopped(ring)) 78 return; 79 ring->write_tail(ring, ringbuf->tail); 80} 81 82static int 83gen2_render_ring_flush(struct intel_engine_cs *ring, 84 u32 invalidate_domains, 85 u32 flush_domains) 86{ 87 u32 cmd; 88 int ret; 89 90 cmd = MI_FLUSH; 91 if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) 92 cmd |= MI_NO_WRITE_FLUSH; 93 94 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 95 cmd |= MI_READ_FLUSH; 96 97 ret = intel_ring_begin(ring, 2); 98 if (ret) 99 return ret; 100 101 intel_ring_emit(ring, cmd); 102 intel_ring_emit(ring, MI_NOOP); 103 intel_ring_advance(ring); 104 105 return 0; 106} 107 108static int 109gen4_render_ring_flush(struct intel_engine_cs *ring, 110 u32 invalidate_domains, 111 u32 flush_domains) 112{ 113 struct drm_device *dev = ring->dev; 114 u32 cmd; 115 int ret; 116 117 /* 118 * read/write caches: 119 * 120 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 121 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 122 * also flushed at 2d versus 3d pipeline switches. 123 * 124 * read-only caches: 125 * 126 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 127 * MI_READ_FLUSH is set, and is always flushed on 965. 128 * 129 * I915_GEM_DOMAIN_COMMAND may not exist? 130 * 131 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 132 * invalidated when MI_EXE_FLUSH is set. 133 * 134 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 135 * invalidated with every MI_FLUSH. 136 * 137 * TLBs: 138 * 139 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 140 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 141 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 142 * are flushed at any MI_FLUSH. 143 */ 144 145 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 146 if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) 147 cmd &= ~MI_NO_WRITE_FLUSH; 148 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 149 cmd |= MI_EXE_FLUSH; 150 151 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && 152 (IS_G4X(dev) || IS_GEN5(dev))) 153 cmd |= MI_INVALIDATE_ISP; 154 155 ret = intel_ring_begin(ring, 2); 156 if (ret) 157 return ret; 158 159 intel_ring_emit(ring, cmd); 160 intel_ring_emit(ring, MI_NOOP); 161 intel_ring_advance(ring); 162 163 return 0; 164} 165 166/** 167 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 168 * implementing two workarounds on gen6. From section 1.4.7.1 169 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 170 * 171 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 172 * produced by non-pipelined state commands), software needs to first 173 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 174 * 0. 175 * 176 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 177 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 178 * 179 * And the workaround for these two requires this workaround first: 180 * 181 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 182 * BEFORE the pipe-control with a post-sync op and no write-cache 183 * flushes. 184 * 185 * And this last workaround is tricky because of the requirements on 186 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 187 * volume 2 part 1: 188 * 189 * "1 of the following must also be set: 190 * - Render Target Cache Flush Enable ([12] of DW1) 191 * - Depth Cache Flush Enable ([0] of DW1) 192 * - Stall at Pixel Scoreboard ([1] of DW1) 193 * - Depth Stall ([13] of DW1) 194 * - Post-Sync Operation ([13] of DW1) 195 * - Notify Enable ([8] of DW1)" 196 * 197 * The cache flushes require the workaround flush that triggered this 198 * one, so we can't use it. Depth stall would trigger the same. 199 * Post-sync nonzero is what triggered this second workaround, so we 200 * can't use that one either. Notify enable is IRQs, which aren't 201 * really our business. That leaves only stall at scoreboard. 202 */ 203static int 204intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) 205{ 206 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; 207 int ret; 208 209 210 ret = intel_ring_begin(ring, 6); 211 if (ret) 212 return ret; 213 214 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 215 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 216 PIPE_CONTROL_STALL_AT_SCOREBOARD); 217 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 218 intel_ring_emit(ring, 0); /* low dword */ 219 intel_ring_emit(ring, 0); /* high dword */ 220 intel_ring_emit(ring, MI_NOOP); 221 intel_ring_advance(ring); 222 223 ret = intel_ring_begin(ring, 6); 224 if (ret) 225 return ret; 226 227 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 228 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); 229 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 230 intel_ring_emit(ring, 0); 231 intel_ring_emit(ring, 0); 232 intel_ring_emit(ring, MI_NOOP); 233 intel_ring_advance(ring); 234 235 return 0; 236} 237 238static int 239gen6_render_ring_flush(struct intel_engine_cs *ring, 240 u32 invalidate_domains, u32 flush_domains) 241{ 242 u32 flags = 0; 243 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; 244 int ret; 245 246 /* Force SNB workarounds for PIPE_CONTROL flushes */ 247 ret = intel_emit_post_sync_nonzero_flush(ring); 248 if (ret) 249 return ret; 250 251 /* Just flush everything. Experiments have shown that reducing the 252 * number of bits based on the write domains has little performance 253 * impact. 254 */ 255 if (flush_domains) { 256 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 257 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 258 /* 259 * Ensure that any following seqno writes only happen 260 * when the render cache is indeed flushed. 261 */ 262 flags |= PIPE_CONTROL_CS_STALL; 263 } 264 if (invalidate_domains) { 265 flags |= PIPE_CONTROL_TLB_INVALIDATE; 266 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 267 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 268 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 269 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 270 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 271 /* 272 * TLB invalidate requires a post-sync write. 273 */ 274 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; 275 } 276 277 ret = intel_ring_begin(ring, 4); 278 if (ret) 279 return ret; 280 281 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 282 intel_ring_emit(ring, flags); 283 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 284 intel_ring_emit(ring, 0); 285 intel_ring_advance(ring); 286 287 return 0; 288} 289 290static int 291gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring) 292{ 293 int ret; 294 295 ret = intel_ring_begin(ring, 4); 296 if (ret) 297 return ret; 298 299 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 300 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 301 PIPE_CONTROL_STALL_AT_SCOREBOARD); 302 intel_ring_emit(ring, 0); 303 intel_ring_emit(ring, 0); 304 intel_ring_advance(ring); 305 306 return 0; 307} 308 309static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value) 310{ 311 int ret; 312 313 if (!ring->fbc_dirty) 314 return 0; 315 316 ret = intel_ring_begin(ring, 6); 317 if (ret) 318 return ret; 319 /* WaFbcNukeOn3DBlt:ivb/hsw */ 320 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 321 intel_ring_emit(ring, MSG_FBC_REND_STATE); 322 intel_ring_emit(ring, value); 323 intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT); 324 intel_ring_emit(ring, MSG_FBC_REND_STATE); 325 intel_ring_emit(ring, ring->scratch.gtt_offset + 256); 326 intel_ring_advance(ring); 327 328 ring->fbc_dirty = false; 329 return 0; 330} 331 332static int 333gen7_render_ring_flush(struct intel_engine_cs *ring, 334 u32 invalidate_domains, u32 flush_domains) 335{ 336 u32 flags = 0; 337 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; 338 int ret; 339 340 /* 341 * Ensure that any following seqno writes only happen when the render 342 * cache is indeed flushed. 343 * 344 * Workaround: 4th PIPE_CONTROL command (except the ones with only 345 * read-cache invalidate bits set) must have the CS_STALL bit set. We 346 * don't try to be clever and just set it unconditionally. 347 */ 348 flags |= PIPE_CONTROL_CS_STALL; 349 350 /* Just flush everything. Experiments have shown that reducing the 351 * number of bits based on the write domains has little performance 352 * impact. 353 */ 354 if (flush_domains) { 355 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 356 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 357 } 358 if (invalidate_domains) { 359 flags |= PIPE_CONTROL_TLB_INVALIDATE; 360 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 361 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 362 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 363 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 364 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 365 /* 366 * TLB invalidate requires a post-sync write. 367 */ 368 flags |= PIPE_CONTROL_QW_WRITE; 369 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; 370 371 /* Workaround: we must issue a pipe_control with CS-stall bit 372 * set before a pipe_control command that has the state cache 373 * invalidate bit set. */ 374 gen7_render_ring_cs_stall_wa(ring); 375 } 376 377 ret = intel_ring_begin(ring, 4); 378 if (ret) 379 return ret; 380 381 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 382 intel_ring_emit(ring, flags); 383 intel_ring_emit(ring, scratch_addr); 384 intel_ring_emit(ring, 0); 385 intel_ring_advance(ring); 386 387 if (!invalidate_domains && flush_domains) 388 return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); 389 390 return 0; 391} 392 393static int 394gen8_emit_pipe_control(struct intel_engine_cs *ring, 395 u32 flags, u32 scratch_addr) 396{ 397 int ret; 398 399 ret = intel_ring_begin(ring, 6); 400 if (ret) 401 return ret; 402 403 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); 404 intel_ring_emit(ring, flags); 405 intel_ring_emit(ring, scratch_addr); 406 intel_ring_emit(ring, 0); 407 intel_ring_emit(ring, 0); 408 intel_ring_emit(ring, 0); 409 intel_ring_advance(ring); 410 411 return 0; 412} 413 414static int 415gen8_render_ring_flush(struct intel_engine_cs *ring, 416 u32 invalidate_domains, u32 flush_domains) 417{ 418 u32 flags = 0; 419 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; 420 int ret; 421 422 flags |= PIPE_CONTROL_CS_STALL; 423 424 if (flush_domains) { 425 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 426 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 427 } 428 if (invalidate_domains) { 429 flags |= PIPE_CONTROL_TLB_INVALIDATE; 430 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 431 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 432 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 433 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 434 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 435 flags |= PIPE_CONTROL_QW_WRITE; 436 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; 437 438 /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ 439 ret = gen8_emit_pipe_control(ring, 440 PIPE_CONTROL_CS_STALL | 441 PIPE_CONTROL_STALL_AT_SCOREBOARD, 442 0); 443 if (ret) 444 return ret; 445 } 446 447 ret = gen8_emit_pipe_control(ring, flags, scratch_addr); 448 if (ret) 449 return ret; 450 451 if (!invalidate_domains && flush_domains) 452 return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); 453 454 return 0; 455} 456 457static void ring_write_tail(struct intel_engine_cs *ring, 458 u32 value) 459{ 460 struct drm_i915_private *dev_priv = ring->dev->dev_private; 461 I915_WRITE_TAIL(ring, value); 462} 463 464u64 intel_ring_get_active_head(struct intel_engine_cs *ring) 465{ 466 struct drm_i915_private *dev_priv = ring->dev->dev_private; 467 u64 acthd; 468 469 if (INTEL_INFO(ring->dev)->gen >= 8) 470 acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base), 471 RING_ACTHD_UDW(ring->mmio_base)); 472 else if (INTEL_INFO(ring->dev)->gen >= 4) 473 acthd = I915_READ(RING_ACTHD(ring->mmio_base)); 474 else 475 acthd = I915_READ(ACTHD); 476 477 return acthd; 478} 479 480static void ring_setup_phys_status_page(struct intel_engine_cs *ring) 481{ 482 struct drm_i915_private *dev_priv = ring->dev->dev_private; 483 u32 addr; 484 485 addr = dev_priv->status_page_dmah->busaddr; 486 if (INTEL_INFO(ring->dev)->gen >= 4) 487 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; 488 I915_WRITE(HWS_PGA, addr); 489} 490 491static bool stop_ring(struct intel_engine_cs *ring) 492{ 493 struct drm_i915_private *dev_priv = to_i915(ring->dev); 494 495 if (!IS_GEN2(ring->dev)) { 496 I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); 497 if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { 498 DRM_ERROR("%s : timed out trying to stop ring\n", ring->name); 499 /* Sometimes we observe that the idle flag is not 500 * set even though the ring is empty. So double 501 * check before giving up. 502 */ 503 if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring)) 504 return false; 505 } 506 } 507 508 I915_WRITE_CTL(ring, 0); 509 I915_WRITE_HEAD(ring, 0); 510 ring->write_tail(ring, 0); 511 512 if (!IS_GEN2(ring->dev)) { 513 (void)I915_READ_CTL(ring); 514 I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); 515 } 516 517 return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0; 518} 519 520static int init_ring_common(struct intel_engine_cs *ring) 521{ 522 struct drm_device *dev = ring->dev; 523 struct drm_i915_private *dev_priv = dev->dev_private; 524 struct intel_ringbuffer *ringbuf = ring->buffer; 525 struct drm_i915_gem_object *obj = ringbuf->obj; 526 int ret = 0; 527 528 gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); 529 530 if (!stop_ring(ring)) { 531 /* G45 ring initialization often fails to reset head to zero */ 532 DRM_DEBUG_KMS("%s head not reset to zero " 533 "ctl %08x head %08x tail %08x start %08x\n", 534 ring->name, 535 I915_READ_CTL(ring), 536 I915_READ_HEAD(ring), 537 I915_READ_TAIL(ring), 538 I915_READ_START(ring)); 539 540 if (!stop_ring(ring)) { 541 DRM_ERROR("failed to set %s head to zero " 542 "ctl %08x head %08x tail %08x start %08x\n", 543 ring->name, 544 I915_READ_CTL(ring), 545 I915_READ_HEAD(ring), 546 I915_READ_TAIL(ring), 547 I915_READ_START(ring)); 548 ret = -EIO; 549 goto out; 550 } 551 } 552 553 if (I915_NEED_GFX_HWS(dev)) 554 intel_ring_setup_status_page(ring); 555 else 556 ring_setup_phys_status_page(ring); 557 558 /* Enforce ordering by reading HEAD register back */ 559 I915_READ_HEAD(ring); 560 561 /* Initialize the ring. This must happen _after_ we've cleared the ring 562 * registers with the above sequence (the readback of the HEAD registers 563 * also enforces ordering), otherwise the hw might lose the new ring 564 * register values. */ 565 I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj)); 566 567 /* WaClearRingBufHeadRegAtInit:ctg,elk */ 568 if (I915_READ_HEAD(ring)) 569 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n", 570 ring->name, I915_READ_HEAD(ring)); 571 I915_WRITE_HEAD(ring, 0); 572 (void)I915_READ_HEAD(ring); 573 574 I915_WRITE_CTL(ring, 575 ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) 576 | RING_VALID); 577 578 /* If the head is still not zero, the ring is dead */ 579 if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 && 580 I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) && 581 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) { 582 DRM_ERROR("%s initialization failed " 583 "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n", 584 ring->name, 585 I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID, 586 I915_READ_HEAD(ring), I915_READ_TAIL(ring), 587 I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj)); 588 ret = -EIO; 589 goto out; 590 } 591 592 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) 593 i915_kernel_lost_context(ring->dev); 594 else { 595 ringbuf->head = I915_READ_HEAD(ring); 596 ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; 597 ringbuf->space = intel_ring_space(ringbuf); 598 ringbuf->last_retired_head = -1; 599 } 600 601 memset(&ring->hangcheck, 0, sizeof(ring->hangcheck)); 602 603out: 604 gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); 605 606 return ret; 607} 608 609void 610intel_fini_pipe_control(struct intel_engine_cs *ring) 611{ 612 struct drm_device *dev = ring->dev; 613 614 if (ring->scratch.obj == NULL) 615 return; 616 617 if (INTEL_INFO(dev)->gen >= 5) { 618 kunmap(sg_page(ring->scratch.obj->pages->sgl)); 619 i915_gem_object_ggtt_unpin(ring->scratch.obj); 620 } 621 622 drm_gem_object_unreference(&ring->scratch.obj->base); 623 ring->scratch.obj = NULL; 624} 625 626int 627intel_init_pipe_control(struct intel_engine_cs *ring) 628{ 629 int ret; 630 631 if (ring->scratch.obj) 632 return 0; 633 634 ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096); 635 if (ring->scratch.obj == NULL) { 636 DRM_ERROR("Failed to allocate seqno page\n"); 637 ret = -ENOMEM; 638 goto err; 639 } 640 641 ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC); 642 if (ret) 643 goto err_unref; 644 645 ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0); 646 if (ret) 647 goto err_unref; 648 649 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj); 650 ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl)); 651 if (ring->scratch.cpu_page == NULL) { 652 ret = -ENOMEM; 653 goto err_unpin; 654 } 655 656 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", 657 ring->name, ring->scratch.gtt_offset); 658 return 0; 659 660err_unpin: 661 i915_gem_object_ggtt_unpin(ring->scratch.obj); 662err_unref: 663 drm_gem_object_unreference(&ring->scratch.obj->base); 664err: 665 return ret; 666} 667 668static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, 669 u32 addr, u32 value) 670{ 671 struct drm_device *dev = ring->dev; 672 struct drm_i915_private *dev_priv = dev->dev_private; 673 674 if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS)) 675 return; 676 677 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 678 intel_ring_emit(ring, addr); 679 intel_ring_emit(ring, value); 680 681 dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr; 682 dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF; 683 /* value is updated with the status of remaining bits of this 684 * register when it is read from debugfs file 685 */ 686 dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value; 687 dev_priv->num_wa_regs++; 688 689 return; 690} 691 692static int bdw_init_workarounds(struct intel_engine_cs *ring) 693{ 694 int ret; 695 struct drm_device *dev = ring->dev; 696 struct drm_i915_private *dev_priv = dev->dev_private; 697 698 /* 699 * workarounds applied in this fn are part of register state context, 700 * they need to be re-initialized followed by gpu reset, suspend/resume, 701 * module reload. 702 */ 703 dev_priv->num_wa_regs = 0; 704 memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); 705 706 /* 707 * update the number of dwords required based on the 708 * actual number of workarounds applied 709 */ 710 ret = intel_ring_begin(ring, 18); 711 if (ret) 712 return ret; 713 714 /* WaDisablePartialInstShootdown:bdw */ 715 /* WaDisableThreadStallDopClockGating:bdw */ 716 /* FIXME: Unclear whether we really need this on production bdw. */ 717 intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, 718 _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE 719 | STALL_DOP_GATING_DISABLE)); 720 721 /* WaDisableDopClockGating:bdw May not be needed for production */ 722 intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, 723 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 724 725 intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, 726 _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); 727 728 /* Use Force Non-Coherent whenever executing a 3D context. This is a 729 * workaround for for a possible hang in the unlikely event a TLB 730 * invalidation occurs during a PSD flush. 731 */ 732 intel_ring_emit_wa(ring, HDC_CHICKEN0, 733 _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); 734 735 /* Wa4x4STCOptimizationDisable:bdw */ 736 intel_ring_emit_wa(ring, CACHE_MODE_1, 737 _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); 738 739 /* 740 * BSpec recommends 8x4 when MSAA is used, 741 * however in practice 16x4 seems fastest. 742 * 743 * Note that PS/WM thread counts depend on the WIZ hashing 744 * disable bit, which we don't touch here, but it's good 745 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 746 */ 747 intel_ring_emit_wa(ring, GEN7_GT_MODE, 748 GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); 749 750 intel_ring_advance(ring); 751 752 DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n", 753 dev_priv->num_wa_regs); 754 755 return 0; 756} 757 758static int chv_init_workarounds(struct intel_engine_cs *ring) 759{ 760 int ret; 761 struct drm_device *dev = ring->dev; 762 struct drm_i915_private *dev_priv = dev->dev_private; 763 764 /* 765 * workarounds applied in this fn are part of register state context, 766 * they need to be re-initialized followed by gpu reset, suspend/resume, 767 * module reload. 768 */ 769 dev_priv->num_wa_regs = 0; 770 memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); 771 772 ret = intel_ring_begin(ring, 12); 773 if (ret) 774 return ret; 775 776 /* WaDisablePartialInstShootdown:chv */ 777 intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, 778 _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); 779 780 /* WaDisableThreadStallDopClockGating:chv */ 781 intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, 782 _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); 783 784 /* WaDisableDopClockGating:chv (pre-production hw) */ 785 intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, 786 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 787 788 /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ 789 intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, 790 _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); 791 792 intel_ring_advance(ring); 793 794 return 0; 795} 796 797static int init_render_ring(struct intel_engine_cs *ring) 798{ 799 struct drm_device *dev = ring->dev; 800 struct drm_i915_private *dev_priv = dev->dev_private; 801 int ret = init_ring_common(ring); 802 if (ret) 803 return ret; 804 805 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ 806 if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7) 807 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 808 809 /* We need to disable the AsyncFlip performance optimisations in order 810 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 811 * programmed to '1' on all products. 812 * 813 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv 814 */ 815 if (INTEL_INFO(dev)->gen >= 6) 816 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 817 818 /* Required for the hardware to program scanline values for waiting */ 819 /* WaEnableFlushTlbInvalidationMode:snb */ 820 if (INTEL_INFO(dev)->gen == 6) 821 I915_WRITE(GFX_MODE, 822 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT)); 823 824 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ 825 if (IS_GEN7(dev)) 826 I915_WRITE(GFX_MODE_GEN7, 827 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) | 828 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 829 830 if (INTEL_INFO(dev)->gen >= 5) { 831 ret = intel_init_pipe_control(ring); 832 if (ret) 833 return ret; 834 } 835 836 if (IS_GEN6(dev)) { 837 /* From the Sandybridge PRM, volume 1 part 3, page 24: 838 * "If this bit is set, STCunit will have LRA as replacement 839 * policy. [...] This bit must be reset. LRA replacement 840 * policy is not supported." 841 */ 842 I915_WRITE(CACHE_MODE_0, 843 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 844 } 845 846 if (INTEL_INFO(dev)->gen >= 6) 847 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 848 849 if (HAS_L3_DPF(dev)) 850 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); 851 852 return ret; 853} 854 855static void render_ring_cleanup(struct intel_engine_cs *ring) 856{ 857 struct drm_device *dev = ring->dev; 858 struct drm_i915_private *dev_priv = dev->dev_private; 859 860 if (dev_priv->semaphore_obj) { 861 i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj); 862 drm_gem_object_unreference(&dev_priv->semaphore_obj->base); 863 dev_priv->semaphore_obj = NULL; 864 } 865 866 intel_fini_pipe_control(ring); 867} 868 869static int gen8_rcs_signal(struct intel_engine_cs *signaller, 870 unsigned int num_dwords) 871{ 872#define MBOX_UPDATE_DWORDS 8 873 struct drm_device *dev = signaller->dev; 874 struct drm_i915_private *dev_priv = dev->dev_private; 875 struct intel_engine_cs *waiter; 876 int i, ret, num_rings; 877 878 num_rings = hweight32(INTEL_INFO(dev)->ring_mask); 879 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; 880#undef MBOX_UPDATE_DWORDS 881 882 ret = intel_ring_begin(signaller, num_dwords); 883 if (ret) 884 return ret; 885 886 for_each_ring(waiter, dev_priv, i) { 887 u64 gtt_offset = signaller->semaphore.signal_ggtt[i]; 888 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) 889 continue; 890 891 intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); 892 intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | 893 PIPE_CONTROL_QW_WRITE | 894 PIPE_CONTROL_FLUSH_ENABLE); 895 intel_ring_emit(signaller, lower_32_bits(gtt_offset)); 896 intel_ring_emit(signaller, upper_32_bits(gtt_offset)); 897 intel_ring_emit(signaller, signaller->outstanding_lazy_seqno); 898 intel_ring_emit(signaller, 0); 899 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | 900 MI_SEMAPHORE_TARGET(waiter->id)); 901 intel_ring_emit(signaller, 0); 902 } 903 904 return 0; 905} 906 907static int gen8_xcs_signal(struct intel_engine_cs *signaller, 908 unsigned int num_dwords) 909{ 910#define MBOX_UPDATE_DWORDS 6 911 struct drm_device *dev = signaller->dev; 912 struct drm_i915_private *dev_priv = dev->dev_private; 913 struct intel_engine_cs *waiter; 914 int i, ret, num_rings; 915 916 num_rings = hweight32(INTEL_INFO(dev)->ring_mask); 917 num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; 918#undef MBOX_UPDATE_DWORDS 919 920 ret = intel_ring_begin(signaller, num_dwords); 921 if (ret) 922 return ret; 923 924 for_each_ring(waiter, dev_priv, i) { 925 u64 gtt_offset = signaller->semaphore.signal_ggtt[i]; 926 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) 927 continue; 928 929 intel_ring_emit(signaller, (MI_FLUSH_DW + 1) | 930 MI_FLUSH_DW_OP_STOREDW); 931 intel_ring_emit(signaller, lower_32_bits(gtt_offset) | 932 MI_FLUSH_DW_USE_GTT); 933 intel_ring_emit(signaller, upper_32_bits(gtt_offset)); 934 intel_ring_emit(signaller, signaller->outstanding_lazy_seqno); 935 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | 936 MI_SEMAPHORE_TARGET(waiter->id)); 937 intel_ring_emit(signaller, 0); 938 } 939 940 return 0; 941} 942 943static int gen6_signal(struct intel_engine_cs *signaller, 944 unsigned int num_dwords) 945{ 946 struct drm_device *dev = signaller->dev; 947 struct drm_i915_private *dev_priv = dev->dev_private; 948 struct intel_engine_cs *useless; 949 int i, ret, num_rings; 950 951#define MBOX_UPDATE_DWORDS 3 952 num_rings = hweight32(INTEL_INFO(dev)->ring_mask); 953 num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2); 954#undef MBOX_UPDATE_DWORDS 955 956 ret = intel_ring_begin(signaller, num_dwords); 957 if (ret) 958 return ret; 959 960 for_each_ring(useless, dev_priv, i) { 961 u32 mbox_reg = signaller->semaphore.mbox.signal[i]; 962 if (mbox_reg != GEN6_NOSYNC) { 963 intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); 964 intel_ring_emit(signaller, mbox_reg); 965 intel_ring_emit(signaller, signaller->outstanding_lazy_seqno); 966 } 967 } 968 969 /* If num_dwords was rounded, make sure the tail pointer is correct */ 970 if (num_rings % 2 == 0) 971 intel_ring_emit(signaller, MI_NOOP); 972 973 return 0; 974} 975 976/** 977 * gen6_add_request - Update the semaphore mailbox registers 978 * 979 * @ring - ring that is adding a request 980 * @seqno - return seqno stuck into the ring 981 * 982 * Update the mailbox registers in the *other* rings with the current seqno. 983 * This acts like a signal in the canonical semaphore. 984 */ 985static int 986gen6_add_request(struct intel_engine_cs *ring) 987{ 988 int ret; 989 990 if (ring->semaphore.signal) 991 ret = ring->semaphore.signal(ring, 4); 992 else 993 ret = intel_ring_begin(ring, 4); 994 995 if (ret) 996 return ret; 997 998 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 999 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 1000 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 1001 intel_ring_emit(ring, MI_USER_INTERRUPT); 1002 __intel_ring_advance(ring); 1003 1004 return 0; 1005} 1006 1007static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev, 1008 u32 seqno) 1009{ 1010 struct drm_i915_private *dev_priv = dev->dev_private; 1011 return dev_priv->last_seqno < seqno; 1012} 1013 1014/** 1015 * intel_ring_sync - sync the waiter to the signaller on seqno 1016 * 1017 * @waiter - ring that is waiting 1018 * @signaller - ring which has, or will signal 1019 * @seqno - seqno which the waiter will block on 1020 */ 1021 1022static int 1023gen8_ring_sync(struct intel_engine_cs *waiter, 1024 struct intel_engine_cs *signaller, 1025 u32 seqno) 1026{ 1027 struct drm_i915_private *dev_priv = waiter->dev->dev_private; 1028 int ret; 1029 1030 ret = intel_ring_begin(waiter, 4); 1031 if (ret) 1032 return ret; 1033 1034 intel_ring_emit(waiter, MI_SEMAPHORE_WAIT | 1035 MI_SEMAPHORE_GLOBAL_GTT | 1036 MI_SEMAPHORE_POLL | 1037 MI_SEMAPHORE_SAD_GTE_SDD); 1038 intel_ring_emit(waiter, seqno); 1039 intel_ring_emit(waiter, 1040 lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id))); 1041 intel_ring_emit(waiter, 1042 upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id))); 1043 intel_ring_advance(waiter); 1044 return 0; 1045} 1046 1047static int 1048gen6_ring_sync(struct intel_engine_cs *waiter, 1049 struct intel_engine_cs *signaller, 1050 u32 seqno) 1051{ 1052 u32 dw1 = MI_SEMAPHORE_MBOX | 1053 MI_SEMAPHORE_COMPARE | 1054 MI_SEMAPHORE_REGISTER; 1055 u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id]; 1056 int ret; 1057 1058 /* Throughout all of the GEM code, seqno passed implies our current 1059 * seqno is >= the last seqno executed. However for hardware the 1060 * comparison is strictly greater than. 1061 */ 1062 seqno -= 1; 1063 1064 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); 1065 1066 ret = intel_ring_begin(waiter, 4); 1067 if (ret) 1068 return ret; 1069 1070 /* If seqno wrap happened, omit the wait with no-ops */ 1071 if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) { 1072 intel_ring_emit(waiter, dw1 | wait_mbox); 1073 intel_ring_emit(waiter, seqno); 1074 intel_ring_emit(waiter, 0); 1075 intel_ring_emit(waiter, MI_NOOP); 1076 } else { 1077 intel_ring_emit(waiter, MI_NOOP); 1078 intel_ring_emit(waiter, MI_NOOP); 1079 intel_ring_emit(waiter, MI_NOOP); 1080 intel_ring_emit(waiter, MI_NOOP); 1081 } 1082 intel_ring_advance(waiter); 1083 1084 return 0; 1085} 1086 1087#define PIPE_CONTROL_FLUSH(ring__, addr__) \ 1088do { \ 1089 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ 1090 PIPE_CONTROL_DEPTH_STALL); \ 1091 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ 1092 intel_ring_emit(ring__, 0); \ 1093 intel_ring_emit(ring__, 0); \ 1094} while (0) 1095 1096static int 1097pc_render_add_request(struct intel_engine_cs *ring) 1098{ 1099 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; 1100 int ret; 1101 1102 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently 1103 * incoherent with writes to memory, i.e. completely fubar, 1104 * so we need to use PIPE_NOTIFY instead. 1105 * 1106 * However, we also need to workaround the qword write 1107 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to 1108 * memory before requesting an interrupt. 1109 */ 1110 ret = intel_ring_begin(ring, 32); 1111 if (ret) 1112 return ret; 1113 1114 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 1115 PIPE_CONTROL_WRITE_FLUSH | 1116 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 1117 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 1118 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 1119 intel_ring_emit(ring, 0); 1120 PIPE_CONTROL_FLUSH(ring, scratch_addr); 1121 scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */ 1122 PIPE_CONTROL_FLUSH(ring, scratch_addr); 1123 scratch_addr += 2 * CACHELINE_BYTES; 1124 PIPE_CONTROL_FLUSH(ring, scratch_addr); 1125 scratch_addr += 2 * CACHELINE_BYTES; 1126 PIPE_CONTROL_FLUSH(ring, scratch_addr); 1127 scratch_addr += 2 * CACHELINE_BYTES; 1128 PIPE_CONTROL_FLUSH(ring, scratch_addr); 1129 scratch_addr += 2 * CACHELINE_BYTES; 1130 PIPE_CONTROL_FLUSH(ring, scratch_addr); 1131 1132 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 1133 PIPE_CONTROL_WRITE_FLUSH | 1134 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 1135 PIPE_CONTROL_NOTIFY); 1136 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 1137 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 1138 intel_ring_emit(ring, 0); 1139 __intel_ring_advance(ring); 1140 1141 return 0; 1142} 1143 1144static u32 1145gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) 1146{ 1147 /* Workaround to force correct ordering between irq and seqno writes on 1148 * ivb (and maybe also on snb) by reading from a CS register (like 1149 * ACTHD) before reading the status page. */ 1150 if (!lazy_coherency) { 1151 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1152 POSTING_READ(RING_ACTHD(ring->mmio_base)); 1153 } 1154 1155 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 1156} 1157 1158static u32 1159ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) 1160{ 1161 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 1162} 1163 1164static void 1165ring_set_seqno(struct intel_engine_cs *ring, u32 seqno) 1166{ 1167 intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); 1168} 1169 1170static u32 1171pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) 1172{ 1173 return ring->scratch.cpu_page[0]; 1174} 1175 1176static void 1177pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno) 1178{ 1179 ring->scratch.cpu_page[0] = seqno; 1180} 1181 1182static bool 1183gen5_ring_get_irq(struct intel_engine_cs *ring) 1184{ 1185 struct drm_device *dev = ring->dev; 1186 struct drm_i915_private *dev_priv = dev->dev_private; 1187 unsigned long flags; 1188 1189 if (!dev->irq_enabled) 1190 return false; 1191 1192 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1193 if (ring->irq_refcount++ == 0) 1194 gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask); 1195 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1196 1197 return true; 1198} 1199 1200static void 1201gen5_ring_put_irq(struct intel_engine_cs *ring) 1202{ 1203 struct drm_device *dev = ring->dev; 1204 struct drm_i915_private *dev_priv = dev->dev_private; 1205 unsigned long flags; 1206 1207 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1208 if (--ring->irq_refcount == 0) 1209 gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask); 1210 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1211} 1212 1213static bool 1214i9xx_ring_get_irq(struct intel_engine_cs *ring) 1215{ 1216 struct drm_device *dev = ring->dev; 1217 struct drm_i915_private *dev_priv = dev->dev_private; 1218 unsigned long flags; 1219 1220 if (!dev->irq_enabled) 1221 return false; 1222 1223 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1224 if (ring->irq_refcount++ == 0) { 1225 dev_priv->irq_mask &= ~ring->irq_enable_mask; 1226 I915_WRITE(IMR, dev_priv->irq_mask); 1227 POSTING_READ(IMR); 1228 } 1229 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1230 1231 return true; 1232} 1233 1234static void 1235i9xx_ring_put_irq(struct intel_engine_cs *ring) 1236{ 1237 struct drm_device *dev = ring->dev; 1238 struct drm_i915_private *dev_priv = dev->dev_private; 1239 unsigned long flags; 1240 1241 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1242 if (--ring->irq_refcount == 0) { 1243 dev_priv->irq_mask |= ring->irq_enable_mask; 1244 I915_WRITE(IMR, dev_priv->irq_mask); 1245 POSTING_READ(IMR); 1246 } 1247 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1248} 1249 1250static bool 1251i8xx_ring_get_irq(struct intel_engine_cs *ring) 1252{ 1253 struct drm_device *dev = ring->dev; 1254 struct drm_i915_private *dev_priv = dev->dev_private; 1255 unsigned long flags; 1256 1257 if (!dev->irq_enabled) 1258 return false; 1259 1260 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1261 if (ring->irq_refcount++ == 0) { 1262 dev_priv->irq_mask &= ~ring->irq_enable_mask; 1263 I915_WRITE16(IMR, dev_priv->irq_mask); 1264 POSTING_READ16(IMR); 1265 } 1266 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1267 1268 return true; 1269} 1270 1271static void 1272i8xx_ring_put_irq(struct intel_engine_cs *ring) 1273{ 1274 struct drm_device *dev = ring->dev; 1275 struct drm_i915_private *dev_priv = dev->dev_private; 1276 unsigned long flags; 1277 1278 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1279 if (--ring->irq_refcount == 0) { 1280 dev_priv->irq_mask |= ring->irq_enable_mask; 1281 I915_WRITE16(IMR, dev_priv->irq_mask); 1282 POSTING_READ16(IMR); 1283 } 1284 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1285} 1286 1287void intel_ring_setup_status_page(struct intel_engine_cs *ring) 1288{ 1289 struct drm_device *dev = ring->dev; 1290 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1291 u32 mmio = 0; 1292 1293 /* The ring status page addresses are no longer next to the rest of 1294 * the ring registers as of gen7. 1295 */ 1296 if (IS_GEN7(dev)) { 1297 switch (ring->id) { 1298 case RCS: 1299 mmio = RENDER_HWS_PGA_GEN7; 1300 break; 1301 case BCS: 1302 mmio = BLT_HWS_PGA_GEN7; 1303 break; 1304 /* 1305 * VCS2 actually doesn't exist on Gen7. Only shut up 1306 * gcc switch check warning 1307 */ 1308 case VCS2: 1309 case VCS: 1310 mmio = BSD_HWS_PGA_GEN7; 1311 break; 1312 case VECS: 1313 mmio = VEBOX_HWS_PGA_GEN7; 1314 break; 1315 } 1316 } else if (IS_GEN6(ring->dev)) { 1317 mmio = RING_HWS_PGA_GEN6(ring->mmio_base); 1318 } else { 1319 /* XXX: gen8 returns to sanity */ 1320 mmio = RING_HWS_PGA(ring->mmio_base); 1321 } 1322 1323 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); 1324 POSTING_READ(mmio); 1325 1326 /* 1327 * Flush the TLB for this page 1328 * 1329 * FIXME: These two bits have disappeared on gen8, so a question 1330 * arises: do we still need this and if so how should we go about 1331 * invalidating the TLB? 1332 */ 1333 if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) { 1334 u32 reg = RING_INSTPM(ring->mmio_base); 1335 1336 /* ring should be idle before issuing a sync flush*/ 1337 WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); 1338 1339 I915_WRITE(reg, 1340 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | 1341 INSTPM_SYNC_FLUSH)); 1342 if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0, 1343 1000)) 1344 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", 1345 ring->name); 1346 } 1347} 1348 1349static int 1350bsd_ring_flush(struct intel_engine_cs *ring, 1351 u32 invalidate_domains, 1352 u32 flush_domains) 1353{ 1354 int ret; 1355 1356 ret = intel_ring_begin(ring, 2); 1357 if (ret) 1358 return ret; 1359 1360 intel_ring_emit(ring, MI_FLUSH); 1361 intel_ring_emit(ring, MI_NOOP); 1362 intel_ring_advance(ring); 1363 return 0; 1364} 1365 1366static int 1367i9xx_add_request(struct intel_engine_cs *ring) 1368{ 1369 int ret; 1370 1371 ret = intel_ring_begin(ring, 4); 1372 if (ret) 1373 return ret; 1374 1375 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 1376 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 1377 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 1378 intel_ring_emit(ring, MI_USER_INTERRUPT); 1379 __intel_ring_advance(ring); 1380 1381 return 0; 1382} 1383 1384static bool 1385gen6_ring_get_irq(struct intel_engine_cs *ring) 1386{ 1387 struct drm_device *dev = ring->dev; 1388 struct drm_i915_private *dev_priv = dev->dev_private; 1389 unsigned long flags; 1390 1391 if (!dev->irq_enabled) 1392 return false; 1393 1394 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1395 if (ring->irq_refcount++ == 0) { 1396 if (HAS_L3_DPF(dev) && ring->id == RCS) 1397 I915_WRITE_IMR(ring, 1398 ~(ring->irq_enable_mask | 1399 GT_PARITY_ERROR(dev))); 1400 else 1401 I915_WRITE_IMR(ring, ~ring->irq_enable_mask); 1402 gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask); 1403 } 1404 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1405 1406 return true; 1407} 1408 1409static void 1410gen6_ring_put_irq(struct intel_engine_cs *ring) 1411{ 1412 struct drm_device *dev = ring->dev; 1413 struct drm_i915_private *dev_priv = dev->dev_private; 1414 unsigned long flags; 1415 1416 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1417 if (--ring->irq_refcount == 0) { 1418 if (HAS_L3_DPF(dev) && ring->id == RCS) 1419 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); 1420 else 1421 I915_WRITE_IMR(ring, ~0); 1422 gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask); 1423 } 1424 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1425} 1426 1427static bool 1428hsw_vebox_get_irq(struct intel_engine_cs *ring) 1429{ 1430 struct drm_device *dev = ring->dev; 1431 struct drm_i915_private *dev_priv = dev->dev_private; 1432 unsigned long flags; 1433 1434 if (!dev->irq_enabled) 1435 return false; 1436 1437 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1438 if (ring->irq_refcount++ == 0) { 1439 I915_WRITE_IMR(ring, ~ring->irq_enable_mask); 1440 gen6_enable_pm_irq(dev_priv, ring->irq_enable_mask); 1441 } 1442 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1443 1444 return true; 1445} 1446 1447static void 1448hsw_vebox_put_irq(struct intel_engine_cs *ring) 1449{ 1450 struct drm_device *dev = ring->dev; 1451 struct drm_i915_private *dev_priv = dev->dev_private; 1452 unsigned long flags; 1453 1454 if (!dev->irq_enabled) 1455 return; 1456 1457 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1458 if (--ring->irq_refcount == 0) { 1459 I915_WRITE_IMR(ring, ~0); 1460 gen6_disable_pm_irq(dev_priv, ring->irq_enable_mask); 1461 } 1462 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1463} 1464 1465static bool 1466gen8_ring_get_irq(struct intel_engine_cs *ring) 1467{ 1468 struct drm_device *dev = ring->dev; 1469 struct drm_i915_private *dev_priv = dev->dev_private; 1470 unsigned long flags; 1471 1472 if (!dev->irq_enabled) 1473 return false; 1474 1475 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1476 if (ring->irq_refcount++ == 0) { 1477 if (HAS_L3_DPF(dev) && ring->id == RCS) { 1478 I915_WRITE_IMR(ring, 1479 ~(ring->irq_enable_mask | 1480 GT_RENDER_L3_PARITY_ERROR_INTERRUPT)); 1481 } else { 1482 I915_WRITE_IMR(ring, ~ring->irq_enable_mask); 1483 } 1484 POSTING_READ(RING_IMR(ring->mmio_base)); 1485 } 1486 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1487 1488 return true; 1489} 1490 1491static void 1492gen8_ring_put_irq(struct intel_engine_cs *ring) 1493{ 1494 struct drm_device *dev = ring->dev; 1495 struct drm_i915_private *dev_priv = dev->dev_private; 1496 unsigned long flags; 1497 1498 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1499 if (--ring->irq_refcount == 0) { 1500 if (HAS_L3_DPF(dev) && ring->id == RCS) { 1501 I915_WRITE_IMR(ring, 1502 ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); 1503 } else { 1504 I915_WRITE_IMR(ring, ~0); 1505 } 1506 POSTING_READ(RING_IMR(ring->mmio_base)); 1507 } 1508 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1509} 1510 1511static int 1512i965_dispatch_execbuffer(struct intel_engine_cs *ring, 1513 u64 offset, u32 length, 1514 unsigned flags) 1515{ 1516 int ret; 1517 1518 ret = intel_ring_begin(ring, 2); 1519 if (ret) 1520 return ret; 1521 1522 intel_ring_emit(ring, 1523 MI_BATCH_BUFFER_START | 1524 MI_BATCH_GTT | 1525 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); 1526 intel_ring_emit(ring, offset); 1527 intel_ring_advance(ring); 1528 1529 return 0; 1530} 1531 1532/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 1533#define I830_BATCH_LIMIT (256*1024) 1534#define I830_TLB_ENTRIES (2) 1535#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) 1536static int 1537i830_dispatch_execbuffer(struct intel_engine_cs *ring, 1538 u64 offset, u32 len, 1539 unsigned flags) 1540{ 1541 u32 cs_offset = ring->scratch.gtt_offset; 1542 int ret; 1543 1544 ret = intel_ring_begin(ring, 6); 1545 if (ret) 1546 return ret; 1547 1548 /* Evict the invalid PTE TLBs */ 1549 intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA); 1550 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); 1551 intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */ 1552 intel_ring_emit(ring, cs_offset); 1553 intel_ring_emit(ring, 0xdeadbeef); 1554 intel_ring_emit(ring, MI_NOOP); 1555 intel_ring_advance(ring); 1556 1557 if ((flags & I915_DISPATCH_PINNED) == 0) { 1558 if (len > I830_BATCH_LIMIT) 1559 return -ENOSPC; 1560 1561 ret = intel_ring_begin(ring, 6 + 2); 1562 if (ret) 1563 return ret; 1564 1565 /* Blit the batch (which has now all relocs applied) to the 1566 * stable batch scratch bo area (so that the CS never 1567 * stumbles over its tlb invalidation bug) ... 1568 */ 1569 intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); 1570 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); 1571 intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096); 1572 intel_ring_emit(ring, cs_offset); 1573 intel_ring_emit(ring, 4096); 1574 intel_ring_emit(ring, offset); 1575 1576 intel_ring_emit(ring, MI_FLUSH); 1577 intel_ring_emit(ring, MI_NOOP); 1578 intel_ring_advance(ring); 1579 1580 /* ... and execute it. */ 1581 offset = cs_offset; 1582 } 1583 1584 ret = intel_ring_begin(ring, 4); 1585 if (ret) 1586 return ret; 1587 1588 intel_ring_emit(ring, MI_BATCH_BUFFER); 1589 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1590 intel_ring_emit(ring, offset + len - 8); 1591 intel_ring_emit(ring, MI_NOOP); 1592 intel_ring_advance(ring); 1593 1594 return 0; 1595} 1596 1597static int 1598i915_dispatch_execbuffer(struct intel_engine_cs *ring, 1599 u64 offset, u32 len, 1600 unsigned flags) 1601{ 1602 int ret; 1603 1604 ret = intel_ring_begin(ring, 2); 1605 if (ret) 1606 return ret; 1607 1608 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); 1609 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1610 intel_ring_advance(ring); 1611 1612 return 0; 1613} 1614 1615static void cleanup_status_page(struct intel_engine_cs *ring) 1616{ 1617 struct drm_i915_gem_object *obj; 1618 1619 obj = ring->status_page.obj; 1620 if (obj == NULL) 1621 return; 1622 1623 kunmap(sg_page(obj->pages->sgl)); 1624 i915_gem_object_ggtt_unpin(obj); 1625 drm_gem_object_unreference(&obj->base); 1626 ring->status_page.obj = NULL; 1627} 1628 1629static int init_status_page(struct intel_engine_cs *ring) 1630{ 1631 struct drm_i915_gem_object *obj; 1632 1633 if ((obj = ring->status_page.obj) == NULL) { 1634 unsigned flags; 1635 int ret; 1636 1637 obj = i915_gem_alloc_object(ring->dev, 4096); 1638 if (obj == NULL) { 1639 DRM_ERROR("Failed to allocate status page\n"); 1640 return -ENOMEM; 1641 } 1642 1643 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 1644 if (ret) 1645 goto err_unref; 1646 1647 flags = 0; 1648 if (!HAS_LLC(ring->dev)) 1649 /* On g33, we cannot place HWS above 256MiB, so 1650 * restrict its pinning to the low mappable arena. 1651 * Though this restriction is not documented for 1652 * gen4, gen5, or byt, they also behave similarly 1653 * and hang if the HWS is placed at the top of the 1654 * GTT. To generalise, it appears that all !llc 1655 * platforms have issues with us placing the HWS 1656 * above the mappable region (even though we never 1657 * actualy map it). 1658 */ 1659 flags |= PIN_MAPPABLE; 1660 ret = i915_gem_obj_ggtt_pin(obj, 4096, flags); 1661 if (ret) { 1662err_unref: 1663 drm_gem_object_unreference(&obj->base); 1664 return ret; 1665 } 1666 1667 ring->status_page.obj = obj; 1668 } 1669 1670 ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj); 1671 ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl)); 1672 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1673 1674 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", 1675 ring->name, ring->status_page.gfx_addr); 1676 1677 return 0; 1678} 1679 1680static int init_phys_status_page(struct intel_engine_cs *ring) 1681{ 1682 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1683 1684 if (!dev_priv->status_page_dmah) { 1685 dev_priv->status_page_dmah = 1686 drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE); 1687 if (!dev_priv->status_page_dmah) 1688 return -ENOMEM; 1689 } 1690 1691 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1692 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1693 1694 return 0; 1695} 1696 1697void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) 1698{ 1699 if (!ringbuf->obj) 1700 return; 1701 1702 iounmap(ringbuf->virtual_start); 1703 i915_gem_object_ggtt_unpin(ringbuf->obj); 1704 drm_gem_object_unreference(&ringbuf->obj->base); 1705 ringbuf->obj = NULL; 1706} 1707 1708int intel_alloc_ringbuffer_obj(struct drm_device *dev, 1709 struct intel_ringbuffer *ringbuf) 1710{ 1711 struct drm_i915_private *dev_priv = to_i915(dev); 1712 struct drm_i915_gem_object *obj; 1713 int ret; 1714 1715 if (ringbuf->obj) 1716 return 0; 1717 1718 obj = NULL; 1719 if (!HAS_LLC(dev)) 1720 obj = i915_gem_object_create_stolen(dev, ringbuf->size); 1721 if (obj == NULL) 1722 obj = i915_gem_alloc_object(dev, ringbuf->size); 1723 if (obj == NULL) 1724 return -ENOMEM; 1725 1726 /* mark ring buffers as read-only from GPU side by default */ 1727 obj->gt_ro = 1; 1728 1729 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE); 1730 if (ret) 1731 goto err_unref; 1732 1733 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1734 if (ret) 1735 goto err_unpin; 1736 1737 ringbuf->virtual_start = 1738 ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj), 1739 ringbuf->size); 1740 if (ringbuf->virtual_start == NULL) { 1741 ret = -EINVAL; 1742 goto err_unpin; 1743 } 1744 1745 ringbuf->obj = obj; 1746 return 0; 1747 1748err_unpin: 1749 i915_gem_object_ggtt_unpin(obj); 1750err_unref: 1751 drm_gem_object_unreference(&obj->base); 1752 return ret; 1753} 1754 1755static int intel_init_ring_buffer(struct drm_device *dev, 1756 struct intel_engine_cs *ring) 1757{ 1758 struct intel_ringbuffer *ringbuf = ring->buffer; 1759 int ret; 1760 1761 if (ringbuf == NULL) { 1762 ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); 1763 if (!ringbuf) 1764 return -ENOMEM; 1765 ring->buffer = ringbuf; 1766 } 1767 1768 ring->dev = dev; 1769 INIT_LIST_HEAD(&ring->active_list); 1770 INIT_LIST_HEAD(&ring->request_list); 1771 INIT_LIST_HEAD(&ring->execlist_queue); 1772 ringbuf->size = 32 * PAGE_SIZE; 1773 ringbuf->ring = ring; 1774 memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); 1775 1776 init_waitqueue_head(&ring->irq_queue); 1777 1778 if (I915_NEED_GFX_HWS(dev)) { 1779 ret = init_status_page(ring); 1780 if (ret) 1781 goto error; 1782 } else { 1783 BUG_ON(ring->id != RCS); 1784 ret = init_phys_status_page(ring); 1785 if (ret) 1786 goto error; 1787 } 1788 1789 ret = intel_alloc_ringbuffer_obj(dev, ringbuf); 1790 if (ret) { 1791 DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", ring->name, ret); 1792 goto error; 1793 } 1794 1795 /* Workaround an erratum on the i830 which causes a hang if 1796 * the TAIL pointer points to within the last 2 cachelines 1797 * of the buffer. 1798 */ 1799 ringbuf->effective_size = ringbuf->size; 1800 if (IS_I830(dev) || IS_845G(dev)) 1801 ringbuf->effective_size -= 2 * CACHELINE_BYTES; 1802 1803 ret = i915_cmd_parser_init_ring(ring); 1804 if (ret) 1805 goto error; 1806 1807 ret = ring->init(ring); 1808 if (ret) 1809 goto error; 1810 1811 return 0; 1812 1813error: 1814 kfree(ringbuf); 1815 ring->buffer = NULL; 1816 return ret; 1817} 1818 1819void intel_cleanup_ring_buffer(struct intel_engine_cs *ring) 1820{ 1821 struct drm_i915_private *dev_priv = to_i915(ring->dev); 1822 struct intel_ringbuffer *ringbuf = ring->buffer; 1823 1824 if (!intel_ring_initialized(ring)) 1825 return; 1826 1827 intel_stop_ring_buffer(ring); 1828 WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0); 1829 1830 intel_destroy_ringbuffer_obj(ringbuf); 1831 ring->preallocated_lazy_request = NULL; 1832 ring->outstanding_lazy_seqno = 0; 1833 1834 if (ring->cleanup) 1835 ring->cleanup(ring); 1836 1837 cleanup_status_page(ring); 1838 1839 i915_cmd_parser_fini_ring(ring); 1840 1841 kfree(ringbuf); 1842 ring->buffer = NULL; 1843} 1844 1845static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) 1846{ 1847 struct intel_ringbuffer *ringbuf = ring->buffer; 1848 struct drm_i915_gem_request *request; 1849 u32 seqno = 0; 1850 int ret; 1851 1852 if (ringbuf->last_retired_head != -1) { 1853 ringbuf->head = ringbuf->last_retired_head; 1854 ringbuf->last_retired_head = -1; 1855 1856 ringbuf->space = intel_ring_space(ringbuf); 1857 if (ringbuf->space >= n) 1858 return 0; 1859 } 1860 1861 list_for_each_entry(request, &ring->request_list, list) { 1862 if (__intel_ring_space(request->tail, ringbuf->tail, 1863 ringbuf->size) >= n) { 1864 seqno = request->seqno; 1865 break; 1866 } 1867 } 1868 1869 if (seqno == 0) 1870 return -ENOSPC; 1871 1872 ret = i915_wait_seqno(ring, seqno); 1873 if (ret) 1874 return ret; 1875 1876 i915_gem_retire_requests_ring(ring); 1877 ringbuf->head = ringbuf->last_retired_head; 1878 ringbuf->last_retired_head = -1; 1879 1880 ringbuf->space = intel_ring_space(ringbuf); 1881 return 0; 1882} 1883 1884static int ring_wait_for_space(struct intel_engine_cs *ring, int n) 1885{ 1886 struct drm_device *dev = ring->dev; 1887 struct drm_i915_private *dev_priv = dev->dev_private; 1888 struct intel_ringbuffer *ringbuf = ring->buffer; 1889 unsigned long end; 1890 int ret; 1891 1892 ret = intel_ring_wait_request(ring, n); 1893 if (ret != -ENOSPC) 1894 return ret; 1895 1896 /* force the tail write in case we have been skipping them */ 1897 __intel_ring_advance(ring); 1898 1899 /* With GEM the hangcheck timer should kick us out of the loop, 1900 * leaving it early runs the risk of corrupting GEM state (due 1901 * to running on almost untested codepaths). But on resume 1902 * timers don't work yet, so prevent a complete hang in that 1903 * case by choosing an insanely large timeout. */ 1904 end = jiffies + 60 * HZ; 1905 1906 trace_i915_ring_wait_begin(ring); 1907 do { 1908 ringbuf->head = I915_READ_HEAD(ring); 1909 ringbuf->space = intel_ring_space(ringbuf); 1910 if (ringbuf->space >= n) { 1911 ret = 0; 1912 break; 1913 } 1914 1915 if (!drm_core_check_feature(dev, DRIVER_MODESET) && 1916 dev->primary->master) { 1917 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; 1918 if (master_priv->sarea_priv) 1919 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1920 } 1921 1922 msleep(1); 1923 1924 if (dev_priv->mm.interruptible && signal_pending(current)) { 1925 ret = -ERESTARTSYS; 1926 break; 1927 } 1928 1929 ret = i915_gem_check_wedge(&dev_priv->gpu_error, 1930 dev_priv->mm.interruptible); 1931 if (ret) 1932 break; 1933 1934 if (time_after(jiffies, end)) { 1935 ret = -EBUSY; 1936 break; 1937 } 1938 } while (1); 1939 trace_i915_ring_wait_end(ring); 1940 return ret; 1941} 1942 1943static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) 1944{ 1945 uint32_t __iomem *virt; 1946 struct intel_ringbuffer *ringbuf = ring->buffer; 1947 int rem = ringbuf->size - ringbuf->tail; 1948 1949 if (ringbuf->space < rem) { 1950 int ret = ring_wait_for_space(ring, rem); 1951 if (ret) 1952 return ret; 1953 } 1954 1955 virt = ringbuf->virtual_start + ringbuf->tail; 1956 rem /= 4; 1957 while (rem--) 1958 iowrite32(MI_NOOP, virt++); 1959 1960 ringbuf->tail = 0; 1961 ringbuf->space = intel_ring_space(ringbuf); 1962 1963 return 0; 1964} 1965 1966int intel_ring_idle(struct intel_engine_cs *ring) 1967{ 1968 u32 seqno; 1969 int ret; 1970 1971 /* We need to add any requests required to flush the objects and ring */ 1972 if (ring->outstanding_lazy_seqno) { 1973 ret = i915_add_request(ring, NULL); 1974 if (ret) 1975 return ret; 1976 } 1977 1978 /* Wait upon the last request to be completed */ 1979 if (list_empty(&ring->request_list)) 1980 return 0; 1981 1982 seqno = list_entry(ring->request_list.prev, 1983 struct drm_i915_gem_request, 1984 list)->seqno; 1985 1986 return i915_wait_seqno(ring, seqno); 1987} 1988 1989static int 1990intel_ring_alloc_seqno(struct intel_engine_cs *ring) 1991{ 1992 if (ring->outstanding_lazy_seqno) 1993 return 0; 1994 1995 if (ring->preallocated_lazy_request == NULL) { 1996 struct drm_i915_gem_request *request; 1997 1998 request = kmalloc(sizeof(*request), GFP_KERNEL); 1999 if (request == NULL) 2000 return -ENOMEM; 2001 2002 ring->preallocated_lazy_request = request; 2003 } 2004 2005 return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); 2006} 2007 2008static int __intel_ring_prepare(struct intel_engine_cs *ring, 2009 int bytes) 2010{ 2011 struct intel_ringbuffer *ringbuf = ring->buffer; 2012 int ret; 2013 2014 if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { 2015 ret = intel_wrap_ring_buffer(ring); 2016 if (unlikely(ret)) 2017 return ret; 2018 } 2019 2020 if (unlikely(ringbuf->space < bytes)) { 2021 ret = ring_wait_for_space(ring, bytes); 2022 if (unlikely(ret)) 2023 return ret; 2024 } 2025 2026 return 0; 2027} 2028 2029int intel_ring_begin(struct intel_engine_cs *ring, 2030 int num_dwords) 2031{ 2032 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2033 int ret; 2034 2035 ret = i915_gem_check_wedge(&dev_priv->gpu_error, 2036 dev_priv->mm.interruptible); 2037 if (ret) 2038 return ret; 2039 2040 ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t)); 2041 if (ret) 2042 return ret; 2043 2044 /* Preallocate the olr before touching the ring */ 2045 ret = intel_ring_alloc_seqno(ring); 2046 if (ret) 2047 return ret; 2048 2049 ring->buffer->space -= num_dwords * sizeof(uint32_t); 2050 return 0; 2051} 2052 2053/* Align the ring tail to a cacheline boundary */ 2054int intel_ring_cacheline_align(struct intel_engine_cs *ring) 2055{ 2056 int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); 2057 int ret; 2058 2059 if (num_dwords == 0) 2060 return 0; 2061 2062 num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; 2063 ret = intel_ring_begin(ring, num_dwords); 2064 if (ret) 2065 return ret; 2066 2067 while (num_dwords--) 2068 intel_ring_emit(ring, MI_NOOP); 2069 2070 intel_ring_advance(ring); 2071 2072 return 0; 2073} 2074 2075void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno) 2076{ 2077 struct drm_device *dev = ring->dev; 2078 struct drm_i915_private *dev_priv = dev->dev_private; 2079 2080 BUG_ON(ring->outstanding_lazy_seqno); 2081 2082 if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) { 2083 I915_WRITE(RING_SYNC_0(ring->mmio_base), 0); 2084 I915_WRITE(RING_SYNC_1(ring->mmio_base), 0); 2085 if (HAS_VEBOX(dev)) 2086 I915_WRITE(RING_SYNC_2(ring->mmio_base), 0); 2087 } 2088 2089 ring->set_seqno(ring, seqno); 2090 ring->hangcheck.seqno = seqno; 2091} 2092 2093static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring, 2094 u32 value) 2095{ 2096 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2097 2098 /* Every tail move must follow the sequence below */ 2099 2100 /* Disable notification that the ring is IDLE. The GT 2101 * will then assume that it is busy and bring it out of rc6. 2102 */ 2103 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 2104 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 2105 2106 /* Clear the context id. Here be magic! */ 2107 I915_WRITE64(GEN6_BSD_RNCID, 0x0); 2108 2109 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 2110 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & 2111 GEN6_BSD_SLEEP_INDICATOR) == 0, 2112 50)) 2113 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 2114 2115 /* Now that the ring is fully powered up, update the tail */ 2116 I915_WRITE_TAIL(ring, value); 2117 POSTING_READ(RING_TAIL(ring->mmio_base)); 2118 2119 /* Let the ring send IDLE messages to the GT again, 2120 * and so let it sleep to conserve power when idle. 2121 */ 2122 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 2123 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 2124} 2125 2126static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, 2127 u32 invalidate, u32 flush) 2128{ 2129 uint32_t cmd; 2130 int ret; 2131 2132 ret = intel_ring_begin(ring, 4); 2133 if (ret) 2134 return ret; 2135 2136 cmd = MI_FLUSH_DW; 2137 if (INTEL_INFO(ring->dev)->gen >= 8) 2138 cmd += 1; 2139 /* 2140 * Bspec vol 1c.5 - video engine command streamer: 2141 * "If ENABLED, all TLBs will be invalidated once the flush 2142 * operation is complete. This bit is only valid when the 2143 * Post-Sync Operation field is a value of 1h or 3h." 2144 */ 2145 if (invalidate & I915_GEM_GPU_DOMAINS) 2146 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | 2147 MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 2148 intel_ring_emit(ring, cmd); 2149 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 2150 if (INTEL_INFO(ring->dev)->gen >= 8) { 2151 intel_ring_emit(ring, 0); /* upper addr */ 2152 intel_ring_emit(ring, 0); /* value */ 2153 } else { 2154 intel_ring_emit(ring, 0); 2155 intel_ring_emit(ring, MI_NOOP); 2156 } 2157 intel_ring_advance(ring); 2158 return 0; 2159} 2160 2161static int 2162gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, 2163 u64 offset, u32 len, 2164 unsigned flags) 2165{ 2166 bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE); 2167 int ret; 2168 2169 ret = intel_ring_begin(ring, 4); 2170 if (ret) 2171 return ret; 2172 2173 /* FIXME(BDW): Address space and security selectors. */ 2174 intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); 2175 intel_ring_emit(ring, lower_32_bits(offset)); 2176 intel_ring_emit(ring, upper_32_bits(offset)); 2177 intel_ring_emit(ring, MI_NOOP); 2178 intel_ring_advance(ring); 2179 2180 return 0; 2181} 2182 2183static int 2184hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, 2185 u64 offset, u32 len, 2186 unsigned flags) 2187{ 2188 int ret; 2189 2190 ret = intel_ring_begin(ring, 2); 2191 if (ret) 2192 return ret; 2193 2194 intel_ring_emit(ring, 2195 MI_BATCH_BUFFER_START | 2196 (flags & I915_DISPATCH_SECURE ? 2197 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); 2198 /* bit0-7 is the length on GEN6+ */ 2199 intel_ring_emit(ring, offset); 2200 intel_ring_advance(ring); 2201 2202 return 0; 2203} 2204 2205static int 2206gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, 2207 u64 offset, u32 len, 2208 unsigned flags) 2209{ 2210 int ret; 2211 2212 ret = intel_ring_begin(ring, 2); 2213 if (ret) 2214 return ret; 2215 2216 intel_ring_emit(ring, 2217 MI_BATCH_BUFFER_START | 2218 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); 2219 /* bit0-7 is the length on GEN6+ */ 2220 intel_ring_emit(ring, offset); 2221 intel_ring_advance(ring); 2222 2223 return 0; 2224} 2225 2226/* Blitter support (SandyBridge+) */ 2227 2228static int gen6_ring_flush(struct intel_engine_cs *ring, 2229 u32 invalidate, u32 flush) 2230{ 2231 struct drm_device *dev = ring->dev; 2232 uint32_t cmd; 2233 int ret; 2234 2235 ret = intel_ring_begin(ring, 4); 2236 if (ret) 2237 return ret; 2238 2239 cmd = MI_FLUSH_DW; 2240 if (INTEL_INFO(ring->dev)->gen >= 8) 2241 cmd += 1; 2242 /* 2243 * Bspec vol 1c.3 - blitter engine command streamer: 2244 * "If ENABLED, all TLBs will be invalidated once the flush 2245 * operation is complete. This bit is only valid when the 2246 * Post-Sync Operation field is a value of 1h or 3h." 2247 */ 2248 if (invalidate & I915_GEM_DOMAIN_RENDER) 2249 cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | 2250 MI_FLUSH_DW_OP_STOREDW; 2251 intel_ring_emit(ring, cmd); 2252 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 2253 if (INTEL_INFO(ring->dev)->gen >= 8) { 2254 intel_ring_emit(ring, 0); /* upper addr */ 2255 intel_ring_emit(ring, 0); /* value */ 2256 } else { 2257 intel_ring_emit(ring, 0); 2258 intel_ring_emit(ring, MI_NOOP); 2259 } 2260 intel_ring_advance(ring); 2261 2262 if (IS_GEN7(dev) && !invalidate && flush) 2263 return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN); 2264 2265 return 0; 2266} 2267 2268int intel_init_render_ring_buffer(struct drm_device *dev) 2269{ 2270 struct drm_i915_private *dev_priv = dev->dev_private; 2271 struct intel_engine_cs *ring = &dev_priv->ring[RCS]; 2272 struct drm_i915_gem_object *obj; 2273 int ret; 2274 2275 ring->name = "render ring"; 2276 ring->id = RCS; 2277 ring->mmio_base = RENDER_RING_BASE; 2278 2279 if (INTEL_INFO(dev)->gen >= 8) { 2280 if (i915_semaphore_is_enabled(dev)) { 2281 obj = i915_gem_alloc_object(dev, 4096); 2282 if (obj == NULL) { 2283 DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n"); 2284 i915.semaphores = 0; 2285 } else { 2286 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 2287 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK); 2288 if (ret != 0) { 2289 drm_gem_object_unreference(&obj->base); 2290 DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n"); 2291 i915.semaphores = 0; 2292 } else 2293 dev_priv->semaphore_obj = obj; 2294 } 2295 } 2296 if (IS_CHERRYVIEW(dev)) 2297 ring->init_context = chv_init_workarounds; 2298 else 2299 ring->init_context = bdw_init_workarounds; 2300 ring->add_request = gen6_add_request; 2301 ring->flush = gen8_render_ring_flush; 2302 ring->irq_get = gen8_ring_get_irq; 2303 ring->irq_put = gen8_ring_put_irq; 2304 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; 2305 ring->get_seqno = gen6_ring_get_seqno; 2306 ring->set_seqno = ring_set_seqno; 2307 if (i915_semaphore_is_enabled(dev)) { 2308 WARN_ON(!dev_priv->semaphore_obj); 2309 ring->semaphore.sync_to = gen8_ring_sync; 2310 ring->semaphore.signal = gen8_rcs_signal; 2311 GEN8_RING_SEMAPHORE_INIT; 2312 } 2313 } else if (INTEL_INFO(dev)->gen >= 6) { 2314 ring->add_request = gen6_add_request; 2315 ring->flush = gen7_render_ring_flush; 2316 if (INTEL_INFO(dev)->gen == 6) 2317 ring->flush = gen6_render_ring_flush; 2318 ring->irq_get = gen6_ring_get_irq; 2319 ring->irq_put = gen6_ring_put_irq; 2320 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; 2321 ring->get_seqno = gen6_ring_get_seqno; 2322 ring->set_seqno = ring_set_seqno; 2323 if (i915_semaphore_is_enabled(dev)) { 2324 ring->semaphore.sync_to = gen6_ring_sync; 2325 ring->semaphore.signal = gen6_signal; 2326 /* 2327 * The current semaphore is only applied on pre-gen8 2328 * platform. And there is no VCS2 ring on the pre-gen8 2329 * platform. So the semaphore between RCS and VCS2 is 2330 * initialized as INVALID. Gen8 will initialize the 2331 * sema between VCS2 and RCS later. 2332 */ 2333 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; 2334 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV; 2335 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB; 2336 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE; 2337 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; 2338 ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; 2339 ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC; 2340 ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC; 2341 ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC; 2342 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; 2343 } 2344 } else if (IS_GEN5(dev)) { 2345 ring->add_request = pc_render_add_request; 2346 ring->flush = gen4_render_ring_flush; 2347 ring->get_seqno = pc_render_get_seqno; 2348 ring->set_seqno = pc_render_set_seqno; 2349 ring->irq_get = gen5_ring_get_irq; 2350 ring->irq_put = gen5_ring_put_irq; 2351 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT | 2352 GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; 2353 } else { 2354 ring->add_request = i9xx_add_request; 2355 if (INTEL_INFO(dev)->gen < 4) 2356 ring->flush = gen2_render_ring_flush; 2357 else 2358 ring->flush = gen4_render_ring_flush; 2359 ring->get_seqno = ring_get_seqno; 2360 ring->set_seqno = ring_set_seqno; 2361 if (IS_GEN2(dev)) { 2362 ring->irq_get = i8xx_ring_get_irq; 2363 ring->irq_put = i8xx_ring_put_irq; 2364 } else { 2365 ring->irq_get = i9xx_ring_get_irq; 2366 ring->irq_put = i9xx_ring_put_irq; 2367 } 2368 ring->irq_enable_mask = I915_USER_INTERRUPT; 2369 } 2370 ring->write_tail = ring_write_tail; 2371 2372 if (IS_HASWELL(dev)) 2373 ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; 2374 else if (IS_GEN8(dev)) 2375 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; 2376 else if (INTEL_INFO(dev)->gen >= 6) 2377 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 2378 else if (INTEL_INFO(dev)->gen >= 4) 2379 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 2380 else if (IS_I830(dev) || IS_845G(dev)) 2381 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 2382 else 2383 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 2384 ring->init = init_render_ring; 2385 ring->cleanup = render_ring_cleanup; 2386 2387 /* Workaround batchbuffer to combat CS tlb bug. */ 2388 if (HAS_BROKEN_CS_TLB(dev)) { 2389 obj = i915_gem_alloc_object(dev, I830_WA_SIZE); 2390 if (obj == NULL) { 2391 DRM_ERROR("Failed to allocate batch bo\n"); 2392 return -ENOMEM; 2393 } 2394 2395 ret = i915_gem_obj_ggtt_pin(obj, 0, 0); 2396 if (ret != 0) { 2397 drm_gem_object_unreference(&obj->base); 2398 DRM_ERROR("Failed to ping batch bo\n"); 2399 return ret; 2400 } 2401 2402 ring->scratch.obj = obj; 2403 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj); 2404 } 2405 2406 return intel_init_ring_buffer(dev, ring); 2407} 2408 2409int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) 2410{ 2411 struct drm_i915_private *dev_priv = dev->dev_private; 2412 struct intel_engine_cs *ring = &dev_priv->ring[RCS]; 2413 struct intel_ringbuffer *ringbuf = ring->buffer; 2414 int ret; 2415 2416 if (ringbuf == NULL) { 2417 ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); 2418 if (!ringbuf) 2419 return -ENOMEM; 2420 ring->buffer = ringbuf; 2421 } 2422 2423 ring->name = "render ring"; 2424 ring->id = RCS; 2425 ring->mmio_base = RENDER_RING_BASE; 2426 2427 if (INTEL_INFO(dev)->gen >= 6) { 2428 /* non-kms not supported on gen6+ */ 2429 ret = -ENODEV; 2430 goto err_ringbuf; 2431 } 2432 2433 /* Note: gem is not supported on gen5/ilk without kms (the corresponding 2434 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up 2435 * the special gen5 functions. */ 2436 ring->add_request = i9xx_add_request; 2437 if (INTEL_INFO(dev)->gen < 4) 2438 ring->flush = gen2_render_ring_flush; 2439 else 2440 ring->flush = gen4_render_ring_flush; 2441 ring->get_seqno = ring_get_seqno; 2442 ring->set_seqno = ring_set_seqno; 2443 if (IS_GEN2(dev)) { 2444 ring->irq_get = i8xx_ring_get_irq; 2445 ring->irq_put = i8xx_ring_put_irq; 2446 } else { 2447 ring->irq_get = i9xx_ring_get_irq; 2448 ring->irq_put = i9xx_ring_put_irq; 2449 } 2450 ring->irq_enable_mask = I915_USER_INTERRUPT; 2451 ring->write_tail = ring_write_tail; 2452 if (INTEL_INFO(dev)->gen >= 4) 2453 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 2454 else if (IS_I830(dev) || IS_845G(dev)) 2455 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 2456 else 2457 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 2458 ring->init = init_render_ring; 2459 ring->cleanup = render_ring_cleanup; 2460 2461 ring->dev = dev; 2462 INIT_LIST_HEAD(&ring->active_list); 2463 INIT_LIST_HEAD(&ring->request_list); 2464 2465 ringbuf->size = size; 2466 ringbuf->effective_size = ringbuf->size; 2467 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 2468 ringbuf->effective_size -= 2 * CACHELINE_BYTES; 2469 2470 ringbuf->virtual_start = ioremap_wc(start, size); 2471 if (ringbuf->virtual_start == NULL) { 2472 DRM_ERROR("can not ioremap virtual address for" 2473 " ring buffer\n"); 2474 ret = -ENOMEM; 2475 goto err_ringbuf; 2476 } 2477 2478 if (!I915_NEED_GFX_HWS(dev)) { 2479 ret = init_phys_status_page(ring); 2480 if (ret) 2481 goto err_vstart; 2482 } 2483 2484 return 0; 2485 2486err_vstart: 2487 iounmap(ringbuf->virtual_start); 2488err_ringbuf: 2489 kfree(ringbuf); 2490 ring->buffer = NULL; 2491 return ret; 2492} 2493 2494int intel_init_bsd_ring_buffer(struct drm_device *dev) 2495{ 2496 struct drm_i915_private *dev_priv = dev->dev_private; 2497 struct intel_engine_cs *ring = &dev_priv->ring[VCS]; 2498 2499 ring->name = "bsd ring"; 2500 ring->id = VCS; 2501 2502 ring->write_tail = ring_write_tail; 2503 if (INTEL_INFO(dev)->gen >= 6) { 2504 ring->mmio_base = GEN6_BSD_RING_BASE; 2505 /* gen6 bsd needs a special wa for tail updates */ 2506 if (IS_GEN6(dev)) 2507 ring->write_tail = gen6_bsd_ring_write_tail; 2508 ring->flush = gen6_bsd_ring_flush; 2509 ring->add_request = gen6_add_request; 2510 ring->get_seqno = gen6_ring_get_seqno; 2511 ring->set_seqno = ring_set_seqno; 2512 if (INTEL_INFO(dev)->gen >= 8) { 2513 ring->irq_enable_mask = 2514 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; 2515 ring->irq_get = gen8_ring_get_irq; 2516 ring->irq_put = gen8_ring_put_irq; 2517 ring->dispatch_execbuffer = 2518 gen8_ring_dispatch_execbuffer; 2519 if (i915_semaphore_is_enabled(dev)) { 2520 ring->semaphore.sync_to = gen8_ring_sync; 2521 ring->semaphore.signal = gen8_xcs_signal; 2522 GEN8_RING_SEMAPHORE_INIT; 2523 } 2524 } else { 2525 ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; 2526 ring->irq_get = gen6_ring_get_irq; 2527 ring->irq_put = gen6_ring_put_irq; 2528 ring->dispatch_execbuffer = 2529 gen6_ring_dispatch_execbuffer; 2530 if (i915_semaphore_is_enabled(dev)) { 2531 ring->semaphore.sync_to = gen6_ring_sync; 2532 ring->semaphore.signal = gen6_signal; 2533 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR; 2534 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; 2535 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB; 2536 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE; 2537 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; 2538 ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC; 2539 ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; 2540 ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC; 2541 ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC; 2542 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; 2543 } 2544 } 2545 } else { 2546 ring->mmio_base = BSD_RING_BASE; 2547 ring->flush = bsd_ring_flush; 2548 ring->add_request = i9xx_add_request; 2549 ring->get_seqno = ring_get_seqno; 2550 ring->set_seqno = ring_set_seqno; 2551 if (IS_GEN5(dev)) { 2552 ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT; 2553 ring->irq_get = gen5_ring_get_irq; 2554 ring->irq_put = gen5_ring_put_irq; 2555 } else { 2556 ring->irq_enable_mask = I915_BSD_USER_INTERRUPT; 2557 ring->irq_get = i9xx_ring_get_irq; 2558 ring->irq_put = i9xx_ring_put_irq; 2559 } 2560 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 2561 } 2562 ring->init = init_ring_common; 2563 2564 return intel_init_ring_buffer(dev, ring); 2565} 2566 2567/** 2568 * Initialize the second BSD ring for Broadwell GT3. 2569 * It is noted that this only exists on Broadwell GT3. 2570 */ 2571int intel_init_bsd2_ring_buffer(struct drm_device *dev) 2572{ 2573 struct drm_i915_private *dev_priv = dev->dev_private; 2574 struct intel_engine_cs *ring = &dev_priv->ring[VCS2]; 2575 2576 if ((INTEL_INFO(dev)->gen != 8)) { 2577 DRM_ERROR("No dual-BSD ring on non-BDW machine\n"); 2578 return -EINVAL; 2579 } 2580 2581 ring->name = "bsd2 ring"; 2582 ring->id = VCS2; 2583 2584 ring->write_tail = ring_write_tail; 2585 ring->mmio_base = GEN8_BSD2_RING_BASE; 2586 ring->flush = gen6_bsd_ring_flush; 2587 ring->add_request = gen6_add_request; 2588 ring->get_seqno = gen6_ring_get_seqno; 2589 ring->set_seqno = ring_set_seqno; 2590 ring->irq_enable_mask = 2591 GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; 2592 ring->irq_get = gen8_ring_get_irq; 2593 ring->irq_put = gen8_ring_put_irq; 2594 ring->dispatch_execbuffer = 2595 gen8_ring_dispatch_execbuffer; 2596 if (i915_semaphore_is_enabled(dev)) { 2597 ring->semaphore.sync_to = gen8_ring_sync; 2598 ring->semaphore.signal = gen8_xcs_signal; 2599 GEN8_RING_SEMAPHORE_INIT; 2600 } 2601 ring->init = init_ring_common; 2602 2603 return intel_init_ring_buffer(dev, ring); 2604} 2605 2606int intel_init_blt_ring_buffer(struct drm_device *dev) 2607{ 2608 struct drm_i915_private *dev_priv = dev->dev_private; 2609 struct intel_engine_cs *ring = &dev_priv->ring[BCS]; 2610 2611 ring->name = "blitter ring"; 2612 ring->id = BCS; 2613 2614 ring->mmio_base = BLT_RING_BASE; 2615 ring->write_tail = ring_write_tail; 2616 ring->flush = gen6_ring_flush; 2617 ring->add_request = gen6_add_request; 2618 ring->get_seqno = gen6_ring_get_seqno; 2619 ring->set_seqno = ring_set_seqno; 2620 if (INTEL_INFO(dev)->gen >= 8) { 2621 ring->irq_enable_mask = 2622 GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; 2623 ring->irq_get = gen8_ring_get_irq; 2624 ring->irq_put = gen8_ring_put_irq; 2625 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; 2626 if (i915_semaphore_is_enabled(dev)) { 2627 ring->semaphore.sync_to = gen8_ring_sync; 2628 ring->semaphore.signal = gen8_xcs_signal; 2629 GEN8_RING_SEMAPHORE_INIT; 2630 } 2631 } else { 2632 ring->irq_enable_mask = GT_BLT_USER_INTERRUPT; 2633 ring->irq_get = gen6_ring_get_irq; 2634 ring->irq_put = gen6_ring_put_irq; 2635 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 2636 if (i915_semaphore_is_enabled(dev)) { 2637 ring->semaphore.signal = gen6_signal; 2638 ring->semaphore.sync_to = gen6_ring_sync; 2639 /* 2640 * The current semaphore is only applied on pre-gen8 2641 * platform. And there is no VCS2 ring on the pre-gen8 2642 * platform. So the semaphore between BCS and VCS2 is 2643 * initialized as INVALID. Gen8 will initialize the 2644 * sema between BCS and VCS2 later. 2645 */ 2646 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR; 2647 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV; 2648 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; 2649 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE; 2650 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; 2651 ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC; 2652 ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC; 2653 ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; 2654 ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC; 2655 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; 2656 } 2657 } 2658 ring->init = init_ring_common; 2659 2660 return intel_init_ring_buffer(dev, ring); 2661} 2662 2663int intel_init_vebox_ring_buffer(struct drm_device *dev) 2664{ 2665 struct drm_i915_private *dev_priv = dev->dev_private; 2666 struct intel_engine_cs *ring = &dev_priv->ring[VECS]; 2667 2668 ring->name = "video enhancement ring"; 2669 ring->id = VECS; 2670 2671 ring->mmio_base = VEBOX_RING_BASE; 2672 ring->write_tail = ring_write_tail; 2673 ring->flush = gen6_ring_flush; 2674 ring->add_request = gen6_add_request; 2675 ring->get_seqno = gen6_ring_get_seqno; 2676 ring->set_seqno = ring_set_seqno; 2677 2678 if (INTEL_INFO(dev)->gen >= 8) { 2679 ring->irq_enable_mask = 2680 GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; 2681 ring->irq_get = gen8_ring_get_irq; 2682 ring->irq_put = gen8_ring_put_irq; 2683 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; 2684 if (i915_semaphore_is_enabled(dev)) { 2685 ring->semaphore.sync_to = gen8_ring_sync; 2686 ring->semaphore.signal = gen8_xcs_signal; 2687 GEN8_RING_SEMAPHORE_INIT; 2688 } 2689 } else { 2690 ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; 2691 ring->irq_get = hsw_vebox_get_irq; 2692 ring->irq_put = hsw_vebox_put_irq; 2693 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 2694 if (i915_semaphore_is_enabled(dev)) { 2695 ring->semaphore.sync_to = gen6_ring_sync; 2696 ring->semaphore.signal = gen6_signal; 2697 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER; 2698 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV; 2699 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB; 2700 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; 2701 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; 2702 ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC; 2703 ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC; 2704 ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC; 2705 ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; 2706 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; 2707 } 2708 } 2709 ring->init = init_ring_common; 2710 2711 return intel_init_ring_buffer(dev, ring); 2712} 2713 2714int 2715intel_ring_flush_all_caches(struct intel_engine_cs *ring) 2716{ 2717 int ret; 2718 2719 if (!ring->gpu_caches_dirty) 2720 return 0; 2721 2722 ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); 2723 if (ret) 2724 return ret; 2725 2726 trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); 2727 2728 ring->gpu_caches_dirty = false; 2729 return 0; 2730} 2731 2732int 2733intel_ring_invalidate_all_caches(struct intel_engine_cs *ring) 2734{ 2735 uint32_t flush_domains; 2736 int ret; 2737 2738 flush_domains = 0; 2739 if (ring->gpu_caches_dirty) 2740 flush_domains = I915_GEM_GPU_DOMAINS; 2741 2742 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); 2743 if (ret) 2744 return ret; 2745 2746 trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); 2747 2748 ring->gpu_caches_dirty = false; 2749 return 0; 2750} 2751 2752void 2753intel_stop_ring_buffer(struct intel_engine_cs *ring) 2754{ 2755 int ret; 2756 2757 if (!intel_ring_initialized(ring)) 2758 return; 2759 2760 ret = intel_ring_idle(ring); 2761 if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error)) 2762 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", 2763 ring->name, ret); 2764 2765 stop_ring(ring); 2766}