at v3.8 1853 lines 49 kB view raw
1/* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 */ 29 30#include <drm/drmP.h> 31#include "i915_drv.h" 32#include <drm/i915_drm.h> 33#include "i915_trace.h" 34#include "intel_drv.h" 35 36/* 37 * 965+ support PIPE_CONTROL commands, which provide finer grained control 38 * over cache flushing. 39 */ 40struct pipe_control { 41 struct drm_i915_gem_object *obj; 42 volatile u32 *cpu_page; 43 u32 gtt_offset; 44}; 45 46static inline int ring_space(struct intel_ring_buffer *ring) 47{ 48 int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE); 49 if (space < 0) 50 space += ring->size; 51 return space; 52} 53 54static int 55gen2_render_ring_flush(struct intel_ring_buffer *ring, 56 u32 invalidate_domains, 57 u32 flush_domains) 58{ 59 u32 cmd; 60 int ret; 61 62 cmd = MI_FLUSH; 63 if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) 64 cmd |= MI_NO_WRITE_FLUSH; 65 66 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 67 cmd |= MI_READ_FLUSH; 68 69 ret = intel_ring_begin(ring, 2); 70 if (ret) 71 return ret; 72 73 intel_ring_emit(ring, cmd); 74 intel_ring_emit(ring, MI_NOOP); 75 intel_ring_advance(ring); 76 77 return 0; 78} 79 80static int 81gen4_render_ring_flush(struct intel_ring_buffer *ring, 82 u32 invalidate_domains, 83 u32 flush_domains) 84{ 85 struct drm_device *dev = ring->dev; 86 u32 cmd; 87 int ret; 88 89 /* 90 * read/write caches: 91 * 92 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 93 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 94 * also flushed at 2d versus 3d pipeline switches. 95 * 96 * read-only caches: 97 * 98 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 99 * MI_READ_FLUSH is set, and is always flushed on 965. 100 * 101 * I915_GEM_DOMAIN_COMMAND may not exist? 102 * 103 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 104 * invalidated when MI_EXE_FLUSH is set. 105 * 106 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 107 * invalidated with every MI_FLUSH. 108 * 109 * TLBs: 110 * 111 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 112 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 113 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 114 * are flushed at any MI_FLUSH. 115 */ 116 117 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 118 if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) 119 cmd &= ~MI_NO_WRITE_FLUSH; 120 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 121 cmd |= MI_EXE_FLUSH; 122 123 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && 124 (IS_G4X(dev) || IS_GEN5(dev))) 125 cmd |= MI_INVALIDATE_ISP; 126 127 ret = intel_ring_begin(ring, 2); 128 if (ret) 129 return ret; 130 131 intel_ring_emit(ring, cmd); 132 intel_ring_emit(ring, MI_NOOP); 133 intel_ring_advance(ring); 134 135 return 0; 136} 137 138/** 139 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 140 * implementing two workarounds on gen6. From section 1.4.7.1 141 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 142 * 143 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 144 * produced by non-pipelined state commands), software needs to first 145 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 146 * 0. 147 * 148 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 149 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 150 * 151 * And the workaround for these two requires this workaround first: 152 * 153 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 154 * BEFORE the pipe-control with a post-sync op and no write-cache 155 * flushes. 156 * 157 * And this last workaround is tricky because of the requirements on 158 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 159 * volume 2 part 1: 160 * 161 * "1 of the following must also be set: 162 * - Render Target Cache Flush Enable ([12] of DW1) 163 * - Depth Cache Flush Enable ([0] of DW1) 164 * - Stall at Pixel Scoreboard ([1] of DW1) 165 * - Depth Stall ([13] of DW1) 166 * - Post-Sync Operation ([13] of DW1) 167 * - Notify Enable ([8] of DW1)" 168 * 169 * The cache flushes require the workaround flush that triggered this 170 * one, so we can't use it. Depth stall would trigger the same. 171 * Post-sync nonzero is what triggered this second workaround, so we 172 * can't use that one either. Notify enable is IRQs, which aren't 173 * really our business. That leaves only stall at scoreboard. 174 */ 175static int 176intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring) 177{ 178 struct pipe_control *pc = ring->private; 179 u32 scratch_addr = pc->gtt_offset + 128; 180 int ret; 181 182 183 ret = intel_ring_begin(ring, 6); 184 if (ret) 185 return ret; 186 187 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 188 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 189 PIPE_CONTROL_STALL_AT_SCOREBOARD); 190 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 191 intel_ring_emit(ring, 0); /* low dword */ 192 intel_ring_emit(ring, 0); /* high dword */ 193 intel_ring_emit(ring, MI_NOOP); 194 intel_ring_advance(ring); 195 196 ret = intel_ring_begin(ring, 6); 197 if (ret) 198 return ret; 199 200 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 201 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); 202 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 203 intel_ring_emit(ring, 0); 204 intel_ring_emit(ring, 0); 205 intel_ring_emit(ring, MI_NOOP); 206 intel_ring_advance(ring); 207 208 return 0; 209} 210 211static int 212gen6_render_ring_flush(struct intel_ring_buffer *ring, 213 u32 invalidate_domains, u32 flush_domains) 214{ 215 u32 flags = 0; 216 struct pipe_control *pc = ring->private; 217 u32 scratch_addr = pc->gtt_offset + 128; 218 int ret; 219 220 /* Force SNB workarounds for PIPE_CONTROL flushes */ 221 ret = intel_emit_post_sync_nonzero_flush(ring); 222 if (ret) 223 return ret; 224 225 /* Just flush everything. Experiments have shown that reducing the 226 * number of bits based on the write domains has little performance 227 * impact. 228 */ 229 if (flush_domains) { 230 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 231 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 232 /* 233 * Ensure that any following seqno writes only happen 234 * when the render cache is indeed flushed. 235 */ 236 flags |= PIPE_CONTROL_CS_STALL; 237 } 238 if (invalidate_domains) { 239 flags |= PIPE_CONTROL_TLB_INVALIDATE; 240 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 241 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 242 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 243 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 244 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 245 /* 246 * TLB invalidate requires a post-sync write. 247 */ 248 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; 249 } 250 251 ret = intel_ring_begin(ring, 4); 252 if (ret) 253 return ret; 254 255 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 256 intel_ring_emit(ring, flags); 257 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 258 intel_ring_emit(ring, 0); 259 intel_ring_advance(ring); 260 261 return 0; 262} 263 264static int 265gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring) 266{ 267 int ret; 268 269 ret = intel_ring_begin(ring, 4); 270 if (ret) 271 return ret; 272 273 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 274 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 275 PIPE_CONTROL_STALL_AT_SCOREBOARD); 276 intel_ring_emit(ring, 0); 277 intel_ring_emit(ring, 0); 278 intel_ring_advance(ring); 279 280 return 0; 281} 282 283static int 284gen7_render_ring_flush(struct intel_ring_buffer *ring, 285 u32 invalidate_domains, u32 flush_domains) 286{ 287 u32 flags = 0; 288 struct pipe_control *pc = ring->private; 289 u32 scratch_addr = pc->gtt_offset + 128; 290 int ret; 291 292 /* 293 * Ensure that any following seqno writes only happen when the render 294 * cache is indeed flushed. 295 * 296 * Workaround: 4th PIPE_CONTROL command (except the ones with only 297 * read-cache invalidate bits set) must have the CS_STALL bit set. We 298 * don't try to be clever and just set it unconditionally. 299 */ 300 flags |= PIPE_CONTROL_CS_STALL; 301 302 /* Just flush everything. Experiments have shown that reducing the 303 * number of bits based on the write domains has little performance 304 * impact. 305 */ 306 if (flush_domains) { 307 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 308 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 309 } 310 if (invalidate_domains) { 311 flags |= PIPE_CONTROL_TLB_INVALIDATE; 312 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 313 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 314 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 315 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 316 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 317 /* 318 * TLB invalidate requires a post-sync write. 319 */ 320 flags |= PIPE_CONTROL_QW_WRITE; 321 322 /* Workaround: we must issue a pipe_control with CS-stall bit 323 * set before a pipe_control command that has the state cache 324 * invalidate bit set. */ 325 gen7_render_ring_cs_stall_wa(ring); 326 } 327 328 ret = intel_ring_begin(ring, 4); 329 if (ret) 330 return ret; 331 332 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 333 intel_ring_emit(ring, flags); 334 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 335 intel_ring_emit(ring, 0); 336 intel_ring_advance(ring); 337 338 return 0; 339} 340 341static void ring_write_tail(struct intel_ring_buffer *ring, 342 u32 value) 343{ 344 drm_i915_private_t *dev_priv = ring->dev->dev_private; 345 I915_WRITE_TAIL(ring, value); 346} 347 348u32 intel_ring_get_active_head(struct intel_ring_buffer *ring) 349{ 350 drm_i915_private_t *dev_priv = ring->dev->dev_private; 351 u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ? 352 RING_ACTHD(ring->mmio_base) : ACTHD; 353 354 return I915_READ(acthd_reg); 355} 356 357static int init_ring_common(struct intel_ring_buffer *ring) 358{ 359 struct drm_device *dev = ring->dev; 360 drm_i915_private_t *dev_priv = dev->dev_private; 361 struct drm_i915_gem_object *obj = ring->obj; 362 int ret = 0; 363 u32 head; 364 365 if (HAS_FORCE_WAKE(dev)) 366 gen6_gt_force_wake_get(dev_priv); 367 368 /* Stop the ring if it's running. */ 369 I915_WRITE_CTL(ring, 0); 370 I915_WRITE_HEAD(ring, 0); 371 ring->write_tail(ring, 0); 372 373 head = I915_READ_HEAD(ring) & HEAD_ADDR; 374 375 /* G45 ring initialization fails to reset head to zero */ 376 if (head != 0) { 377 DRM_DEBUG_KMS("%s head not reset to zero " 378 "ctl %08x head %08x tail %08x start %08x\n", 379 ring->name, 380 I915_READ_CTL(ring), 381 I915_READ_HEAD(ring), 382 I915_READ_TAIL(ring), 383 I915_READ_START(ring)); 384 385 I915_WRITE_HEAD(ring, 0); 386 387 if (I915_READ_HEAD(ring) & HEAD_ADDR) { 388 DRM_ERROR("failed to set %s head to zero " 389 "ctl %08x head %08x tail %08x start %08x\n", 390 ring->name, 391 I915_READ_CTL(ring), 392 I915_READ_HEAD(ring), 393 I915_READ_TAIL(ring), 394 I915_READ_START(ring)); 395 } 396 } 397 398 /* Initialize the ring. This must happen _after_ we've cleared the ring 399 * registers with the above sequence (the readback of the HEAD registers 400 * also enforces ordering), otherwise the hw might lose the new ring 401 * register values. */ 402 I915_WRITE_START(ring, obj->gtt_offset); 403 I915_WRITE_CTL(ring, 404 ((ring->size - PAGE_SIZE) & RING_NR_PAGES) 405 | RING_VALID); 406 407 /* If the head is still not zero, the ring is dead */ 408 if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 && 409 I915_READ_START(ring) == obj->gtt_offset && 410 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) { 411 DRM_ERROR("%s initialization failed " 412 "ctl %08x head %08x tail %08x start %08x\n", 413 ring->name, 414 I915_READ_CTL(ring), 415 I915_READ_HEAD(ring), 416 I915_READ_TAIL(ring), 417 I915_READ_START(ring)); 418 ret = -EIO; 419 goto out; 420 } 421 422 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) 423 i915_kernel_lost_context(ring->dev); 424 else { 425 ring->head = I915_READ_HEAD(ring); 426 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; 427 ring->space = ring_space(ring); 428 ring->last_retired_head = -1; 429 } 430 431out: 432 if (HAS_FORCE_WAKE(dev)) 433 gen6_gt_force_wake_put(dev_priv); 434 435 return ret; 436} 437 438static int 439init_pipe_control(struct intel_ring_buffer *ring) 440{ 441 struct pipe_control *pc; 442 struct drm_i915_gem_object *obj; 443 int ret; 444 445 if (ring->private) 446 return 0; 447 448 pc = kmalloc(sizeof(*pc), GFP_KERNEL); 449 if (!pc) 450 return -ENOMEM; 451 452 obj = i915_gem_alloc_object(ring->dev, 4096); 453 if (obj == NULL) { 454 DRM_ERROR("Failed to allocate seqno page\n"); 455 ret = -ENOMEM; 456 goto err; 457 } 458 459 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 460 461 ret = i915_gem_object_pin(obj, 4096, true, false); 462 if (ret) 463 goto err_unref; 464 465 pc->gtt_offset = obj->gtt_offset; 466 pc->cpu_page = kmap(sg_page(obj->pages->sgl)); 467 if (pc->cpu_page == NULL) 468 goto err_unpin; 469 470 pc->obj = obj; 471 ring->private = pc; 472 return 0; 473 474err_unpin: 475 i915_gem_object_unpin(obj); 476err_unref: 477 drm_gem_object_unreference(&obj->base); 478err: 479 kfree(pc); 480 return ret; 481} 482 483static void 484cleanup_pipe_control(struct intel_ring_buffer *ring) 485{ 486 struct pipe_control *pc = ring->private; 487 struct drm_i915_gem_object *obj; 488 489 if (!ring->private) 490 return; 491 492 obj = pc->obj; 493 494 kunmap(sg_page(obj->pages->sgl)); 495 i915_gem_object_unpin(obj); 496 drm_gem_object_unreference(&obj->base); 497 498 kfree(pc); 499 ring->private = NULL; 500} 501 502static int init_render_ring(struct intel_ring_buffer *ring) 503{ 504 struct drm_device *dev = ring->dev; 505 struct drm_i915_private *dev_priv = dev->dev_private; 506 int ret = init_ring_common(ring); 507 508 if (INTEL_INFO(dev)->gen > 3) 509 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 510 511 /* We need to disable the AsyncFlip performance optimisations in order 512 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 513 * programmed to '1' on all products. 514 */ 515 if (INTEL_INFO(dev)->gen >= 6) 516 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 517 518 /* Required for the hardware to program scanline values for waiting */ 519 if (INTEL_INFO(dev)->gen == 6) 520 I915_WRITE(GFX_MODE, 521 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS)); 522 523 if (IS_GEN7(dev)) 524 I915_WRITE(GFX_MODE_GEN7, 525 _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | 526 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 527 528 if (INTEL_INFO(dev)->gen >= 5) { 529 ret = init_pipe_control(ring); 530 if (ret) 531 return ret; 532 } 533 534 if (IS_GEN6(dev)) { 535 /* From the Sandybridge PRM, volume 1 part 3, page 24: 536 * "If this bit is set, STCunit will have LRA as replacement 537 * policy. [...] This bit must be reset. LRA replacement 538 * policy is not supported." 539 */ 540 I915_WRITE(CACHE_MODE_0, 541 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 542 543 /* This is not explicitly set for GEN6, so read the register. 544 * see intel_ring_mi_set_context() for why we care. 545 * TODO: consider explicitly setting the bit for GEN5 546 */ 547 ring->itlb_before_ctx_switch = 548 !!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS); 549 } 550 551 if (INTEL_INFO(dev)->gen >= 6) 552 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 553 554 if (HAS_L3_GPU_CACHE(dev)) 555 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 556 557 return ret; 558} 559 560static void render_ring_cleanup(struct intel_ring_buffer *ring) 561{ 562 struct drm_device *dev = ring->dev; 563 564 if (!ring->private) 565 return; 566 567 if (HAS_BROKEN_CS_TLB(dev)) 568 drm_gem_object_unreference(to_gem_object(ring->private)); 569 570 cleanup_pipe_control(ring); 571} 572 573static void 574update_mboxes(struct intel_ring_buffer *ring, 575 u32 mmio_offset) 576{ 577 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 578 intel_ring_emit(ring, mmio_offset); 579 intel_ring_emit(ring, ring->outstanding_lazy_request); 580} 581 582/** 583 * gen6_add_request - Update the semaphore mailbox registers 584 * 585 * @ring - ring that is adding a request 586 * @seqno - return seqno stuck into the ring 587 * 588 * Update the mailbox registers in the *other* rings with the current seqno. 589 * This acts like a signal in the canonical semaphore. 590 */ 591static int 592gen6_add_request(struct intel_ring_buffer *ring) 593{ 594 u32 mbox1_reg; 595 u32 mbox2_reg; 596 int ret; 597 598 ret = intel_ring_begin(ring, 10); 599 if (ret) 600 return ret; 601 602 mbox1_reg = ring->signal_mbox[0]; 603 mbox2_reg = ring->signal_mbox[1]; 604 605 update_mboxes(ring, mbox1_reg); 606 update_mboxes(ring, mbox2_reg); 607 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 608 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 609 intel_ring_emit(ring, ring->outstanding_lazy_request); 610 intel_ring_emit(ring, MI_USER_INTERRUPT); 611 intel_ring_advance(ring); 612 613 return 0; 614} 615 616/** 617 * intel_ring_sync - sync the waiter to the signaller on seqno 618 * 619 * @waiter - ring that is waiting 620 * @signaller - ring which has, or will signal 621 * @seqno - seqno which the waiter will block on 622 */ 623static int 624gen6_ring_sync(struct intel_ring_buffer *waiter, 625 struct intel_ring_buffer *signaller, 626 u32 seqno) 627{ 628 int ret; 629 u32 dw1 = MI_SEMAPHORE_MBOX | 630 MI_SEMAPHORE_COMPARE | 631 MI_SEMAPHORE_REGISTER; 632 633 /* Throughout all of the GEM code, seqno passed implies our current 634 * seqno is >= the last seqno executed. However for hardware the 635 * comparison is strictly greater than. 636 */ 637 seqno -= 1; 638 639 WARN_ON(signaller->semaphore_register[waiter->id] == 640 MI_SEMAPHORE_SYNC_INVALID); 641 642 ret = intel_ring_begin(waiter, 4); 643 if (ret) 644 return ret; 645 646 intel_ring_emit(waiter, 647 dw1 | signaller->semaphore_register[waiter->id]); 648 intel_ring_emit(waiter, seqno); 649 intel_ring_emit(waiter, 0); 650 intel_ring_emit(waiter, MI_NOOP); 651 intel_ring_advance(waiter); 652 653 return 0; 654} 655 656#define PIPE_CONTROL_FLUSH(ring__, addr__) \ 657do { \ 658 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ 659 PIPE_CONTROL_DEPTH_STALL); \ 660 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ 661 intel_ring_emit(ring__, 0); \ 662 intel_ring_emit(ring__, 0); \ 663} while (0) 664 665static int 666pc_render_add_request(struct intel_ring_buffer *ring) 667{ 668 struct pipe_control *pc = ring->private; 669 u32 scratch_addr = pc->gtt_offset + 128; 670 int ret; 671 672 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently 673 * incoherent with writes to memory, i.e. completely fubar, 674 * so we need to use PIPE_NOTIFY instead. 675 * 676 * However, we also need to workaround the qword write 677 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to 678 * memory before requesting an interrupt. 679 */ 680 ret = intel_ring_begin(ring, 32); 681 if (ret) 682 return ret; 683 684 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 685 PIPE_CONTROL_WRITE_FLUSH | 686 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 687 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 688 intel_ring_emit(ring, ring->outstanding_lazy_request); 689 intel_ring_emit(ring, 0); 690 PIPE_CONTROL_FLUSH(ring, scratch_addr); 691 scratch_addr += 128; /* write to separate cachelines */ 692 PIPE_CONTROL_FLUSH(ring, scratch_addr); 693 scratch_addr += 128; 694 PIPE_CONTROL_FLUSH(ring, scratch_addr); 695 scratch_addr += 128; 696 PIPE_CONTROL_FLUSH(ring, scratch_addr); 697 scratch_addr += 128; 698 PIPE_CONTROL_FLUSH(ring, scratch_addr); 699 scratch_addr += 128; 700 PIPE_CONTROL_FLUSH(ring, scratch_addr); 701 702 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 703 PIPE_CONTROL_WRITE_FLUSH | 704 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 705 PIPE_CONTROL_NOTIFY); 706 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 707 intel_ring_emit(ring, ring->outstanding_lazy_request); 708 intel_ring_emit(ring, 0); 709 intel_ring_advance(ring); 710 711 return 0; 712} 713 714static u32 715gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 716{ 717 /* Workaround to force correct ordering between irq and seqno writes on 718 * ivb (and maybe also on snb) by reading from a CS register (like 719 * ACTHD) before reading the status page. */ 720 if (!lazy_coherency) 721 intel_ring_get_active_head(ring); 722 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 723} 724 725static u32 726ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 727{ 728 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 729} 730 731static u32 732pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 733{ 734 struct pipe_control *pc = ring->private; 735 return pc->cpu_page[0]; 736} 737 738static bool 739gen5_ring_get_irq(struct intel_ring_buffer *ring) 740{ 741 struct drm_device *dev = ring->dev; 742 drm_i915_private_t *dev_priv = dev->dev_private; 743 unsigned long flags; 744 745 if (!dev->irq_enabled) 746 return false; 747 748 spin_lock_irqsave(&dev_priv->irq_lock, flags); 749 if (ring->irq_refcount++ == 0) { 750 dev_priv->gt_irq_mask &= ~ring->irq_enable_mask; 751 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 752 POSTING_READ(GTIMR); 753 } 754 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 755 756 return true; 757} 758 759static void 760gen5_ring_put_irq(struct intel_ring_buffer *ring) 761{ 762 struct drm_device *dev = ring->dev; 763 drm_i915_private_t *dev_priv = dev->dev_private; 764 unsigned long flags; 765 766 spin_lock_irqsave(&dev_priv->irq_lock, flags); 767 if (--ring->irq_refcount == 0) { 768 dev_priv->gt_irq_mask |= ring->irq_enable_mask; 769 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 770 POSTING_READ(GTIMR); 771 } 772 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 773} 774 775static bool 776i9xx_ring_get_irq(struct intel_ring_buffer *ring) 777{ 778 struct drm_device *dev = ring->dev; 779 drm_i915_private_t *dev_priv = dev->dev_private; 780 unsigned long flags; 781 782 if (!dev->irq_enabled) 783 return false; 784 785 spin_lock_irqsave(&dev_priv->irq_lock, flags); 786 if (ring->irq_refcount++ == 0) { 787 dev_priv->irq_mask &= ~ring->irq_enable_mask; 788 I915_WRITE(IMR, dev_priv->irq_mask); 789 POSTING_READ(IMR); 790 } 791 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 792 793 return true; 794} 795 796static void 797i9xx_ring_put_irq(struct intel_ring_buffer *ring) 798{ 799 struct drm_device *dev = ring->dev; 800 drm_i915_private_t *dev_priv = dev->dev_private; 801 unsigned long flags; 802 803 spin_lock_irqsave(&dev_priv->irq_lock, flags); 804 if (--ring->irq_refcount == 0) { 805 dev_priv->irq_mask |= ring->irq_enable_mask; 806 I915_WRITE(IMR, dev_priv->irq_mask); 807 POSTING_READ(IMR); 808 } 809 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 810} 811 812static bool 813i8xx_ring_get_irq(struct intel_ring_buffer *ring) 814{ 815 struct drm_device *dev = ring->dev; 816 drm_i915_private_t *dev_priv = dev->dev_private; 817 unsigned long flags; 818 819 if (!dev->irq_enabled) 820 return false; 821 822 spin_lock_irqsave(&dev_priv->irq_lock, flags); 823 if (ring->irq_refcount++ == 0) { 824 dev_priv->irq_mask &= ~ring->irq_enable_mask; 825 I915_WRITE16(IMR, dev_priv->irq_mask); 826 POSTING_READ16(IMR); 827 } 828 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 829 830 return true; 831} 832 833static void 834i8xx_ring_put_irq(struct intel_ring_buffer *ring) 835{ 836 struct drm_device *dev = ring->dev; 837 drm_i915_private_t *dev_priv = dev->dev_private; 838 unsigned long flags; 839 840 spin_lock_irqsave(&dev_priv->irq_lock, flags); 841 if (--ring->irq_refcount == 0) { 842 dev_priv->irq_mask |= ring->irq_enable_mask; 843 I915_WRITE16(IMR, dev_priv->irq_mask); 844 POSTING_READ16(IMR); 845 } 846 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 847} 848 849void intel_ring_setup_status_page(struct intel_ring_buffer *ring) 850{ 851 struct drm_device *dev = ring->dev; 852 drm_i915_private_t *dev_priv = ring->dev->dev_private; 853 u32 mmio = 0; 854 855 /* The ring status page addresses are no longer next to the rest of 856 * the ring registers as of gen7. 857 */ 858 if (IS_GEN7(dev)) { 859 switch (ring->id) { 860 case RCS: 861 mmio = RENDER_HWS_PGA_GEN7; 862 break; 863 case BCS: 864 mmio = BLT_HWS_PGA_GEN7; 865 break; 866 case VCS: 867 mmio = BSD_HWS_PGA_GEN7; 868 break; 869 } 870 } else if (IS_GEN6(ring->dev)) { 871 mmio = RING_HWS_PGA_GEN6(ring->mmio_base); 872 } else { 873 mmio = RING_HWS_PGA(ring->mmio_base); 874 } 875 876 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); 877 POSTING_READ(mmio); 878} 879 880static int 881bsd_ring_flush(struct intel_ring_buffer *ring, 882 u32 invalidate_domains, 883 u32 flush_domains) 884{ 885 int ret; 886 887 ret = intel_ring_begin(ring, 2); 888 if (ret) 889 return ret; 890 891 intel_ring_emit(ring, MI_FLUSH); 892 intel_ring_emit(ring, MI_NOOP); 893 intel_ring_advance(ring); 894 return 0; 895} 896 897static int 898i9xx_add_request(struct intel_ring_buffer *ring) 899{ 900 int ret; 901 902 ret = intel_ring_begin(ring, 4); 903 if (ret) 904 return ret; 905 906 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 907 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 908 intel_ring_emit(ring, ring->outstanding_lazy_request); 909 intel_ring_emit(ring, MI_USER_INTERRUPT); 910 intel_ring_advance(ring); 911 912 return 0; 913} 914 915static bool 916gen6_ring_get_irq(struct intel_ring_buffer *ring) 917{ 918 struct drm_device *dev = ring->dev; 919 drm_i915_private_t *dev_priv = dev->dev_private; 920 unsigned long flags; 921 922 if (!dev->irq_enabled) 923 return false; 924 925 /* It looks like we need to prevent the gt from suspending while waiting 926 * for an notifiy irq, otherwise irqs seem to get lost on at least the 927 * blt/bsd rings on ivb. */ 928 gen6_gt_force_wake_get(dev_priv); 929 930 spin_lock_irqsave(&dev_priv->irq_lock, flags); 931 if (ring->irq_refcount++ == 0) { 932 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) 933 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | 934 GEN6_RENDER_L3_PARITY_ERROR)); 935 else 936 I915_WRITE_IMR(ring, ~ring->irq_enable_mask); 937 dev_priv->gt_irq_mask &= ~ring->irq_enable_mask; 938 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 939 POSTING_READ(GTIMR); 940 } 941 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 942 943 return true; 944} 945 946static void 947gen6_ring_put_irq(struct intel_ring_buffer *ring) 948{ 949 struct drm_device *dev = ring->dev; 950 drm_i915_private_t *dev_priv = dev->dev_private; 951 unsigned long flags; 952 953 spin_lock_irqsave(&dev_priv->irq_lock, flags); 954 if (--ring->irq_refcount == 0) { 955 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) 956 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 957 else 958 I915_WRITE_IMR(ring, ~0); 959 dev_priv->gt_irq_mask |= ring->irq_enable_mask; 960 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 961 POSTING_READ(GTIMR); 962 } 963 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 964 965 gen6_gt_force_wake_put(dev_priv); 966} 967 968static int 969i965_dispatch_execbuffer(struct intel_ring_buffer *ring, 970 u32 offset, u32 length, 971 unsigned flags) 972{ 973 int ret; 974 975 ret = intel_ring_begin(ring, 2); 976 if (ret) 977 return ret; 978 979 intel_ring_emit(ring, 980 MI_BATCH_BUFFER_START | 981 MI_BATCH_GTT | 982 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); 983 intel_ring_emit(ring, offset); 984 intel_ring_advance(ring); 985 986 return 0; 987} 988 989/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 990#define I830_BATCH_LIMIT (256*1024) 991static int 992i830_dispatch_execbuffer(struct intel_ring_buffer *ring, 993 u32 offset, u32 len, 994 unsigned flags) 995{ 996 int ret; 997 998 if (flags & I915_DISPATCH_PINNED) { 999 ret = intel_ring_begin(ring, 4); 1000 if (ret) 1001 return ret; 1002 1003 intel_ring_emit(ring, MI_BATCH_BUFFER); 1004 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1005 intel_ring_emit(ring, offset + len - 8); 1006 intel_ring_emit(ring, MI_NOOP); 1007 intel_ring_advance(ring); 1008 } else { 1009 struct drm_i915_gem_object *obj = ring->private; 1010 u32 cs_offset = obj->gtt_offset; 1011 1012 if (len > I830_BATCH_LIMIT) 1013 return -ENOSPC; 1014 1015 ret = intel_ring_begin(ring, 9+3); 1016 if (ret) 1017 return ret; 1018 /* Blit the batch (which has now all relocs applied) to the stable batch 1019 * scratch bo area (so that the CS never stumbles over its tlb 1020 * invalidation bug) ... */ 1021 intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD | 1022 XY_SRC_COPY_BLT_WRITE_ALPHA | 1023 XY_SRC_COPY_BLT_WRITE_RGB); 1024 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096); 1025 intel_ring_emit(ring, 0); 1026 intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024); 1027 intel_ring_emit(ring, cs_offset); 1028 intel_ring_emit(ring, 0); 1029 intel_ring_emit(ring, 4096); 1030 intel_ring_emit(ring, offset); 1031 intel_ring_emit(ring, MI_FLUSH); 1032 1033 /* ... and execute it. */ 1034 intel_ring_emit(ring, MI_BATCH_BUFFER); 1035 intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1036 intel_ring_emit(ring, cs_offset + len - 8); 1037 intel_ring_advance(ring); 1038 } 1039 1040 return 0; 1041} 1042 1043static int 1044i915_dispatch_execbuffer(struct intel_ring_buffer *ring, 1045 u32 offset, u32 len, 1046 unsigned flags) 1047{ 1048 int ret; 1049 1050 ret = intel_ring_begin(ring, 2); 1051 if (ret) 1052 return ret; 1053 1054 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); 1055 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1056 intel_ring_advance(ring); 1057 1058 return 0; 1059} 1060 1061static void cleanup_status_page(struct intel_ring_buffer *ring) 1062{ 1063 struct drm_i915_gem_object *obj; 1064 1065 obj = ring->status_page.obj; 1066 if (obj == NULL) 1067 return; 1068 1069 kunmap(sg_page(obj->pages->sgl)); 1070 i915_gem_object_unpin(obj); 1071 drm_gem_object_unreference(&obj->base); 1072 ring->status_page.obj = NULL; 1073} 1074 1075static int init_status_page(struct intel_ring_buffer *ring) 1076{ 1077 struct drm_device *dev = ring->dev; 1078 struct drm_i915_gem_object *obj; 1079 int ret; 1080 1081 obj = i915_gem_alloc_object(dev, 4096); 1082 if (obj == NULL) { 1083 DRM_ERROR("Failed to allocate status page\n"); 1084 ret = -ENOMEM; 1085 goto err; 1086 } 1087 1088 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 1089 1090 ret = i915_gem_object_pin(obj, 4096, true, false); 1091 if (ret != 0) { 1092 goto err_unref; 1093 } 1094 1095 ring->status_page.gfx_addr = obj->gtt_offset; 1096 ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl)); 1097 if (ring->status_page.page_addr == NULL) { 1098 ret = -ENOMEM; 1099 goto err_unpin; 1100 } 1101 ring->status_page.obj = obj; 1102 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1103 1104 intel_ring_setup_status_page(ring); 1105 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", 1106 ring->name, ring->status_page.gfx_addr); 1107 1108 return 0; 1109 1110err_unpin: 1111 i915_gem_object_unpin(obj); 1112err_unref: 1113 drm_gem_object_unreference(&obj->base); 1114err: 1115 return ret; 1116} 1117 1118static int init_phys_hws_pga(struct intel_ring_buffer *ring) 1119{ 1120 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1121 u32 addr; 1122 1123 if (!dev_priv->status_page_dmah) { 1124 dev_priv->status_page_dmah = 1125 drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE); 1126 if (!dev_priv->status_page_dmah) 1127 return -ENOMEM; 1128 } 1129 1130 addr = dev_priv->status_page_dmah->busaddr; 1131 if (INTEL_INFO(ring->dev)->gen >= 4) 1132 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; 1133 I915_WRITE(HWS_PGA, addr); 1134 1135 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1136 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1137 1138 return 0; 1139} 1140 1141static int intel_init_ring_buffer(struct drm_device *dev, 1142 struct intel_ring_buffer *ring) 1143{ 1144 struct drm_i915_gem_object *obj; 1145 struct drm_i915_private *dev_priv = dev->dev_private; 1146 int ret; 1147 1148 ring->dev = dev; 1149 INIT_LIST_HEAD(&ring->active_list); 1150 INIT_LIST_HEAD(&ring->request_list); 1151 ring->size = 32 * PAGE_SIZE; 1152 memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno)); 1153 1154 init_waitqueue_head(&ring->irq_queue); 1155 1156 if (I915_NEED_GFX_HWS(dev)) { 1157 ret = init_status_page(ring); 1158 if (ret) 1159 return ret; 1160 } else { 1161 BUG_ON(ring->id != RCS); 1162 ret = init_phys_hws_pga(ring); 1163 if (ret) 1164 return ret; 1165 } 1166 1167 obj = i915_gem_alloc_object(dev, ring->size); 1168 if (obj == NULL) { 1169 DRM_ERROR("Failed to allocate ringbuffer\n"); 1170 ret = -ENOMEM; 1171 goto err_hws; 1172 } 1173 1174 ring->obj = obj; 1175 1176 ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false); 1177 if (ret) 1178 goto err_unref; 1179 1180 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1181 if (ret) 1182 goto err_unpin; 1183 1184 ring->virtual_start = 1185 ioremap_wc(dev_priv->mm.gtt->gma_bus_addr + obj->gtt_offset, 1186 ring->size); 1187 if (ring->virtual_start == NULL) { 1188 DRM_ERROR("Failed to map ringbuffer.\n"); 1189 ret = -EINVAL; 1190 goto err_unpin; 1191 } 1192 1193 ret = ring->init(ring); 1194 if (ret) 1195 goto err_unmap; 1196 1197 /* Workaround an erratum on the i830 which causes a hang if 1198 * the TAIL pointer points to within the last 2 cachelines 1199 * of the buffer. 1200 */ 1201 ring->effective_size = ring->size; 1202 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 1203 ring->effective_size -= 128; 1204 1205 return 0; 1206 1207err_unmap: 1208 iounmap(ring->virtual_start); 1209err_unpin: 1210 i915_gem_object_unpin(obj); 1211err_unref: 1212 drm_gem_object_unreference(&obj->base); 1213 ring->obj = NULL; 1214err_hws: 1215 cleanup_status_page(ring); 1216 return ret; 1217} 1218 1219void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) 1220{ 1221 struct drm_i915_private *dev_priv; 1222 int ret; 1223 1224 if (ring->obj == NULL) 1225 return; 1226 1227 /* Disable the ring buffer. The ring must be idle at this point */ 1228 dev_priv = ring->dev->dev_private; 1229 ret = intel_ring_idle(ring); 1230 if (ret) 1231 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", 1232 ring->name, ret); 1233 1234 I915_WRITE_CTL(ring, 0); 1235 1236 iounmap(ring->virtual_start); 1237 1238 i915_gem_object_unpin(ring->obj); 1239 drm_gem_object_unreference(&ring->obj->base); 1240 ring->obj = NULL; 1241 1242 if (ring->cleanup) 1243 ring->cleanup(ring); 1244 1245 cleanup_status_page(ring); 1246} 1247 1248static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) 1249{ 1250 int ret; 1251 1252 ret = i915_wait_seqno(ring, seqno); 1253 if (!ret) 1254 i915_gem_retire_requests_ring(ring); 1255 1256 return ret; 1257} 1258 1259static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) 1260{ 1261 struct drm_i915_gem_request *request; 1262 u32 seqno = 0; 1263 int ret; 1264 1265 i915_gem_retire_requests_ring(ring); 1266 1267 if (ring->last_retired_head != -1) { 1268 ring->head = ring->last_retired_head; 1269 ring->last_retired_head = -1; 1270 ring->space = ring_space(ring); 1271 if (ring->space >= n) 1272 return 0; 1273 } 1274 1275 list_for_each_entry(request, &ring->request_list, list) { 1276 int space; 1277 1278 if (request->tail == -1) 1279 continue; 1280 1281 space = request->tail - (ring->tail + I915_RING_FREE_SPACE); 1282 if (space < 0) 1283 space += ring->size; 1284 if (space >= n) { 1285 seqno = request->seqno; 1286 break; 1287 } 1288 1289 /* Consume this request in case we need more space than 1290 * is available and so need to prevent a race between 1291 * updating last_retired_head and direct reads of 1292 * I915_RING_HEAD. It also provides a nice sanity check. 1293 */ 1294 request->tail = -1; 1295 } 1296 1297 if (seqno == 0) 1298 return -ENOSPC; 1299 1300 ret = intel_ring_wait_seqno(ring, seqno); 1301 if (ret) 1302 return ret; 1303 1304 if (WARN_ON(ring->last_retired_head == -1)) 1305 return -ENOSPC; 1306 1307 ring->head = ring->last_retired_head; 1308 ring->last_retired_head = -1; 1309 ring->space = ring_space(ring); 1310 if (WARN_ON(ring->space < n)) 1311 return -ENOSPC; 1312 1313 return 0; 1314} 1315 1316static int ring_wait_for_space(struct intel_ring_buffer *ring, int n) 1317{ 1318 struct drm_device *dev = ring->dev; 1319 struct drm_i915_private *dev_priv = dev->dev_private; 1320 unsigned long end; 1321 int ret; 1322 1323 ret = intel_ring_wait_request(ring, n); 1324 if (ret != -ENOSPC) 1325 return ret; 1326 1327 trace_i915_ring_wait_begin(ring); 1328 /* With GEM the hangcheck timer should kick us out of the loop, 1329 * leaving it early runs the risk of corrupting GEM state (due 1330 * to running on almost untested codepaths). But on resume 1331 * timers don't work yet, so prevent a complete hang in that 1332 * case by choosing an insanely large timeout. */ 1333 end = jiffies + 60 * HZ; 1334 1335 do { 1336 ring->head = I915_READ_HEAD(ring); 1337 ring->space = ring_space(ring); 1338 if (ring->space >= n) { 1339 trace_i915_ring_wait_end(ring); 1340 return 0; 1341 } 1342 1343 if (dev->primary->master) { 1344 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; 1345 if (master_priv->sarea_priv) 1346 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1347 } 1348 1349 msleep(1); 1350 1351 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); 1352 if (ret) 1353 return ret; 1354 } while (!time_after(jiffies, end)); 1355 trace_i915_ring_wait_end(ring); 1356 return -EBUSY; 1357} 1358 1359static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) 1360{ 1361 uint32_t __iomem *virt; 1362 int rem = ring->size - ring->tail; 1363 1364 if (ring->space < rem) { 1365 int ret = ring_wait_for_space(ring, rem); 1366 if (ret) 1367 return ret; 1368 } 1369 1370 virt = ring->virtual_start + ring->tail; 1371 rem /= 4; 1372 while (rem--) 1373 iowrite32(MI_NOOP, virt++); 1374 1375 ring->tail = 0; 1376 ring->space = ring_space(ring); 1377 1378 return 0; 1379} 1380 1381int intel_ring_idle(struct intel_ring_buffer *ring) 1382{ 1383 u32 seqno; 1384 int ret; 1385 1386 /* We need to add any requests required to flush the objects and ring */ 1387 if (ring->outstanding_lazy_request) { 1388 ret = i915_add_request(ring, NULL, NULL); 1389 if (ret) 1390 return ret; 1391 } 1392 1393 /* Wait upon the last request to be completed */ 1394 if (list_empty(&ring->request_list)) 1395 return 0; 1396 1397 seqno = list_entry(ring->request_list.prev, 1398 struct drm_i915_gem_request, 1399 list)->seqno; 1400 1401 return i915_wait_seqno(ring, seqno); 1402} 1403 1404static int 1405intel_ring_alloc_seqno(struct intel_ring_buffer *ring) 1406{ 1407 if (ring->outstanding_lazy_request) 1408 return 0; 1409 1410 return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request); 1411} 1412 1413int intel_ring_begin(struct intel_ring_buffer *ring, 1414 int num_dwords) 1415{ 1416 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1417 int n = 4*num_dwords; 1418 int ret; 1419 1420 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); 1421 if (ret) 1422 return ret; 1423 1424 /* Preallocate the olr before touching the ring */ 1425 ret = intel_ring_alloc_seqno(ring); 1426 if (ret) 1427 return ret; 1428 1429 if (unlikely(ring->tail + n > ring->effective_size)) { 1430 ret = intel_wrap_ring_buffer(ring); 1431 if (unlikely(ret)) 1432 return ret; 1433 } 1434 1435 if (unlikely(ring->space < n)) { 1436 ret = ring_wait_for_space(ring, n); 1437 if (unlikely(ret)) 1438 return ret; 1439 } 1440 1441 ring->space -= n; 1442 return 0; 1443} 1444 1445void intel_ring_advance(struct intel_ring_buffer *ring) 1446{ 1447 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1448 1449 ring->tail &= ring->size - 1; 1450 if (dev_priv->stop_rings & intel_ring_flag(ring)) 1451 return; 1452 ring->write_tail(ring, ring->tail); 1453} 1454 1455 1456static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, 1457 u32 value) 1458{ 1459 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1460 1461 /* Every tail move must follow the sequence below */ 1462 1463 /* Disable notification that the ring is IDLE. The GT 1464 * will then assume that it is busy and bring it out of rc6. 1465 */ 1466 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1467 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1468 1469 /* Clear the context id. Here be magic! */ 1470 I915_WRITE64(GEN6_BSD_RNCID, 0x0); 1471 1472 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1473 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & 1474 GEN6_BSD_SLEEP_INDICATOR) == 0, 1475 50)) 1476 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 1477 1478 /* Now that the ring is fully powered up, update the tail */ 1479 I915_WRITE_TAIL(ring, value); 1480 POSTING_READ(RING_TAIL(ring->mmio_base)); 1481 1482 /* Let the ring send IDLE messages to the GT again, 1483 * and so let it sleep to conserve power when idle. 1484 */ 1485 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1486 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1487} 1488 1489static int gen6_ring_flush(struct intel_ring_buffer *ring, 1490 u32 invalidate, u32 flush) 1491{ 1492 uint32_t cmd; 1493 int ret; 1494 1495 ret = intel_ring_begin(ring, 4); 1496 if (ret) 1497 return ret; 1498 1499 cmd = MI_FLUSH_DW; 1500 /* 1501 * Bspec vol 1c.5 - video engine command streamer: 1502 * "If ENABLED, all TLBs will be invalidated once the flush 1503 * operation is complete. This bit is only valid when the 1504 * Post-Sync Operation field is a value of 1h or 3h." 1505 */ 1506 if (invalidate & I915_GEM_GPU_DOMAINS) 1507 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | 1508 MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 1509 intel_ring_emit(ring, cmd); 1510 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 1511 intel_ring_emit(ring, 0); 1512 intel_ring_emit(ring, MI_NOOP); 1513 intel_ring_advance(ring); 1514 return 0; 1515} 1516 1517static int 1518hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1519 u32 offset, u32 len, 1520 unsigned flags) 1521{ 1522 int ret; 1523 1524 ret = intel_ring_begin(ring, 2); 1525 if (ret) 1526 return ret; 1527 1528 intel_ring_emit(ring, 1529 MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | 1530 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW)); 1531 /* bit0-7 is the length on GEN6+ */ 1532 intel_ring_emit(ring, offset); 1533 intel_ring_advance(ring); 1534 1535 return 0; 1536} 1537 1538static int 1539gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1540 u32 offset, u32 len, 1541 unsigned flags) 1542{ 1543 int ret; 1544 1545 ret = intel_ring_begin(ring, 2); 1546 if (ret) 1547 return ret; 1548 1549 intel_ring_emit(ring, 1550 MI_BATCH_BUFFER_START | 1551 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); 1552 /* bit0-7 is the length on GEN6+ */ 1553 intel_ring_emit(ring, offset); 1554 intel_ring_advance(ring); 1555 1556 return 0; 1557} 1558 1559/* Blitter support (SandyBridge+) */ 1560 1561static int blt_ring_flush(struct intel_ring_buffer *ring, 1562 u32 invalidate, u32 flush) 1563{ 1564 uint32_t cmd; 1565 int ret; 1566 1567 ret = intel_ring_begin(ring, 4); 1568 if (ret) 1569 return ret; 1570 1571 cmd = MI_FLUSH_DW; 1572 /* 1573 * Bspec vol 1c.3 - blitter engine command streamer: 1574 * "If ENABLED, all TLBs will be invalidated once the flush 1575 * operation is complete. This bit is only valid when the 1576 * Post-Sync Operation field is a value of 1h or 3h." 1577 */ 1578 if (invalidate & I915_GEM_DOMAIN_RENDER) 1579 cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | 1580 MI_FLUSH_DW_OP_STOREDW; 1581 intel_ring_emit(ring, cmd); 1582 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 1583 intel_ring_emit(ring, 0); 1584 intel_ring_emit(ring, MI_NOOP); 1585 intel_ring_advance(ring); 1586 return 0; 1587} 1588 1589int intel_init_render_ring_buffer(struct drm_device *dev) 1590{ 1591 drm_i915_private_t *dev_priv = dev->dev_private; 1592 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1593 1594 ring->name = "render ring"; 1595 ring->id = RCS; 1596 ring->mmio_base = RENDER_RING_BASE; 1597 1598 if (INTEL_INFO(dev)->gen >= 6) { 1599 ring->add_request = gen6_add_request; 1600 ring->flush = gen7_render_ring_flush; 1601 if (INTEL_INFO(dev)->gen == 6) 1602 ring->flush = gen6_render_ring_flush; 1603 ring->irq_get = gen6_ring_get_irq; 1604 ring->irq_put = gen6_ring_put_irq; 1605 ring->irq_enable_mask = GT_USER_INTERRUPT; 1606 ring->get_seqno = gen6_ring_get_seqno; 1607 ring->sync_to = gen6_ring_sync; 1608 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_INVALID; 1609 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_RV; 1610 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_RB; 1611 ring->signal_mbox[0] = GEN6_VRSYNC; 1612 ring->signal_mbox[1] = GEN6_BRSYNC; 1613 } else if (IS_GEN5(dev)) { 1614 ring->add_request = pc_render_add_request; 1615 ring->flush = gen4_render_ring_flush; 1616 ring->get_seqno = pc_render_get_seqno; 1617 ring->irq_get = gen5_ring_get_irq; 1618 ring->irq_put = gen5_ring_put_irq; 1619 ring->irq_enable_mask = GT_USER_INTERRUPT | GT_PIPE_NOTIFY; 1620 } else { 1621 ring->add_request = i9xx_add_request; 1622 if (INTEL_INFO(dev)->gen < 4) 1623 ring->flush = gen2_render_ring_flush; 1624 else 1625 ring->flush = gen4_render_ring_flush; 1626 ring->get_seqno = ring_get_seqno; 1627 if (IS_GEN2(dev)) { 1628 ring->irq_get = i8xx_ring_get_irq; 1629 ring->irq_put = i8xx_ring_put_irq; 1630 } else { 1631 ring->irq_get = i9xx_ring_get_irq; 1632 ring->irq_put = i9xx_ring_put_irq; 1633 } 1634 ring->irq_enable_mask = I915_USER_INTERRUPT; 1635 } 1636 ring->write_tail = ring_write_tail; 1637 if (IS_HASWELL(dev)) 1638 ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; 1639 else if (INTEL_INFO(dev)->gen >= 6) 1640 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1641 else if (INTEL_INFO(dev)->gen >= 4) 1642 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1643 else if (IS_I830(dev) || IS_845G(dev)) 1644 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 1645 else 1646 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 1647 ring->init = init_render_ring; 1648 ring->cleanup = render_ring_cleanup; 1649 1650 /* Workaround batchbuffer to combat CS tlb bug. */ 1651 if (HAS_BROKEN_CS_TLB(dev)) { 1652 struct drm_i915_gem_object *obj; 1653 int ret; 1654 1655 obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); 1656 if (obj == NULL) { 1657 DRM_ERROR("Failed to allocate batch bo\n"); 1658 return -ENOMEM; 1659 } 1660 1661 ret = i915_gem_object_pin(obj, 0, true, false); 1662 if (ret != 0) { 1663 drm_gem_object_unreference(&obj->base); 1664 DRM_ERROR("Failed to ping batch bo\n"); 1665 return ret; 1666 } 1667 1668 ring->private = obj; 1669 } 1670 1671 return intel_init_ring_buffer(dev, ring); 1672} 1673 1674int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) 1675{ 1676 drm_i915_private_t *dev_priv = dev->dev_private; 1677 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1678 int ret; 1679 1680 ring->name = "render ring"; 1681 ring->id = RCS; 1682 ring->mmio_base = RENDER_RING_BASE; 1683 1684 if (INTEL_INFO(dev)->gen >= 6) { 1685 /* non-kms not supported on gen6+ */ 1686 return -ENODEV; 1687 } 1688 1689 /* Note: gem is not supported on gen5/ilk without kms (the corresponding 1690 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up 1691 * the special gen5 functions. */ 1692 ring->add_request = i9xx_add_request; 1693 if (INTEL_INFO(dev)->gen < 4) 1694 ring->flush = gen2_render_ring_flush; 1695 else 1696 ring->flush = gen4_render_ring_flush; 1697 ring->get_seqno = ring_get_seqno; 1698 if (IS_GEN2(dev)) { 1699 ring->irq_get = i8xx_ring_get_irq; 1700 ring->irq_put = i8xx_ring_put_irq; 1701 } else { 1702 ring->irq_get = i9xx_ring_get_irq; 1703 ring->irq_put = i9xx_ring_put_irq; 1704 } 1705 ring->irq_enable_mask = I915_USER_INTERRUPT; 1706 ring->write_tail = ring_write_tail; 1707 if (INTEL_INFO(dev)->gen >= 4) 1708 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1709 else if (IS_I830(dev) || IS_845G(dev)) 1710 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 1711 else 1712 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 1713 ring->init = init_render_ring; 1714 ring->cleanup = render_ring_cleanup; 1715 1716 ring->dev = dev; 1717 INIT_LIST_HEAD(&ring->active_list); 1718 INIT_LIST_HEAD(&ring->request_list); 1719 1720 ring->size = size; 1721 ring->effective_size = ring->size; 1722 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 1723 ring->effective_size -= 128; 1724 1725 ring->virtual_start = ioremap_wc(start, size); 1726 if (ring->virtual_start == NULL) { 1727 DRM_ERROR("can not ioremap virtual address for" 1728 " ring buffer\n"); 1729 return -ENOMEM; 1730 } 1731 1732 if (!I915_NEED_GFX_HWS(dev)) { 1733 ret = init_phys_hws_pga(ring); 1734 if (ret) 1735 return ret; 1736 } 1737 1738 return 0; 1739} 1740 1741int intel_init_bsd_ring_buffer(struct drm_device *dev) 1742{ 1743 drm_i915_private_t *dev_priv = dev->dev_private; 1744 struct intel_ring_buffer *ring = &dev_priv->ring[VCS]; 1745 1746 ring->name = "bsd ring"; 1747 ring->id = VCS; 1748 1749 ring->write_tail = ring_write_tail; 1750 if (IS_GEN6(dev) || IS_GEN7(dev)) { 1751 ring->mmio_base = GEN6_BSD_RING_BASE; 1752 /* gen6 bsd needs a special wa for tail updates */ 1753 if (IS_GEN6(dev)) 1754 ring->write_tail = gen6_bsd_ring_write_tail; 1755 ring->flush = gen6_ring_flush; 1756 ring->add_request = gen6_add_request; 1757 ring->get_seqno = gen6_ring_get_seqno; 1758 ring->irq_enable_mask = GEN6_BSD_USER_INTERRUPT; 1759 ring->irq_get = gen6_ring_get_irq; 1760 ring->irq_put = gen6_ring_put_irq; 1761 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1762 ring->sync_to = gen6_ring_sync; 1763 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_VR; 1764 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_INVALID; 1765 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_VB; 1766 ring->signal_mbox[0] = GEN6_RVSYNC; 1767 ring->signal_mbox[1] = GEN6_BVSYNC; 1768 } else { 1769 ring->mmio_base = BSD_RING_BASE; 1770 ring->flush = bsd_ring_flush; 1771 ring->add_request = i9xx_add_request; 1772 ring->get_seqno = ring_get_seqno; 1773 if (IS_GEN5(dev)) { 1774 ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; 1775 ring->irq_get = gen5_ring_get_irq; 1776 ring->irq_put = gen5_ring_put_irq; 1777 } else { 1778 ring->irq_enable_mask = I915_BSD_USER_INTERRUPT; 1779 ring->irq_get = i9xx_ring_get_irq; 1780 ring->irq_put = i9xx_ring_put_irq; 1781 } 1782 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1783 } 1784 ring->init = init_ring_common; 1785 1786 return intel_init_ring_buffer(dev, ring); 1787} 1788 1789int intel_init_blt_ring_buffer(struct drm_device *dev) 1790{ 1791 drm_i915_private_t *dev_priv = dev->dev_private; 1792 struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; 1793 1794 ring->name = "blitter ring"; 1795 ring->id = BCS; 1796 1797 ring->mmio_base = BLT_RING_BASE; 1798 ring->write_tail = ring_write_tail; 1799 ring->flush = blt_ring_flush; 1800 ring->add_request = gen6_add_request; 1801 ring->get_seqno = gen6_ring_get_seqno; 1802 ring->irq_enable_mask = GEN6_BLITTER_USER_INTERRUPT; 1803 ring->irq_get = gen6_ring_get_irq; 1804 ring->irq_put = gen6_ring_put_irq; 1805 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1806 ring->sync_to = gen6_ring_sync; 1807 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_BR; 1808 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_BV; 1809 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_INVALID; 1810 ring->signal_mbox[0] = GEN6_RBSYNC; 1811 ring->signal_mbox[1] = GEN6_VBSYNC; 1812 ring->init = init_ring_common; 1813 1814 return intel_init_ring_buffer(dev, ring); 1815} 1816 1817int 1818intel_ring_flush_all_caches(struct intel_ring_buffer *ring) 1819{ 1820 int ret; 1821 1822 if (!ring->gpu_caches_dirty) 1823 return 0; 1824 1825 ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); 1826 if (ret) 1827 return ret; 1828 1829 trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); 1830 1831 ring->gpu_caches_dirty = false; 1832 return 0; 1833} 1834 1835int 1836intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) 1837{ 1838 uint32_t flush_domains; 1839 int ret; 1840 1841 flush_domains = 0; 1842 if (ring->gpu_caches_dirty) 1843 flush_domains = I915_GEM_GPU_DOMAINS; 1844 1845 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); 1846 if (ret) 1847 return ret; 1848 1849 trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); 1850 1851 ring->gpu_caches_dirty = false; 1852 return 0; 1853}