at v4.13 1349 lines 38 kB view raw
1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25#include "i915_drv.h" 26#include "intel_ringbuffer.h" 27#include "intel_lrc.h" 28 29/* Haswell does have the CXT_SIZE register however it does not appear to be 30 * valid. Now, docs explain in dwords what is in the context object. The full 31 * size is 70720 bytes, however, the power context and execlist context will 32 * never be saved (power context is stored elsewhere, and execlists don't work 33 * on HSW) - so the final size, including the extra state required for the 34 * Resource Streamer, is 66944 bytes, which rounds to 17 pages. 35 */ 36#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) 37/* Same as Haswell, but 72064 bytes now. */ 38#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) 39 40#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) 41#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) 42 43#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) 44 45struct engine_class_info { 46 const char *name; 47 int (*init_legacy)(struct intel_engine_cs *engine); 48 int (*init_execlists)(struct intel_engine_cs *engine); 49}; 50 51static const struct engine_class_info intel_engine_classes[] = { 52 [RENDER_CLASS] = { 53 .name = "rcs", 54 .init_execlists = logical_render_ring_init, 55 .init_legacy = intel_init_render_ring_buffer, 56 }, 57 [COPY_ENGINE_CLASS] = { 58 .name = "bcs", 59 .init_execlists = logical_xcs_ring_init, 60 .init_legacy = intel_init_blt_ring_buffer, 61 }, 62 [VIDEO_DECODE_CLASS] = { 63 .name = "vcs", 64 .init_execlists = logical_xcs_ring_init, 65 .init_legacy = intel_init_bsd_ring_buffer, 66 }, 67 [VIDEO_ENHANCEMENT_CLASS] = { 68 .name = "vecs", 69 .init_execlists = logical_xcs_ring_init, 70 .init_legacy = intel_init_vebox_ring_buffer, 71 }, 72}; 73 74struct engine_info { 75 unsigned int hw_id; 76 unsigned int uabi_id; 77 u8 class; 78 u8 instance; 79 u32 mmio_base; 80 unsigned irq_shift; 81}; 82 83static const struct engine_info intel_engines[] = { 84 [RCS] = { 85 .hw_id = RCS_HW, 86 .uabi_id = I915_EXEC_RENDER, 87 .class = RENDER_CLASS, 88 .instance = 0, 89 .mmio_base = RENDER_RING_BASE, 90 .irq_shift = GEN8_RCS_IRQ_SHIFT, 91 }, 92 [BCS] = { 93 .hw_id = BCS_HW, 94 .uabi_id = I915_EXEC_BLT, 95 .class = COPY_ENGINE_CLASS, 96 .instance = 0, 97 .mmio_base = BLT_RING_BASE, 98 .irq_shift = GEN8_BCS_IRQ_SHIFT, 99 }, 100 [VCS] = { 101 .hw_id = VCS_HW, 102 .uabi_id = I915_EXEC_BSD, 103 .class = VIDEO_DECODE_CLASS, 104 .instance = 0, 105 .mmio_base = GEN6_BSD_RING_BASE, 106 .irq_shift = GEN8_VCS1_IRQ_SHIFT, 107 }, 108 [VCS2] = { 109 .hw_id = VCS2_HW, 110 .uabi_id = I915_EXEC_BSD, 111 .class = VIDEO_DECODE_CLASS, 112 .instance = 1, 113 .mmio_base = GEN8_BSD2_RING_BASE, 114 .irq_shift = GEN8_VCS2_IRQ_SHIFT, 115 }, 116 [VECS] = { 117 .hw_id = VECS_HW, 118 .uabi_id = I915_EXEC_VEBOX, 119 .class = VIDEO_ENHANCEMENT_CLASS, 120 .instance = 0, 121 .mmio_base = VEBOX_RING_BASE, 122 .irq_shift = GEN8_VECS_IRQ_SHIFT, 123 }, 124}; 125 126/** 127 * ___intel_engine_context_size() - return the size of the context for an engine 128 * @dev_priv: i915 device private 129 * @class: engine class 130 * 131 * Each engine class may require a different amount of space for a context 132 * image. 133 * 134 * Return: size (in bytes) of an engine class specific context image 135 * 136 * Note: this size includes the HWSP, which is part of the context image 137 * in LRC mode, but does not include the "shared data page" used with 138 * GuC submission. The caller should account for this if using the GuC. 139 */ 140static u32 141__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) 142{ 143 u32 cxt_size; 144 145 BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); 146 147 switch (class) { 148 case RENDER_CLASS: 149 switch (INTEL_GEN(dev_priv)) { 150 default: 151 MISSING_CASE(INTEL_GEN(dev_priv)); 152 case 9: 153 return GEN9_LR_CONTEXT_RENDER_SIZE; 154 case 8: 155 return i915.enable_execlists ? 156 GEN8_LR_CONTEXT_RENDER_SIZE : 157 GEN8_CXT_TOTAL_SIZE; 158 case 7: 159 if (IS_HASWELL(dev_priv)) 160 return HSW_CXT_TOTAL_SIZE; 161 162 cxt_size = I915_READ(GEN7_CXT_SIZE); 163 return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64, 164 PAGE_SIZE); 165 case 6: 166 cxt_size = I915_READ(CXT_SIZE); 167 return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64, 168 PAGE_SIZE); 169 case 5: 170 case 4: 171 case 3: 172 case 2: 173 /* For the special day when i810 gets merged. */ 174 case 1: 175 return 0; 176 } 177 break; 178 default: 179 MISSING_CASE(class); 180 case VIDEO_DECODE_CLASS: 181 case VIDEO_ENHANCEMENT_CLASS: 182 case COPY_ENGINE_CLASS: 183 if (INTEL_GEN(dev_priv) < 8) 184 return 0; 185 return GEN8_LR_CONTEXT_OTHER_SIZE; 186 } 187} 188 189static int 190intel_engine_setup(struct drm_i915_private *dev_priv, 191 enum intel_engine_id id) 192{ 193 const struct engine_info *info = &intel_engines[id]; 194 const struct engine_class_info *class_info; 195 struct intel_engine_cs *engine; 196 197 GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); 198 class_info = &intel_engine_classes[info->class]; 199 200 GEM_BUG_ON(dev_priv->engine[id]); 201 engine = kzalloc(sizeof(*engine), GFP_KERNEL); 202 if (!engine) 203 return -ENOMEM; 204 205 engine->id = id; 206 engine->i915 = dev_priv; 207 WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u", 208 class_info->name, info->instance) >= 209 sizeof(engine->name)); 210 engine->uabi_id = info->uabi_id; 211 engine->hw_id = engine->guc_id = info->hw_id; 212 engine->mmio_base = info->mmio_base; 213 engine->irq_shift = info->irq_shift; 214 engine->class = info->class; 215 engine->instance = info->instance; 216 217 engine->context_size = __intel_engine_context_size(dev_priv, 218 engine->class); 219 if (WARN_ON(engine->context_size > BIT(20))) 220 engine->context_size = 0; 221 222 /* Nothing to do here, execute in order of dependencies */ 223 engine->schedule = NULL; 224 225 ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); 226 227 dev_priv->engine[id] = engine; 228 return 0; 229} 230 231/** 232 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers 233 * @dev_priv: i915 device private 234 * 235 * Return: non-zero if the initialization failed. 236 */ 237int intel_engines_init_mmio(struct drm_i915_private *dev_priv) 238{ 239 struct intel_device_info *device_info = mkwrite_device_info(dev_priv); 240 const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; 241 struct intel_engine_cs *engine; 242 enum intel_engine_id id; 243 unsigned int mask = 0; 244 unsigned int i; 245 int err; 246 247 WARN_ON(ring_mask == 0); 248 WARN_ON(ring_mask & 249 GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES)); 250 251 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { 252 if (!HAS_ENGINE(dev_priv, i)) 253 continue; 254 255 err = intel_engine_setup(dev_priv, i); 256 if (err) 257 goto cleanup; 258 259 mask |= ENGINE_MASK(i); 260 } 261 262 /* 263 * Catch failures to update intel_engines table when the new engines 264 * are added to the driver by a warning and disabling the forgotten 265 * engines. 266 */ 267 if (WARN_ON(mask != ring_mask)) 268 device_info->ring_mask = mask; 269 270 /* We always presume we have at least RCS available for later probing */ 271 if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) { 272 err = -ENODEV; 273 goto cleanup; 274 } 275 276 device_info->num_rings = hweight32(mask); 277 278 return 0; 279 280cleanup: 281 for_each_engine(engine, dev_priv, id) 282 kfree(engine); 283 return err; 284} 285 286/** 287 * intel_engines_init() - init the Engine Command Streamers 288 * @dev_priv: i915 device private 289 * 290 * Return: non-zero if the initialization failed. 291 */ 292int intel_engines_init(struct drm_i915_private *dev_priv) 293{ 294 struct intel_device_info *device_info = mkwrite_device_info(dev_priv); 295 struct intel_engine_cs *engine; 296 enum intel_engine_id id, err_id; 297 unsigned int mask = 0; 298 int err = 0; 299 300 for_each_engine(engine, dev_priv, id) { 301 const struct engine_class_info *class_info = 302 &intel_engine_classes[engine->class]; 303 int (*init)(struct intel_engine_cs *engine); 304 305 if (i915.enable_execlists) 306 init = class_info->init_execlists; 307 else 308 init = class_info->init_legacy; 309 if (!init) { 310 kfree(engine); 311 dev_priv->engine[id] = NULL; 312 continue; 313 } 314 315 err = init(engine); 316 if (err) { 317 err_id = id; 318 goto cleanup; 319 } 320 321 GEM_BUG_ON(!engine->submit_request); 322 mask |= ENGINE_MASK(id); 323 } 324 325 /* 326 * Catch failures to update intel_engines table when the new engines 327 * are added to the driver by a warning and disabling the forgotten 328 * engines. 329 */ 330 if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) 331 device_info->ring_mask = mask; 332 333 device_info->num_rings = hweight32(mask); 334 335 return 0; 336 337cleanup: 338 for_each_engine(engine, dev_priv, id) { 339 if (id >= err_id) 340 kfree(engine); 341 else 342 dev_priv->gt.cleanup_engine(engine); 343 } 344 return err; 345} 346 347void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) 348{ 349 struct drm_i915_private *dev_priv = engine->i915; 350 351 GEM_BUG_ON(!intel_engine_is_idle(engine)); 352 GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); 353 354 /* Our semaphore implementation is strictly monotonic (i.e. we proceed 355 * so long as the semaphore value in the register/page is greater 356 * than the sync value), so whenever we reset the seqno, 357 * so long as we reset the tracking semaphore value to 0, it will 358 * always be before the next request's seqno. If we don't reset 359 * the semaphore value, then when the seqno moves backwards all 360 * future waits will complete instantly (causing rendering corruption). 361 */ 362 if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { 363 I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); 364 I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); 365 if (HAS_VEBOX(dev_priv)) 366 I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); 367 } 368 if (dev_priv->semaphore) { 369 struct page *page = i915_vma_first_page(dev_priv->semaphore); 370 void *semaphores; 371 372 /* Semaphores are in noncoherent memory, flush to be safe */ 373 semaphores = kmap_atomic(page); 374 memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), 375 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); 376 drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), 377 I915_NUM_ENGINES * gen8_semaphore_seqno_size); 378 kunmap_atomic(semaphores); 379 } 380 381 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); 382 clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); 383 384 /* After manually advancing the seqno, fake the interrupt in case 385 * there are any waiters for that seqno. 386 */ 387 intel_engine_wakeup(engine); 388 389 GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); 390} 391 392static void intel_engine_init_timeline(struct intel_engine_cs *engine) 393{ 394 engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id]; 395} 396 397/** 398 * intel_engines_setup_common - setup engine state not requiring hw access 399 * @engine: Engine to setup. 400 * 401 * Initializes @engine@ structure members shared between legacy and execlists 402 * submission modes which do not require hardware access. 403 * 404 * Typically done early in the submission mode specific engine setup stage. 405 */ 406void intel_engine_setup_common(struct intel_engine_cs *engine) 407{ 408 engine->execlist_queue = RB_ROOT; 409 engine->execlist_first = NULL; 410 411 intel_engine_init_timeline(engine); 412 intel_engine_init_hangcheck(engine); 413 i915_gem_batch_pool_init(engine, &engine->batch_pool); 414 415 intel_engine_init_cmd_parser(engine); 416} 417 418int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) 419{ 420 struct drm_i915_gem_object *obj; 421 struct i915_vma *vma; 422 int ret; 423 424 WARN_ON(engine->scratch); 425 426 obj = i915_gem_object_create_stolen(engine->i915, size); 427 if (!obj) 428 obj = i915_gem_object_create_internal(engine->i915, size); 429 if (IS_ERR(obj)) { 430 DRM_ERROR("Failed to allocate scratch page\n"); 431 return PTR_ERR(obj); 432 } 433 434 vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); 435 if (IS_ERR(vma)) { 436 ret = PTR_ERR(vma); 437 goto err_unref; 438 } 439 440 ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH); 441 if (ret) 442 goto err_unref; 443 444 engine->scratch = vma; 445 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", 446 engine->name, i915_ggtt_offset(vma)); 447 return 0; 448 449err_unref: 450 i915_gem_object_put(obj); 451 return ret; 452} 453 454static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) 455{ 456 i915_vma_unpin_and_release(&engine->scratch); 457} 458 459/** 460 * intel_engines_init_common - initialize cengine state which might require hw access 461 * @engine: Engine to initialize. 462 * 463 * Initializes @engine@ structure members shared between legacy and execlists 464 * submission modes which do require hardware access. 465 * 466 * Typcally done at later stages of submission mode specific engine setup. 467 * 468 * Returns zero on success or an error code on failure. 469 */ 470int intel_engine_init_common(struct intel_engine_cs *engine) 471{ 472 struct intel_ring *ring; 473 int ret; 474 475 engine->set_default_submission(engine); 476 477 /* We may need to do things with the shrinker which 478 * require us to immediately switch back to the default 479 * context. This can cause a problem as pinning the 480 * default context also requires GTT space which may not 481 * be available. To avoid this we always pin the default 482 * context. 483 */ 484 ring = engine->context_pin(engine, engine->i915->kernel_context); 485 if (IS_ERR(ring)) 486 return PTR_ERR(ring); 487 488 ret = intel_engine_init_breadcrumbs(engine); 489 if (ret) 490 goto err_unpin; 491 492 ret = i915_gem_render_state_init(engine); 493 if (ret) 494 goto err_unpin; 495 496 return 0; 497 498err_unpin: 499 engine->context_unpin(engine, engine->i915->kernel_context); 500 return ret; 501} 502 503/** 504 * intel_engines_cleanup_common - cleans up the engine state created by 505 * the common initiailizers. 506 * @engine: Engine to cleanup. 507 * 508 * This cleans up everything created by the common helpers. 509 */ 510void intel_engine_cleanup_common(struct intel_engine_cs *engine) 511{ 512 intel_engine_cleanup_scratch(engine); 513 514 i915_gem_render_state_fini(engine); 515 intel_engine_fini_breadcrumbs(engine); 516 intel_engine_cleanup_cmd_parser(engine); 517 i915_gem_batch_pool_fini(&engine->batch_pool); 518 519 engine->context_unpin(engine, engine->i915->kernel_context); 520} 521 522u64 intel_engine_get_active_head(struct intel_engine_cs *engine) 523{ 524 struct drm_i915_private *dev_priv = engine->i915; 525 u64 acthd; 526 527 if (INTEL_GEN(dev_priv) >= 8) 528 acthd = I915_READ64_2x32(RING_ACTHD(engine->mmio_base), 529 RING_ACTHD_UDW(engine->mmio_base)); 530 else if (INTEL_GEN(dev_priv) >= 4) 531 acthd = I915_READ(RING_ACTHD(engine->mmio_base)); 532 else 533 acthd = I915_READ(ACTHD); 534 535 return acthd; 536} 537 538u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine) 539{ 540 struct drm_i915_private *dev_priv = engine->i915; 541 u64 bbaddr; 542 543 if (INTEL_GEN(dev_priv) >= 8) 544 bbaddr = I915_READ64_2x32(RING_BBADDR(engine->mmio_base), 545 RING_BBADDR_UDW(engine->mmio_base)); 546 else 547 bbaddr = I915_READ(RING_BBADDR(engine->mmio_base)); 548 549 return bbaddr; 550} 551 552const char *i915_cache_level_str(struct drm_i915_private *i915, int type) 553{ 554 switch (type) { 555 case I915_CACHE_NONE: return " uncached"; 556 case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped"; 557 case I915_CACHE_L3_LLC: return " L3+LLC"; 558 case I915_CACHE_WT: return " WT"; 559 default: return ""; 560 } 561} 562 563static inline uint32_t 564read_subslice_reg(struct drm_i915_private *dev_priv, int slice, 565 int subslice, i915_reg_t reg) 566{ 567 uint32_t mcr; 568 uint32_t ret; 569 enum forcewake_domains fw_domains; 570 571 fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, 572 FW_REG_READ); 573 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, 574 GEN8_MCR_SELECTOR, 575 FW_REG_READ | FW_REG_WRITE); 576 577 spin_lock_irq(&dev_priv->uncore.lock); 578 intel_uncore_forcewake_get__locked(dev_priv, fw_domains); 579 580 mcr = I915_READ_FW(GEN8_MCR_SELECTOR); 581 /* 582 * The HW expects the slice and sublice selectors to be reset to 0 583 * after reading out the registers. 584 */ 585 WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK)); 586 mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK); 587 mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); 588 I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); 589 590 ret = I915_READ_FW(reg); 591 592 mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK); 593 I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); 594 595 intel_uncore_forcewake_put__locked(dev_priv, fw_domains); 596 spin_unlock_irq(&dev_priv->uncore.lock); 597 598 return ret; 599} 600 601/* NB: please notice the memset */ 602void intel_engine_get_instdone(struct intel_engine_cs *engine, 603 struct intel_instdone *instdone) 604{ 605 struct drm_i915_private *dev_priv = engine->i915; 606 u32 mmio_base = engine->mmio_base; 607 int slice; 608 int subslice; 609 610 memset(instdone, 0, sizeof(*instdone)); 611 612 switch (INTEL_GEN(dev_priv)) { 613 default: 614 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); 615 616 if (engine->id != RCS) 617 break; 618 619 instdone->slice_common = I915_READ(GEN7_SC_INSTDONE); 620 for_each_instdone_slice_subslice(dev_priv, slice, subslice) { 621 instdone->sampler[slice][subslice] = 622 read_subslice_reg(dev_priv, slice, subslice, 623 GEN7_SAMPLER_INSTDONE); 624 instdone->row[slice][subslice] = 625 read_subslice_reg(dev_priv, slice, subslice, 626 GEN7_ROW_INSTDONE); 627 } 628 break; 629 case 7: 630 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); 631 632 if (engine->id != RCS) 633 break; 634 635 instdone->slice_common = I915_READ(GEN7_SC_INSTDONE); 636 instdone->sampler[0][0] = I915_READ(GEN7_SAMPLER_INSTDONE); 637 instdone->row[0][0] = I915_READ(GEN7_ROW_INSTDONE); 638 639 break; 640 case 6: 641 case 5: 642 case 4: 643 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); 644 645 if (engine->id == RCS) 646 /* HACK: Using the wrong struct member */ 647 instdone->slice_common = I915_READ(GEN4_INSTDONE1); 648 break; 649 case 3: 650 case 2: 651 instdone->instdone = I915_READ(GEN2_INSTDONE); 652 break; 653 } 654} 655 656static int wa_add(struct drm_i915_private *dev_priv, 657 i915_reg_t addr, 658 const u32 mask, const u32 val) 659{ 660 const u32 idx = dev_priv->workarounds.count; 661 662 if (WARN_ON(idx >= I915_MAX_WA_REGS)) 663 return -ENOSPC; 664 665 dev_priv->workarounds.reg[idx].addr = addr; 666 dev_priv->workarounds.reg[idx].value = val; 667 dev_priv->workarounds.reg[idx].mask = mask; 668 669 dev_priv->workarounds.count++; 670 671 return 0; 672} 673 674#define WA_REG(addr, mask, val) do { \ 675 const int r = wa_add(dev_priv, (addr), (mask), (val)); \ 676 if (r) \ 677 return r; \ 678 } while (0) 679 680#define WA_SET_BIT_MASKED(addr, mask) \ 681 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) 682 683#define WA_CLR_BIT_MASKED(addr, mask) \ 684 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) 685 686#define WA_SET_FIELD_MASKED(addr, mask, value) \ 687 WA_REG(addr, mask, _MASKED_FIELD(mask, value)) 688 689#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) 690#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) 691 692#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) 693 694static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, 695 i915_reg_t reg) 696{ 697 struct drm_i915_private *dev_priv = engine->i915; 698 struct i915_workarounds *wa = &dev_priv->workarounds; 699 const uint32_t index = wa->hw_whitelist_count[engine->id]; 700 701 if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) 702 return -EINVAL; 703 704 WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), 705 i915_mmio_reg_offset(reg)); 706 wa->hw_whitelist_count[engine->id]++; 707 708 return 0; 709} 710 711static int gen8_init_workarounds(struct intel_engine_cs *engine) 712{ 713 struct drm_i915_private *dev_priv = engine->i915; 714 715 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 716 717 /* WaDisableAsyncFlipPerfMode:bdw,chv */ 718 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); 719 720 /* WaDisablePartialInstShootdown:bdw,chv */ 721 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 722 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 723 724 /* Use Force Non-Coherent whenever executing a 3D context. This is a 725 * workaround for for a possible hang in the unlikely event a TLB 726 * invalidation occurs during a PSD flush. 727 */ 728 /* WaForceEnableNonCoherent:bdw,chv */ 729 /* WaHdcDisableFetchWhenMasked:bdw,chv */ 730 WA_SET_BIT_MASKED(HDC_CHICKEN0, 731 HDC_DONOT_FETCH_MEM_WHEN_MASKED | 732 HDC_FORCE_NON_COHERENT); 733 734 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: 735 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping 736 * polygons in the same 8x4 pixel/sample area to be processed without 737 * stalling waiting for the earlier ones to write to Hierarchical Z 738 * buffer." 739 * 740 * This optimization is off by default for BDW and CHV; turn it on. 741 */ 742 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 743 744 /* Wa4x4STCOptimizationDisable:bdw,chv */ 745 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); 746 747 /* 748 * BSpec recommends 8x4 when MSAA is used, 749 * however in practice 16x4 seems fastest. 750 * 751 * Note that PS/WM thread counts depend on the WIZ hashing 752 * disable bit, which we don't touch here, but it's good 753 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 754 */ 755 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 756 GEN6_WIZ_HASHING_MASK, 757 GEN6_WIZ_HASHING_16x4); 758 759 return 0; 760} 761 762static int bdw_init_workarounds(struct intel_engine_cs *engine) 763{ 764 struct drm_i915_private *dev_priv = engine->i915; 765 int ret; 766 767 ret = gen8_init_workarounds(engine); 768 if (ret) 769 return ret; 770 771 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ 772 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 773 774 /* WaDisableDopClockGating:bdw 775 * 776 * Also see the related UCGTCL1 write in broadwell_init_clock_gating() 777 * to disable EUTC clock gating. 778 */ 779 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 780 DOP_CLOCK_GATING_DISABLE); 781 782 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 783 GEN8_SAMPLER_POWER_BYPASS_DIS); 784 785 WA_SET_BIT_MASKED(HDC_CHICKEN0, 786 /* WaForceContextSaveRestoreNonCoherent:bdw */ 787 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 788 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ 789 (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); 790 791 return 0; 792} 793 794static int chv_init_workarounds(struct intel_engine_cs *engine) 795{ 796 struct drm_i915_private *dev_priv = engine->i915; 797 int ret; 798 799 ret = gen8_init_workarounds(engine); 800 if (ret) 801 return ret; 802 803 /* WaDisableThreadStallDopClockGating:chv */ 804 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 805 806 /* Improve HiZ throughput on CHV. */ 807 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); 808 809 return 0; 810} 811 812static int gen9_init_workarounds(struct intel_engine_cs *engine) 813{ 814 struct drm_i915_private *dev_priv = engine->i915; 815 int ret; 816 817 /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ 818 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); 819 820 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ 821 I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | 822 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); 823 824 /* WaDisableKillLogic:bxt,skl,kbl */ 825 if (!IS_COFFEELAKE(dev_priv)) 826 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | 827 ECOCHK_DIS_TLB); 828 829 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ 830 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ 831 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 832 FLOW_CONTROL_ENABLE | 833 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 834 835 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ 836 if (!IS_COFFEELAKE(dev_priv)) 837 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 838 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); 839 840 /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ 841 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) 842 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 843 GEN9_DG_MIRROR_FIX_ENABLE); 844 845 /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ 846 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 847 WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, 848 GEN9_RHWO_OPTIMIZATION_DISABLE); 849 /* 850 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set 851 * but we do that in per ctx batchbuffer as there is an issue 852 * with this register not getting restored on ctx restore 853 */ 854 } 855 856 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ 857 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ 858 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 859 GEN9_ENABLE_YV12_BUGFIX | 860 GEN9_ENABLE_GPGPU_PREEMPTION); 861 862 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ 863 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ 864 WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | 865 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); 866 867 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ 868 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 869 GEN9_CCS_TLB_PREFETCH_ENABLE); 870 871 /* WaDisableMaskBasedCammingInRCC:bxt */ 872 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) 873 WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, 874 PIXEL_MASK_CAMMING_DISABLE); 875 876 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ 877 WA_SET_BIT_MASKED(HDC_CHICKEN0, 878 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 879 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); 880 881 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are 882 * both tied to WaForceContextSaveRestoreNonCoherent 883 * in some hsds for skl. We keep the tie for all gen9. The 884 * documentation is a bit hazy and so we want to get common behaviour, 885 * even though there is no clear evidence we would need both on kbl/bxt. 886 * This area has been source of system hangs so we play it safe 887 * and mimic the skl regardless of what bspec says. 888 * 889 * Use Force Non-Coherent whenever executing a 3D context. This 890 * is a workaround for a possible hang in the unlikely event 891 * a TLB invalidation occurs during a PSD flush. 892 */ 893 894 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ 895 WA_SET_BIT_MASKED(HDC_CHICKEN0, 896 HDC_FORCE_NON_COHERENT); 897 898 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ 899 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | 900 BDW_DISABLE_HDC_INVALIDATION); 901 902 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ 903 if (IS_SKYLAKE(dev_priv) || 904 IS_KABYLAKE(dev_priv) || 905 IS_COFFEELAKE(dev_priv) || 906 IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) 907 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 908 GEN8_SAMPLER_POWER_BYPASS_DIS); 909 910 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ 911 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); 912 913 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ 914 I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | 915 GEN8_LQSC_FLUSH_COHERENT_LINES)); 916 917 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ 918 ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); 919 if (ret) 920 return ret; 921 922 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl */ 923 ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 924 if (ret) 925 return ret; 926 927 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ 928 ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); 929 if (ret) 930 return ret; 931 932 return 0; 933} 934 935static int skl_tune_iz_hashing(struct intel_engine_cs *engine) 936{ 937 struct drm_i915_private *dev_priv = engine->i915; 938 u8 vals[3] = { 0, 0, 0 }; 939 unsigned int i; 940 941 for (i = 0; i < 3; i++) { 942 u8 ss; 943 944 /* 945 * Only consider slices where one, and only one, subslice has 7 946 * EUs 947 */ 948 if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) 949 continue; 950 951 /* 952 * subslice_7eu[i] != 0 (because of the check above) and 953 * ss_max == 4 (maximum number of subslices possible per slice) 954 * 955 * -> 0 <= ss <= 3; 956 */ 957 ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; 958 vals[i] = 3 - ss; 959 } 960 961 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) 962 return 0; 963 964 /* Tune IZ hashing. See intel_device_info_runtime_init() */ 965 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 966 GEN9_IZ_HASHING_MASK(2) | 967 GEN9_IZ_HASHING_MASK(1) | 968 GEN9_IZ_HASHING_MASK(0), 969 GEN9_IZ_HASHING(2, vals[2]) | 970 GEN9_IZ_HASHING(1, vals[1]) | 971 GEN9_IZ_HASHING(0, vals[0])); 972 973 return 0; 974} 975 976static int skl_init_workarounds(struct intel_engine_cs *engine) 977{ 978 struct drm_i915_private *dev_priv = engine->i915; 979 int ret; 980 981 ret = gen9_init_workarounds(engine); 982 if (ret) 983 return ret; 984 985 /* 986 * Actual WA is to disable percontext preemption granularity control 987 * until D0 which is the default case so this is equivalent to 988 * !WaDisablePerCtxtPreemptionGranularityControl:skl 989 */ 990 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, 991 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); 992 993 /* WaEnableGapsTsvCreditFix:skl */ 994 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | 995 GEN9_GAPS_TSV_CREDIT_DISABLE)); 996 997 /* WaDisableGafsUnitClkGating:skl */ 998 WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 999 1000 /* WaInPlaceDecompressionHang:skl */ 1001 if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) 1002 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, 1003 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 1004 1005 /* WaDisableLSQCROPERFforOCL:skl */ 1006 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 1007 if (ret) 1008 return ret; 1009 1010 return skl_tune_iz_hashing(engine); 1011} 1012 1013static int bxt_init_workarounds(struct intel_engine_cs *engine) 1014{ 1015 struct drm_i915_private *dev_priv = engine->i915; 1016 int ret; 1017 1018 ret = gen9_init_workarounds(engine); 1019 if (ret) 1020 return ret; 1021 1022 /* WaStoreMultiplePTEenable:bxt */ 1023 /* This is a requirement according to Hardware specification */ 1024 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) 1025 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); 1026 1027 /* WaSetClckGatingDisableMedia:bxt */ 1028 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 1029 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & 1030 ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); 1031 } 1032 1033 /* WaDisableThreadStallDopClockGating:bxt */ 1034 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 1035 STALL_DOP_GATING_DISABLE); 1036 1037 /* WaDisablePooledEuLoadBalancingFix:bxt */ 1038 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { 1039 WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, 1040 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); 1041 } 1042 1043 /* WaDisableSbeCacheDispatchPortSharing:bxt */ 1044 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { 1045 WA_SET_BIT_MASKED( 1046 GEN7_HALF_SLICE_CHICKEN1, 1047 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 1048 } 1049 1050 /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ 1051 /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ 1052 /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ 1053 /* WaDisableLSQCROPERFforOCL:bxt */ 1054 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { 1055 ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); 1056 if (ret) 1057 return ret; 1058 1059 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 1060 if (ret) 1061 return ret; 1062 } 1063 1064 /* WaProgramL3SqcReg1DefaultForPerf:bxt */ 1065 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) 1066 I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | 1067 L3_HIGH_PRIO_CREDITS(2)); 1068 1069 /* WaToEnableHwFixForPushConstHWBug:bxt */ 1070 if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) 1071 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1072 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1073 1074 /* WaInPlaceDecompressionHang:bxt */ 1075 if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) 1076 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, 1077 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 1078 1079 return 0; 1080} 1081 1082static int kbl_init_workarounds(struct intel_engine_cs *engine) 1083{ 1084 struct drm_i915_private *dev_priv = engine->i915; 1085 int ret; 1086 1087 ret = gen9_init_workarounds(engine); 1088 if (ret) 1089 return ret; 1090 1091 /* WaEnableGapsTsvCreditFix:kbl */ 1092 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | 1093 GEN9_GAPS_TSV_CREDIT_DISABLE)); 1094 1095 /* WaDisableDynamicCreditSharing:kbl */ 1096 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 1097 WA_SET_BIT(GAMT_CHKN_BIT_REG, 1098 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); 1099 1100 /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ 1101 if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) 1102 WA_SET_BIT_MASKED(HDC_CHICKEN0, 1103 HDC_FENCE_DEST_SLM_DISABLE); 1104 1105 /* WaToEnableHwFixForPushConstHWBug:kbl */ 1106 if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) 1107 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1108 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1109 1110 /* WaDisableGafsUnitClkGating:kbl */ 1111 WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 1112 1113 /* WaDisableSbeCacheDispatchPortSharing:kbl */ 1114 WA_SET_BIT_MASKED( 1115 GEN7_HALF_SLICE_CHICKEN1, 1116 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 1117 1118 /* WaInPlaceDecompressionHang:kbl */ 1119 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, 1120 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 1121 1122 /* WaDisableLSQCROPERFforOCL:kbl */ 1123 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 1124 if (ret) 1125 return ret; 1126 1127 return 0; 1128} 1129 1130static int glk_init_workarounds(struct intel_engine_cs *engine) 1131{ 1132 struct drm_i915_private *dev_priv = engine->i915; 1133 int ret; 1134 1135 ret = gen9_init_workarounds(engine); 1136 if (ret) 1137 return ret; 1138 1139 /* WaToEnableHwFixForPushConstHWBug:glk */ 1140 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1141 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1142 1143 return 0; 1144} 1145 1146static int cfl_init_workarounds(struct intel_engine_cs *engine) 1147{ 1148 struct drm_i915_private *dev_priv = engine->i915; 1149 int ret; 1150 1151 ret = gen9_init_workarounds(engine); 1152 if (ret) 1153 return ret; 1154 1155 /* WaEnableGapsTsvCreditFix:cfl */ 1156 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | 1157 GEN9_GAPS_TSV_CREDIT_DISABLE)); 1158 1159 /* WaToEnableHwFixForPushConstHWBug:cfl */ 1160 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1161 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1162 1163 /* WaDisableGafsUnitClkGating:cfl */ 1164 WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 1165 1166 /* WaDisableSbeCacheDispatchPortSharing:cfl */ 1167 WA_SET_BIT_MASKED( 1168 GEN7_HALF_SLICE_CHICKEN1, 1169 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 1170 1171 /* WaInPlaceDecompressionHang:cfl */ 1172 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, 1173 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 1174 1175 return 0; 1176} 1177 1178int init_workarounds_ring(struct intel_engine_cs *engine) 1179{ 1180 struct drm_i915_private *dev_priv = engine->i915; 1181 int err; 1182 1183 WARN_ON(engine->id != RCS); 1184 1185 dev_priv->workarounds.count = 0; 1186 dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; 1187 1188 if (IS_BROADWELL(dev_priv)) 1189 err = bdw_init_workarounds(engine); 1190 else if (IS_CHERRYVIEW(dev_priv)) 1191 err = chv_init_workarounds(engine); 1192 else if (IS_SKYLAKE(dev_priv)) 1193 err = skl_init_workarounds(engine); 1194 else if (IS_BROXTON(dev_priv)) 1195 err = bxt_init_workarounds(engine); 1196 else if (IS_KABYLAKE(dev_priv)) 1197 err = kbl_init_workarounds(engine); 1198 else if (IS_GEMINILAKE(dev_priv)) 1199 err = glk_init_workarounds(engine); 1200 else if (IS_COFFEELAKE(dev_priv)) 1201 err = cfl_init_workarounds(engine); 1202 else 1203 err = 0; 1204 if (err) 1205 return err; 1206 1207 DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n", 1208 engine->name, dev_priv->workarounds.count); 1209 return 0; 1210} 1211 1212int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) 1213{ 1214 struct i915_workarounds *w = &req->i915->workarounds; 1215 u32 *cs; 1216 int ret, i; 1217 1218 if (w->count == 0) 1219 return 0; 1220 1221 ret = req->engine->emit_flush(req, EMIT_BARRIER); 1222 if (ret) 1223 return ret; 1224 1225 cs = intel_ring_begin(req, (w->count * 2 + 2)); 1226 if (IS_ERR(cs)) 1227 return PTR_ERR(cs); 1228 1229 *cs++ = MI_LOAD_REGISTER_IMM(w->count); 1230 for (i = 0; i < w->count; i++) { 1231 *cs++ = i915_mmio_reg_offset(w->reg[i].addr); 1232 *cs++ = w->reg[i].value; 1233 } 1234 *cs++ = MI_NOOP; 1235 1236 intel_ring_advance(req, cs); 1237 1238 ret = req->engine->emit_flush(req, EMIT_BARRIER); 1239 if (ret) 1240 return ret; 1241 1242 return 0; 1243} 1244 1245static bool ring_is_idle(struct intel_engine_cs *engine) 1246{ 1247 struct drm_i915_private *dev_priv = engine->i915; 1248 bool idle = true; 1249 1250 intel_runtime_pm_get(dev_priv); 1251 1252 /* First check that no commands are left in the ring */ 1253 if ((I915_READ_HEAD(engine) & HEAD_ADDR) != 1254 (I915_READ_TAIL(engine) & TAIL_ADDR)) 1255 idle = false; 1256 1257 /* No bit for gen2, so assume the CS parser is idle */ 1258 if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) 1259 idle = false; 1260 1261 intel_runtime_pm_put(dev_priv); 1262 1263 return idle; 1264} 1265 1266/** 1267 * intel_engine_is_idle() - Report if the engine has finished process all work 1268 * @engine: the intel_engine_cs 1269 * 1270 * Return true if there are no requests pending, nothing left to be submitted 1271 * to hardware, and that the engine is idle. 1272 */ 1273bool intel_engine_is_idle(struct intel_engine_cs *engine) 1274{ 1275 struct drm_i915_private *dev_priv = engine->i915; 1276 1277 /* More white lies, if wedged, hw state is inconsistent */ 1278 if (i915_terminally_wedged(&dev_priv->gpu_error)) 1279 return true; 1280 1281 /* Any inflight/incomplete requests? */ 1282 if (!i915_seqno_passed(intel_engine_get_seqno(engine), 1283 intel_engine_last_submit(engine))) 1284 return false; 1285 1286 if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock)) 1287 return true; 1288 1289 /* Interrupt/tasklet pending? */ 1290 if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) 1291 return false; 1292 1293 /* Both ports drained, no more ELSP submission? */ 1294 if (port_request(&engine->execlist_port[0])) 1295 return false; 1296 1297 /* Ring stopped? */ 1298 if (!ring_is_idle(engine)) 1299 return false; 1300 1301 return true; 1302} 1303 1304bool intel_engines_are_idle(struct drm_i915_private *dev_priv) 1305{ 1306 struct intel_engine_cs *engine; 1307 enum intel_engine_id id; 1308 1309 if (READ_ONCE(dev_priv->gt.active_requests)) 1310 return false; 1311 1312 /* If the driver is wedged, HW state may be very inconsistent and 1313 * report that it is still busy, even though we have stopped using it. 1314 */ 1315 if (i915_terminally_wedged(&dev_priv->gpu_error)) 1316 return true; 1317 1318 for_each_engine(engine, dev_priv, id) { 1319 if (!intel_engine_is_idle(engine)) 1320 return false; 1321 } 1322 1323 return true; 1324} 1325 1326void intel_engines_reset_default_submission(struct drm_i915_private *i915) 1327{ 1328 struct intel_engine_cs *engine; 1329 enum intel_engine_id id; 1330 1331 for_each_engine(engine, i915, id) 1332 engine->set_default_submission(engine); 1333} 1334 1335void intel_engines_mark_idle(struct drm_i915_private *i915) 1336{ 1337 struct intel_engine_cs *engine; 1338 enum intel_engine_id id; 1339 1340 for_each_engine(engine, i915, id) { 1341 intel_engine_disarm_breadcrumbs(engine); 1342 i915_gem_batch_pool_fini(&engine->batch_pool); 1343 engine->no_priolist = false; 1344 } 1345} 1346 1347#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1348#include "selftests/mock_engine.c" 1349#endif