Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v5.1 1694 lines 45 kB view raw
1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25#include <drm/drm_print.h> 26 27#include "i915_drv.h" 28#include "i915_reset.h" 29#include "intel_ringbuffer.h" 30#include "intel_lrc.h" 31 32/* Haswell does have the CXT_SIZE register however it does not appear to be 33 * valid. Now, docs explain in dwords what is in the context object. The full 34 * size is 70720 bytes, however, the power context and execlist context will 35 * never be saved (power context is stored elsewhere, and execlists don't work 36 * on HSW) - so the final size, including the extra state required for the 37 * Resource Streamer, is 66944 bytes, which rounds to 17 pages. 38 */ 39#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) 40 41#define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) 42#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) 43#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) 44#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) 45#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE) 46 47#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) 48 49struct engine_class_info { 50 const char *name; 51 int (*init_legacy)(struct intel_engine_cs *engine); 52 int (*init_execlists)(struct intel_engine_cs *engine); 53 54 u8 uabi_class; 55}; 56 57static const struct engine_class_info intel_engine_classes[] = { 58 [RENDER_CLASS] = { 59 .name = "rcs", 60 .init_execlists = logical_render_ring_init, 61 .init_legacy = intel_init_render_ring_buffer, 62 .uabi_class = I915_ENGINE_CLASS_RENDER, 63 }, 64 [COPY_ENGINE_CLASS] = { 65 .name = "bcs", 66 .init_execlists = logical_xcs_ring_init, 67 .init_legacy = intel_init_blt_ring_buffer, 68 .uabi_class = I915_ENGINE_CLASS_COPY, 69 }, 70 [VIDEO_DECODE_CLASS] = { 71 .name = "vcs", 72 .init_execlists = logical_xcs_ring_init, 73 .init_legacy = intel_init_bsd_ring_buffer, 74 .uabi_class = I915_ENGINE_CLASS_VIDEO, 75 }, 76 [VIDEO_ENHANCEMENT_CLASS] = { 77 .name = "vecs", 78 .init_execlists = logical_xcs_ring_init, 79 .init_legacy = intel_init_vebox_ring_buffer, 80 .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE, 81 }, 82}; 83 84#define MAX_MMIO_BASES 3 85struct engine_info { 86 unsigned int hw_id; 87 unsigned int uabi_id; 88 u8 class; 89 u8 instance; 90 /* mmio bases table *must* be sorted in reverse gen order */ 91 struct engine_mmio_base { 92 u32 gen : 8; 93 u32 base : 24; 94 } mmio_bases[MAX_MMIO_BASES]; 95}; 96 97static const struct engine_info intel_engines[] = { 98 [RCS] = { 99 .hw_id = RCS_HW, 100 .uabi_id = I915_EXEC_RENDER, 101 .class = RENDER_CLASS, 102 .instance = 0, 103 .mmio_bases = { 104 { .gen = 1, .base = RENDER_RING_BASE } 105 }, 106 }, 107 [BCS] = { 108 .hw_id = BCS_HW, 109 .uabi_id = I915_EXEC_BLT, 110 .class = COPY_ENGINE_CLASS, 111 .instance = 0, 112 .mmio_bases = { 113 { .gen = 6, .base = BLT_RING_BASE } 114 }, 115 }, 116 [VCS] = { 117 .hw_id = VCS_HW, 118 .uabi_id = I915_EXEC_BSD, 119 .class = VIDEO_DECODE_CLASS, 120 .instance = 0, 121 .mmio_bases = { 122 { .gen = 11, .base = GEN11_BSD_RING_BASE }, 123 { .gen = 6, .base = GEN6_BSD_RING_BASE }, 124 { .gen = 4, .base = BSD_RING_BASE } 125 }, 126 }, 127 [VCS2] = { 128 .hw_id = VCS2_HW, 129 .uabi_id = I915_EXEC_BSD, 130 .class = VIDEO_DECODE_CLASS, 131 .instance = 1, 132 .mmio_bases = { 133 { .gen = 11, .base = GEN11_BSD2_RING_BASE }, 134 { .gen = 8, .base = GEN8_BSD2_RING_BASE } 135 }, 136 }, 137 [VCS3] = { 138 .hw_id = VCS3_HW, 139 .uabi_id = I915_EXEC_BSD, 140 .class = VIDEO_DECODE_CLASS, 141 .instance = 2, 142 .mmio_bases = { 143 { .gen = 11, .base = GEN11_BSD3_RING_BASE } 144 }, 145 }, 146 [VCS4] = { 147 .hw_id = VCS4_HW, 148 .uabi_id = I915_EXEC_BSD, 149 .class = VIDEO_DECODE_CLASS, 150 .instance = 3, 151 .mmio_bases = { 152 { .gen = 11, .base = GEN11_BSD4_RING_BASE } 153 }, 154 }, 155 [VECS] = { 156 .hw_id = VECS_HW, 157 .uabi_id = I915_EXEC_VEBOX, 158 .class = VIDEO_ENHANCEMENT_CLASS, 159 .instance = 0, 160 .mmio_bases = { 161 { .gen = 11, .base = GEN11_VEBOX_RING_BASE }, 162 { .gen = 7, .base = VEBOX_RING_BASE } 163 }, 164 }, 165 [VECS2] = { 166 .hw_id = VECS2_HW, 167 .uabi_id = I915_EXEC_VEBOX, 168 .class = VIDEO_ENHANCEMENT_CLASS, 169 .instance = 1, 170 .mmio_bases = { 171 { .gen = 11, .base = GEN11_VEBOX2_RING_BASE } 172 }, 173 }, 174}; 175 176/** 177 * ___intel_engine_context_size() - return the size of the context for an engine 178 * @dev_priv: i915 device private 179 * @class: engine class 180 * 181 * Each engine class may require a different amount of space for a context 182 * image. 183 * 184 * Return: size (in bytes) of an engine class specific context image 185 * 186 * Note: this size includes the HWSP, which is part of the context image 187 * in LRC mode, but does not include the "shared data page" used with 188 * GuC submission. The caller should account for this if using the GuC. 189 */ 190static u32 191__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) 192{ 193 u32 cxt_size; 194 195 BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); 196 197 switch (class) { 198 case RENDER_CLASS: 199 switch (INTEL_GEN(dev_priv)) { 200 default: 201 MISSING_CASE(INTEL_GEN(dev_priv)); 202 return DEFAULT_LR_CONTEXT_RENDER_SIZE; 203 case 11: 204 return GEN11_LR_CONTEXT_RENDER_SIZE; 205 case 10: 206 return GEN10_LR_CONTEXT_RENDER_SIZE; 207 case 9: 208 return GEN9_LR_CONTEXT_RENDER_SIZE; 209 case 8: 210 return GEN8_LR_CONTEXT_RENDER_SIZE; 211 case 7: 212 if (IS_HASWELL(dev_priv)) 213 return HSW_CXT_TOTAL_SIZE; 214 215 cxt_size = I915_READ(GEN7_CXT_SIZE); 216 return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64, 217 PAGE_SIZE); 218 case 6: 219 cxt_size = I915_READ(CXT_SIZE); 220 return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64, 221 PAGE_SIZE); 222 case 5: 223 case 4: 224 case 3: 225 case 2: 226 /* For the special day when i810 gets merged. */ 227 case 1: 228 return 0; 229 } 230 break; 231 default: 232 MISSING_CASE(class); 233 /* fall through */ 234 case VIDEO_DECODE_CLASS: 235 case VIDEO_ENHANCEMENT_CLASS: 236 case COPY_ENGINE_CLASS: 237 if (INTEL_GEN(dev_priv) < 8) 238 return 0; 239 return GEN8_LR_CONTEXT_OTHER_SIZE; 240 } 241} 242 243static u32 __engine_mmio_base(struct drm_i915_private *i915, 244 const struct engine_mmio_base *bases) 245{ 246 int i; 247 248 for (i = 0; i < MAX_MMIO_BASES; i++) 249 if (INTEL_GEN(i915) >= bases[i].gen) 250 break; 251 252 GEM_BUG_ON(i == MAX_MMIO_BASES); 253 GEM_BUG_ON(!bases[i].base); 254 255 return bases[i].base; 256} 257 258static void __sprint_engine_name(char *name, const struct engine_info *info) 259{ 260 WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u", 261 intel_engine_classes[info->class].name, 262 info->instance) >= INTEL_ENGINE_CS_MAX_NAME); 263} 264 265void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask) 266{ 267 struct drm_i915_private *dev_priv = engine->i915; 268 i915_reg_t hwstam; 269 270 /* 271 * Though they added more rings on g4x/ilk, they did not add 272 * per-engine HWSTAM until gen6. 273 */ 274 if (INTEL_GEN(dev_priv) < 6 && engine->class != RENDER_CLASS) 275 return; 276 277 hwstam = RING_HWSTAM(engine->mmio_base); 278 if (INTEL_GEN(dev_priv) >= 3) 279 I915_WRITE(hwstam, mask); 280 else 281 I915_WRITE16(hwstam, mask); 282} 283 284static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine) 285{ 286 /* Mask off all writes into the unknown HWSP */ 287 intel_engine_set_hwsp_writemask(engine, ~0u); 288} 289 290static int 291intel_engine_setup(struct drm_i915_private *dev_priv, 292 enum intel_engine_id id) 293{ 294 const struct engine_info *info = &intel_engines[id]; 295 struct intel_engine_cs *engine; 296 297 GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); 298 299 BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); 300 BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); 301 302 if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS)) 303 return -EINVAL; 304 305 if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE)) 306 return -EINVAL; 307 308 if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance])) 309 return -EINVAL; 310 311 GEM_BUG_ON(dev_priv->engine[id]); 312 engine = kzalloc(sizeof(*engine), GFP_KERNEL); 313 if (!engine) 314 return -ENOMEM; 315 316 engine->id = id; 317 engine->i915 = dev_priv; 318 __sprint_engine_name(engine->name, info); 319 engine->hw_id = engine->guc_id = info->hw_id; 320 engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases); 321 engine->class = info->class; 322 engine->instance = info->instance; 323 324 engine->uabi_id = info->uabi_id; 325 engine->uabi_class = intel_engine_classes[info->class].uabi_class; 326 327 engine->context_size = __intel_engine_context_size(dev_priv, 328 engine->class); 329 if (WARN_ON(engine->context_size > BIT(20))) 330 engine->context_size = 0; 331 if (engine->context_size) 332 DRIVER_CAPS(dev_priv)->has_logical_contexts = true; 333 334 /* Nothing to do here, execute in order of dependencies */ 335 engine->schedule = NULL; 336 337 seqlock_init(&engine->stats.lock); 338 339 ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); 340 341 /* Scrub mmio state on takeover */ 342 intel_engine_sanitize_mmio(engine); 343 344 dev_priv->engine_class[info->class][info->instance] = engine; 345 dev_priv->engine[id] = engine; 346 return 0; 347} 348 349/** 350 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers 351 * @dev_priv: i915 device private 352 * 353 * Return: non-zero if the initialization failed. 354 */ 355int intel_engines_init_mmio(struct drm_i915_private *dev_priv) 356{ 357 struct intel_device_info *device_info = mkwrite_device_info(dev_priv); 358 const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; 359 struct intel_engine_cs *engine; 360 enum intel_engine_id id; 361 unsigned int mask = 0; 362 unsigned int i; 363 int err; 364 365 WARN_ON(ring_mask == 0); 366 WARN_ON(ring_mask & 367 GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES)); 368 369 if (i915_inject_load_failure()) 370 return -ENODEV; 371 372 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { 373 if (!HAS_ENGINE(dev_priv, i)) 374 continue; 375 376 err = intel_engine_setup(dev_priv, i); 377 if (err) 378 goto cleanup; 379 380 mask |= ENGINE_MASK(i); 381 } 382 383 /* 384 * Catch failures to update intel_engines table when the new engines 385 * are added to the driver by a warning and disabling the forgotten 386 * engines. 387 */ 388 if (WARN_ON(mask != ring_mask)) 389 device_info->ring_mask = mask; 390 391 /* We always presume we have at least RCS available for later probing */ 392 if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) { 393 err = -ENODEV; 394 goto cleanup; 395 } 396 397 RUNTIME_INFO(dev_priv)->num_rings = hweight32(mask); 398 399 i915_check_and_clear_faults(dev_priv); 400 401 return 0; 402 403cleanup: 404 for_each_engine(engine, dev_priv, id) 405 kfree(engine); 406 return err; 407} 408 409/** 410 * intel_engines_init() - init the Engine Command Streamers 411 * @dev_priv: i915 device private 412 * 413 * Return: non-zero if the initialization failed. 414 */ 415int intel_engines_init(struct drm_i915_private *dev_priv) 416{ 417 struct intel_engine_cs *engine; 418 enum intel_engine_id id, err_id; 419 int err; 420 421 for_each_engine(engine, dev_priv, id) { 422 const struct engine_class_info *class_info = 423 &intel_engine_classes[engine->class]; 424 int (*init)(struct intel_engine_cs *engine); 425 426 if (HAS_EXECLISTS(dev_priv)) 427 init = class_info->init_execlists; 428 else 429 init = class_info->init_legacy; 430 431 err = -EINVAL; 432 err_id = id; 433 434 if (GEM_DEBUG_WARN_ON(!init)) 435 goto cleanup; 436 437 err = init(engine); 438 if (err) 439 goto cleanup; 440 441 GEM_BUG_ON(!engine->submit_request); 442 } 443 444 return 0; 445 446cleanup: 447 for_each_engine(engine, dev_priv, id) { 448 if (id >= err_id) { 449 kfree(engine); 450 dev_priv->engine[id] = NULL; 451 } else { 452 dev_priv->gt.cleanup_engine(engine); 453 } 454 } 455 return err; 456} 457 458void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno) 459{ 460 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); 461 GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); 462} 463 464static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) 465{ 466 i915_gem_batch_pool_init(&engine->batch_pool, engine); 467} 468 469static void intel_engine_init_execlist(struct intel_engine_cs *engine) 470{ 471 struct intel_engine_execlists * const execlists = &engine->execlists; 472 473 execlists->port_mask = 1; 474 GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); 475 GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); 476 477 execlists->queue_priority_hint = INT_MIN; 478 execlists->queue = RB_ROOT_CACHED; 479} 480 481static void cleanup_status_page(struct intel_engine_cs *engine) 482{ 483 struct i915_vma *vma; 484 485 /* Prevent writes into HWSP after returning the page to the system */ 486 intel_engine_set_hwsp_writemask(engine, ~0u); 487 488 vma = fetch_and_zero(&engine->status_page.vma); 489 if (!vma) 490 return; 491 492 if (!HWS_NEEDS_PHYSICAL(engine->i915)) 493 i915_vma_unpin(vma); 494 495 i915_gem_object_unpin_map(vma->obj); 496 __i915_gem_object_release_unless_active(vma->obj); 497} 498 499static int pin_ggtt_status_page(struct intel_engine_cs *engine, 500 struct i915_vma *vma) 501{ 502 unsigned int flags; 503 504 flags = PIN_GLOBAL; 505 if (!HAS_LLC(engine->i915)) 506 /* 507 * On g33, we cannot place HWS above 256MiB, so 508 * restrict its pinning to the low mappable arena. 509 * Though this restriction is not documented for 510 * gen4, gen5, or byt, they also behave similarly 511 * and hang if the HWS is placed at the top of the 512 * GTT. To generalise, it appears that all !llc 513 * platforms have issues with us placing the HWS 514 * above the mappable region (even though we never 515 * actually map it). 516 */ 517 flags |= PIN_MAPPABLE; 518 else 519 flags |= PIN_HIGH; 520 521 return i915_vma_pin(vma, 0, 0, flags); 522} 523 524static int init_status_page(struct intel_engine_cs *engine) 525{ 526 struct drm_i915_gem_object *obj; 527 struct i915_vma *vma; 528 void *vaddr; 529 int ret; 530 531 /* 532 * Though the HWS register does support 36bit addresses, historically 533 * we have had hangs and corruption reported due to wild writes if 534 * the HWS is placed above 4G. We only allow objects to be allocated 535 * in GFP_DMA32 for i965, and no earlier physical address users had 536 * access to more than 4G. 537 */ 538 obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); 539 if (IS_ERR(obj)) { 540 DRM_ERROR("Failed to allocate status page\n"); 541 return PTR_ERR(obj); 542 } 543 544 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 545 if (ret) 546 goto err; 547 548 vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); 549 if (IS_ERR(vma)) { 550 ret = PTR_ERR(vma); 551 goto err; 552 } 553 554 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 555 if (IS_ERR(vaddr)) { 556 ret = PTR_ERR(vaddr); 557 goto err; 558 } 559 560 engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE); 561 engine->status_page.vma = vma; 562 563 if (!HWS_NEEDS_PHYSICAL(engine->i915)) { 564 ret = pin_ggtt_status_page(engine, vma); 565 if (ret) 566 goto err_unpin; 567 } 568 569 return 0; 570 571err_unpin: 572 i915_gem_object_unpin_map(obj); 573err: 574 i915_gem_object_put(obj); 575 return ret; 576} 577 578/** 579 * intel_engines_setup_common - setup engine state not requiring hw access 580 * @engine: Engine to setup. 581 * 582 * Initializes @engine@ structure members shared between legacy and execlists 583 * submission modes which do not require hardware access. 584 * 585 * Typically done early in the submission mode specific engine setup stage. 586 */ 587int intel_engine_setup_common(struct intel_engine_cs *engine) 588{ 589 int err; 590 591 err = init_status_page(engine); 592 if (err) 593 return err; 594 595 err = i915_timeline_init(engine->i915, 596 &engine->timeline, 597 engine->name, 598 engine->status_page.vma); 599 if (err) 600 goto err_hwsp; 601 602 i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); 603 604 intel_engine_init_breadcrumbs(engine); 605 intel_engine_init_execlist(engine); 606 intel_engine_init_hangcheck(engine); 607 intel_engine_init_batch_pool(engine); 608 intel_engine_init_cmd_parser(engine); 609 610 return 0; 611 612err_hwsp: 613 cleanup_status_page(engine); 614 return err; 615} 616 617static void __intel_context_unpin(struct i915_gem_context *ctx, 618 struct intel_engine_cs *engine) 619{ 620 intel_context_unpin(to_intel_context(ctx, engine)); 621} 622 623struct measure_breadcrumb { 624 struct i915_request rq; 625 struct i915_timeline timeline; 626 struct intel_ring ring; 627 u32 cs[1024]; 628}; 629 630static int measure_breadcrumb_dw(struct intel_engine_cs *engine) 631{ 632 struct measure_breadcrumb *frame; 633 int dw = -ENOMEM; 634 635 GEM_BUG_ON(!engine->i915->gt.scratch); 636 637 frame = kzalloc(sizeof(*frame), GFP_KERNEL); 638 if (!frame) 639 return -ENOMEM; 640 641 if (i915_timeline_init(engine->i915, 642 &frame->timeline, "measure", 643 engine->status_page.vma)) 644 goto out_frame; 645 646 INIT_LIST_HEAD(&frame->ring.request_list); 647 frame->ring.timeline = &frame->timeline; 648 frame->ring.vaddr = frame->cs; 649 frame->ring.size = sizeof(frame->cs); 650 frame->ring.effective_size = frame->ring.size; 651 intel_ring_update_space(&frame->ring); 652 653 frame->rq.i915 = engine->i915; 654 frame->rq.engine = engine; 655 frame->rq.ring = &frame->ring; 656 frame->rq.timeline = &frame->timeline; 657 658 dw = i915_timeline_pin(&frame->timeline); 659 if (dw < 0) 660 goto out_timeline; 661 662 dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; 663 664 i915_timeline_unpin(&frame->timeline); 665 666out_timeline: 667 i915_timeline_fini(&frame->timeline); 668out_frame: 669 kfree(frame); 670 return dw; 671} 672 673/** 674 * intel_engines_init_common - initialize cengine state which might require hw access 675 * @engine: Engine to initialize. 676 * 677 * Initializes @engine@ structure members shared between legacy and execlists 678 * submission modes which do require hardware access. 679 * 680 * Typcally done at later stages of submission mode specific engine setup. 681 * 682 * Returns zero on success or an error code on failure. 683 */ 684int intel_engine_init_common(struct intel_engine_cs *engine) 685{ 686 struct drm_i915_private *i915 = engine->i915; 687 struct intel_context *ce; 688 int ret; 689 690 engine->set_default_submission(engine); 691 692 /* We may need to do things with the shrinker which 693 * require us to immediately switch back to the default 694 * context. This can cause a problem as pinning the 695 * default context also requires GTT space which may not 696 * be available. To avoid this we always pin the default 697 * context. 698 */ 699 ce = intel_context_pin(i915->kernel_context, engine); 700 if (IS_ERR(ce)) 701 return PTR_ERR(ce); 702 703 /* 704 * Similarly the preempt context must always be available so that 705 * we can interrupt the engine at any time. 706 */ 707 if (i915->preempt_context) { 708 ce = intel_context_pin(i915->preempt_context, engine); 709 if (IS_ERR(ce)) { 710 ret = PTR_ERR(ce); 711 goto err_unpin_kernel; 712 } 713 } 714 715 ret = measure_breadcrumb_dw(engine); 716 if (ret < 0) 717 goto err_unpin_preempt; 718 719 engine->emit_fini_breadcrumb_dw = ret; 720 721 return 0; 722 723err_unpin_preempt: 724 if (i915->preempt_context) 725 __intel_context_unpin(i915->preempt_context, engine); 726 727err_unpin_kernel: 728 __intel_context_unpin(i915->kernel_context, engine); 729 return ret; 730} 731 732/** 733 * intel_engines_cleanup_common - cleans up the engine state created by 734 * the common initiailizers. 735 * @engine: Engine to cleanup. 736 * 737 * This cleans up everything created by the common helpers. 738 */ 739void intel_engine_cleanup_common(struct intel_engine_cs *engine) 740{ 741 struct drm_i915_private *i915 = engine->i915; 742 743 cleanup_status_page(engine); 744 745 intel_engine_fini_breadcrumbs(engine); 746 intel_engine_cleanup_cmd_parser(engine); 747 i915_gem_batch_pool_fini(&engine->batch_pool); 748 749 if (engine->default_state) 750 i915_gem_object_put(engine->default_state); 751 752 if (i915->preempt_context) 753 __intel_context_unpin(i915->preempt_context, engine); 754 __intel_context_unpin(i915->kernel_context, engine); 755 756 i915_timeline_fini(&engine->timeline); 757 758 intel_wa_list_free(&engine->ctx_wa_list); 759 intel_wa_list_free(&engine->wa_list); 760 intel_wa_list_free(&engine->whitelist); 761} 762 763u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) 764{ 765 struct drm_i915_private *dev_priv = engine->i915; 766 u64 acthd; 767 768 if (INTEL_GEN(dev_priv) >= 8) 769 acthd = I915_READ64_2x32(RING_ACTHD(engine->mmio_base), 770 RING_ACTHD_UDW(engine->mmio_base)); 771 else if (INTEL_GEN(dev_priv) >= 4) 772 acthd = I915_READ(RING_ACTHD(engine->mmio_base)); 773 else 774 acthd = I915_READ(ACTHD); 775 776 return acthd; 777} 778 779u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) 780{ 781 struct drm_i915_private *dev_priv = engine->i915; 782 u64 bbaddr; 783 784 if (INTEL_GEN(dev_priv) >= 8) 785 bbaddr = I915_READ64_2x32(RING_BBADDR(engine->mmio_base), 786 RING_BBADDR_UDW(engine->mmio_base)); 787 else 788 bbaddr = I915_READ(RING_BBADDR(engine->mmio_base)); 789 790 return bbaddr; 791} 792 793int intel_engine_stop_cs(struct intel_engine_cs *engine) 794{ 795 struct drm_i915_private *dev_priv = engine->i915; 796 const u32 base = engine->mmio_base; 797 const i915_reg_t mode = RING_MI_MODE(base); 798 int err; 799 800 if (INTEL_GEN(dev_priv) < 3) 801 return -ENODEV; 802 803 GEM_TRACE("%s\n", engine->name); 804 805 I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING)); 806 807 err = 0; 808 if (__intel_wait_for_register_fw(dev_priv, 809 mode, MODE_IDLE, MODE_IDLE, 810 1000, 0, 811 NULL)) { 812 GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); 813 err = -ETIMEDOUT; 814 } 815 816 /* A final mmio read to let GPU writes be hopefully flushed to memory */ 817 POSTING_READ_FW(mode); 818 819 return err; 820} 821 822void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) 823{ 824 struct drm_i915_private *dev_priv = engine->i915; 825 826 GEM_TRACE("%s\n", engine->name); 827 828 I915_WRITE_FW(RING_MI_MODE(engine->mmio_base), 829 _MASKED_BIT_DISABLE(STOP_RING)); 830} 831 832const char *i915_cache_level_str(struct drm_i915_private *i915, int type) 833{ 834 switch (type) { 835 case I915_CACHE_NONE: return " uncached"; 836 case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped"; 837 case I915_CACHE_L3_LLC: return " L3+LLC"; 838 case I915_CACHE_WT: return " WT"; 839 default: return ""; 840 } 841} 842 843u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) 844{ 845 const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; 846 u32 mcr_s_ss_select; 847 u32 slice = fls(sseu->slice_mask); 848 u32 subslice = fls(sseu->subslice_mask[slice]); 849 850 if (IS_GEN(dev_priv, 10)) 851 mcr_s_ss_select = GEN8_MCR_SLICE(slice) | 852 GEN8_MCR_SUBSLICE(subslice); 853 else if (INTEL_GEN(dev_priv) >= 11) 854 mcr_s_ss_select = GEN11_MCR_SLICE(slice) | 855 GEN11_MCR_SUBSLICE(subslice); 856 else 857 mcr_s_ss_select = 0; 858 859 return mcr_s_ss_select; 860} 861 862static inline u32 863read_subslice_reg(struct drm_i915_private *dev_priv, int slice, 864 int subslice, i915_reg_t reg) 865{ 866 u32 mcr_slice_subslice_mask; 867 u32 mcr_slice_subslice_select; 868 u32 default_mcr_s_ss_select; 869 u32 mcr; 870 u32 ret; 871 enum forcewake_domains fw_domains; 872 873 if (INTEL_GEN(dev_priv) >= 11) { 874 mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | 875 GEN11_MCR_SUBSLICE_MASK; 876 mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) | 877 GEN11_MCR_SUBSLICE(subslice); 878 } else { 879 mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | 880 GEN8_MCR_SUBSLICE_MASK; 881 mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) | 882 GEN8_MCR_SUBSLICE(subslice); 883 } 884 885 default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv); 886 887 fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, 888 FW_REG_READ); 889 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, 890 GEN8_MCR_SELECTOR, 891 FW_REG_READ | FW_REG_WRITE); 892 893 spin_lock_irq(&dev_priv->uncore.lock); 894 intel_uncore_forcewake_get__locked(dev_priv, fw_domains); 895 896 mcr = I915_READ_FW(GEN8_MCR_SELECTOR); 897 898 WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) != 899 default_mcr_s_ss_select); 900 901 mcr &= ~mcr_slice_subslice_mask; 902 mcr |= mcr_slice_subslice_select; 903 I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); 904 905 ret = I915_READ_FW(reg); 906 907 mcr &= ~mcr_slice_subslice_mask; 908 mcr |= default_mcr_s_ss_select; 909 910 I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); 911 912 intel_uncore_forcewake_put__locked(dev_priv, fw_domains); 913 spin_unlock_irq(&dev_priv->uncore.lock); 914 915 return ret; 916} 917 918/* NB: please notice the memset */ 919void intel_engine_get_instdone(struct intel_engine_cs *engine, 920 struct intel_instdone *instdone) 921{ 922 struct drm_i915_private *dev_priv = engine->i915; 923 u32 mmio_base = engine->mmio_base; 924 int slice; 925 int subslice; 926 927 memset(instdone, 0, sizeof(*instdone)); 928 929 switch (INTEL_GEN(dev_priv)) { 930 default: 931 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); 932 933 if (engine->id != RCS) 934 break; 935 936 instdone->slice_common = I915_READ(GEN7_SC_INSTDONE); 937 for_each_instdone_slice_subslice(dev_priv, slice, subslice) { 938 instdone->sampler[slice][subslice] = 939 read_subslice_reg(dev_priv, slice, subslice, 940 GEN7_SAMPLER_INSTDONE); 941 instdone->row[slice][subslice] = 942 read_subslice_reg(dev_priv, slice, subslice, 943 GEN7_ROW_INSTDONE); 944 } 945 break; 946 case 7: 947 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); 948 949 if (engine->id != RCS) 950 break; 951 952 instdone->slice_common = I915_READ(GEN7_SC_INSTDONE); 953 instdone->sampler[0][0] = I915_READ(GEN7_SAMPLER_INSTDONE); 954 instdone->row[0][0] = I915_READ(GEN7_ROW_INSTDONE); 955 956 break; 957 case 6: 958 case 5: 959 case 4: 960 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base)); 961 962 if (engine->id == RCS) 963 /* HACK: Using the wrong struct member */ 964 instdone->slice_common = I915_READ(GEN4_INSTDONE1); 965 break; 966 case 3: 967 case 2: 968 instdone->instdone = I915_READ(GEN2_INSTDONE); 969 break; 970 } 971} 972 973static bool ring_is_idle(struct intel_engine_cs *engine) 974{ 975 struct drm_i915_private *dev_priv = engine->i915; 976 intel_wakeref_t wakeref; 977 bool idle = true; 978 979 if (I915_SELFTEST_ONLY(!engine->mmio_base)) 980 return true; 981 982 /* If the whole device is asleep, the engine must be idle */ 983 wakeref = intel_runtime_pm_get_if_in_use(dev_priv); 984 if (!wakeref) 985 return true; 986 987 /* First check that no commands are left in the ring */ 988 if ((I915_READ_HEAD(engine) & HEAD_ADDR) != 989 (I915_READ_TAIL(engine) & TAIL_ADDR)) 990 idle = false; 991 992 /* No bit for gen2, so assume the CS parser is idle */ 993 if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) 994 idle = false; 995 996 intel_runtime_pm_put(dev_priv, wakeref); 997 998 return idle; 999} 1000 1001/** 1002 * intel_engine_is_idle() - Report if the engine has finished process all work 1003 * @engine: the intel_engine_cs 1004 * 1005 * Return true if there are no requests pending, nothing left to be submitted 1006 * to hardware, and that the engine is idle. 1007 */ 1008bool intel_engine_is_idle(struct intel_engine_cs *engine) 1009{ 1010 struct drm_i915_private *dev_priv = engine->i915; 1011 1012 /* More white lies, if wedged, hw state is inconsistent */ 1013 if (i915_terminally_wedged(&dev_priv->gpu_error)) 1014 return true; 1015 1016 /* Any inflight/incomplete requests? */ 1017 if (!intel_engine_signaled(engine, intel_engine_last_submit(engine))) 1018 return false; 1019 1020 /* Waiting to drain ELSP? */ 1021 if (READ_ONCE(engine->execlists.active)) { 1022 struct tasklet_struct *t = &engine->execlists.tasklet; 1023 1024 local_bh_disable(); 1025 if (tasklet_trylock(t)) { 1026 /* Must wait for any GPU reset in progress. */ 1027 if (__tasklet_is_enabled(t)) 1028 t->func(t->data); 1029 tasklet_unlock(t); 1030 } 1031 local_bh_enable(); 1032 1033 /* Otherwise flush the tasklet if it was on another cpu */ 1034 tasklet_unlock_wait(t); 1035 1036 if (READ_ONCE(engine->execlists.active)) 1037 return false; 1038 } 1039 1040 /* ELSP is empty, but there are ready requests? E.g. after reset */ 1041 if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) 1042 return false; 1043 1044 /* Ring stopped? */ 1045 return ring_is_idle(engine); 1046} 1047 1048bool intel_engines_are_idle(struct drm_i915_private *dev_priv) 1049{ 1050 struct intel_engine_cs *engine; 1051 enum intel_engine_id id; 1052 1053 /* 1054 * If the driver is wedged, HW state may be very inconsistent and 1055 * report that it is still busy, even though we have stopped using it. 1056 */ 1057 if (i915_terminally_wedged(&dev_priv->gpu_error)) 1058 return true; 1059 1060 for_each_engine(engine, dev_priv, id) { 1061 if (!intel_engine_is_idle(engine)) 1062 return false; 1063 } 1064 1065 return true; 1066} 1067 1068/** 1069 * intel_engine_has_kernel_context: 1070 * @engine: the engine 1071 * 1072 * Returns true if the last context to be executed on this engine, or has been 1073 * executed if the engine is already idle, is the kernel context 1074 * (#i915.kernel_context). 1075 */ 1076bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) 1077{ 1078 const struct intel_context *kernel_context = 1079 to_intel_context(engine->i915->kernel_context, engine); 1080 struct i915_request *rq; 1081 1082 lockdep_assert_held(&engine->i915->drm.struct_mutex); 1083 1084 /* 1085 * Check the last context seen by the engine. If active, it will be 1086 * the last request that remains in the timeline. When idle, it is 1087 * the last executed context as tracked by retirement. 1088 */ 1089 rq = __i915_active_request_peek(&engine->timeline.last_request); 1090 if (rq) 1091 return rq->hw_context == kernel_context; 1092 else 1093 return engine->last_retired_context == kernel_context; 1094} 1095 1096void intel_engines_reset_default_submission(struct drm_i915_private *i915) 1097{ 1098 struct intel_engine_cs *engine; 1099 enum intel_engine_id id; 1100 1101 for_each_engine(engine, i915, id) 1102 engine->set_default_submission(engine); 1103} 1104 1105static bool reset_engines(struct drm_i915_private *i915) 1106{ 1107 if (INTEL_INFO(i915)->gpu_reset_clobbers_display) 1108 return false; 1109 1110 return intel_gpu_reset(i915, ALL_ENGINES) == 0; 1111} 1112 1113/** 1114 * intel_engines_sanitize: called after the GPU has lost power 1115 * @i915: the i915 device 1116 * @force: ignore a failed reset and sanitize engine state anyway 1117 * 1118 * Anytime we reset the GPU, either with an explicit GPU reset or through a 1119 * PCI power cycle, the GPU loses state and we must reset our state tracking 1120 * to match. Note that calling intel_engines_sanitize() if the GPU has not 1121 * been reset results in much confusion! 1122 */ 1123void intel_engines_sanitize(struct drm_i915_private *i915, bool force) 1124{ 1125 struct intel_engine_cs *engine; 1126 enum intel_engine_id id; 1127 1128 GEM_TRACE("\n"); 1129 1130 if (!reset_engines(i915) && !force) 1131 return; 1132 1133 for_each_engine(engine, i915, id) 1134 intel_engine_reset(engine, false); 1135} 1136 1137/** 1138 * intel_engines_park: called when the GT is transitioning from busy->idle 1139 * @i915: the i915 device 1140 * 1141 * The GT is now idle and about to go to sleep (maybe never to wake again?). 1142 * Time for us to tidy and put away our toys (release resources back to the 1143 * system). 1144 */ 1145void intel_engines_park(struct drm_i915_private *i915) 1146{ 1147 struct intel_engine_cs *engine; 1148 enum intel_engine_id id; 1149 1150 for_each_engine(engine, i915, id) { 1151 /* Flush the residual irq tasklets first. */ 1152 intel_engine_disarm_breadcrumbs(engine); 1153 tasklet_kill(&engine->execlists.tasklet); 1154 1155 /* 1156 * We are committed now to parking the engines, make sure there 1157 * will be no more interrupts arriving later and the engines 1158 * are truly idle. 1159 */ 1160 if (wait_for(intel_engine_is_idle(engine), 10)) { 1161 struct drm_printer p = drm_debug_printer(__func__); 1162 1163 dev_err(i915->drm.dev, 1164 "%s is not idle before parking\n", 1165 engine->name); 1166 intel_engine_dump(engine, &p, NULL); 1167 } 1168 1169 /* Must be reset upon idling, or we may miss the busy wakeup. */ 1170 GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); 1171 1172 if (engine->park) 1173 engine->park(engine); 1174 1175 if (engine->pinned_default_state) { 1176 i915_gem_object_unpin_map(engine->default_state); 1177 engine->pinned_default_state = NULL; 1178 } 1179 1180 i915_gem_batch_pool_fini(&engine->batch_pool); 1181 engine->execlists.no_priolist = false; 1182 } 1183} 1184 1185/** 1186 * intel_engines_unpark: called when the GT is transitioning from idle->busy 1187 * @i915: the i915 device 1188 * 1189 * The GT was idle and now about to fire up with some new user requests. 1190 */ 1191void intel_engines_unpark(struct drm_i915_private *i915) 1192{ 1193 struct intel_engine_cs *engine; 1194 enum intel_engine_id id; 1195 1196 for_each_engine(engine, i915, id) { 1197 void *map; 1198 1199 /* Pin the default state for fast resets from atomic context. */ 1200 map = NULL; 1201 if (engine->default_state) 1202 map = i915_gem_object_pin_map(engine->default_state, 1203 I915_MAP_WB); 1204 if (!IS_ERR_OR_NULL(map)) 1205 engine->pinned_default_state = map; 1206 1207 if (engine->unpark) 1208 engine->unpark(engine); 1209 1210 intel_engine_init_hangcheck(engine); 1211 } 1212} 1213 1214/** 1215 * intel_engine_lost_context: called when the GPU is reset into unknown state 1216 * @engine: the engine 1217 * 1218 * We have either reset the GPU or otherwise about to lose state tracking of 1219 * the current GPU logical state (e.g. suspend). On next use, it is therefore 1220 * imperative that we make no presumptions about the current state and load 1221 * from scratch. 1222 */ 1223void intel_engine_lost_context(struct intel_engine_cs *engine) 1224{ 1225 struct intel_context *ce; 1226 1227 lockdep_assert_held(&engine->i915->drm.struct_mutex); 1228 1229 ce = fetch_and_zero(&engine->last_retired_context); 1230 if (ce) 1231 intel_context_unpin(ce); 1232} 1233 1234bool intel_engine_can_store_dword(struct intel_engine_cs *engine) 1235{ 1236 switch (INTEL_GEN(engine->i915)) { 1237 case 2: 1238 return false; /* uses physical not virtual addresses */ 1239 case 3: 1240 /* maybe only uses physical not virtual addresses */ 1241 return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915)); 1242 case 6: 1243 return engine->class != VIDEO_DECODE_CLASS; /* b0rked */ 1244 default: 1245 return true; 1246 } 1247} 1248 1249unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) 1250{ 1251 struct intel_engine_cs *engine; 1252 enum intel_engine_id id; 1253 unsigned int which; 1254 1255 which = 0; 1256 for_each_engine(engine, i915, id) 1257 if (engine->default_state) 1258 which |= BIT(engine->uabi_class); 1259 1260 return which; 1261} 1262 1263static int print_sched_attr(struct drm_i915_private *i915, 1264 const struct i915_sched_attr *attr, 1265 char *buf, int x, int len) 1266{ 1267 if (attr->priority == I915_PRIORITY_INVALID) 1268 return x; 1269 1270 x += snprintf(buf + x, len - x, 1271 " prio=%d", attr->priority); 1272 1273 return x; 1274} 1275 1276static void print_request(struct drm_printer *m, 1277 struct i915_request *rq, 1278 const char *prefix) 1279{ 1280 const char *name = rq->fence.ops->get_timeline_name(&rq->fence); 1281 char buf[80] = ""; 1282 int x = 0; 1283 1284 x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); 1285 1286 drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n", 1287 prefix, 1288 rq->global_seqno, 1289 i915_request_completed(rq) ? "!" : 1290 i915_request_started(rq) ? "*" : 1291 "", 1292 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 1293 &rq->fence.flags) ? "+" : "", 1294 rq->fence.context, rq->fence.seqno, 1295 buf, 1296 jiffies_to_msecs(jiffies - rq->emitted_jiffies), 1297 name); 1298} 1299 1300static void hexdump(struct drm_printer *m, const void *buf, size_t len) 1301{ 1302 const size_t rowsize = 8 * sizeof(u32); 1303 const void *prev = NULL; 1304 bool skip = false; 1305 size_t pos; 1306 1307 for (pos = 0; pos < len; pos += rowsize) { 1308 char line[128]; 1309 1310 if (prev && !memcmp(prev, buf + pos, rowsize)) { 1311 if (!skip) { 1312 drm_printf(m, "*\n"); 1313 skip = true; 1314 } 1315 continue; 1316 } 1317 1318 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, 1319 rowsize, sizeof(u32), 1320 line, sizeof(line), 1321 false) >= sizeof(line)); 1322 drm_printf(m, "[%04zx] %s\n", pos, line); 1323 1324 prev = buf + pos; 1325 skip = false; 1326 } 1327} 1328 1329static void intel_engine_print_registers(const struct intel_engine_cs *engine, 1330 struct drm_printer *m) 1331{ 1332 struct drm_i915_private *dev_priv = engine->i915; 1333 const struct intel_engine_execlists * const execlists = 1334 &engine->execlists; 1335 u64 addr; 1336 1337 if (engine->id == RCS && IS_GEN_RANGE(dev_priv, 4, 7)) 1338 drm_printf(m, "\tCCID: 0x%08x\n", I915_READ(CCID)); 1339 drm_printf(m, "\tRING_START: 0x%08x\n", 1340 I915_READ(RING_START(engine->mmio_base))); 1341 drm_printf(m, "\tRING_HEAD: 0x%08x\n", 1342 I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR); 1343 drm_printf(m, "\tRING_TAIL: 0x%08x\n", 1344 I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR); 1345 drm_printf(m, "\tRING_CTL: 0x%08x%s\n", 1346 I915_READ(RING_CTL(engine->mmio_base)), 1347 I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : ""); 1348 if (INTEL_GEN(engine->i915) > 2) { 1349 drm_printf(m, "\tRING_MODE: 0x%08x%s\n", 1350 I915_READ(RING_MI_MODE(engine->mmio_base)), 1351 I915_READ(RING_MI_MODE(engine->mmio_base)) & (MODE_IDLE) ? " [idle]" : ""); 1352 } 1353 1354 if (INTEL_GEN(dev_priv) >= 6) { 1355 drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine)); 1356 } 1357 1358 addr = intel_engine_get_active_head(engine); 1359 drm_printf(m, "\tACTHD: 0x%08x_%08x\n", 1360 upper_32_bits(addr), lower_32_bits(addr)); 1361 addr = intel_engine_get_last_batch_head(engine); 1362 drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", 1363 upper_32_bits(addr), lower_32_bits(addr)); 1364 if (INTEL_GEN(dev_priv) >= 8) 1365 addr = I915_READ64_2x32(RING_DMA_FADD(engine->mmio_base), 1366 RING_DMA_FADD_UDW(engine->mmio_base)); 1367 else if (INTEL_GEN(dev_priv) >= 4) 1368 addr = I915_READ(RING_DMA_FADD(engine->mmio_base)); 1369 else 1370 addr = I915_READ(DMA_FADD_I8XX); 1371 drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n", 1372 upper_32_bits(addr), lower_32_bits(addr)); 1373 if (INTEL_GEN(dev_priv) >= 4) { 1374 drm_printf(m, "\tIPEIR: 0x%08x\n", 1375 I915_READ(RING_IPEIR(engine->mmio_base))); 1376 drm_printf(m, "\tIPEHR: 0x%08x\n", 1377 I915_READ(RING_IPEHR(engine->mmio_base))); 1378 } else { 1379 drm_printf(m, "\tIPEIR: 0x%08x\n", I915_READ(IPEIR)); 1380 drm_printf(m, "\tIPEHR: 0x%08x\n", I915_READ(IPEHR)); 1381 } 1382 1383 if (HAS_EXECLISTS(dev_priv)) { 1384 const u32 *hws = 1385 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; 1386 unsigned int idx; 1387 u8 read, write; 1388 1389 drm_printf(m, "\tExeclist status: 0x%08x %08x\n", 1390 I915_READ(RING_EXECLIST_STATUS_LO(engine)), 1391 I915_READ(RING_EXECLIST_STATUS_HI(engine))); 1392 1393 read = execlists->csb_head; 1394 write = READ_ONCE(*execlists->csb_write); 1395 1396 drm_printf(m, "\tExeclist CSB read %d, write %d [mmio:%d], tasklet queued? %s (%s)\n", 1397 read, write, 1398 GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(engine))), 1399 yesno(test_bit(TASKLET_STATE_SCHED, 1400 &engine->execlists.tasklet.state)), 1401 enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); 1402 if (read >= GEN8_CSB_ENTRIES) 1403 read = 0; 1404 if (write >= GEN8_CSB_ENTRIES) 1405 write = 0; 1406 if (read > write) 1407 write += GEN8_CSB_ENTRIES; 1408 while (read < write) { 1409 idx = ++read % GEN8_CSB_ENTRIES; 1410 drm_printf(m, "\tExeclist CSB[%d]: 0x%08x [mmio:0x%08x], context: %d [mmio:%d]\n", 1411 idx, 1412 hws[idx * 2], 1413 I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), 1414 hws[idx * 2 + 1], 1415 I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx))); 1416 } 1417 1418 rcu_read_lock(); 1419 for (idx = 0; idx < execlists_num_ports(execlists); idx++) { 1420 struct i915_request *rq; 1421 unsigned int count; 1422 1423 rq = port_unpack(&execlists->port[idx], &count); 1424 if (rq) { 1425 char hdr[80]; 1426 1427 snprintf(hdr, sizeof(hdr), 1428 "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x}, rq: ", 1429 idx, count, 1430 i915_ggtt_offset(rq->ring->vma), 1431 rq->timeline->hwsp_offset); 1432 print_request(m, rq, hdr); 1433 } else { 1434 drm_printf(m, "\t\tELSP[%d] idle\n", idx); 1435 } 1436 } 1437 drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active); 1438 rcu_read_unlock(); 1439 } else if (INTEL_GEN(dev_priv) > 6) { 1440 drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", 1441 I915_READ(RING_PP_DIR_BASE(engine))); 1442 drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", 1443 I915_READ(RING_PP_DIR_BASE_READ(engine))); 1444 drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", 1445 I915_READ(RING_PP_DIR_DCLV(engine))); 1446 } 1447} 1448 1449static void print_request_ring(struct drm_printer *m, struct i915_request *rq) 1450{ 1451 void *ring; 1452 int size; 1453 1454 drm_printf(m, 1455 "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", 1456 rq->head, rq->postfix, rq->tail, 1457 rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, 1458 rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); 1459 1460 size = rq->tail - rq->head; 1461 if (rq->tail < rq->head) 1462 size += rq->ring->size; 1463 1464 ring = kmalloc(size, GFP_ATOMIC); 1465 if (ring) { 1466 const void *vaddr = rq->ring->vaddr; 1467 unsigned int head = rq->head; 1468 unsigned int len = 0; 1469 1470 if (rq->tail < head) { 1471 len = rq->ring->size - head; 1472 memcpy(ring, vaddr + head, len); 1473 head = 0; 1474 } 1475 memcpy(ring + len, vaddr + head, size - len); 1476 1477 hexdump(m, ring, size); 1478 kfree(ring); 1479 } 1480} 1481 1482void intel_engine_dump(struct intel_engine_cs *engine, 1483 struct drm_printer *m, 1484 const char *header, ...) 1485{ 1486 struct i915_gpu_error * const error = &engine->i915->gpu_error; 1487 struct i915_request *rq; 1488 intel_wakeref_t wakeref; 1489 1490 if (header) { 1491 va_list ap; 1492 1493 va_start(ap, header); 1494 drm_vprintf(m, header, &ap); 1495 va_end(ap); 1496 } 1497 1498 if (i915_terminally_wedged(&engine->i915->gpu_error)) 1499 drm_printf(m, "*** WEDGED ***\n"); 1500 1501 drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", 1502 intel_engine_get_seqno(engine), 1503 intel_engine_last_submit(engine), 1504 engine->hangcheck.seqno, 1505 jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); 1506 drm_printf(m, "\tReset count: %d (global %d)\n", 1507 i915_reset_engine_count(error, engine), 1508 i915_reset_count(error)); 1509 1510 rcu_read_lock(); 1511 1512 drm_printf(m, "\tRequests:\n"); 1513 1514 rq = list_first_entry(&engine->timeline.requests, 1515 struct i915_request, link); 1516 if (&rq->link != &engine->timeline.requests) 1517 print_request(m, rq, "\t\tfirst "); 1518 1519 rq = list_last_entry(&engine->timeline.requests, 1520 struct i915_request, link); 1521 if (&rq->link != &engine->timeline.requests) 1522 print_request(m, rq, "\t\tlast "); 1523 1524 rq = i915_gem_find_active_request(engine); 1525 if (rq) { 1526 print_request(m, rq, "\t\tactive "); 1527 1528 drm_printf(m, "\t\tring->start: 0x%08x\n", 1529 i915_ggtt_offset(rq->ring->vma)); 1530 drm_printf(m, "\t\tring->head: 0x%08x\n", 1531 rq->ring->head); 1532 drm_printf(m, "\t\tring->tail: 0x%08x\n", 1533 rq->ring->tail); 1534 drm_printf(m, "\t\tring->emit: 0x%08x\n", 1535 rq->ring->emit); 1536 drm_printf(m, "\t\tring->space: 0x%08x\n", 1537 rq->ring->space); 1538 drm_printf(m, "\t\tring->hwsp: 0x%08x\n", 1539 rq->timeline->hwsp_offset); 1540 1541 print_request_ring(m, rq); 1542 } 1543 1544 rcu_read_unlock(); 1545 1546 wakeref = intel_runtime_pm_get_if_in_use(engine->i915); 1547 if (wakeref) { 1548 intel_engine_print_registers(engine, m); 1549 intel_runtime_pm_put(engine->i915, wakeref); 1550 } else { 1551 drm_printf(m, "\tDevice is asleep; skipping register dump\n"); 1552 } 1553 1554 intel_execlists_show_requests(engine, m, print_request, 8); 1555 1556 drm_printf(m, "HWSP:\n"); 1557 hexdump(m, engine->status_page.addr, PAGE_SIZE); 1558 1559 drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); 1560 1561 intel_engine_print_breadcrumbs(engine, m); 1562} 1563 1564static u8 user_class_map[] = { 1565 [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS, 1566 [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS, 1567 [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS, 1568 [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS, 1569}; 1570 1571struct intel_engine_cs * 1572intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) 1573{ 1574 if (class >= ARRAY_SIZE(user_class_map)) 1575 return NULL; 1576 1577 class = user_class_map[class]; 1578 1579 GEM_BUG_ON(class > MAX_ENGINE_CLASS); 1580 1581 if (instance > MAX_ENGINE_INSTANCE) 1582 return NULL; 1583 1584 return i915->engine_class[class][instance]; 1585} 1586 1587/** 1588 * intel_enable_engine_stats() - Enable engine busy tracking on engine 1589 * @engine: engine to enable stats collection 1590 * 1591 * Start collecting the engine busyness data for @engine. 1592 * 1593 * Returns 0 on success or a negative error code. 1594 */ 1595int intel_enable_engine_stats(struct intel_engine_cs *engine) 1596{ 1597 struct intel_engine_execlists *execlists = &engine->execlists; 1598 unsigned long flags; 1599 int err = 0; 1600 1601 if (!intel_engine_supports_stats(engine)) 1602 return -ENODEV; 1603 1604 spin_lock_irqsave(&engine->timeline.lock, flags); 1605 write_seqlock(&engine->stats.lock); 1606 1607 if (unlikely(engine->stats.enabled == ~0)) { 1608 err = -EBUSY; 1609 goto unlock; 1610 } 1611 1612 if (engine->stats.enabled++ == 0) { 1613 const struct execlist_port *port = execlists->port; 1614 unsigned int num_ports = execlists_num_ports(execlists); 1615 1616 engine->stats.enabled_at = ktime_get(); 1617 1618 /* XXX submission method oblivious? */ 1619 while (num_ports-- && port_isset(port)) { 1620 engine->stats.active++; 1621 port++; 1622 } 1623 1624 if (engine->stats.active) 1625 engine->stats.start = engine->stats.enabled_at; 1626 } 1627 1628unlock: 1629 write_sequnlock(&engine->stats.lock); 1630 spin_unlock_irqrestore(&engine->timeline.lock, flags); 1631 1632 return err; 1633} 1634 1635static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) 1636{ 1637 ktime_t total = engine->stats.total; 1638 1639 /* 1640 * If the engine is executing something at the moment 1641 * add it to the total. 1642 */ 1643 if (engine->stats.active) 1644 total = ktime_add(total, 1645 ktime_sub(ktime_get(), engine->stats.start)); 1646 1647 return total; 1648} 1649 1650/** 1651 * intel_engine_get_busy_time() - Return current accumulated engine busyness 1652 * @engine: engine to report on 1653 * 1654 * Returns accumulated time @engine was busy since engine stats were enabled. 1655 */ 1656ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) 1657{ 1658 unsigned int seq; 1659 ktime_t total; 1660 1661 do { 1662 seq = read_seqbegin(&engine->stats.lock); 1663 total = __intel_engine_get_busy_time(engine); 1664 } while (read_seqretry(&engine->stats.lock, seq)); 1665 1666 return total; 1667} 1668 1669/** 1670 * intel_disable_engine_stats() - Disable engine busy tracking on engine 1671 * @engine: engine to disable stats collection 1672 * 1673 * Stops collecting the engine busyness data for @engine. 1674 */ 1675void intel_disable_engine_stats(struct intel_engine_cs *engine) 1676{ 1677 unsigned long flags; 1678 1679 if (!intel_engine_supports_stats(engine)) 1680 return; 1681 1682 write_seqlock_irqsave(&engine->stats.lock, flags); 1683 WARN_ON_ONCE(engine->stats.enabled == 0); 1684 if (--engine->stats.enabled == 0) { 1685 engine->stats.total = __intel_engine_get_busy_time(engine); 1686 engine->stats.active = 0; 1687 } 1688 write_sequnlock_irqrestore(&engine->stats.lock, flags); 1689} 1690 1691#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1692#include "selftests/mock_engine.c" 1693#include "selftests/intel_engine_cs.c" 1694#endif