Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915/gem: Always test execution status on closing the context

Verify that if a context is active at the time it is closed, that it is
either persistent and preemptible (with hangcheck running) or it shall
be removed from execution.

Fixes: 9a40bddd47ca ("drm/i915/gt: Expose heartbeat interval via sysfs")
Testcase: igt/gem_ctx_persistence/heartbeat-close
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v5.7+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200928221510.26044-3-chris@chris-wilson.co.uk
(cherry picked from commit d3bb2f9b5ee66d5e000293edd6b6575e59d11db9)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

authored by

Chris Wilson and committed by
Rodrigo Vivi
651dabe2 ca65fc0d

+10 -38
+10 -38
drivers/gpu/drm/i915/gem/i915_gem_context.c
··· 390 390 return rcu_dereference_protected(ctx->engines, true); 391 391 } 392 392 393 - static bool __reset_engine(struct intel_engine_cs *engine) 394 - { 395 - struct intel_gt *gt = engine->gt; 396 - bool success = false; 397 - 398 - if (!intel_has_reset_engine(gt)) 399 - return false; 400 - 401 - if (!test_and_set_bit(I915_RESET_ENGINE + engine->id, 402 - &gt->reset.flags)) { 403 - success = intel_engine_reset(engine, NULL) == 0; 404 - clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, 405 - &gt->reset.flags); 406 - } 407 - 408 - return success; 409 - } 410 - 411 393 static void __reset_context(struct i915_gem_context *ctx, 412 394 struct intel_engine_cs *engine) 413 395 { ··· 413 431 * kill the banned context, we fallback to doing a local reset 414 432 * instead. 415 433 */ 416 - if (IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT) && 417 - !intel_engine_pulse(engine)) 418 - return true; 419 - 420 - /* If we are unable to send a pulse, try resetting this engine. */ 421 - return __reset_engine(engine); 434 + return intel_engine_pulse(engine) == 0; 422 435 } 423 436 424 437 static bool ··· 483 506 return engine; 484 507 } 485 508 486 - static void kill_engines(struct i915_gem_engines *engines) 509 + static void kill_engines(struct i915_gem_engines *engines, bool ban) 487 510 { 488 511 struct i915_gem_engines_iter it; 489 512 struct intel_context *ce; ··· 498 521 for_each_gem_engine(ce, engines, it) { 499 522 struct intel_engine_cs *engine; 500 523 501 - if (intel_context_set_banned(ce)) 524 + if (ban && intel_context_set_banned(ce)) 502 525 continue; 503 526 504 527 /* ··· 511 534 engine = active_engine(ce); 512 535 513 536 /* First attempt to gracefully cancel the context */ 514 - if (engine && !__cancel_engine(engine)) 537 + if (engine && !__cancel_engine(engine) && ban) 515 538 /* 516 539 * If we are unable to send a preemptive pulse to bump 517 540 * the context from the GPU, we have to resort to a full ··· 521 544 } 522 545 } 523 546 524 - static void kill_stale_engines(struct i915_gem_context *ctx) 547 + static void kill_context(struct i915_gem_context *ctx) 525 548 { 549 + bool ban = (!i915_gem_context_is_persistent(ctx) || 550 + !ctx->i915->params.enable_hangcheck); 526 551 struct i915_gem_engines *pos, *next; 527 552 528 553 spin_lock_irq(&ctx->stale.lock); ··· 537 558 538 559 spin_unlock_irq(&ctx->stale.lock); 539 560 540 - kill_engines(pos); 561 + kill_engines(pos, ban); 541 562 542 563 spin_lock_irq(&ctx->stale.lock); 543 564 GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence)); ··· 547 568 i915_sw_fence_complete(&pos->fence); 548 569 } 549 570 spin_unlock_irq(&ctx->stale.lock); 550 - } 551 - 552 - static void kill_context(struct i915_gem_context *ctx) 553 - { 554 - kill_stale_engines(ctx); 555 571 } 556 572 557 573 static void engines_idle_release(struct i915_gem_context *ctx, ··· 583 609 584 610 kill: 585 611 if (list_empty(&engines->link)) /* raced, already closed */ 586 - kill_engines(engines); 612 + kill_engines(engines, true); 587 613 588 614 i915_sw_fence_commit(&engines->fence); 589 615 } ··· 641 667 * case we opt to forcibly kill off all remaining requests on 642 668 * context close. 643 669 */ 644 - if (!i915_gem_context_is_persistent(ctx) || 645 - !ctx->i915->params.enable_hangcheck) 646 - kill_context(ctx); 670 + kill_context(ctx); 647 671 648 672 i915_gem_context_put(ctx); 649 673 }