Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915/selftests: Add tests for GT and engine workaround verification

Two simple selftests which test that both GT and engine workarounds are
not lost after either a full GPU reset, or after the per-engine ones.

(Including checks that one engine reset is not affecting workarounds not
belonging to itself.)

v2:
* Rebase for series refactoring.
* Add spinner for actual engine reset!
* Add idle reset test as well. (Chris Wilson)
* Share existing global_reset_lock. (Chris Wilson)

v3:
* intel_engine_verify_workarounds can be static.
* API rename. (Chris Wilson)
* Move global reset lock out of the loop. (Chris Wilson)

v4:
* Add missing rpm puts. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20181203125014.3219-5-tvrtko.ursulin@linux.intel.com

+214 -47
+1
drivers/gpu/drm/i915/Makefile
··· 166 166 selftests/i915_random.o \ 167 167 selftests/i915_selftest.o \ 168 168 selftests/igt_flush_test.o \ 169 + selftests/igt_reset.o \ 169 170 selftests/igt_spinner.o 170 171 171 172 # virtual gpu code
+6
drivers/gpu/drm/i915/intel_workarounds.c
··· 1303 1303 } 1304 1304 1305 1305 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1306 + static bool intel_engine_verify_workarounds(struct intel_engine_cs *engine, 1307 + const char *from) 1308 + { 1309 + return wa_list_verify(engine->i915, &engine->wa_list, from); 1310 + } 1311 + 1306 1312 #include "selftests/intel_workarounds.c" 1307 1313 #endif
+44
drivers/gpu/drm/i915/selftests/igt_reset.c
··· 1 + /* 2 + * SPDX-License-Identifier: MIT 3 + * 4 + * Copyright © 2018 Intel Corporation 5 + */ 6 + 7 + #include "igt_reset.h" 8 + 9 + #include "../i915_drv.h" 10 + #include "../intel_ringbuffer.h" 11 + 12 + void igt_global_reset_lock(struct drm_i915_private *i915) 13 + { 14 + struct intel_engine_cs *engine; 15 + enum intel_engine_id id; 16 + 17 + pr_debug("%s: current gpu_error=%08lx\n", 18 + __func__, i915->gpu_error.flags); 19 + 20 + while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) 21 + wait_event(i915->gpu_error.reset_queue, 22 + !test_bit(I915_RESET_BACKOFF, 23 + &i915->gpu_error.flags)); 24 + 25 + for_each_engine(engine, i915, id) { 26 + while (test_and_set_bit(I915_RESET_ENGINE + id, 27 + &i915->gpu_error.flags)) 28 + wait_on_bit(&i915->gpu_error.flags, 29 + I915_RESET_ENGINE + id, 30 + TASK_UNINTERRUPTIBLE); 31 + } 32 + } 33 + 34 + void igt_global_reset_unlock(struct drm_i915_private *i915) 35 + { 36 + struct intel_engine_cs *engine; 37 + enum intel_engine_id id; 38 + 39 + for_each_engine(engine, i915, id) 40 + clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); 41 + 42 + clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); 43 + wake_up_all(&i915->gpu_error.reset_queue); 44 + }
+15
drivers/gpu/drm/i915/selftests/igt_reset.h
··· 1 + /* 2 + * SPDX-License-Identifier: MIT 3 + * 4 + * Copyright © 2018 Intel Corporation 5 + */ 6 + 7 + #ifndef __I915_SELFTESTS_IGT_RESET_H__ 8 + #define __I915_SELFTESTS_IGT_RESET_H__ 9 + 10 + #include "../i915_drv.h" 11 + 12 + void igt_global_reset_lock(struct drm_i915_private *i915); 13 + void igt_global_reset_unlock(struct drm_i915_private *i915); 14 + 15 + #endif
+9 -42
drivers/gpu/drm/i915/selftests/intel_hangcheck.c
··· 27 27 #include "../i915_selftest.h" 28 28 #include "i915_random.h" 29 29 #include "igt_flush_test.h" 30 + #include "igt_reset.h" 30 31 #include "igt_wedge_me.h" 31 32 32 33 #include "mock_context.h" ··· 355 354 return err; 356 355 } 357 356 358 - static void global_reset_lock(struct drm_i915_private *i915) 359 - { 360 - struct intel_engine_cs *engine; 361 - enum intel_engine_id id; 362 - 363 - pr_debug("%s: current gpu_error=%08lx\n", 364 - __func__, i915->gpu_error.flags); 365 - 366 - while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) 367 - wait_event(i915->gpu_error.reset_queue, 368 - !test_bit(I915_RESET_BACKOFF, 369 - &i915->gpu_error.flags)); 370 - 371 - for_each_engine(engine, i915, id) { 372 - while (test_and_set_bit(I915_RESET_ENGINE + id, 373 - &i915->gpu_error.flags)) 374 - wait_on_bit(&i915->gpu_error.flags, 375 - I915_RESET_ENGINE + id, 376 - TASK_UNINTERRUPTIBLE); 377 - } 378 - } 379 - 380 - static void global_reset_unlock(struct drm_i915_private *i915) 381 - { 382 - struct intel_engine_cs *engine; 383 - enum intel_engine_id id; 384 - 385 - for_each_engine(engine, i915, id) 386 - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); 387 - 388 - clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); 389 - wake_up_all(&i915->gpu_error.reset_queue); 390 - } 391 - 392 357 static int igt_global_reset(void *arg) 393 358 { 394 359 struct drm_i915_private *i915 = arg; ··· 363 396 364 397 /* Check that we can issue a global GPU reset */ 365 398 366 - global_reset_lock(i915); 399 + igt_global_reset_lock(i915); 367 400 set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); 368 401 369 402 mutex_lock(&i915->drm.struct_mutex); ··· 378 411 mutex_unlock(&i915->drm.struct_mutex); 379 412 380 413 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); 381 - global_reset_unlock(i915); 414 + igt_global_reset_unlock(i915); 382 415 383 416 if (i915_terminally_wedged(&i915->gpu_error)) 384 417 err = -EIO; ··· 909 942 910 943 /* Check that we detect a stuck waiter and issue a reset */ 911 944 912 - global_reset_lock(i915); 945 + igt_global_reset_lock(i915); 913 946 914 947 mutex_lock(&i915->drm.struct_mutex); 915 948 err = hang_init(&h, i915); ··· 961 994 hang_fini(&h); 962 995 unlock: 963 996 mutex_unlock(&i915->drm.struct_mutex); 964 - global_reset_unlock(i915); 997 + igt_global_reset_unlock(i915); 965 998 966 999 if (i915_terminally_wedged(&i915->gpu_error)) 967 1000 return -EIO; ··· 1039 1072 1040 1073 /* Check that we can recover an unbind stuck on a hanging request */ 1041 1074 1042 - global_reset_lock(i915); 1075 + igt_global_reset_lock(i915); 1043 1076 1044 1077 mutex_lock(&i915->drm.struct_mutex); 1045 1078 err = hang_init(&h, i915); ··· 1159 1192 hang_fini(&h); 1160 1193 unlock: 1161 1194 mutex_unlock(&i915->drm.struct_mutex); 1162 - global_reset_unlock(i915); 1195 + igt_global_reset_unlock(i915); 1163 1196 1164 1197 if (i915_terminally_wedged(&i915->gpu_error)) 1165 1198 return -EIO; ··· 1239 1272 1240 1273 /* Check that we replay pending requests following a hang */ 1241 1274 1242 - global_reset_lock(i915); 1275 + igt_global_reset_lock(i915); 1243 1276 1244 1277 mutex_lock(&i915->drm.struct_mutex); 1245 1278 err = hang_init(&h, i915); ··· 1370 1403 hang_fini(&h); 1371 1404 unlock: 1372 1405 mutex_unlock(&i915->drm.struct_mutex); 1373 - global_reset_unlock(i915); 1406 + igt_global_reset_unlock(i915); 1374 1407 1375 1408 if (i915_terminally_wedged(&i915->gpu_error)) 1376 1409 return -EIO;
+139 -5
drivers/gpu/drm/i915/selftests/intel_workarounds.c
··· 6 6 7 7 #include "../i915_selftest.h" 8 8 9 + #include "igt_flush_test.h" 10 + #include "igt_reset.h" 9 11 #include "igt_spinner.h" 10 12 #include "igt_wedge_me.h" 11 13 #include "mock_context.h" ··· 292 290 { 293 291 struct drm_i915_private *i915 = arg; 294 292 struct intel_engine_cs *engine = i915->engine[RCS]; 295 - struct i915_gpu_error *error = &i915->gpu_error; 296 293 struct whitelist w; 297 294 int err = 0; 298 295 ··· 303 302 if (!whitelist_build(engine, &w)) 304 303 return 0; 305 304 306 - set_bit(I915_RESET_BACKOFF, &error->flags); 307 - set_bit(I915_RESET_ENGINE + engine->id, &error->flags); 305 + igt_global_reset_lock(i915); 308 306 309 307 if (intel_has_reset_engine(i915)) { 310 308 err = check_whitelist_across_reset(engine, ··· 322 322 } 323 323 324 324 out: 325 - clear_bit(I915_RESET_ENGINE + engine->id, &error->flags); 326 - clear_bit(I915_RESET_BACKOFF, &error->flags); 325 + igt_global_reset_unlock(i915); 327 326 return err; 327 + } 328 + 329 + static bool verify_gt_engine_wa(struct drm_i915_private *i915, const char *str) 330 + { 331 + struct intel_engine_cs *engine; 332 + enum intel_engine_id id; 333 + bool ok = true; 334 + 335 + ok &= intel_gt_verify_workarounds(i915, str); 336 + 337 + for_each_engine(engine, i915, id) 338 + ok &= intel_engine_verify_workarounds(engine, str); 339 + 340 + return ok; 341 + } 342 + 343 + static int 344 + live_gpu_reset_gt_engine_workarounds(void *arg) 345 + { 346 + struct drm_i915_private *i915 = arg; 347 + struct i915_gpu_error *error = &i915->gpu_error; 348 + bool ok; 349 + 350 + if (!intel_has_gpu_reset(i915)) 351 + return 0; 352 + 353 + pr_info("Verifying after GPU reset...\n"); 354 + 355 + igt_global_reset_lock(i915); 356 + 357 + ok = verify_gt_engine_wa(i915, "before reset"); 358 + if (!ok) 359 + goto out; 360 + 361 + intel_runtime_pm_get(i915); 362 + set_bit(I915_RESET_HANDOFF, &error->flags); 363 + i915_reset(i915, ALL_ENGINES, "live_workarounds"); 364 + intel_runtime_pm_put(i915); 365 + 366 + ok = verify_gt_engine_wa(i915, "after reset"); 367 + 368 + out: 369 + igt_global_reset_unlock(i915); 370 + 371 + return ok ? 0 : -ESRCH; 372 + } 373 + 374 + static int 375 + live_engine_reset_gt_engine_workarounds(void *arg) 376 + { 377 + struct drm_i915_private *i915 = arg; 378 + struct intel_engine_cs *engine; 379 + struct i915_gem_context *ctx; 380 + struct igt_spinner spin; 381 + enum intel_engine_id id; 382 + struct i915_request *rq; 383 + int ret = 0; 384 + 385 + if (!intel_has_reset_engine(i915)) 386 + return 0; 387 + 388 + ctx = kernel_context(i915); 389 + if (IS_ERR(ctx)) 390 + return PTR_ERR(ctx); 391 + 392 + igt_global_reset_lock(i915); 393 + 394 + for_each_engine(engine, i915, id) { 395 + bool ok; 396 + 397 + pr_info("Verifying after %s reset...\n", engine->name); 398 + 399 + ok = verify_gt_engine_wa(i915, "before reset"); 400 + if (!ok) { 401 + ret = -ESRCH; 402 + goto err; 403 + } 404 + 405 + intel_runtime_pm_get(i915); 406 + i915_reset_engine(engine, "live_workarounds"); 407 + intel_runtime_pm_put(i915); 408 + 409 + ok = verify_gt_engine_wa(i915, "after idle reset"); 410 + if (!ok) { 411 + ret = -ESRCH; 412 + goto err; 413 + } 414 + 415 + ret = igt_spinner_init(&spin, i915); 416 + if (ret) 417 + goto err; 418 + 419 + intel_runtime_pm_get(i915); 420 + 421 + rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP); 422 + if (IS_ERR(rq)) { 423 + ret = PTR_ERR(rq); 424 + igt_spinner_fini(&spin); 425 + intel_runtime_pm_put(i915); 426 + goto err; 427 + } 428 + 429 + i915_request_add(rq); 430 + 431 + if (!igt_wait_for_spinner(&spin, rq)) { 432 + pr_err("Spinner failed to start\n"); 433 + igt_spinner_fini(&spin); 434 + intel_runtime_pm_put(i915); 435 + ret = -ETIMEDOUT; 436 + goto err; 437 + } 438 + 439 + i915_reset_engine(engine, "live_workarounds"); 440 + 441 + intel_runtime_pm_put(i915); 442 + 443 + igt_spinner_end(&spin); 444 + igt_spinner_fini(&spin); 445 + 446 + ok = verify_gt_engine_wa(i915, "after busy reset"); 447 + if (!ok) { 448 + ret = -ESRCH; 449 + goto err; 450 + } 451 + } 452 + 453 + err: 454 + igt_global_reset_unlock(i915); 455 + kernel_context_close(ctx); 456 + 457 + igt_flush_test(i915, I915_WAIT_LOCKED); 458 + 459 + return ret; 328 460 } 329 461 330 462 int intel_workarounds_live_selftests(struct drm_i915_private *i915) 331 463 { 332 464 static const struct i915_subtest tests[] = { 333 465 SUBTEST(live_reset_whitelist), 466 + SUBTEST(live_gpu_reset_gt_engine_workarounds), 467 + SUBTEST(live_engine_reset_gt_engine_workarounds), 334 468 }; 335 469 int err; 336 470