Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.13 597 lines 13 kB view raw
1// SPDX-License-Identifier: MIT 2/* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6#include <linux/sort.h> 7 8#include "gt/intel_gt.h" 9#include "gt/intel_engine_user.h" 10 11#include "i915_selftest.h" 12 13#include "gem/i915_gem_context.h" 14#include "selftests/igt_flush_test.h" 15#include "selftests/i915_random.h" 16#include "selftests/mock_drm.h" 17#include "huge_gem_object.h" 18#include "mock_context.h" 19 20static int wrap_ktime_compare(const void *A, const void *B) 21{ 22 const ktime_t *a = A, *b = B; 23 24 return ktime_compare(*a, *b); 25} 26 27static int __perf_fill_blt(struct drm_i915_gem_object *obj) 28{ 29 struct drm_i915_private *i915 = to_i915(obj->base.dev); 30 int inst = 0; 31 32 do { 33 struct intel_engine_cs *engine; 34 ktime_t t[5]; 35 int pass; 36 int err; 37 38 engine = intel_engine_lookup_user(i915, 39 I915_ENGINE_CLASS_COPY, 40 inst++); 41 if (!engine) 42 return 0; 43 44 intel_engine_pm_get(engine); 45 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 46 struct intel_context *ce = engine->kernel_context; 47 ktime_t t0, t1; 48 49 t0 = ktime_get(); 50 51 err = i915_gem_object_fill_blt(obj, ce, 0); 52 if (err) 53 break; 54 55 err = i915_gem_object_wait(obj, 56 I915_WAIT_ALL, 57 MAX_SCHEDULE_TIMEOUT); 58 if (err) 59 break; 60 61 t1 = ktime_get(); 62 t[pass] = ktime_sub(t1, t0); 63 } 64 intel_engine_pm_put(engine); 65 if (err) 66 return err; 67 68 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 69 pr_info("%s: blt %zd KiB fill: %lld MiB/s\n", 70 engine->name, 71 obj->base.size >> 10, 72 div64_u64(mul_u32_u32(4 * obj->base.size, 73 1000 * 1000 * 1000), 74 t[1] + 2 * t[2] + t[3]) >> 20); 75 } while (1); 76} 77 78static int perf_fill_blt(void *arg) 79{ 80 struct drm_i915_private *i915 = arg; 81 static const unsigned long sizes[] = { 82 SZ_4K, 83 SZ_64K, 84 SZ_2M, 85 SZ_64M 86 }; 87 int i; 88 89 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 90 struct drm_i915_gem_object *obj; 91 int err; 92 93 obj = i915_gem_object_create_internal(i915, sizes[i]); 94 if (IS_ERR(obj)) 95 return PTR_ERR(obj); 96 97 err = __perf_fill_blt(obj); 98 i915_gem_object_put(obj); 99 if (err) 100 return err; 101 } 102 103 return 0; 104} 105 106static int __perf_copy_blt(struct drm_i915_gem_object *src, 107 struct drm_i915_gem_object *dst) 108{ 109 struct drm_i915_private *i915 = to_i915(src->base.dev); 110 int inst = 0; 111 112 do { 113 struct intel_engine_cs *engine; 114 ktime_t t[5]; 115 int pass; 116 int err = 0; 117 118 engine = intel_engine_lookup_user(i915, 119 I915_ENGINE_CLASS_COPY, 120 inst++); 121 if (!engine) 122 return 0; 123 124 intel_engine_pm_get(engine); 125 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 126 struct intel_context *ce = engine->kernel_context; 127 ktime_t t0, t1; 128 129 t0 = ktime_get(); 130 131 err = i915_gem_object_copy_blt(src, dst, ce); 132 if (err) 133 break; 134 135 err = i915_gem_object_wait(dst, 136 I915_WAIT_ALL, 137 MAX_SCHEDULE_TIMEOUT); 138 if (err) 139 break; 140 141 t1 = ktime_get(); 142 t[pass] = ktime_sub(t1, t0); 143 } 144 intel_engine_pm_put(engine); 145 if (err) 146 return err; 147 148 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 149 pr_info("%s: blt %zd KiB copy: %lld MiB/s\n", 150 engine->name, 151 src->base.size >> 10, 152 div64_u64(mul_u32_u32(4 * src->base.size, 153 1000 * 1000 * 1000), 154 t[1] + 2 * t[2] + t[3]) >> 20); 155 } while (1); 156} 157 158static int perf_copy_blt(void *arg) 159{ 160 struct drm_i915_private *i915 = arg; 161 static const unsigned long sizes[] = { 162 SZ_4K, 163 SZ_64K, 164 SZ_2M, 165 SZ_64M 166 }; 167 int i; 168 169 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 170 struct drm_i915_gem_object *src, *dst; 171 int err; 172 173 src = i915_gem_object_create_internal(i915, sizes[i]); 174 if (IS_ERR(src)) 175 return PTR_ERR(src); 176 177 dst = i915_gem_object_create_internal(i915, sizes[i]); 178 if (IS_ERR(dst)) { 179 err = PTR_ERR(dst); 180 goto err_src; 181 } 182 183 err = __perf_copy_blt(src, dst); 184 185 i915_gem_object_put(dst); 186err_src: 187 i915_gem_object_put(src); 188 if (err) 189 return err; 190 } 191 192 return 0; 193} 194 195struct igt_thread_arg { 196 struct intel_engine_cs *engine; 197 struct i915_gem_context *ctx; 198 struct file *file; 199 struct rnd_state prng; 200 unsigned int n_cpus; 201}; 202 203static int igt_fill_blt_thread(void *arg) 204{ 205 struct igt_thread_arg *thread = arg; 206 struct intel_engine_cs *engine = thread->engine; 207 struct rnd_state *prng = &thread->prng; 208 struct drm_i915_gem_object *obj; 209 struct i915_gem_context *ctx; 210 struct intel_context *ce; 211 unsigned int prio; 212 IGT_TIMEOUT(end); 213 u64 total, max; 214 int err; 215 216 ctx = thread->ctx; 217 if (!ctx) { 218 ctx = live_context_for_engine(engine, thread->file); 219 if (IS_ERR(ctx)) 220 return PTR_ERR(ctx); 221 222 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); 223 ctx->sched.priority = prio; 224 } 225 226 ce = i915_gem_context_get_engine(ctx, 0); 227 GEM_BUG_ON(IS_ERR(ce)); 228 229 /* 230 * If we have a tiny shared address space, like for the GGTT 231 * then we can't be too greedy. 232 */ 233 max = ce->vm->total; 234 if (i915_is_ggtt(ce->vm) || thread->ctx) 235 max = div_u64(max, thread->n_cpus); 236 max >>= 4; 237 238 total = PAGE_SIZE; 239 do { 240 /* Aim to keep the runtime under reasonable bounds! */ 241 const u32 max_phys_size = SZ_64K; 242 u32 val = prandom_u32_state(prng); 243 u32 phys_sz; 244 u32 sz; 245 u32 *vaddr; 246 u32 i; 247 248 total = min(total, max); 249 sz = i915_prandom_u32_max_state(total, prng) + 1; 250 phys_sz = sz % max_phys_size + 1; 251 252 sz = round_up(sz, PAGE_SIZE); 253 phys_sz = round_up(phys_sz, PAGE_SIZE); 254 phys_sz = min(phys_sz, sz); 255 256 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, 257 phys_sz, sz, val); 258 259 obj = huge_gem_object(engine->i915, phys_sz, sz); 260 if (IS_ERR(obj)) { 261 err = PTR_ERR(obj); 262 goto err_flush; 263 } 264 265 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 266 if (IS_ERR(vaddr)) { 267 err = PTR_ERR(vaddr); 268 goto err_put; 269 } 270 271 /* 272 * Make sure the potentially async clflush does its job, if 273 * required. 274 */ 275 memset32(vaddr, val ^ 0xdeadbeaf, 276 huge_gem_object_phys_size(obj) / sizeof(u32)); 277 278 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 279 obj->cache_dirty = true; 280 281 err = i915_gem_object_fill_blt(obj, ce, val); 282 if (err) 283 goto err_unpin; 284 285 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 286 if (err) 287 goto err_unpin; 288 289 for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) { 290 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 291 drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i])); 292 293 if (vaddr[i] != val) { 294 pr_err("vaddr[%u]=%x, expected=%x\n", i, 295 vaddr[i], val); 296 err = -EINVAL; 297 goto err_unpin; 298 } 299 } 300 301 i915_gem_object_unpin_map(obj); 302 i915_gem_object_put(obj); 303 304 total <<= 1; 305 } while (!time_after(jiffies, end)); 306 307 goto err_flush; 308 309err_unpin: 310 i915_gem_object_unpin_map(obj); 311err_put: 312 i915_gem_object_put(obj); 313err_flush: 314 if (err == -ENOMEM) 315 err = 0; 316 317 intel_context_put(ce); 318 return err; 319} 320 321static int igt_copy_blt_thread(void *arg) 322{ 323 struct igt_thread_arg *thread = arg; 324 struct intel_engine_cs *engine = thread->engine; 325 struct rnd_state *prng = &thread->prng; 326 struct drm_i915_gem_object *src, *dst; 327 struct i915_gem_context *ctx; 328 struct intel_context *ce; 329 unsigned int prio; 330 IGT_TIMEOUT(end); 331 u64 total, max; 332 int err; 333 334 ctx = thread->ctx; 335 if (!ctx) { 336 ctx = live_context_for_engine(engine, thread->file); 337 if (IS_ERR(ctx)) 338 return PTR_ERR(ctx); 339 340 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); 341 ctx->sched.priority = prio; 342 } 343 344 ce = i915_gem_context_get_engine(ctx, 0); 345 GEM_BUG_ON(IS_ERR(ce)); 346 347 /* 348 * If we have a tiny shared address space, like for the GGTT 349 * then we can't be too greedy. 350 */ 351 max = ce->vm->total; 352 if (i915_is_ggtt(ce->vm) || thread->ctx) 353 max = div_u64(max, thread->n_cpus); 354 max >>= 4; 355 356 total = PAGE_SIZE; 357 do { 358 /* Aim to keep the runtime under reasonable bounds! */ 359 const u32 max_phys_size = SZ_64K; 360 u32 val = prandom_u32_state(prng); 361 u32 phys_sz; 362 u32 sz; 363 u32 *vaddr; 364 u32 i; 365 366 total = min(total, max); 367 sz = i915_prandom_u32_max_state(total, prng) + 1; 368 phys_sz = sz % max_phys_size + 1; 369 370 sz = round_up(sz, PAGE_SIZE); 371 phys_sz = round_up(phys_sz, PAGE_SIZE); 372 phys_sz = min(phys_sz, sz); 373 374 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, 375 phys_sz, sz, val); 376 377 src = huge_gem_object(engine->i915, phys_sz, sz); 378 if (IS_ERR(src)) { 379 err = PTR_ERR(src); 380 goto err_flush; 381 } 382 383 vaddr = i915_gem_object_pin_map_unlocked(src, I915_MAP_WB); 384 if (IS_ERR(vaddr)) { 385 err = PTR_ERR(vaddr); 386 goto err_put_src; 387 } 388 389 memset32(vaddr, val, 390 huge_gem_object_phys_size(src) / sizeof(u32)); 391 392 i915_gem_object_unpin_map(src); 393 394 if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 395 src->cache_dirty = true; 396 397 dst = huge_gem_object(engine->i915, phys_sz, sz); 398 if (IS_ERR(dst)) { 399 err = PTR_ERR(dst); 400 goto err_put_src; 401 } 402 403 vaddr = i915_gem_object_pin_map_unlocked(dst, I915_MAP_WB); 404 if (IS_ERR(vaddr)) { 405 err = PTR_ERR(vaddr); 406 goto err_put_dst; 407 } 408 409 memset32(vaddr, val ^ 0xdeadbeaf, 410 huge_gem_object_phys_size(dst) / sizeof(u32)); 411 412 if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 413 dst->cache_dirty = true; 414 415 err = i915_gem_object_copy_blt(src, dst, ce); 416 if (err) 417 goto err_unpin; 418 419 err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT); 420 if (err) 421 goto err_unpin; 422 423 for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) { 424 if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 425 drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i])); 426 427 if (vaddr[i] != val) { 428 pr_err("vaddr[%u]=%x, expected=%x\n", i, 429 vaddr[i], val); 430 err = -EINVAL; 431 goto err_unpin; 432 } 433 } 434 435 i915_gem_object_unpin_map(dst); 436 437 i915_gem_object_put(src); 438 i915_gem_object_put(dst); 439 440 total <<= 1; 441 } while (!time_after(jiffies, end)); 442 443 goto err_flush; 444 445err_unpin: 446 i915_gem_object_unpin_map(dst); 447err_put_dst: 448 i915_gem_object_put(dst); 449err_put_src: 450 i915_gem_object_put(src); 451err_flush: 452 if (err == -ENOMEM) 453 err = 0; 454 455 intel_context_put(ce); 456 return err; 457} 458 459static int igt_threaded_blt(struct intel_engine_cs *engine, 460 int (*blt_fn)(void *arg), 461 unsigned int flags) 462#define SINGLE_CTX BIT(0) 463{ 464 struct igt_thread_arg *thread; 465 struct task_struct **tsk; 466 unsigned int n_cpus, i; 467 I915_RND_STATE(prng); 468 int err = 0; 469 470 n_cpus = num_online_cpus() + 1; 471 472 tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL); 473 if (!tsk) 474 return 0; 475 476 thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL); 477 if (!thread) 478 goto out_tsk; 479 480 thread[0].file = mock_file(engine->i915); 481 if (IS_ERR(thread[0].file)) { 482 err = PTR_ERR(thread[0].file); 483 goto out_thread; 484 } 485 486 if (flags & SINGLE_CTX) { 487 thread[0].ctx = live_context_for_engine(engine, thread[0].file); 488 if (IS_ERR(thread[0].ctx)) { 489 err = PTR_ERR(thread[0].ctx); 490 goto out_file; 491 } 492 } 493 494 for (i = 0; i < n_cpus; ++i) { 495 thread[i].engine = engine; 496 thread[i].file = thread[0].file; 497 thread[i].ctx = thread[0].ctx; 498 thread[i].n_cpus = n_cpus; 499 thread[i].prng = 500 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); 501 502 tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i); 503 if (IS_ERR(tsk[i])) { 504 err = PTR_ERR(tsk[i]); 505 break; 506 } 507 508 get_task_struct(tsk[i]); 509 } 510 511 yield(); /* start all threads before we kthread_stop() */ 512 513 for (i = 0; i < n_cpus; ++i) { 514 int status; 515 516 if (IS_ERR_OR_NULL(tsk[i])) 517 continue; 518 519 status = kthread_stop(tsk[i]); 520 if (status && !err) 521 err = status; 522 523 put_task_struct(tsk[i]); 524 } 525 526out_file: 527 fput(thread[0].file); 528out_thread: 529 kfree(thread); 530out_tsk: 531 kfree(tsk); 532 return err; 533} 534 535static int test_copy_engines(struct drm_i915_private *i915, 536 int (*fn)(void *arg), 537 unsigned int flags) 538{ 539 struct intel_engine_cs *engine; 540 int ret; 541 542 for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) { 543 ret = igt_threaded_blt(engine, fn, flags); 544 if (ret) 545 return ret; 546 } 547 548 return 0; 549} 550 551static int igt_fill_blt(void *arg) 552{ 553 return test_copy_engines(arg, igt_fill_blt_thread, 0); 554} 555 556static int igt_fill_blt_ctx0(void *arg) 557{ 558 return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX); 559} 560 561static int igt_copy_blt(void *arg) 562{ 563 return test_copy_engines(arg, igt_copy_blt_thread, 0); 564} 565 566static int igt_copy_blt_ctx0(void *arg) 567{ 568 return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX); 569} 570 571int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) 572{ 573 static const struct i915_subtest tests[] = { 574 SUBTEST(igt_fill_blt), 575 SUBTEST(igt_fill_blt_ctx0), 576 SUBTEST(igt_copy_blt), 577 SUBTEST(igt_copy_blt_ctx0), 578 }; 579 580 if (intel_gt_is_wedged(&i915->gt)) 581 return 0; 582 583 return i915_live_subtests(tests, i915); 584} 585 586int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915) 587{ 588 static const struct i915_subtest tests[] = { 589 SUBTEST(perf_fill_blt), 590 SUBTEST(perf_copy_blt), 591 }; 592 593 if (intel_gt_is_wedged(&i915->gt)) 594 return 0; 595 596 return i915_live_subtests(tests, i915); 597}