Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915/gt: Keep a no-frills swappable copy of the default context state

We need to keep the default context state around to instantiate new
contexts (aka golden rendercontext), and we also keep it pinned while
the engine is active so that we can quickly reset a hanging context.
However, the default contexts are large enough to merit keeping in
swappable memory as opposed to kernel memory, so we store them inside
shmemfs. Currently, we use the normal GEM objects to create the default
context image, but we can throw away all but the shmemfs file.

This greatly simplifies the tricky power management code which wants to
run underneath the normal GT locking, and we definitely do not want to
use any high level objects that may appear to recurse back into the GT.
Though perhaps the primary advantage of the complex GEM object is that
we aggressively cache the mapping, but here we are recreating the
vm_area everytime time we unpark. At the worst, we add a lightweight
cache, but first find a microbenchmark that is impacted.

Having started to create some utility functions to make working with
shmemfs objects easier, we can start putting them to wider use, where
GEM objects are overkill, such as storing persistent error state.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200429172429.6054-1-chris@chris-wilson.co.uk

+291 -123
+1
drivers/gpu/drm/i915/Makefile
··· 111 111 gt/intel_sseu.o \ 112 112 gt/intel_timeline.o \ 113 113 gt/intel_workarounds.o \ 114 + gt/shmem_utils.o \ 114 115 gt/sysfs_engines.o 115 116 # autogenerated null render state 116 117 gt-y += \
+1 -1
drivers/gpu/drm/i915/gt/intel_engine_cs.c
··· 834 834 intel_engine_cleanup_cmd_parser(engine); 835 835 836 836 if (engine->default_state) 837 - i915_gem_object_put(engine->default_state); 837 + fput(engine->default_state); 838 838 839 839 if (engine->kernel_context) { 840 840 intel_context_unpin(engine->kernel_context);
+5 -5
drivers/gpu/drm/i915/gt/intel_engine_pm.c
··· 15 15 #include "intel_gt_pm.h" 16 16 #include "intel_rc6.h" 17 17 #include "intel_ring.h" 18 + #include "shmem_utils.h" 18 19 19 20 static int __engine_unpark(struct intel_wakeref *wf) 20 21 { ··· 31 30 /* Pin the default state for fast resets from atomic context. */ 32 31 map = NULL; 33 32 if (engine->default_state) 34 - map = i915_gem_object_pin_map(engine->default_state, 35 - I915_MAP_WB); 36 - if (!IS_ERR_OR_NULL(map)) 37 - engine->pinned_default_state = map; 33 + map = shmem_pin_map(engine->default_state); 34 + engine->pinned_default_state = map; 38 35 39 36 /* Discard stale context state from across idling */ 40 37 ce = engine->kernel_context; ··· 263 264 engine->park(engine); 264 265 265 266 if (engine->pinned_default_state) { 266 - i915_gem_object_unpin_map(engine->default_state); 267 + shmem_unpin_map(engine->default_state, 268 + engine->pinned_default_state); 267 269 engine->pinned_default_state = NULL; 268 270 } 269 271
+1 -1
drivers/gpu/drm/i915/gt/intel_engine_types.h
··· 339 339 340 340 unsigned long wakeref_serial; 341 341 struct intel_wakeref wakeref; 342 - struct drm_i915_gem_object *default_state; 342 + struct file *default_state; 343 343 void *pinned_default_state; 344 344 345 345 struct {
+8 -52
drivers/gpu/drm/i915/gt/intel_gt.c
··· 16 16 #include "intel_rps.h" 17 17 #include "intel_uncore.h" 18 18 #include "intel_pm.h" 19 + #include "shmem_utils.h" 19 20 20 21 void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) 21 22 { ··· 372 371 return i915_vm_get(&gt->ggtt->vm); 373 372 } 374 373 375 - static int __intel_context_flush_retire(struct intel_context *ce) 376 - { 377 - struct intel_timeline *tl; 378 - 379 - tl = intel_context_timeline_lock(ce); 380 - if (IS_ERR(tl)) 381 - return PTR_ERR(tl); 382 - 383 - intel_context_timeline_unlock(tl); 384 - return 0; 385 - } 386 - 387 374 static int __engines_record_defaults(struct intel_gt *gt) 388 375 { 389 376 struct i915_request *requests[I915_NUM_ENGINES] = {}; ··· 437 448 438 449 for (id = 0; id < ARRAY_SIZE(requests); id++) { 439 450 struct i915_request *rq; 440 - struct i915_vma *state; 441 - void *vaddr; 451 + struct file *state; 442 452 443 453 rq = requests[id]; 444 454 if (!rq) ··· 449 461 } 450 462 451 463 GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags)); 452 - state = rq->context->state; 453 - if (!state) 464 + if (!rq->context->state) 454 465 continue; 455 466 456 - /* Serialise with retirement on another CPU */ 457 - GEM_BUG_ON(!i915_request_completed(rq)); 458 - err = __intel_context_flush_retire(rq->context); 459 - if (err) 460 - goto out; 461 - 462 - /* We want to be able to unbind the state from the GGTT */ 463 - GEM_BUG_ON(intel_context_is_pinned(rq->context)); 464 - 465 - /* 466 - * As we will hold a reference to the logical state, it will 467 - * not be torn down with the context, and importantly the 468 - * object will hold onto its vma (making it possible for a 469 - * stray GTT write to corrupt our defaults). Unmap the vma 470 - * from the GTT to prevent such accidents and reclaim the 471 - * space. 472 - */ 473 - err = i915_vma_unbind(state); 474 - if (err) 475 - goto out; 476 - 477 - i915_gem_object_lock(state->obj); 478 - err = i915_gem_object_set_to_cpu_domain(state->obj, false); 479 - i915_gem_object_unlock(state->obj); 480 - if (err) 481 - goto out; 482 - 483 - i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC); 484 - 485 - /* Check we can acquire the image of the context state */ 486 - vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB); 487 - if (IS_ERR(vaddr)) { 488 - err = PTR_ERR(vaddr); 467 + /* Keep a copy of the state's backing pages; free the obj */ 468 + state = shmem_create_from_object(rq->context->state->obj); 469 + if (IS_ERR(state)) { 470 + err = PTR_ERR(state); 489 471 goto out; 490 472 } 491 - 492 - rq->engine->default_state = i915_gem_object_get(state->obj); 493 - i915_gem_object_unpin_map(state->obj); 473 + rq->engine->default_state = state; 494 474 } 495 475 496 476 out:
+6 -19
drivers/gpu/drm/i915/gt/intel_lrc.c
··· 147 147 #include "intel_reset.h" 148 148 #include "intel_ring.h" 149 149 #include "intel_workarounds.h" 150 + #include "shmem_utils.h" 150 151 151 152 #define RING_EXECLIST_QFULL (1 << 0x2) 152 153 #define RING_EXECLIST1_VALID (1 << 0x3) ··· 5084 5083 { 5085 5084 bool inhibit = true; 5086 5085 void *vaddr; 5087 - int ret; 5088 5086 5089 5087 vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); 5090 5088 if (IS_ERR(vaddr)) { 5091 - ret = PTR_ERR(vaddr); 5092 - drm_dbg(&engine->i915->drm, 5093 - "Could not map object pages! (%d)\n", ret); 5094 - return ret; 5089 + drm_dbg(&engine->i915->drm, "Could not map object pages!\n"); 5090 + return PTR_ERR(vaddr); 5095 5091 } 5096 5092 5097 5093 set_redzone(vaddr, engine); 5098 5094 5099 5095 if (engine->default_state) { 5100 - void *defaults; 5101 - 5102 - defaults = i915_gem_object_pin_map(engine->default_state, 5103 - I915_MAP_WB); 5104 - if (IS_ERR(defaults)) { 5105 - ret = PTR_ERR(defaults); 5106 - goto err_unpin_ctx; 5107 - } 5108 - 5109 - memcpy(vaddr, defaults, engine->context_size); 5110 - i915_gem_object_unpin_map(engine->default_state); 5096 + shmem_read(engine->default_state, 0, 5097 + vaddr, engine->context_size); 5111 5098 __set_bit(CONTEXT_VALID_BIT, &ce->flags); 5112 5099 inhibit = false; 5113 5100 } ··· 5110 5121 execlists_init_reg_state(vaddr + LRC_STATE_OFFSET, 5111 5122 ce, engine, ring, inhibit); 5112 5123 5113 - ret = 0; 5114 - err_unpin_ctx: 5115 5124 __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size); 5116 5125 i915_gem_object_unpin_map(ctx_obj); 5117 - return ret; 5126 + return 0; 5118 5127 } 5119 5128 5120 5129 static int __execlists_context_alloc(struct intel_context *ce,
+4 -12
drivers/gpu/drm/i915/gt/intel_ring_submission.c
··· 42 42 #include "intel_reset.h" 43 43 #include "intel_ring.h" 44 44 #include "intel_workarounds.h" 45 + #include "shmem_utils.h" 45 46 46 47 /* Rough estimate of the typical request size, performing a flush, 47 48 * set-context and then emitting the batch. ··· 1242 1241 i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC); 1243 1242 1244 1243 if (engine->default_state) { 1245 - void *defaults, *vaddr; 1244 + void *vaddr; 1246 1245 1247 1246 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 1248 1247 if (IS_ERR(vaddr)) { ··· 1250 1249 goto err_obj; 1251 1250 } 1252 1251 1253 - defaults = i915_gem_object_pin_map(engine->default_state, 1254 - I915_MAP_WB); 1255 - if (IS_ERR(defaults)) { 1256 - err = PTR_ERR(defaults); 1257 - goto err_map; 1258 - } 1259 - 1260 - memcpy(vaddr, defaults, engine->context_size); 1261 - i915_gem_object_unpin_map(engine->default_state); 1252 + shmem_read(engine->default_state, 0, 1253 + vaddr, engine->context_size); 1262 1254 1263 1255 i915_gem_object_flush_map(obj); 1264 1256 i915_gem_object_unpin_map(obj); ··· 1265 1271 1266 1272 return vma; 1267 1273 1268 - err_map: 1269 - i915_gem_object_unpin_map(obj); 1270 1274 err_obj: 1271 1275 i915_gem_object_put(obj); 1272 1276 return ERR_PTR(err);
+1 -1
drivers/gpu/drm/i915/gt/selftest_context.c
··· 155 155 156 156 for_each_engine(engine, gt, id) { 157 157 struct { 158 - struct drm_i915_gem_object *state; 158 + struct file *state; 159 159 void *pinned; 160 160 } saved; 161 161
+4 -6
drivers/gpu/drm/i915/gt/selftest_lrc.c
··· 4452 4452 if (!engine->default_state) 4453 4453 continue; 4454 4454 4455 - hw = i915_gem_object_pin_map(engine->default_state, 4456 - I915_MAP_WB); 4455 + hw = shmem_pin_map(engine->default_state); 4457 4456 if (IS_ERR(hw)) { 4458 4457 err = PTR_ERR(hw); 4459 4458 break; ··· 4524 4525 hexdump(lrc, PAGE_SIZE); 4525 4526 } 4526 4527 4527 - i915_gem_object_unpin_map(engine->default_state); 4528 + shmem_unpin_map(engine->default_state, hw); 4528 4529 if (err) 4529 4530 break; 4530 4531 } ··· 4629 4630 if (!engine->default_state) 4630 4631 continue; 4631 4632 4632 - hw = i915_gem_object_pin_map(engine->default_state, 4633 - I915_MAP_WB); 4633 + hw = shmem_pin_map(engine->default_state); 4634 4634 if (IS_ERR(hw)) { 4635 4635 err = PTR_ERR(hw); 4636 4636 break; ··· 4650 4652 } 4651 4653 } 4652 4654 4653 - i915_gem_object_unpin_map(engine->default_state); 4655 + shmem_unpin_map(engine->default_state, hw); 4654 4656 } 4655 4657 4656 4658 return err;
+173
drivers/gpu/drm/i915/gt/shmem_utils.c
··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2020 Intel Corporation 4 + */ 5 + 6 + #include <linux/mm.h> 7 + #include <linux/pagemap.h> 8 + #include <linux/shmem_fs.h> 9 + 10 + #include "gem/i915_gem_object.h" 11 + #include "shmem_utils.h" 12 + 13 + struct file *shmem_create_from_data(const char *name, void *data, size_t len) 14 + { 15 + struct file *file; 16 + int err; 17 + 18 + file = shmem_file_setup(name, PAGE_ALIGN(len), VM_NORESERVE); 19 + if (IS_ERR(file)) 20 + return file; 21 + 22 + err = shmem_write(file, 0, data, len); 23 + if (err) { 24 + fput(file); 25 + return ERR_PTR(err); 26 + } 27 + 28 + return file; 29 + } 30 + 31 + struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) 32 + { 33 + struct file *file; 34 + void *ptr; 35 + 36 + if (obj->ops == &i915_gem_shmem_ops) { 37 + file = obj->base.filp; 38 + atomic_long_inc(&file->f_count); 39 + return file; 40 + } 41 + 42 + ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); 43 + if (IS_ERR(ptr)) 44 + return ERR_CAST(ptr); 45 + 46 + file = shmem_create_from_data("", ptr, obj->base.size); 47 + i915_gem_object_unpin_map(obj); 48 + 49 + return file; 50 + } 51 + 52 + static size_t shmem_npte(struct file *file) 53 + { 54 + return file->f_mapping->host->i_size >> PAGE_SHIFT; 55 + } 56 + 57 + static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte) 58 + { 59 + unsigned long pfn; 60 + 61 + vunmap(ptr); 62 + 63 + for (pfn = 0; pfn < n_pte; pfn++) { 64 + struct page *page; 65 + 66 + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn, 67 + GFP_KERNEL); 68 + if (!WARN_ON(IS_ERR(page))) { 69 + put_page(page); 70 + put_page(page); 71 + } 72 + } 73 + } 74 + 75 + void *shmem_pin_map(struct file *file) 76 + { 77 + const size_t n_pte = shmem_npte(file); 78 + pte_t *stack[32], **ptes, **mem; 79 + struct vm_struct *area; 80 + unsigned long pfn; 81 + 82 + mem = stack; 83 + if (n_pte > ARRAY_SIZE(stack)) { 84 + mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL); 85 + if (!mem) 86 + return NULL; 87 + } 88 + 89 + area = alloc_vm_area(n_pte << PAGE_SHIFT, mem); 90 + if (!area) { 91 + if (mem != stack) 92 + kvfree(mem); 93 + return NULL; 94 + } 95 + 96 + ptes = mem; 97 + for (pfn = 0; pfn < n_pte; pfn++) { 98 + struct page *page; 99 + 100 + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn, 101 + GFP_KERNEL); 102 + if (IS_ERR(page)) 103 + goto err_page; 104 + 105 + **ptes++ = mk_pte(page, PAGE_KERNEL); 106 + } 107 + 108 + if (mem != stack) 109 + kvfree(mem); 110 + 111 + mapping_set_unevictable(file->f_mapping); 112 + return area->addr; 113 + 114 + err_page: 115 + if (mem != stack) 116 + kvfree(mem); 117 + 118 + __shmem_unpin_map(file, area->addr, pfn); 119 + return NULL; 120 + } 121 + 122 + void shmem_unpin_map(struct file *file, void *ptr) 123 + { 124 + mapping_clear_unevictable(file->f_mapping); 125 + __shmem_unpin_map(file, ptr, shmem_npte(file)); 126 + } 127 + 128 + static int __shmem_rw(struct file *file, loff_t off, 129 + void *ptr, size_t len, 130 + bool write) 131 + { 132 + unsigned long pfn; 133 + 134 + for (pfn = off >> PAGE_SHIFT; len; pfn++) { 135 + unsigned int this = 136 + min_t(size_t, PAGE_SIZE - offset_in_page(off), len); 137 + struct page *page; 138 + void *vaddr; 139 + 140 + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn, 141 + GFP_KERNEL); 142 + if (IS_ERR(page)) 143 + return PTR_ERR(page); 144 + 145 + vaddr = kmap(page); 146 + if (write) 147 + memcpy(vaddr + offset_in_page(off), ptr, this); 148 + else 149 + memcpy(ptr, vaddr + offset_in_page(off), this); 150 + kunmap(page); 151 + put_page(page); 152 + 153 + len -= this; 154 + ptr += this; 155 + off = 0; 156 + } 157 + 158 + return 0; 159 + } 160 + 161 + int shmem_read(struct file *file, loff_t off, void *dst, size_t len) 162 + { 163 + return __shmem_rw(file, off, dst, len, false); 164 + } 165 + 166 + int shmem_write(struct file *file, loff_t off, void *src, size_t len) 167 + { 168 + return __shmem_rw(file, off, src, len, true); 169 + } 170 + 171 + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 172 + #include "st_shmem_utils.c" 173 + #endif
+23
drivers/gpu/drm/i915/gt/shmem_utils.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2020 Intel Corporation 4 + */ 5 + 6 + #ifndef SHMEM_UTILS_H 7 + #define SHMEM_UTILS_H 8 + 9 + #include <linux/types.h> 10 + 11 + struct drm_i915_gem_object; 12 + struct file; 13 + 14 + struct file *shmem_create_from_data(const char *name, void *data, size_t len); 15 + struct file *shmem_create_from_object(struct drm_i915_gem_object *obj); 16 + 17 + void *shmem_pin_map(struct file *file); 18 + void shmem_unpin_map(struct file *file, void *ptr); 19 + 20 + int shmem_read(struct file *file, loff_t off, void *dst, size_t len); 21 + int shmem_write(struct file *file, loff_t off, void *src, size_t len); 22 + 23 + #endif /* SHMEM_UTILS_H */
+63
drivers/gpu/drm/i915/gt/st_shmem_utils.c
··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2020 Intel Corporation 4 + */ 5 + 6 + /* Just a quick and causal check of the shmem_utils API */ 7 + 8 + static int igt_shmem_basic(void *ignored) 9 + { 10 + u32 datum = 0xdeadbeef, result; 11 + struct file *file; 12 + u32 *map; 13 + int err; 14 + 15 + file = shmem_create_from_data("mock", &datum, sizeof(datum)); 16 + if (IS_ERR(file)) 17 + return PTR_ERR(file); 18 + 19 + result = 0; 20 + err = shmem_read(file, 0, &result, sizeof(result)); 21 + if (err) 22 + goto out_file; 23 + 24 + if (result != datum) { 25 + pr_err("Incorrect read back from shmemfs: %x != %x\n", 26 + result, datum); 27 + err = -EINVAL; 28 + goto out_file; 29 + } 30 + 31 + result = 0xc0ffee; 32 + err = shmem_write(file, 0, &result, sizeof(result)); 33 + if (err) 34 + goto out_file; 35 + 36 + map = shmem_pin_map(file); 37 + if (!map) { 38 + err = -ENOMEM; 39 + goto out_file; 40 + } 41 + 42 + if (*map != result) { 43 + pr_err("Incorrect read back via mmap of last write: %x != %x\n", 44 + *map, result); 45 + err = -EINVAL; 46 + goto out_map; 47 + } 48 + 49 + out_map: 50 + shmem_unpin_map(file, map); 51 + out_file: 52 + fput(file); 53 + return err; 54 + } 55 + 56 + int shmem_utils_mock_selftests(void) 57 + { 58 + static const struct i915_subtest tests[] = { 59 + SUBTEST(igt_shmem_basic), 60 + }; 61 + 62 + return i915_subtests(tests, NULL); 63 + }
-26
drivers/gpu/drm/i915/i915_gpu_error.c
··· 1320 1320 return capture; 1321 1321 } 1322 1322 1323 - static struct i915_vma_coredump * 1324 - capture_object(const struct intel_gt *gt, 1325 - struct drm_i915_gem_object *obj, 1326 - const char *name, 1327 - struct i915_vma_compress *compress) 1328 - { 1329 - if (obj && i915_gem_object_has_pages(obj)) { 1330 - struct i915_vma fake = { 1331 - .node = { .start = U64_MAX, .size = obj->base.size }, 1332 - .size = obj->base.size, 1333 - .pages = obj->mm.pages, 1334 - .obj = obj, 1335 - }; 1336 - 1337 - return i915_vma_coredump_create(gt, &fake, name, compress); 1338 - } else { 1339 - return NULL; 1340 - } 1341 - } 1342 - 1343 1323 static void add_vma(struct intel_engine_coredump *ee, 1344 1324 struct i915_vma_coredump *vma) 1345 1325 { ··· 1408 1428 engine->wa_ctx.vma, 1409 1429 "WA context", 1410 1430 compress)); 1411 - 1412 - add_vma(ee, 1413 - capture_object(engine->gt, 1414 - engine->default_state, 1415 - "NULL context", 1416 - compress)); 1417 1431 } 1418 1432 1419 1433 static struct intel_engine_coredump *
+1
drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
··· 16 16 * Tests are executed in order by igt/drv_selftest 17 17 */ 18 18 selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ 19 + selftest(shmem, shmem_utils_mock_selftests) 19 20 selftest(fence, i915_sw_fence_mock_selftests) 20 21 selftest(scatterlist, scatterlist_mock_selftests) 21 22 selftest(syncmap, i915_syncmap_mock_selftests)