Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915/region: support contiguous allocations

Some kernel internal objects may need to be allocated as a contiguous
block, also thinking ahead the various kernel io_mapping interfaces seem
to expect it, although this is purely a limitation in the kernel
API...so perhaps something to be improved.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
Cc: Michael J Ruhl <michael.j.ruhl@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20191008160116.18379-3-matthew.auld@intel.com

authored by

Matthew Auld and committed by
Chris Wilson
2f0b97ca 232a6eba

+245 -45
+6
drivers/gpu/drm/i915/gem/i915_gem_object.h
··· 140 140 } 141 141 142 142 static inline bool 143 + i915_gem_object_is_contiguous(const struct drm_i915_gem_object *obj) 144 + { 145 + return obj->flags & I915_BO_ALLOC_CONTIGUOUS; 146 + } 147 + 148 + static inline bool 143 149 i915_gem_object_type_has(const struct drm_i915_gem_object *obj, 144 150 unsigned long flags) 145 151 {
+4
drivers/gpu/drm/i915/gem/i915_gem_object_types.h
··· 119 119 120 120 I915_SELFTEST_DECLARE(struct list_head st_link); 121 121 122 + unsigned long flags; 123 + #define I915_BO_ALLOC_CONTIGUOUS BIT(0) 124 + #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS) 125 + 122 126 /* 123 127 * Is the object to be mapped as read-only to the GPU 124 128 * Only honoured if hardware has relevant pte bit
+12 -3
drivers/gpu/drm/i915/gem/i915_gem_region.c
··· 23 23 { 24 24 struct intel_memory_region *mem = obj->mm.region; 25 25 struct list_head *blocks = &obj->mm.blocks; 26 - unsigned int flags = I915_ALLOC_MIN_PAGE_SIZE; 27 26 resource_size_t size = obj->base.size; 28 27 resource_size_t prev_end; 29 28 struct i915_buddy_block *block; 29 + unsigned int flags; 30 30 struct sg_table *st; 31 31 struct scatterlist *sg; 32 32 unsigned int sg_page_sizes; ··· 40 40 kfree(st); 41 41 return -ENOMEM; 42 42 } 43 + 44 + flags = I915_ALLOC_MIN_PAGE_SIZE; 45 + if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) 46 + flags |= I915_ALLOC_CONTIGUOUS; 43 47 44 48 ret = __intel_memory_region_get_pages_buddy(mem, size, flags, blocks); 45 49 if (ret) ··· 59 55 list_for_each_entry(block, blocks, link) { 60 56 u64 block_size, offset; 61 57 62 - block_size = i915_buddy_block_size(&mem->mm, block); 58 + block_size = min_t(u64, size, 59 + i915_buddy_block_size(&mem->mm, block)); 63 60 offset = i915_buddy_block_offset(block); 64 61 65 62 GEM_BUG_ON(overflows_type(block_size, sg->length)); ··· 101 96 } 102 97 103 98 void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, 104 - struct intel_memory_region *mem) 99 + struct intel_memory_region *mem, 100 + unsigned long flags) 105 101 { 106 102 INIT_LIST_HEAD(&obj->mm.blocks); 107 103 obj->mm.region = intel_memory_region_get(mem); 104 + obj->flags |= flags; 108 105 } 109 106 110 107 void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) ··· 126 119 * resource for the mem->region. We might need to revisit this in the 127 120 * future. 128 121 */ 122 + 123 + GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS); 129 124 130 125 if (!mem) 131 126 return ERR_PTR(-ENODEV);
+2 -1
drivers/gpu/drm/i915/gem/i915_gem_region.h
··· 17 17 struct sg_table *pages); 18 18 19 19 void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, 20 - struct intel_memory_region *mem); 20 + struct intel_memory_region *mem, 21 + unsigned long flags); 21 22 void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); 22 23 23 24 struct drm_i915_gem_object *
+45 -38
drivers/gpu/drm/i915/gem/selftests/huge_pages.c
··· 456 456 457 457 static int igt_mock_memory_region_huge_pages(void *arg) 458 458 { 459 + const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS }; 459 460 struct i915_ppgtt *ppgtt = arg; 460 461 struct drm_i915_private *i915 = ppgtt->vm.i915; 461 462 unsigned long supported = INTEL_INFO(i915)->page_sizes; ··· 475 474 for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 476 475 unsigned int page_size = BIT(bit); 477 476 resource_size_t phys; 477 + int i; 478 478 479 - obj = i915_gem_object_create_region(mem, page_size, 0); 480 - if (IS_ERR(obj)) { 481 - err = PTR_ERR(obj); 482 - goto out_region; 479 + for (i = 0; i < ARRAY_SIZE(flags); ++i) { 480 + obj = i915_gem_object_create_region(mem, page_size, 481 + flags[i]); 482 + if (IS_ERR(obj)) { 483 + err = PTR_ERR(obj); 484 + goto out_region; 485 + } 486 + 487 + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 488 + if (IS_ERR(vma)) { 489 + err = PTR_ERR(vma); 490 + goto out_put; 491 + } 492 + 493 + err = i915_vma_pin(vma, 0, 0, PIN_USER); 494 + if (err) 495 + goto out_close; 496 + 497 + err = igt_check_page_sizes(vma); 498 + if (err) 499 + goto out_unpin; 500 + 501 + phys = i915_gem_object_get_dma_address(obj, 0); 502 + if (!IS_ALIGNED(phys, page_size)) { 503 + pr_err("%s addr misaligned(%pa) page_size=%u\n", 504 + __func__, &phys, page_size); 505 + err = -EINVAL; 506 + goto out_unpin; 507 + } 508 + 509 + if (vma->page_sizes.gtt != page_size) { 510 + pr_err("%s page_sizes.gtt=%u, expected=%u\n", 511 + __func__, vma->page_sizes.gtt, 512 + page_size); 513 + err = -EINVAL; 514 + goto out_unpin; 515 + } 516 + 517 + i915_vma_unpin(vma); 518 + i915_vma_close(vma); 519 + 520 + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 521 + i915_gem_object_put(obj); 483 522 } 484 - 485 - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 486 - if (IS_ERR(vma)) { 487 - err = PTR_ERR(vma); 488 - goto out_put; 489 - } 490 - 491 - err = i915_vma_pin(vma, 0, 0, PIN_USER); 492 - if (err) 493 - goto out_close; 494 - 495 - err = igt_check_page_sizes(vma); 496 - if (err) 497 - goto out_unpin; 498 - 499 - phys = i915_gem_object_get_dma_address(obj, 0); 500 - if (!IS_ALIGNED(phys, page_size)) { 501 - pr_err("%s addr misaligned(%pa) page_size=%u\n", 502 - __func__, &phys, page_size); 503 - err = -EINVAL; 504 - goto out_unpin; 505 - } 506 - 507 - if (vma->page_sizes.gtt != page_size) { 508 - pr_err("%s page_sizes.gtt=%u, expected=%u\n", 509 - __func__, vma->page_sizes.gtt, page_size); 510 - err = -EINVAL; 511 - goto out_unpin; 512 - } 513 - 514 - i915_vma_unpin(vma); 515 - i915_vma_close(vma); 516 - 517 - i915_gem_object_put(obj); 518 523 } 519 524 520 525 goto out_region;
+8 -1
drivers/gpu/drm/i915/intel_memory_region.c
··· 47 47 unsigned int flags, 48 48 struct list_head *blocks) 49 49 { 50 - unsigned long n_pages = size >> ilog2(mem->mm.chunk_size); 51 50 unsigned int min_order = 0; 51 + unsigned long n_pages; 52 52 53 53 GEM_BUG_ON(!IS_ALIGNED(size, mem->mm.chunk_size)); 54 54 GEM_BUG_ON(!list_empty(blocks)); ··· 57 57 min_order = ilog2(mem->min_page_size) - 58 58 ilog2(mem->mm.chunk_size); 59 59 } 60 + 61 + if (flags & I915_ALLOC_CONTIGUOUS) { 62 + size = roundup_pow_of_two(size); 63 + min_order = ilog2(size) - ilog2(mem->mm.chunk_size); 64 + } 65 + 66 + n_pages = size >> ilog2(mem->mm.chunk_size); 60 67 61 68 mutex_lock(&mem->mm_lock); 62 69
+2 -1
drivers/gpu/drm/i915/intel_memory_region.h
··· 18 18 struct intel_memory_region; 19 19 struct sg_table; 20 20 21 - #define I915_ALLOC_MIN_PAGE_SIZE BIT(0) 21 + #define I915_ALLOC_MIN_PAGE_SIZE BIT(0) 22 + #define I915_ALLOC_CONTIGUOUS BIT(1) 22 23 23 24 struct intel_memory_region_ops { 24 25 unsigned int flags;
+165
drivers/gpu/drm/i915/selftests/intel_memory_region.c
··· 13 13 14 14 #include "gem/i915_gem_region.h" 15 15 #include "gem/selftests/mock_context.h" 16 + #include "selftests/i915_random.h" 16 17 17 18 static void close_objects(struct intel_memory_region *mem, 18 19 struct list_head *objects) ··· 87 86 return err; 88 87 } 89 88 89 + static struct drm_i915_gem_object * 90 + igt_object_create(struct intel_memory_region *mem, 91 + struct list_head *objects, 92 + u64 size, 93 + unsigned int flags) 94 + { 95 + struct drm_i915_gem_object *obj; 96 + int err; 97 + 98 + obj = i915_gem_object_create_region(mem, size, flags); 99 + if (IS_ERR(obj)) 100 + return obj; 101 + 102 + err = i915_gem_object_pin_pages(obj); 103 + if (err) 104 + goto put; 105 + 106 + list_add(&obj->st_link, objects); 107 + return obj; 108 + 109 + put: 110 + i915_gem_object_put(obj); 111 + return ERR_PTR(err); 112 + } 113 + 114 + static void igt_object_release(struct drm_i915_gem_object *obj) 115 + { 116 + i915_gem_object_unpin_pages(obj); 117 + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 118 + list_del(&obj->st_link); 119 + i915_gem_object_put(obj); 120 + } 121 + 122 + static int igt_mock_contiguous(void *arg) 123 + { 124 + struct intel_memory_region *mem = arg; 125 + struct drm_i915_gem_object *obj; 126 + unsigned long n_objects; 127 + LIST_HEAD(objects); 128 + LIST_HEAD(holes); 129 + I915_RND_STATE(prng); 130 + resource_size_t target; 131 + resource_size_t total; 132 + resource_size_t min; 133 + int err = 0; 134 + 135 + total = resource_size(&mem->region); 136 + 137 + /* Min size */ 138 + obj = igt_object_create(mem, &objects, mem->mm.chunk_size, 139 + I915_BO_ALLOC_CONTIGUOUS); 140 + if (IS_ERR(obj)) 141 + return PTR_ERR(obj); 142 + 143 + if (obj->mm.pages->nents != 1) { 144 + pr_err("%s min object spans multiple sg entries\n", __func__); 145 + err = -EINVAL; 146 + goto err_close_objects; 147 + } 148 + 149 + igt_object_release(obj); 150 + 151 + /* Max size */ 152 + obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS); 153 + if (IS_ERR(obj)) 154 + return PTR_ERR(obj); 155 + 156 + if (obj->mm.pages->nents != 1) { 157 + pr_err("%s max object spans multiple sg entries\n", __func__); 158 + err = -EINVAL; 159 + goto err_close_objects; 160 + } 161 + 162 + igt_object_release(obj); 163 + 164 + /* Internal fragmentation should not bleed into the object size */ 165 + target = round_up(prandom_u32_state(&prng) % total, PAGE_SIZE); 166 + target = max_t(u64, PAGE_SIZE, target); 167 + 168 + obj = igt_object_create(mem, &objects, target, 169 + I915_BO_ALLOC_CONTIGUOUS); 170 + if (IS_ERR(obj)) 171 + return PTR_ERR(obj); 172 + 173 + if (obj->base.size != target) { 174 + pr_err("%s obj->base.size(%llx) != target(%llx)\n", __func__, 175 + (u64)obj->base.size, (u64)target); 176 + err = -EINVAL; 177 + goto err_close_objects; 178 + } 179 + 180 + if (obj->mm.pages->nents != 1) { 181 + pr_err("%s object spans multiple sg entries\n", __func__); 182 + err = -EINVAL; 183 + goto err_close_objects; 184 + } 185 + 186 + igt_object_release(obj); 187 + 188 + /* 189 + * Try to fragment the address space, such that half of it is free, but 190 + * the max contiguous block size is SZ_64K. 191 + */ 192 + 193 + target = SZ_64K; 194 + n_objects = div64_u64(total, target); 195 + 196 + while (n_objects--) { 197 + struct list_head *list; 198 + 199 + if (n_objects % 2) 200 + list = &holes; 201 + else 202 + list = &objects; 203 + 204 + obj = igt_object_create(mem, list, target, 205 + I915_BO_ALLOC_CONTIGUOUS); 206 + if (IS_ERR(obj)) { 207 + err = PTR_ERR(obj); 208 + goto err_close_objects; 209 + } 210 + } 211 + 212 + close_objects(mem, &holes); 213 + 214 + min = target; 215 + target = total >> 1; 216 + 217 + /* Make sure we can still allocate all the fragmented space */ 218 + obj = igt_object_create(mem, &objects, target, 0); 219 + if (IS_ERR(obj)) { 220 + err = PTR_ERR(obj); 221 + goto err_close_objects; 222 + } 223 + 224 + igt_object_release(obj); 225 + 226 + /* 227 + * Even though we have enough free space, we don't have a big enough 228 + * contiguous block. Make sure that holds true. 229 + */ 230 + 231 + do { 232 + bool should_fail = target > min; 233 + 234 + obj = igt_object_create(mem, &objects, target, 235 + I915_BO_ALLOC_CONTIGUOUS); 236 + if (should_fail != IS_ERR(obj)) { 237 + pr_err("%s target allocation(%llx) mismatch\n", 238 + __func__, (u64)target); 239 + err = -EINVAL; 240 + goto err_close_objects; 241 + } 242 + 243 + target >>= 1; 244 + } while (target >= mem->mm.chunk_size); 245 + 246 + err_close_objects: 247 + list_splice_tail(&holes, &objects); 248 + close_objects(mem, &objects); 249 + return err; 250 + } 251 + 90 252 int intel_memory_region_mock_selftests(void) 91 253 { 92 254 static const struct i915_subtest tests[] = { 93 255 SUBTEST(igt_mock_fill), 256 + SUBTEST(igt_mock_contiguous), 94 257 }; 95 258 struct intel_memory_region *mem; 96 259 struct drm_i915_private *i915;
+1 -1
drivers/gpu/drm/i915/selftests/mock_region.c
··· 36 36 37 37 i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); 38 38 39 - i915_gem_object_init_memory_region(obj, mem); 39 + i915_gem_object_init_memory_region(obj, mem, flags); 40 40 41 41 return obj; 42 42 }