drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c at v5.13

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / drivers / gpu / drm / i915 / gem / selftests / i915_gem_object_blt.c
at v5.13 597 lines 13 kB view raw
wrap content
  1// SPDX-License-Identifier: MIT
  2/*
  3 * Copyright © 2019 Intel Corporation
  4 */
  5
  6#include <linux/sort.h>
  7
  8#include "gt/intel_gt.h"
  9#include "gt/intel_engine_user.h"
 10
 11#include "i915_selftest.h"
 12
 13#include "gem/i915_gem_context.h"
 14#include "selftests/igt_flush_test.h"
 15#include "selftests/i915_random.h"
 16#include "selftests/mock_drm.h"
 17#include "huge_gem_object.h"
 18#include "mock_context.h"
 19
 20static int wrap_ktime_compare(const void *A, const void *B)
 21{
 22	const ktime_t *a = A, *b = B;
 23
 24	return ktime_compare(*a, *b);
 25}
 26
 27static int __perf_fill_blt(struct drm_i915_gem_object *obj)
 28{
 29	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 30	int inst = 0;
 31
 32	do {
 33		struct intel_engine_cs *engine;
 34		ktime_t t[5];
 35		int pass;
 36		int err;
 37
 38		engine = intel_engine_lookup_user(i915,
 39						  I915_ENGINE_CLASS_COPY,
 40						  inst++);
 41		if (!engine)
 42			return 0;
 43
 44		intel_engine_pm_get(engine);
 45		for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
 46			struct intel_context *ce = engine->kernel_context;
 47			ktime_t t0, t1;
 48
 49			t0 = ktime_get();
 50
 51			err = i915_gem_object_fill_blt(obj, ce, 0);
 52			if (err)
 53				break;
 54
 55			err = i915_gem_object_wait(obj,
 56						   I915_WAIT_ALL,
 57						   MAX_SCHEDULE_TIMEOUT);
 58			if (err)
 59				break;
 60
 61			t1 = ktime_get();
 62			t[pass] = ktime_sub(t1, t0);
 63		}
 64		intel_engine_pm_put(engine);
 65		if (err)
 66			return err;
 67
 68		sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
 69		pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
 70			engine->name,
 71			obj->base.size >> 10,
 72			div64_u64(mul_u32_u32(4 * obj->base.size,
 73					      1000 * 1000 * 1000),
 74				  t[1] + 2 * t[2] + t[3]) >> 20);
 75	} while (1);
 76}
 77
 78static int perf_fill_blt(void *arg)
 79{
 80	struct drm_i915_private *i915 = arg;
 81	static const unsigned long sizes[] = {
 82		SZ_4K,
 83		SZ_64K,
 84		SZ_2M,
 85		SZ_64M
 86	};
 87	int i;
 88
 89	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
 90		struct drm_i915_gem_object *obj;
 91		int err;
 92
 93		obj = i915_gem_object_create_internal(i915, sizes[i]);
 94		if (IS_ERR(obj))
 95			return PTR_ERR(obj);
 96
 97		err = __perf_fill_blt(obj);
 98		i915_gem_object_put(obj);
 99		if (err)
100			return err;
101	}
102
103	return 0;
104}
105
106static int __perf_copy_blt(struct drm_i915_gem_object *src,
107			   struct drm_i915_gem_object *dst)
108{
109	struct drm_i915_private *i915 = to_i915(src->base.dev);
110	int inst = 0;
111
112	do {
113		struct intel_engine_cs *engine;
114		ktime_t t[5];
115		int pass;
116		int err = 0;
117
118		engine = intel_engine_lookup_user(i915,
119						  I915_ENGINE_CLASS_COPY,
120						  inst++);
121		if (!engine)
122			return 0;
123
124		intel_engine_pm_get(engine);
125		for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
126			struct intel_context *ce = engine->kernel_context;
127			ktime_t t0, t1;
128
129			t0 = ktime_get();
130
131			err = i915_gem_object_copy_blt(src, dst, ce);
132			if (err)
133				break;
134
135			err = i915_gem_object_wait(dst,
136						   I915_WAIT_ALL,
137						   MAX_SCHEDULE_TIMEOUT);
138			if (err)
139				break;
140
141			t1 = ktime_get();
142			t[pass] = ktime_sub(t1, t0);
143		}
144		intel_engine_pm_put(engine);
145		if (err)
146			return err;
147
148		sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
149		pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
150			engine->name,
151			src->base.size >> 10,
152			div64_u64(mul_u32_u32(4 * src->base.size,
153					      1000 * 1000 * 1000),
154				  t[1] + 2 * t[2] + t[3]) >> 20);
155	} while (1);
156}
157
158static int perf_copy_blt(void *arg)
159{
160	struct drm_i915_private *i915 = arg;
161	static const unsigned long sizes[] = {
162		SZ_4K,
163		SZ_64K,
164		SZ_2M,
165		SZ_64M
166	};
167	int i;
168
169	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
170		struct drm_i915_gem_object *src, *dst;
171		int err;
172
173		src = i915_gem_object_create_internal(i915, sizes[i]);
174		if (IS_ERR(src))
175			return PTR_ERR(src);
176
177		dst = i915_gem_object_create_internal(i915, sizes[i]);
178		if (IS_ERR(dst)) {
179			err = PTR_ERR(dst);
180			goto err_src;
181		}
182
183		err = __perf_copy_blt(src, dst);
184
185		i915_gem_object_put(dst);
186err_src:
187		i915_gem_object_put(src);
188		if (err)
189			return err;
190	}
191
192	return 0;
193}
194
195struct igt_thread_arg {
196	struct intel_engine_cs *engine;
197	struct i915_gem_context *ctx;
198	struct file *file;
199	struct rnd_state prng;
200	unsigned int n_cpus;
201};
202
203static int igt_fill_blt_thread(void *arg)
204{
205	struct igt_thread_arg *thread = arg;
206	struct intel_engine_cs *engine = thread->engine;
207	struct rnd_state *prng = &thread->prng;
208	struct drm_i915_gem_object *obj;
209	struct i915_gem_context *ctx;
210	struct intel_context *ce;
211	unsigned int prio;
212	IGT_TIMEOUT(end);
213	u64 total, max;
214	int err;
215
216	ctx = thread->ctx;
217	if (!ctx) {
218		ctx = live_context_for_engine(engine, thread->file);
219		if (IS_ERR(ctx))
220			return PTR_ERR(ctx);
221
222		prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
223		ctx->sched.priority = prio;
224	}
225
226	ce = i915_gem_context_get_engine(ctx, 0);
227	GEM_BUG_ON(IS_ERR(ce));
228
229	/*
230	 * If we have a tiny shared address space, like for the GGTT
231	 * then we can't be too greedy.
232	 */
233	max = ce->vm->total;
234	if (i915_is_ggtt(ce->vm) || thread->ctx)
235		max = div_u64(max, thread->n_cpus);
236	max >>= 4;
237
238	total = PAGE_SIZE;
239	do {
240		/* Aim to keep the runtime under reasonable bounds! */
241		const u32 max_phys_size = SZ_64K;
242		u32 val = prandom_u32_state(prng);
243		u32 phys_sz;
244		u32 sz;
245		u32 *vaddr;
246		u32 i;
247
248		total = min(total, max);
249		sz = i915_prandom_u32_max_state(total, prng) + 1;
250		phys_sz = sz % max_phys_size + 1;
251
252		sz = round_up(sz, PAGE_SIZE);
253		phys_sz = round_up(phys_sz, PAGE_SIZE);
254		phys_sz = min(phys_sz, sz);
255
256		pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
257			 phys_sz, sz, val);
258
259		obj = huge_gem_object(engine->i915, phys_sz, sz);
260		if (IS_ERR(obj)) {
261			err = PTR_ERR(obj);
262			goto err_flush;
263		}
264
265		vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
266		if (IS_ERR(vaddr)) {
267			err = PTR_ERR(vaddr);
268			goto err_put;
269		}
270
271		/*
272		 * Make sure the potentially async clflush does its job, if
273		 * required.
274		 */
275		memset32(vaddr, val ^ 0xdeadbeaf,
276			 huge_gem_object_phys_size(obj) / sizeof(u32));
277
278		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
279			obj->cache_dirty = true;
280
281		err = i915_gem_object_fill_blt(obj, ce, val);
282		if (err)
283			goto err_unpin;
284
285		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
286		if (err)
287			goto err_unpin;
288
289		for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) {
290			if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
291				drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
292
293			if (vaddr[i] != val) {
294				pr_err("vaddr[%u]=%x, expected=%x\n", i,
295				       vaddr[i], val);
296				err = -EINVAL;
297				goto err_unpin;
298			}
299		}
300
301		i915_gem_object_unpin_map(obj);
302		i915_gem_object_put(obj);
303
304		total <<= 1;
305	} while (!time_after(jiffies, end));
306
307	goto err_flush;
308
309err_unpin:
310	i915_gem_object_unpin_map(obj);
311err_put:
312	i915_gem_object_put(obj);
313err_flush:
314	if (err == -ENOMEM)
315		err = 0;
316
317	intel_context_put(ce);
318	return err;
319}
320
321static int igt_copy_blt_thread(void *arg)
322{
323	struct igt_thread_arg *thread = arg;
324	struct intel_engine_cs *engine = thread->engine;
325	struct rnd_state *prng = &thread->prng;
326	struct drm_i915_gem_object *src, *dst;
327	struct i915_gem_context *ctx;
328	struct intel_context *ce;
329	unsigned int prio;
330	IGT_TIMEOUT(end);
331	u64 total, max;
332	int err;
333
334	ctx = thread->ctx;
335	if (!ctx) {
336		ctx = live_context_for_engine(engine, thread->file);
337		if (IS_ERR(ctx))
338			return PTR_ERR(ctx);
339
340		prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
341		ctx->sched.priority = prio;
342	}
343
344	ce = i915_gem_context_get_engine(ctx, 0);
345	GEM_BUG_ON(IS_ERR(ce));
346
347	/*
348	 * If we have a tiny shared address space, like for the GGTT
349	 * then we can't be too greedy.
350	 */
351	max = ce->vm->total;
352	if (i915_is_ggtt(ce->vm) || thread->ctx)
353		max = div_u64(max, thread->n_cpus);
354	max >>= 4;
355
356	total = PAGE_SIZE;
357	do {
358		/* Aim to keep the runtime under reasonable bounds! */
359		const u32 max_phys_size = SZ_64K;
360		u32 val = prandom_u32_state(prng);
361		u32 phys_sz;
362		u32 sz;
363		u32 *vaddr;
364		u32 i;
365
366		total = min(total, max);
367		sz = i915_prandom_u32_max_state(total, prng) + 1;
368		phys_sz = sz % max_phys_size + 1;
369
370		sz = round_up(sz, PAGE_SIZE);
371		phys_sz = round_up(phys_sz, PAGE_SIZE);
372		phys_sz = min(phys_sz, sz);
373
374		pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
375			 phys_sz, sz, val);
376
377		src = huge_gem_object(engine->i915, phys_sz, sz);
378		if (IS_ERR(src)) {
379			err = PTR_ERR(src);
380			goto err_flush;
381		}
382
383		vaddr = i915_gem_object_pin_map_unlocked(src, I915_MAP_WB);
384		if (IS_ERR(vaddr)) {
385			err = PTR_ERR(vaddr);
386			goto err_put_src;
387		}
388
389		memset32(vaddr, val,
390			 huge_gem_object_phys_size(src) / sizeof(u32));
391
392		i915_gem_object_unpin_map(src);
393
394		if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
395			src->cache_dirty = true;
396
397		dst = huge_gem_object(engine->i915, phys_sz, sz);
398		if (IS_ERR(dst)) {
399			err = PTR_ERR(dst);
400			goto err_put_src;
401		}
402
403		vaddr = i915_gem_object_pin_map_unlocked(dst, I915_MAP_WB);
404		if (IS_ERR(vaddr)) {
405			err = PTR_ERR(vaddr);
406			goto err_put_dst;
407		}
408
409		memset32(vaddr, val ^ 0xdeadbeaf,
410			 huge_gem_object_phys_size(dst) / sizeof(u32));
411
412		if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
413			dst->cache_dirty = true;
414
415		err = i915_gem_object_copy_blt(src, dst, ce);
416		if (err)
417			goto err_unpin;
418
419		err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT);
420		if (err)
421			goto err_unpin;
422
423		for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) {
424			if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
425				drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
426
427			if (vaddr[i] != val) {
428				pr_err("vaddr[%u]=%x, expected=%x\n", i,
429				       vaddr[i], val);
430				err = -EINVAL;
431				goto err_unpin;
432			}
433		}
434
435		i915_gem_object_unpin_map(dst);
436
437		i915_gem_object_put(src);
438		i915_gem_object_put(dst);
439
440		total <<= 1;
441	} while (!time_after(jiffies, end));
442
443	goto err_flush;
444
445err_unpin:
446	i915_gem_object_unpin_map(dst);
447err_put_dst:
448	i915_gem_object_put(dst);
449err_put_src:
450	i915_gem_object_put(src);
451err_flush:
452	if (err == -ENOMEM)
453		err = 0;
454
455	intel_context_put(ce);
456	return err;
457}
458
459static int igt_threaded_blt(struct intel_engine_cs *engine,
460			    int (*blt_fn)(void *arg),
461			    unsigned int flags)
462#define SINGLE_CTX BIT(0)
463{
464	struct igt_thread_arg *thread;
465	struct task_struct **tsk;
466	unsigned int n_cpus, i;
467	I915_RND_STATE(prng);
468	int err = 0;
469
470	n_cpus = num_online_cpus() + 1;
471
472	tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
473	if (!tsk)
474		return 0;
475
476	thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
477	if (!thread)
478		goto out_tsk;
479
480	thread[0].file = mock_file(engine->i915);
481	if (IS_ERR(thread[0].file)) {
482		err = PTR_ERR(thread[0].file);
483		goto out_thread;
484	}
485
486	if (flags & SINGLE_CTX) {
487		thread[0].ctx = live_context_for_engine(engine, thread[0].file);
488		if (IS_ERR(thread[0].ctx)) {
489			err = PTR_ERR(thread[0].ctx);
490			goto out_file;
491		}
492	}
493
494	for (i = 0; i < n_cpus; ++i) {
495		thread[i].engine = engine;
496		thread[i].file = thread[0].file;
497		thread[i].ctx = thread[0].ctx;
498		thread[i].n_cpus = n_cpus;
499		thread[i].prng =
500			I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
501
502		tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
503		if (IS_ERR(tsk[i])) {
504			err = PTR_ERR(tsk[i]);
505			break;
506		}
507
508		get_task_struct(tsk[i]);
509	}
510
511	yield(); /* start all threads before we kthread_stop() */
512
513	for (i = 0; i < n_cpus; ++i) {
514		int status;
515
516		if (IS_ERR_OR_NULL(tsk[i]))
517			continue;
518
519		status = kthread_stop(tsk[i]);
520		if (status && !err)
521			err = status;
522
523		put_task_struct(tsk[i]);
524	}
525
526out_file:
527	fput(thread[0].file);
528out_thread:
529	kfree(thread);
530out_tsk:
531	kfree(tsk);
532	return err;
533}
534
535static int test_copy_engines(struct drm_i915_private *i915,
536			     int (*fn)(void *arg),
537			     unsigned int flags)
538{
539	struct intel_engine_cs *engine;
540	int ret;
541
542	for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) {
543		ret = igt_threaded_blt(engine, fn, flags);
544		if (ret)
545			return ret;
546	}
547
548	return 0;
549}
550
551static int igt_fill_blt(void *arg)
552{
553	return test_copy_engines(arg, igt_fill_blt_thread, 0);
554}
555
556static int igt_fill_blt_ctx0(void *arg)
557{
558	return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX);
559}
560
561static int igt_copy_blt(void *arg)
562{
563	return test_copy_engines(arg, igt_copy_blt_thread, 0);
564}
565
566static int igt_copy_blt_ctx0(void *arg)
567{
568	return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX);
569}
570
571int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
572{
573	static const struct i915_subtest tests[] = {
574		SUBTEST(igt_fill_blt),
575		SUBTEST(igt_fill_blt_ctx0),
576		SUBTEST(igt_copy_blt),
577		SUBTEST(igt_copy_blt_ctx0),
578	};
579
580	if (intel_gt_is_wedged(&i915->gt))
581		return 0;
582
583	return i915_live_subtests(tests, i915);
584}
585
586int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
587{
588	static const struct i915_subtest tests[] = {
589		SUBTEST(perf_fill_blt),
590		SUBTEST(perf_copy_blt),
591	};
592
593	if (intel_gt_is_wedged(&i915->gt))
594		return 0;
595
596	return i915_live_subtests(tests, i915);
597}
Configure Feed

Configure Feed