1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include <linux/sort.h>
7 
8 #include "gt/intel_gt.h"
9 #include "gt/intel_engine_user.h"
10 
11 #include "i915_selftest.h"
12 
13 #include "gem/i915_gem_context.h"
14 #include "selftests/igt_flush_test.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/mock_drm.h"
17 #include "huge_gem_object.h"
18 #include "mock_context.h"
19 
wrap_ktime_compare(const void * A,const void * B)20 static int wrap_ktime_compare(const void *A, const void *B)
21 {
22 	const ktime_t *a = A, *b = B;
23 
24 	return ktime_compare(*a, *b);
25 }
26 
__perf_fill_blt(struct drm_i915_gem_object * obj)27 static int __perf_fill_blt(struct drm_i915_gem_object *obj)
28 {
29 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
30 	int inst = 0;
31 
32 	do {
33 		struct intel_engine_cs *engine;
34 		ktime_t t[5];
35 		int pass;
36 		int err;
37 
38 		engine = intel_engine_lookup_user(i915,
39 						  I915_ENGINE_CLASS_COPY,
40 						  inst++);
41 		if (!engine)
42 			return 0;
43 
44 		intel_engine_pm_get(engine);
45 		for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
46 			struct intel_context *ce = engine->kernel_context;
47 			ktime_t t0, t1;
48 
49 			t0 = ktime_get();
50 
51 			err = i915_gem_object_fill_blt(obj, ce, 0);
52 			if (err)
53 				break;
54 
55 			err = i915_gem_object_wait(obj,
56 						   I915_WAIT_ALL,
57 						   MAX_SCHEDULE_TIMEOUT);
58 			if (err)
59 				break;
60 
61 			t1 = ktime_get();
62 			t[pass] = ktime_sub(t1, t0);
63 		}
64 		intel_engine_pm_put(engine);
65 		if (err)
66 			return err;
67 
68 		sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
69 		pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
70 			engine->name,
71 			obj->base.size >> 10,
72 			div64_u64(mul_u32_u32(4 * obj->base.size,
73 					      1000 * 1000 * 1000),
74 				  t[1] + 2 * t[2] + t[3]) >> 20);
75 	} while (1);
76 }
77 
perf_fill_blt(void * arg)78 static int perf_fill_blt(void *arg)
79 {
80 	struct drm_i915_private *i915 = arg;
81 	static const unsigned long sizes[] = {
82 		SZ_4K,
83 		SZ_64K,
84 		SZ_2M,
85 		SZ_64M
86 	};
87 	int i;
88 
89 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
90 		struct drm_i915_gem_object *obj;
91 		int err;
92 
93 		obj = i915_gem_object_create_internal(i915, sizes[i]);
94 		if (IS_ERR(obj))
95 			return PTR_ERR(obj);
96 
97 		err = __perf_fill_blt(obj);
98 		i915_gem_object_put(obj);
99 		if (err)
100 			return err;
101 	}
102 
103 	return 0;
104 }
105 
__perf_copy_blt(struct drm_i915_gem_object * src,struct drm_i915_gem_object * dst)106 static int __perf_copy_blt(struct drm_i915_gem_object *src,
107 			   struct drm_i915_gem_object *dst)
108 {
109 	struct drm_i915_private *i915 = to_i915(src->base.dev);
110 	int inst = 0;
111 
112 	do {
113 		struct intel_engine_cs *engine;
114 		ktime_t t[5];
115 		int pass;
116 		int err = 0;
117 
118 		engine = intel_engine_lookup_user(i915,
119 						  I915_ENGINE_CLASS_COPY,
120 						  inst++);
121 		if (!engine)
122 			return 0;
123 
124 		intel_engine_pm_get(engine);
125 		for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
126 			struct intel_context *ce = engine->kernel_context;
127 			ktime_t t0, t1;
128 
129 			t0 = ktime_get();
130 
131 			err = i915_gem_object_copy_blt(src, dst, ce);
132 			if (err)
133 				break;
134 
135 			err = i915_gem_object_wait(dst,
136 						   I915_WAIT_ALL,
137 						   MAX_SCHEDULE_TIMEOUT);
138 			if (err)
139 				break;
140 
141 			t1 = ktime_get();
142 			t[pass] = ktime_sub(t1, t0);
143 		}
144 		intel_engine_pm_put(engine);
145 		if (err)
146 			return err;
147 
148 		sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
149 		pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
150 			engine->name,
151 			src->base.size >> 10,
152 			div64_u64(mul_u32_u32(4 * src->base.size,
153 					      1000 * 1000 * 1000),
154 				  t[1] + 2 * t[2] + t[3]) >> 20);
155 	} while (1);
156 }
157 
perf_copy_blt(void * arg)158 static int perf_copy_blt(void *arg)
159 {
160 	struct drm_i915_private *i915 = arg;
161 	static const unsigned long sizes[] = {
162 		SZ_4K,
163 		SZ_64K,
164 		SZ_2M,
165 		SZ_64M
166 	};
167 	int i;
168 
169 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
170 		struct drm_i915_gem_object *src, *dst;
171 		int err;
172 
173 		src = i915_gem_object_create_internal(i915, sizes[i]);
174 		if (IS_ERR(src))
175 			return PTR_ERR(src);
176 
177 		dst = i915_gem_object_create_internal(i915, sizes[i]);
178 		if (IS_ERR(dst)) {
179 			err = PTR_ERR(dst);
180 			goto err_src;
181 		}
182 
183 		err = __perf_copy_blt(src, dst);
184 
185 		i915_gem_object_put(dst);
186 err_src:
187 		i915_gem_object_put(src);
188 		if (err)
189 			return err;
190 	}
191 
192 	return 0;
193 }
194 
195 struct igt_thread_arg {
196 	struct intel_engine_cs *engine;
197 	struct i915_gem_context *ctx;
198 	struct file *file;
199 	struct rnd_state prng;
200 	unsigned int n_cpus;
201 };
202 
igt_fill_blt_thread(void * arg)203 static int igt_fill_blt_thread(void *arg)
204 {
205 	struct igt_thread_arg *thread = arg;
206 	struct intel_engine_cs *engine = thread->engine;
207 	struct rnd_state *prng = &thread->prng;
208 	struct drm_i915_gem_object *obj;
209 	struct i915_gem_context *ctx;
210 	struct intel_context *ce;
211 	unsigned int prio;
212 	IGT_TIMEOUT(end);
213 	u64 total, max;
214 	int err;
215 
216 	ctx = thread->ctx;
217 	if (!ctx) {
218 		ctx = live_context_for_engine(engine, thread->file);
219 		if (IS_ERR(ctx))
220 			return PTR_ERR(ctx);
221 
222 		prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
223 		ctx->sched.priority = I915_USER_PRIORITY(prio);
224 	}
225 
226 	ce = i915_gem_context_get_engine(ctx, 0);
227 	GEM_BUG_ON(IS_ERR(ce));
228 
229 	/*
230 	 * If we have a tiny shared address space, like for the GGTT
231 	 * then we can't be too greedy.
232 	 */
233 	max = ce->vm->total;
234 	if (i915_is_ggtt(ce->vm) || thread->ctx)
235 		max = div_u64(max, thread->n_cpus);
236 	max >>= 4;
237 
238 	total = PAGE_SIZE;
239 	do {
240 		/* Aim to keep the runtime under reasonable bounds! */
241 		const u32 max_phys_size = SZ_64K;
242 		u32 val = prandom_u32_state(prng);
243 		u32 phys_sz;
244 		u32 sz;
245 		u32 *vaddr;
246 		u32 i;
247 
248 		total = min(total, max);
249 		sz = i915_prandom_u32_max_state(total, prng) + 1;
250 		phys_sz = sz % max_phys_size + 1;
251 
252 		sz = round_up(sz, PAGE_SIZE);
253 		phys_sz = round_up(phys_sz, PAGE_SIZE);
254 		phys_sz = min(phys_sz, sz);
255 
256 		pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
257 			 phys_sz, sz, val);
258 
259 		obj = huge_gem_object(engine->i915, phys_sz, sz);
260 		if (IS_ERR(obj)) {
261 			err = PTR_ERR(obj);
262 			goto err_flush;
263 		}
264 
265 		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
266 		if (IS_ERR(vaddr)) {
267 			err = PTR_ERR(vaddr);
268 			goto err_put;
269 		}
270 
271 		/*
272 		 * Make sure the potentially async clflush does its job, if
273 		 * required.
274 		 */
275 		memset32(vaddr, val ^ 0xdeadbeaf,
276 			 huge_gem_object_phys_size(obj) / sizeof(u32));
277 
278 		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
279 			obj->cache_dirty = true;
280 
281 		err = i915_gem_object_fill_blt(obj, ce, val);
282 		if (err)
283 			goto err_unpin;
284 
285 		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
286 		if (err)
287 			goto err_unpin;
288 
289 		for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) {
290 			if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
291 				drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
292 
293 			if (vaddr[i] != val) {
294 				pr_err("vaddr[%u]=%x, expected=%x\n", i,
295 				       vaddr[i], val);
296 				err = -EINVAL;
297 				goto err_unpin;
298 			}
299 		}
300 
301 		i915_gem_object_unpin_map(obj);
302 		i915_gem_object_put(obj);
303 
304 		total <<= 1;
305 	} while (!time_after(jiffies, end));
306 
307 	goto err_flush;
308 
309 err_unpin:
310 	i915_gem_object_unpin_map(obj);
311 err_put:
312 	i915_gem_object_put(obj);
313 err_flush:
314 	if (err == -ENOMEM)
315 		err = 0;
316 
317 	intel_context_put(ce);
318 	return err;
319 }
320 
igt_copy_blt_thread(void * arg)321 static int igt_copy_blt_thread(void *arg)
322 {
323 	struct igt_thread_arg *thread = arg;
324 	struct intel_engine_cs *engine = thread->engine;
325 	struct rnd_state *prng = &thread->prng;
326 	struct drm_i915_gem_object *src, *dst;
327 	struct i915_gem_context *ctx;
328 	struct intel_context *ce;
329 	unsigned int prio;
330 	IGT_TIMEOUT(end);
331 	u64 total, max;
332 	int err;
333 
334 	ctx = thread->ctx;
335 	if (!ctx) {
336 		ctx = live_context_for_engine(engine, thread->file);
337 		if (IS_ERR(ctx))
338 			return PTR_ERR(ctx);
339 
340 		prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
341 		ctx->sched.priority = I915_USER_PRIORITY(prio);
342 	}
343 
344 	ce = i915_gem_context_get_engine(ctx, 0);
345 	GEM_BUG_ON(IS_ERR(ce));
346 
347 	/*
348 	 * If we have a tiny shared address space, like for the GGTT
349 	 * then we can't be too greedy.
350 	 */
351 	max = ce->vm->total;
352 	if (i915_is_ggtt(ce->vm) || thread->ctx)
353 		max = div_u64(max, thread->n_cpus);
354 	max >>= 4;
355 
356 	total = PAGE_SIZE;
357 	do {
358 		/* Aim to keep the runtime under reasonable bounds! */
359 		const u32 max_phys_size = SZ_64K;
360 		u32 val = prandom_u32_state(prng);
361 		u32 phys_sz;
362 		u32 sz;
363 		u32 *vaddr;
364 		u32 i;
365 
366 		total = min(total, max);
367 		sz = i915_prandom_u32_max_state(total, prng) + 1;
368 		phys_sz = sz % max_phys_size + 1;
369 
370 		sz = round_up(sz, PAGE_SIZE);
371 		phys_sz = round_up(phys_sz, PAGE_SIZE);
372 		phys_sz = min(phys_sz, sz);
373 
374 		pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
375 			 phys_sz, sz, val);
376 
377 		src = huge_gem_object(engine->i915, phys_sz, sz);
378 		if (IS_ERR(src)) {
379 			err = PTR_ERR(src);
380 			goto err_flush;
381 		}
382 
383 		vaddr = i915_gem_object_pin_map(src, I915_MAP_WB);
384 		if (IS_ERR(vaddr)) {
385 			err = PTR_ERR(vaddr);
386 			goto err_put_src;
387 		}
388 
389 		memset32(vaddr, val,
390 			 huge_gem_object_phys_size(src) / sizeof(u32));
391 
392 		i915_gem_object_unpin_map(src);
393 
394 		if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
395 			src->cache_dirty = true;
396 
397 		dst = huge_gem_object(engine->i915, phys_sz, sz);
398 		if (IS_ERR(dst)) {
399 			err = PTR_ERR(dst);
400 			goto err_put_src;
401 		}
402 
403 		vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB);
404 		if (IS_ERR(vaddr)) {
405 			err = PTR_ERR(vaddr);
406 			goto err_put_dst;
407 		}
408 
409 		memset32(vaddr, val ^ 0xdeadbeaf,
410 			 huge_gem_object_phys_size(dst) / sizeof(u32));
411 
412 		if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
413 			dst->cache_dirty = true;
414 
415 		err = i915_gem_object_copy_blt(src, dst, ce);
416 		if (err)
417 			goto err_unpin;
418 
419 		err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT);
420 		if (err)
421 			goto err_unpin;
422 
423 		for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) {
424 			if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
425 				drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
426 
427 			if (vaddr[i] != val) {
428 				pr_err("vaddr[%u]=%x, expected=%x\n", i,
429 				       vaddr[i], val);
430 				err = -EINVAL;
431 				goto err_unpin;
432 			}
433 		}
434 
435 		i915_gem_object_unpin_map(dst);
436 
437 		i915_gem_object_put(src);
438 		i915_gem_object_put(dst);
439 
440 		total <<= 1;
441 	} while (!time_after(jiffies, end));
442 
443 	goto err_flush;
444 
445 err_unpin:
446 	i915_gem_object_unpin_map(dst);
447 err_put_dst:
448 	i915_gem_object_put(dst);
449 err_put_src:
450 	i915_gem_object_put(src);
451 err_flush:
452 	if (err == -ENOMEM)
453 		err = 0;
454 
455 	intel_context_put(ce);
456 	return err;
457 }
458 
igt_threaded_blt(struct intel_engine_cs * engine,int (* blt_fn)(void * arg),unsigned int flags)459 static int igt_threaded_blt(struct intel_engine_cs *engine,
460 			    int (*blt_fn)(void *arg),
461 			    unsigned int flags)
462 #define SINGLE_CTX BIT(0)
463 {
464 	struct igt_thread_arg *thread;
465 	struct task_struct **tsk;
466 	unsigned int n_cpus, i;
467 	I915_RND_STATE(prng);
468 	int err = 0;
469 
470 	n_cpus = num_online_cpus() + 1;
471 
472 	tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
473 	if (!tsk)
474 		return 0;
475 
476 	thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
477 	if (!thread)
478 		goto out_tsk;
479 
480 	thread[0].file = mock_file(engine->i915);
481 	if (IS_ERR(thread[0].file)) {
482 		err = PTR_ERR(thread[0].file);
483 		goto out_thread;
484 	}
485 
486 	if (flags & SINGLE_CTX) {
487 		thread[0].ctx = live_context_for_engine(engine, thread[0].file);
488 		if (IS_ERR(thread[0].ctx)) {
489 			err = PTR_ERR(thread[0].ctx);
490 			goto out_file;
491 		}
492 	}
493 
494 	for (i = 0; i < n_cpus; ++i) {
495 		thread[i].engine = engine;
496 		thread[i].file = thread[0].file;
497 		thread[i].ctx = thread[0].ctx;
498 		thread[i].n_cpus = n_cpus;
499 		thread[i].prng =
500 			I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
501 
502 		tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
503 		if (IS_ERR(tsk[i])) {
504 			err = PTR_ERR(tsk[i]);
505 			break;
506 		}
507 
508 		get_task_struct(tsk[i]);
509 	}
510 
511 	yield(); /* start all threads before we kthread_stop() */
512 
513 	for (i = 0; i < n_cpus; ++i) {
514 		int status;
515 
516 		if (IS_ERR_OR_NULL(tsk[i]))
517 			continue;
518 
519 		status = kthread_stop(tsk[i]);
520 		if (status && !err)
521 			err = status;
522 
523 		put_task_struct(tsk[i]);
524 	}
525 
526 out_file:
527 	fput(thread[0].file);
528 out_thread:
529 	kfree(thread);
530 out_tsk:
531 	kfree(tsk);
532 	return err;
533 }
534 
test_copy_engines(struct drm_i915_private * i915,int (* fn)(void * arg),unsigned int flags)535 static int test_copy_engines(struct drm_i915_private *i915,
536 			     int (*fn)(void *arg),
537 			     unsigned int flags)
538 {
539 	struct intel_engine_cs *engine;
540 	int ret;
541 
542 	for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) {
543 		ret = igt_threaded_blt(engine, fn, flags);
544 		if (ret)
545 			return ret;
546 	}
547 
548 	return 0;
549 }
550 
igt_fill_blt(void * arg)551 static int igt_fill_blt(void *arg)
552 {
553 	return test_copy_engines(arg, igt_fill_blt_thread, 0);
554 }
555 
igt_fill_blt_ctx0(void * arg)556 static int igt_fill_blt_ctx0(void *arg)
557 {
558 	return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX);
559 }
560 
igt_copy_blt(void * arg)561 static int igt_copy_blt(void *arg)
562 {
563 	return test_copy_engines(arg, igt_copy_blt_thread, 0);
564 }
565 
igt_copy_blt_ctx0(void * arg)566 static int igt_copy_blt_ctx0(void *arg)
567 {
568 	return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX);
569 }
570 
i915_gem_object_blt_live_selftests(struct drm_i915_private * i915)571 int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
572 {
573 	static const struct i915_subtest tests[] = {
574 		SUBTEST(igt_fill_blt),
575 		SUBTEST(igt_fill_blt_ctx0),
576 		SUBTEST(igt_copy_blt),
577 		SUBTEST(igt_copy_blt_ctx0),
578 	};
579 
580 	if (intel_gt_is_wedged(&i915->gt))
581 		return 0;
582 
583 	return i915_live_subtests(tests, i915);
584 }
585 
i915_gem_object_blt_perf_selftests(struct drm_i915_private * i915)586 int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
587 {
588 	static const struct i915_subtest tests[] = {
589 		SUBTEST(perf_fill_blt),
590 		SUBTEST(perf_copy_blt),
591 	};
592 
593 	if (intel_gt_is_wedged(&i915->gt))
594 		return 0;
595 
596 	return i915_live_subtests(tests, i915);
597 }
598