1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2019 Intel Corporation
4 */
5
6 #include <linux/sort.h>
7
8 #include "gt/intel_gt.h"
9 #include "gt/intel_engine_user.h"
10
11 #include "i915_selftest.h"
12
13 #include "gem/i915_gem_context.h"
14 #include "selftests/igt_flush_test.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/mock_drm.h"
17 #include "huge_gem_object.h"
18 #include "mock_context.h"
19
wrap_ktime_compare(const void * A,const void * B)20 static int wrap_ktime_compare(const void *A, const void *B)
21 {
22 const ktime_t *a = A, *b = B;
23
24 return ktime_compare(*a, *b);
25 }
26
__perf_fill_blt(struct drm_i915_gem_object * obj)27 static int __perf_fill_blt(struct drm_i915_gem_object *obj)
28 {
29 struct drm_i915_private *i915 = to_i915(obj->base.dev);
30 int inst = 0;
31
32 do {
33 struct intel_engine_cs *engine;
34 ktime_t t[5];
35 int pass;
36 int err;
37
38 engine = intel_engine_lookup_user(i915,
39 I915_ENGINE_CLASS_COPY,
40 inst++);
41 if (!engine)
42 return 0;
43
44 intel_engine_pm_get(engine);
45 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
46 struct intel_context *ce = engine->kernel_context;
47 ktime_t t0, t1;
48
49 t0 = ktime_get();
50
51 err = i915_gem_object_fill_blt(obj, ce, 0);
52 if (err)
53 break;
54
55 err = i915_gem_object_wait(obj,
56 I915_WAIT_ALL,
57 MAX_SCHEDULE_TIMEOUT);
58 if (err)
59 break;
60
61 t1 = ktime_get();
62 t[pass] = ktime_sub(t1, t0);
63 }
64 intel_engine_pm_put(engine);
65 if (err)
66 return err;
67
68 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
69 pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
70 engine->name,
71 obj->base.size >> 10,
72 div64_u64(mul_u32_u32(4 * obj->base.size,
73 1000 * 1000 * 1000),
74 t[1] + 2 * t[2] + t[3]) >> 20);
75 } while (1);
76 }
77
perf_fill_blt(void * arg)78 static int perf_fill_blt(void *arg)
79 {
80 struct drm_i915_private *i915 = arg;
81 static const unsigned long sizes[] = {
82 SZ_4K,
83 SZ_64K,
84 SZ_2M,
85 SZ_64M
86 };
87 int i;
88
89 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
90 struct drm_i915_gem_object *obj;
91 int err;
92
93 obj = i915_gem_object_create_internal(i915, sizes[i]);
94 if (IS_ERR(obj))
95 return PTR_ERR(obj);
96
97 err = __perf_fill_blt(obj);
98 i915_gem_object_put(obj);
99 if (err)
100 return err;
101 }
102
103 return 0;
104 }
105
__perf_copy_blt(struct drm_i915_gem_object * src,struct drm_i915_gem_object * dst)106 static int __perf_copy_blt(struct drm_i915_gem_object *src,
107 struct drm_i915_gem_object *dst)
108 {
109 struct drm_i915_private *i915 = to_i915(src->base.dev);
110 int inst = 0;
111
112 do {
113 struct intel_engine_cs *engine;
114 ktime_t t[5];
115 int pass;
116 int err = 0;
117
118 engine = intel_engine_lookup_user(i915,
119 I915_ENGINE_CLASS_COPY,
120 inst++);
121 if (!engine)
122 return 0;
123
124 intel_engine_pm_get(engine);
125 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
126 struct intel_context *ce = engine->kernel_context;
127 ktime_t t0, t1;
128
129 t0 = ktime_get();
130
131 err = i915_gem_object_copy_blt(src, dst, ce);
132 if (err)
133 break;
134
135 err = i915_gem_object_wait(dst,
136 I915_WAIT_ALL,
137 MAX_SCHEDULE_TIMEOUT);
138 if (err)
139 break;
140
141 t1 = ktime_get();
142 t[pass] = ktime_sub(t1, t0);
143 }
144 intel_engine_pm_put(engine);
145 if (err)
146 return err;
147
148 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
149 pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
150 engine->name,
151 src->base.size >> 10,
152 div64_u64(mul_u32_u32(4 * src->base.size,
153 1000 * 1000 * 1000),
154 t[1] + 2 * t[2] + t[3]) >> 20);
155 } while (1);
156 }
157
perf_copy_blt(void * arg)158 static int perf_copy_blt(void *arg)
159 {
160 struct drm_i915_private *i915 = arg;
161 static const unsigned long sizes[] = {
162 SZ_4K,
163 SZ_64K,
164 SZ_2M,
165 SZ_64M
166 };
167 int i;
168
169 for (i = 0; i < ARRAY_SIZE(sizes); i++) {
170 struct drm_i915_gem_object *src, *dst;
171 int err;
172
173 src = i915_gem_object_create_internal(i915, sizes[i]);
174 if (IS_ERR(src))
175 return PTR_ERR(src);
176
177 dst = i915_gem_object_create_internal(i915, sizes[i]);
178 if (IS_ERR(dst)) {
179 err = PTR_ERR(dst);
180 goto err_src;
181 }
182
183 err = __perf_copy_blt(src, dst);
184
185 i915_gem_object_put(dst);
186 err_src:
187 i915_gem_object_put(src);
188 if (err)
189 return err;
190 }
191
192 return 0;
193 }
194
195 struct igt_thread_arg {
196 struct intel_engine_cs *engine;
197 struct i915_gem_context *ctx;
198 struct file *file;
199 struct rnd_state prng;
200 unsigned int n_cpus;
201 };
202
igt_fill_blt_thread(void * arg)203 static int igt_fill_blt_thread(void *arg)
204 {
205 struct igt_thread_arg *thread = arg;
206 struct intel_engine_cs *engine = thread->engine;
207 struct rnd_state *prng = &thread->prng;
208 struct drm_i915_gem_object *obj;
209 struct i915_gem_context *ctx;
210 struct intel_context *ce;
211 unsigned int prio;
212 IGT_TIMEOUT(end);
213 u64 total, max;
214 int err;
215
216 ctx = thread->ctx;
217 if (!ctx) {
218 ctx = live_context_for_engine(engine, thread->file);
219 if (IS_ERR(ctx))
220 return PTR_ERR(ctx);
221
222 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
223 ctx->sched.priority = I915_USER_PRIORITY(prio);
224 }
225
226 ce = i915_gem_context_get_engine(ctx, 0);
227 GEM_BUG_ON(IS_ERR(ce));
228
229 /*
230 * If we have a tiny shared address space, like for the GGTT
231 * then we can't be too greedy.
232 */
233 max = ce->vm->total;
234 if (i915_is_ggtt(ce->vm) || thread->ctx)
235 max = div_u64(max, thread->n_cpus);
236 max >>= 4;
237
238 total = PAGE_SIZE;
239 do {
240 /* Aim to keep the runtime under reasonable bounds! */
241 const u32 max_phys_size = SZ_64K;
242 u32 val = prandom_u32_state(prng);
243 u32 phys_sz;
244 u32 sz;
245 u32 *vaddr;
246 u32 i;
247
248 total = min(total, max);
249 sz = i915_prandom_u32_max_state(total, prng) + 1;
250 phys_sz = sz % max_phys_size + 1;
251
252 sz = round_up(sz, PAGE_SIZE);
253 phys_sz = round_up(phys_sz, PAGE_SIZE);
254 phys_sz = min(phys_sz, sz);
255
256 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
257 phys_sz, sz, val);
258
259 obj = huge_gem_object(engine->i915, phys_sz, sz);
260 if (IS_ERR(obj)) {
261 err = PTR_ERR(obj);
262 goto err_flush;
263 }
264
265 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
266 if (IS_ERR(vaddr)) {
267 err = PTR_ERR(vaddr);
268 goto err_put;
269 }
270
271 /*
272 * Make sure the potentially async clflush does its job, if
273 * required.
274 */
275 memset32(vaddr, val ^ 0xdeadbeaf,
276 huge_gem_object_phys_size(obj) / sizeof(u32));
277
278 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
279 obj->cache_dirty = true;
280
281 err = i915_gem_object_fill_blt(obj, ce, val);
282 if (err)
283 goto err_unpin;
284
285 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
286 if (err)
287 goto err_unpin;
288
289 for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) {
290 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
291 drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
292
293 if (vaddr[i] != val) {
294 pr_err("vaddr[%u]=%x, expected=%x\n", i,
295 vaddr[i], val);
296 err = -EINVAL;
297 goto err_unpin;
298 }
299 }
300
301 i915_gem_object_unpin_map(obj);
302 i915_gem_object_put(obj);
303
304 total <<= 1;
305 } while (!time_after(jiffies, end));
306
307 goto err_flush;
308
309 err_unpin:
310 i915_gem_object_unpin_map(obj);
311 err_put:
312 i915_gem_object_put(obj);
313 err_flush:
314 if (err == -ENOMEM)
315 err = 0;
316
317 intel_context_put(ce);
318 return err;
319 }
320
igt_copy_blt_thread(void * arg)321 static int igt_copy_blt_thread(void *arg)
322 {
323 struct igt_thread_arg *thread = arg;
324 struct intel_engine_cs *engine = thread->engine;
325 struct rnd_state *prng = &thread->prng;
326 struct drm_i915_gem_object *src, *dst;
327 struct i915_gem_context *ctx;
328 struct intel_context *ce;
329 unsigned int prio;
330 IGT_TIMEOUT(end);
331 u64 total, max;
332 int err;
333
334 ctx = thread->ctx;
335 if (!ctx) {
336 ctx = live_context_for_engine(engine, thread->file);
337 if (IS_ERR(ctx))
338 return PTR_ERR(ctx);
339
340 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
341 ctx->sched.priority = I915_USER_PRIORITY(prio);
342 }
343
344 ce = i915_gem_context_get_engine(ctx, 0);
345 GEM_BUG_ON(IS_ERR(ce));
346
347 /*
348 * If we have a tiny shared address space, like for the GGTT
349 * then we can't be too greedy.
350 */
351 max = ce->vm->total;
352 if (i915_is_ggtt(ce->vm) || thread->ctx)
353 max = div_u64(max, thread->n_cpus);
354 max >>= 4;
355
356 total = PAGE_SIZE;
357 do {
358 /* Aim to keep the runtime under reasonable bounds! */
359 const u32 max_phys_size = SZ_64K;
360 u32 val = prandom_u32_state(prng);
361 u32 phys_sz;
362 u32 sz;
363 u32 *vaddr;
364 u32 i;
365
366 total = min(total, max);
367 sz = i915_prandom_u32_max_state(total, prng) + 1;
368 phys_sz = sz % max_phys_size + 1;
369
370 sz = round_up(sz, PAGE_SIZE);
371 phys_sz = round_up(phys_sz, PAGE_SIZE);
372 phys_sz = min(phys_sz, sz);
373
374 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
375 phys_sz, sz, val);
376
377 src = huge_gem_object(engine->i915, phys_sz, sz);
378 if (IS_ERR(src)) {
379 err = PTR_ERR(src);
380 goto err_flush;
381 }
382
383 vaddr = i915_gem_object_pin_map(src, I915_MAP_WB);
384 if (IS_ERR(vaddr)) {
385 err = PTR_ERR(vaddr);
386 goto err_put_src;
387 }
388
389 memset32(vaddr, val,
390 huge_gem_object_phys_size(src) / sizeof(u32));
391
392 i915_gem_object_unpin_map(src);
393
394 if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
395 src->cache_dirty = true;
396
397 dst = huge_gem_object(engine->i915, phys_sz, sz);
398 if (IS_ERR(dst)) {
399 err = PTR_ERR(dst);
400 goto err_put_src;
401 }
402
403 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB);
404 if (IS_ERR(vaddr)) {
405 err = PTR_ERR(vaddr);
406 goto err_put_dst;
407 }
408
409 memset32(vaddr, val ^ 0xdeadbeaf,
410 huge_gem_object_phys_size(dst) / sizeof(u32));
411
412 if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
413 dst->cache_dirty = true;
414
415 err = i915_gem_object_copy_blt(src, dst, ce);
416 if (err)
417 goto err_unpin;
418
419 err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT);
420 if (err)
421 goto err_unpin;
422
423 for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) {
424 if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
425 drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
426
427 if (vaddr[i] != val) {
428 pr_err("vaddr[%u]=%x, expected=%x\n", i,
429 vaddr[i], val);
430 err = -EINVAL;
431 goto err_unpin;
432 }
433 }
434
435 i915_gem_object_unpin_map(dst);
436
437 i915_gem_object_put(src);
438 i915_gem_object_put(dst);
439
440 total <<= 1;
441 } while (!time_after(jiffies, end));
442
443 goto err_flush;
444
445 err_unpin:
446 i915_gem_object_unpin_map(dst);
447 err_put_dst:
448 i915_gem_object_put(dst);
449 err_put_src:
450 i915_gem_object_put(src);
451 err_flush:
452 if (err == -ENOMEM)
453 err = 0;
454
455 intel_context_put(ce);
456 return err;
457 }
458
igt_threaded_blt(struct intel_engine_cs * engine,int (* blt_fn)(void * arg),unsigned int flags)459 static int igt_threaded_blt(struct intel_engine_cs *engine,
460 int (*blt_fn)(void *arg),
461 unsigned int flags)
462 #define SINGLE_CTX BIT(0)
463 {
464 struct igt_thread_arg *thread;
465 struct task_struct **tsk;
466 unsigned int n_cpus, i;
467 I915_RND_STATE(prng);
468 int err = 0;
469
470 n_cpus = num_online_cpus() + 1;
471
472 tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
473 if (!tsk)
474 return 0;
475
476 thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
477 if (!thread)
478 goto out_tsk;
479
480 thread[0].file = mock_file(engine->i915);
481 if (IS_ERR(thread[0].file)) {
482 err = PTR_ERR(thread[0].file);
483 goto out_thread;
484 }
485
486 if (flags & SINGLE_CTX) {
487 thread[0].ctx = live_context_for_engine(engine, thread[0].file);
488 if (IS_ERR(thread[0].ctx)) {
489 err = PTR_ERR(thread[0].ctx);
490 goto out_file;
491 }
492 }
493
494 for (i = 0; i < n_cpus; ++i) {
495 thread[i].engine = engine;
496 thread[i].file = thread[0].file;
497 thread[i].ctx = thread[0].ctx;
498 thread[i].n_cpus = n_cpus;
499 thread[i].prng =
500 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
501
502 tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
503 if (IS_ERR(tsk[i])) {
504 err = PTR_ERR(tsk[i]);
505 break;
506 }
507
508 get_task_struct(tsk[i]);
509 }
510
511 yield(); /* start all threads before we kthread_stop() */
512
513 for (i = 0; i < n_cpus; ++i) {
514 int status;
515
516 if (IS_ERR_OR_NULL(tsk[i]))
517 continue;
518
519 status = kthread_stop(tsk[i]);
520 if (status && !err)
521 err = status;
522
523 put_task_struct(tsk[i]);
524 }
525
526 out_file:
527 fput(thread[0].file);
528 out_thread:
529 kfree(thread);
530 out_tsk:
531 kfree(tsk);
532 return err;
533 }
534
test_copy_engines(struct drm_i915_private * i915,int (* fn)(void * arg),unsigned int flags)535 static int test_copy_engines(struct drm_i915_private *i915,
536 int (*fn)(void *arg),
537 unsigned int flags)
538 {
539 struct intel_engine_cs *engine;
540 int ret;
541
542 for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) {
543 ret = igt_threaded_blt(engine, fn, flags);
544 if (ret)
545 return ret;
546 }
547
548 return 0;
549 }
550
igt_fill_blt(void * arg)551 static int igt_fill_blt(void *arg)
552 {
553 return test_copy_engines(arg, igt_fill_blt_thread, 0);
554 }
555
igt_fill_blt_ctx0(void * arg)556 static int igt_fill_blt_ctx0(void *arg)
557 {
558 return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX);
559 }
560
igt_copy_blt(void * arg)561 static int igt_copy_blt(void *arg)
562 {
563 return test_copy_engines(arg, igt_copy_blt_thread, 0);
564 }
565
igt_copy_blt_ctx0(void * arg)566 static int igt_copy_blt_ctx0(void *arg)
567 {
568 return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX);
569 }
570
i915_gem_object_blt_live_selftests(struct drm_i915_private * i915)571 int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
572 {
573 static const struct i915_subtest tests[] = {
574 SUBTEST(igt_fill_blt),
575 SUBTEST(igt_fill_blt_ctx0),
576 SUBTEST(igt_copy_blt),
577 SUBTEST(igt_copy_blt_ctx0),
578 };
579
580 if (intel_gt_is_wedged(&i915->gt))
581 return 0;
582
583 return i915_live_subtests(tests, i915);
584 }
585
i915_gem_object_blt_perf_selftests(struct drm_i915_private * i915)586 int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
587 {
588 static const struct i915_subtest tests[] = {
589 SUBTEST(perf_fill_blt),
590 SUBTEST(perf_copy_blt),
591 };
592
593 if (intel_gt_is_wedged(&i915->gt))
594 return 0;
595
596 return i915_live_subtests(tests, i915);
597 }
598