1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 #include "gt/selftest_engine_heartbeat.h"
13 
14 #include "i915_selftest.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/igt_flush_test.h"
17 #include "selftests/igt_live_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/lib_sw_fence.h"
20 
21 #include "gem/selftests/igt_gem_utils.h"
22 #include "gem/selftests/mock_context.h"
23 
24 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
25 #define NUM_GPR 16
26 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
27 
create_scratch(struct intel_gt * gt)28 static struct i915_vma *create_scratch(struct intel_gt *gt)
29 {
30 	struct drm_i915_gem_object *obj;
31 	struct i915_vma *vma;
32 	int err;
33 
34 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
35 	if (IS_ERR(obj))
36 		return ERR_CAST(obj);
37 
38 	i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
39 
40 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
41 	if (IS_ERR(vma)) {
42 		i915_gem_object_put(obj);
43 		return vma;
44 	}
45 
46 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
47 	if (err) {
48 		i915_gem_object_put(obj);
49 		return ERR_PTR(err);
50 	}
51 
52 	return vma;
53 }
54 
is_active(struct i915_request * rq)55 static bool is_active(struct i915_request *rq)
56 {
57 	if (i915_request_is_active(rq))
58 		return true;
59 
60 	if (i915_request_on_hold(rq))
61 		return true;
62 
63 	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
64 		return true;
65 
66 	return false;
67 }
68 
wait_for_submit(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)69 static int wait_for_submit(struct intel_engine_cs *engine,
70 			   struct i915_request *rq,
71 			   unsigned long timeout)
72 {
73 	timeout += jiffies;
74 	do {
75 		bool done = time_after(jiffies, timeout);
76 
77 		if (i915_request_completed(rq)) /* that was quick! */
78 			return 0;
79 
80 		/* Wait until the HW has acknowleged the submission (or err) */
81 		intel_engine_flush_submission(engine);
82 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
83 			return 0;
84 
85 		if (done)
86 			return -ETIME;
87 
88 		cond_resched();
89 	} while (1);
90 }
91 
wait_for_reset(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)92 static int wait_for_reset(struct intel_engine_cs *engine,
93 			  struct i915_request *rq,
94 			  unsigned long timeout)
95 {
96 	timeout += jiffies;
97 
98 	do {
99 		cond_resched();
100 		intel_engine_flush_submission(engine);
101 
102 		if (READ_ONCE(engine->execlists.pending[0]))
103 			continue;
104 
105 		if (i915_request_completed(rq))
106 			break;
107 
108 		if (READ_ONCE(rq->fence.error))
109 			break;
110 	} while (time_before(jiffies, timeout));
111 
112 	flush_scheduled_work();
113 
114 	if (rq->fence.error != -EIO) {
115 		pr_err("%s: hanging request %llx:%lld not reset\n",
116 		       engine->name,
117 		       rq->fence.context,
118 		       rq->fence.seqno);
119 		return -EINVAL;
120 	}
121 
122 	/* Give the request a jiffie to complete after flushing the worker */
123 	if (i915_request_wait(rq, 0,
124 			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
125 		pr_err("%s: hanging request %llx:%lld did not complete\n",
126 		       engine->name,
127 		       rq->fence.context,
128 		       rq->fence.seqno);
129 		return -ETIME;
130 	}
131 
132 	return 0;
133 }
134 
live_sanitycheck(void * arg)135 static int live_sanitycheck(void *arg)
136 {
137 	struct intel_gt *gt = arg;
138 	struct intel_engine_cs *engine;
139 	enum intel_engine_id id;
140 	struct igt_spinner spin;
141 	int err = 0;
142 
143 	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
144 		return 0;
145 
146 	if (igt_spinner_init(&spin, gt))
147 		return -ENOMEM;
148 
149 	for_each_engine(engine, gt, id) {
150 		struct intel_context *ce;
151 		struct i915_request *rq;
152 
153 		ce = intel_context_create(engine);
154 		if (IS_ERR(ce)) {
155 			err = PTR_ERR(ce);
156 			break;
157 		}
158 
159 		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
160 		if (IS_ERR(rq)) {
161 			err = PTR_ERR(rq);
162 			goto out_ctx;
163 		}
164 
165 		i915_request_add(rq);
166 		if (!igt_wait_for_spinner(&spin, rq)) {
167 			GEM_TRACE("spinner failed to start\n");
168 			GEM_TRACE_DUMP();
169 			intel_gt_set_wedged(gt);
170 			err = -EIO;
171 			goto out_ctx;
172 		}
173 
174 		igt_spinner_end(&spin);
175 		if (igt_flush_test(gt->i915)) {
176 			err = -EIO;
177 			goto out_ctx;
178 		}
179 
180 out_ctx:
181 		intel_context_put(ce);
182 		if (err)
183 			break;
184 	}
185 
186 	igt_spinner_fini(&spin);
187 	return err;
188 }
189 
live_unlite_restore(struct intel_gt * gt,int prio)190 static int live_unlite_restore(struct intel_gt *gt, int prio)
191 {
192 	struct intel_engine_cs *engine;
193 	enum intel_engine_id id;
194 	struct igt_spinner spin;
195 	int err = -ENOMEM;
196 
197 	/*
198 	 * Check that we can correctly context switch between 2 instances
199 	 * on the same engine from the same parent context.
200 	 */
201 
202 	if (igt_spinner_init(&spin, gt))
203 		return err;
204 
205 	err = 0;
206 	for_each_engine(engine, gt, id) {
207 		struct intel_context *ce[2] = {};
208 		struct i915_request *rq[2];
209 		struct igt_live_test t;
210 		int n;
211 
212 		if (prio && !intel_engine_has_preemption(engine))
213 			continue;
214 
215 		if (!intel_engine_can_store_dword(engine))
216 			continue;
217 
218 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
219 			err = -EIO;
220 			break;
221 		}
222 		st_engine_heartbeat_disable(engine);
223 
224 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
225 			struct intel_context *tmp;
226 
227 			tmp = intel_context_create(engine);
228 			if (IS_ERR(tmp)) {
229 				err = PTR_ERR(tmp);
230 				goto err_ce;
231 			}
232 
233 			err = intel_context_pin(tmp);
234 			if (err) {
235 				intel_context_put(tmp);
236 				goto err_ce;
237 			}
238 
239 			/*
240 			 * Setup the pair of contexts such that if we
241 			 * lite-restore using the RING_TAIL from ce[1] it
242 			 * will execute garbage from ce[0]->ring.
243 			 */
244 			memset(tmp->ring->vaddr,
245 			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
246 			       tmp->ring->vma->size);
247 
248 			ce[n] = tmp;
249 		}
250 		GEM_BUG_ON(!ce[1]->ring->size);
251 		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
252 		__execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
253 
254 		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
255 		if (IS_ERR(rq[0])) {
256 			err = PTR_ERR(rq[0]);
257 			goto err_ce;
258 		}
259 
260 		i915_request_get(rq[0]);
261 		i915_request_add(rq[0]);
262 		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
263 
264 		if (!igt_wait_for_spinner(&spin, rq[0])) {
265 			i915_request_put(rq[0]);
266 			goto err_ce;
267 		}
268 
269 		rq[1] = i915_request_create(ce[1]);
270 		if (IS_ERR(rq[1])) {
271 			err = PTR_ERR(rq[1]);
272 			i915_request_put(rq[0]);
273 			goto err_ce;
274 		}
275 
276 		if (!prio) {
277 			/*
278 			 * Ensure we do the switch to ce[1] on completion.
279 			 *
280 			 * rq[0] is already submitted, so this should reduce
281 			 * to a no-op (a wait on a request on the same engine
282 			 * uses the submit fence, not the completion fence),
283 			 * but it will install a dependency on rq[1] for rq[0]
284 			 * that will prevent the pair being reordered by
285 			 * timeslicing.
286 			 */
287 			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
288 		}
289 
290 		i915_request_get(rq[1]);
291 		i915_request_add(rq[1]);
292 		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
293 		i915_request_put(rq[0]);
294 
295 		if (prio) {
296 			struct i915_sched_attr attr = {
297 				.priority = prio,
298 			};
299 
300 			/* Alternatively preempt the spinner with ce[1] */
301 			engine->schedule(rq[1], &attr);
302 		}
303 
304 		/* And switch back to ce[0] for good measure */
305 		rq[0] = i915_request_create(ce[0]);
306 		if (IS_ERR(rq[0])) {
307 			err = PTR_ERR(rq[0]);
308 			i915_request_put(rq[1]);
309 			goto err_ce;
310 		}
311 
312 		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
313 		i915_request_get(rq[0]);
314 		i915_request_add(rq[0]);
315 		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
316 		i915_request_put(rq[1]);
317 		i915_request_put(rq[0]);
318 
319 err_ce:
320 		intel_engine_flush_submission(engine);
321 		igt_spinner_end(&spin);
322 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
323 			if (IS_ERR_OR_NULL(ce[n]))
324 				break;
325 
326 			intel_context_unpin(ce[n]);
327 			intel_context_put(ce[n]);
328 		}
329 
330 		st_engine_heartbeat_enable(engine);
331 		if (igt_live_test_end(&t))
332 			err = -EIO;
333 		if (err)
334 			break;
335 	}
336 
337 	igt_spinner_fini(&spin);
338 	return err;
339 }
340 
live_unlite_switch(void * arg)341 static int live_unlite_switch(void *arg)
342 {
343 	return live_unlite_restore(arg, 0);
344 }
345 
live_unlite_preempt(void * arg)346 static int live_unlite_preempt(void *arg)
347 {
348 	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
349 }
350 
live_unlite_ring(void * arg)351 static int live_unlite_ring(void *arg)
352 {
353 	struct intel_gt *gt = arg;
354 	struct intel_engine_cs *engine;
355 	struct igt_spinner spin;
356 	enum intel_engine_id id;
357 	int err = 0;
358 
359 	/*
360 	 * Setup a preemption event that will cause almost the entire ring
361 	 * to be unwound, potentially fooling our intel_ring_direction()
362 	 * into emitting a forward lite-restore instead of the rollback.
363 	 */
364 
365 	if (igt_spinner_init(&spin, gt))
366 		return -ENOMEM;
367 
368 	for_each_engine(engine, gt, id) {
369 		struct intel_context *ce[2] = {};
370 		struct i915_request *rq;
371 		struct igt_live_test t;
372 		int n;
373 
374 		if (!intel_engine_has_preemption(engine))
375 			continue;
376 
377 		if (!intel_engine_can_store_dword(engine))
378 			continue;
379 
380 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
381 			err = -EIO;
382 			break;
383 		}
384 		st_engine_heartbeat_disable(engine);
385 
386 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
387 			struct intel_context *tmp;
388 
389 			tmp = intel_context_create(engine);
390 			if (IS_ERR(tmp)) {
391 				err = PTR_ERR(tmp);
392 				goto err_ce;
393 			}
394 
395 			err = intel_context_pin(tmp);
396 			if (err) {
397 				intel_context_put(tmp);
398 				goto err_ce;
399 			}
400 
401 			memset32(tmp->ring->vaddr,
402 				 0xdeadbeef, /* trigger a hang if executed */
403 				 tmp->ring->vma->size / sizeof(u32));
404 
405 			ce[n] = tmp;
406 		}
407 
408 		/* Create max prio spinner, followed by N low prio nops */
409 		rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
410 		if (IS_ERR(rq)) {
411 			err = PTR_ERR(rq);
412 			goto err_ce;
413 		}
414 
415 		i915_request_get(rq);
416 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
417 		i915_request_add(rq);
418 
419 		if (!igt_wait_for_spinner(&spin, rq)) {
420 			intel_gt_set_wedged(gt);
421 			i915_request_put(rq);
422 			err = -ETIME;
423 			goto err_ce;
424 		}
425 
426 		/* Fill the ring, until we will cause a wrap */
427 		n = 0;
428 		while (intel_ring_direction(ce[0]->ring,
429 					    rq->wa_tail,
430 					    ce[0]->ring->tail) <= 0) {
431 			struct i915_request *tmp;
432 
433 			tmp = intel_context_create_request(ce[0]);
434 			if (IS_ERR(tmp)) {
435 				err = PTR_ERR(tmp);
436 				i915_request_put(rq);
437 				goto err_ce;
438 			}
439 
440 			i915_request_add(tmp);
441 			intel_engine_flush_submission(engine);
442 			n++;
443 		}
444 		intel_engine_flush_submission(engine);
445 		pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
446 			 engine->name, n,
447 			 ce[0]->ring->size,
448 			 ce[0]->ring->tail,
449 			 ce[0]->ring->emit,
450 			 rq->tail);
451 		GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
452 						rq->tail,
453 						ce[0]->ring->tail) <= 0);
454 		i915_request_put(rq);
455 
456 		/* Create a second ring to preempt the first ring after rq[0] */
457 		rq = intel_context_create_request(ce[1]);
458 		if (IS_ERR(rq)) {
459 			err = PTR_ERR(rq);
460 			goto err_ce;
461 		}
462 
463 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
464 		i915_request_get(rq);
465 		i915_request_add(rq);
466 
467 		err = wait_for_submit(engine, rq, HZ / 2);
468 		i915_request_put(rq);
469 		if (err) {
470 			pr_err("%s: preemption request was not submitted\n",
471 			       engine->name);
472 			err = -ETIME;
473 		}
474 
475 		pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
476 			 engine->name,
477 			 ce[0]->ring->tail, ce[0]->ring->emit,
478 			 ce[1]->ring->tail, ce[1]->ring->emit);
479 
480 err_ce:
481 		intel_engine_flush_submission(engine);
482 		igt_spinner_end(&spin);
483 		for (n = 0; n < ARRAY_SIZE(ce); n++) {
484 			if (IS_ERR_OR_NULL(ce[n]))
485 				break;
486 
487 			intel_context_unpin(ce[n]);
488 			intel_context_put(ce[n]);
489 		}
490 		st_engine_heartbeat_enable(engine);
491 		if (igt_live_test_end(&t))
492 			err = -EIO;
493 		if (err)
494 			break;
495 	}
496 
497 	igt_spinner_fini(&spin);
498 	return err;
499 }
500 
live_pin_rewind(void * arg)501 static int live_pin_rewind(void *arg)
502 {
503 	struct intel_gt *gt = arg;
504 	struct intel_engine_cs *engine;
505 	enum intel_engine_id id;
506 	int err = 0;
507 
508 	/*
509 	 * We have to be careful not to trust intel_ring too much, for example
510 	 * ring->head is updated upon retire which is out of sync with pinning
511 	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
512 	 * or else we risk writing an older, stale value.
513 	 *
514 	 * To simulate this, let's apply a bit of deliberate sabotague.
515 	 */
516 
517 	for_each_engine(engine, gt, id) {
518 		struct intel_context *ce;
519 		struct i915_request *rq;
520 		struct intel_ring *ring;
521 		struct igt_live_test t;
522 
523 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
524 			err = -EIO;
525 			break;
526 		}
527 
528 		ce = intel_context_create(engine);
529 		if (IS_ERR(ce)) {
530 			err = PTR_ERR(ce);
531 			break;
532 		}
533 
534 		err = intel_context_pin(ce);
535 		if (err) {
536 			intel_context_put(ce);
537 			break;
538 		}
539 
540 		/* Keep the context awake while we play games */
541 		err = i915_active_acquire(&ce->active);
542 		if (err) {
543 			intel_context_unpin(ce);
544 			intel_context_put(ce);
545 			break;
546 		}
547 		ring = ce->ring;
548 
549 		/* Poison the ring, and offset the next request from HEAD */
550 		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
551 		ring->emit = ring->size / 2;
552 		ring->tail = ring->emit;
553 		GEM_BUG_ON(ring->head);
554 
555 		intel_context_unpin(ce);
556 
557 		/* Submit a simple nop request */
558 		GEM_BUG_ON(intel_context_is_pinned(ce));
559 		rq = intel_context_create_request(ce);
560 		i915_active_release(&ce->active); /* e.g. async retire */
561 		intel_context_put(ce);
562 		if (IS_ERR(rq)) {
563 			err = PTR_ERR(rq);
564 			break;
565 		}
566 		GEM_BUG_ON(!rq->head);
567 		i915_request_add(rq);
568 
569 		/* Expect not to hang! */
570 		if (igt_live_test_end(&t)) {
571 			err = -EIO;
572 			break;
573 		}
574 	}
575 
576 	return err;
577 }
578 
live_hold_reset(void * arg)579 static int live_hold_reset(void *arg)
580 {
581 	struct intel_gt *gt = arg;
582 	struct intel_engine_cs *engine;
583 	enum intel_engine_id id;
584 	struct igt_spinner spin;
585 	int err = 0;
586 
587 	/*
588 	 * In order to support offline error capture for fast preempt reset,
589 	 * we need to decouple the guilty request and ensure that it and its
590 	 * descendents are not executed while the capture is in progress.
591 	 */
592 
593 	if (!intel_has_reset_engine(gt))
594 		return 0;
595 
596 	if (igt_spinner_init(&spin, gt))
597 		return -ENOMEM;
598 
599 	for_each_engine(engine, gt, id) {
600 		struct intel_context *ce;
601 		struct i915_request *rq;
602 
603 		ce = intel_context_create(engine);
604 		if (IS_ERR(ce)) {
605 			err = PTR_ERR(ce);
606 			break;
607 		}
608 
609 		st_engine_heartbeat_disable(engine);
610 
611 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
612 		if (IS_ERR(rq)) {
613 			err = PTR_ERR(rq);
614 			goto out;
615 		}
616 		i915_request_add(rq);
617 
618 		if (!igt_wait_for_spinner(&spin, rq)) {
619 			intel_gt_set_wedged(gt);
620 			err = -ETIME;
621 			goto out;
622 		}
623 
624 		/* We have our request executing, now remove it and reset */
625 
626 		if (test_and_set_bit(I915_RESET_ENGINE + id,
627 				     &gt->reset.flags)) {
628 			intel_gt_set_wedged(gt);
629 			err = -EBUSY;
630 			goto out;
631 		}
632 		tasklet_disable(&engine->execlists.tasklet);
633 
634 		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
635 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
636 
637 		i915_request_get(rq);
638 		execlists_hold(engine, rq);
639 		GEM_BUG_ON(!i915_request_on_hold(rq));
640 
641 		intel_engine_reset(engine, NULL);
642 		GEM_BUG_ON(rq->fence.error != -EIO);
643 
644 		tasklet_enable(&engine->execlists.tasklet);
645 		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
646 				      &gt->reset.flags);
647 
648 		/* Check that we do not resubmit the held request */
649 		if (!i915_request_wait(rq, 0, HZ / 5)) {
650 			pr_err("%s: on hold request completed!\n",
651 			       engine->name);
652 			i915_request_put(rq);
653 			err = -EIO;
654 			goto out;
655 		}
656 		GEM_BUG_ON(!i915_request_on_hold(rq));
657 
658 		/* But is resubmitted on release */
659 		execlists_unhold(engine, rq);
660 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
661 			pr_err("%s: held request did not complete!\n",
662 			       engine->name);
663 			intel_gt_set_wedged(gt);
664 			err = -ETIME;
665 		}
666 		i915_request_put(rq);
667 
668 out:
669 		st_engine_heartbeat_enable(engine);
670 		intel_context_put(ce);
671 		if (err)
672 			break;
673 	}
674 
675 	igt_spinner_fini(&spin);
676 	return err;
677 }
678 
error_repr(int err)679 static const char *error_repr(int err)
680 {
681 	return err ? "bad" : "good";
682 }
683 
live_error_interrupt(void * arg)684 static int live_error_interrupt(void *arg)
685 {
686 	static const struct error_phase {
687 		enum { GOOD = 0, BAD = -EIO } error[2];
688 	} phases[] = {
689 		{ { BAD,  GOOD } },
690 		{ { BAD,  BAD  } },
691 		{ { BAD,  GOOD } },
692 		{ { GOOD, GOOD } }, /* sentinel */
693 	};
694 	struct intel_gt *gt = arg;
695 	struct intel_engine_cs *engine;
696 	enum intel_engine_id id;
697 
698 	/*
699 	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
700 	 * of invalid commands in user batches that will cause a GPU hang.
701 	 * This is a faster mechanism than using hangcheck/heartbeats, but
702 	 * only detects problems the HW knows about -- it will not warn when
703 	 * we kill the HW!
704 	 *
705 	 * To verify our detection and reset, we throw some invalid commands
706 	 * at the HW and wait for the interrupt.
707 	 */
708 
709 	if (!intel_has_reset_engine(gt))
710 		return 0;
711 
712 	for_each_engine(engine, gt, id) {
713 		const struct error_phase *p;
714 		int err = 0;
715 
716 		st_engine_heartbeat_disable(engine);
717 
718 		for (p = phases; p->error[0] != GOOD; p++) {
719 			struct i915_request *client[ARRAY_SIZE(phases->error)];
720 			u32 *cs;
721 			int i;
722 
723 			memset(client, 0, sizeof(*client));
724 			for (i = 0; i < ARRAY_SIZE(client); i++) {
725 				struct intel_context *ce;
726 				struct i915_request *rq;
727 
728 				ce = intel_context_create(engine);
729 				if (IS_ERR(ce)) {
730 					err = PTR_ERR(ce);
731 					goto out;
732 				}
733 
734 				rq = intel_context_create_request(ce);
735 				intel_context_put(ce);
736 				if (IS_ERR(rq)) {
737 					err = PTR_ERR(rq);
738 					goto out;
739 				}
740 
741 				if (rq->engine->emit_init_breadcrumb) {
742 					err = rq->engine->emit_init_breadcrumb(rq);
743 					if (err) {
744 						i915_request_add(rq);
745 						goto out;
746 					}
747 				}
748 
749 				cs = intel_ring_begin(rq, 2);
750 				if (IS_ERR(cs)) {
751 					i915_request_add(rq);
752 					err = PTR_ERR(cs);
753 					goto out;
754 				}
755 
756 				if (p->error[i]) {
757 					*cs++ = 0xdeadbeef;
758 					*cs++ = 0xdeadbeef;
759 				} else {
760 					*cs++ = MI_NOOP;
761 					*cs++ = MI_NOOP;
762 				}
763 
764 				client[i] = i915_request_get(rq);
765 				i915_request_add(rq);
766 			}
767 
768 			err = wait_for_submit(engine, client[0], HZ / 2);
769 			if (err) {
770 				pr_err("%s: first request did not start within time!\n",
771 				       engine->name);
772 				err = -ETIME;
773 				goto out;
774 			}
775 
776 			for (i = 0; i < ARRAY_SIZE(client); i++) {
777 				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
778 					pr_debug("%s: %s request incomplete!\n",
779 						 engine->name,
780 						 error_repr(p->error[i]));
781 
782 				if (!i915_request_started(client[i])) {
783 					pr_err("%s: %s request not started!\n",
784 					       engine->name,
785 					       error_repr(p->error[i]));
786 					err = -ETIME;
787 					goto out;
788 				}
789 
790 				/* Kick the tasklet to process the error */
791 				intel_engine_flush_submission(engine);
792 				if (client[i]->fence.error != p->error[i]) {
793 					pr_err("%s: %s request (%s) with wrong error code: %d\n",
794 					       engine->name,
795 					       error_repr(p->error[i]),
796 					       i915_request_completed(client[i]) ? "completed" : "running",
797 					       client[i]->fence.error);
798 					err = -EINVAL;
799 					goto out;
800 				}
801 			}
802 
803 out:
804 			for (i = 0; i < ARRAY_SIZE(client); i++)
805 				if (client[i])
806 					i915_request_put(client[i]);
807 			if (err) {
808 				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
809 				       engine->name, p - phases,
810 				       p->error[0], p->error[1]);
811 				break;
812 			}
813 		}
814 
815 		st_engine_heartbeat_enable(engine);
816 		if (err) {
817 			intel_gt_set_wedged(gt);
818 			return err;
819 		}
820 	}
821 
822 	return 0;
823 }
824 
825 static int
emit_semaphore_chain(struct i915_request * rq,struct i915_vma * vma,int idx)826 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
827 {
828 	u32 *cs;
829 
830 	cs = intel_ring_begin(rq, 10);
831 	if (IS_ERR(cs))
832 		return PTR_ERR(cs);
833 
834 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
835 
836 	*cs++ = MI_SEMAPHORE_WAIT |
837 		MI_SEMAPHORE_GLOBAL_GTT |
838 		MI_SEMAPHORE_POLL |
839 		MI_SEMAPHORE_SAD_NEQ_SDD;
840 	*cs++ = 0;
841 	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
842 	*cs++ = 0;
843 
844 	if (idx > 0) {
845 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
846 		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
847 		*cs++ = 0;
848 		*cs++ = 1;
849 	} else {
850 		*cs++ = MI_NOOP;
851 		*cs++ = MI_NOOP;
852 		*cs++ = MI_NOOP;
853 		*cs++ = MI_NOOP;
854 	}
855 
856 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
857 
858 	intel_ring_advance(rq, cs);
859 	return 0;
860 }
861 
862 static struct i915_request *
semaphore_queue(struct intel_engine_cs * engine,struct i915_vma * vma,int idx)863 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
864 {
865 	struct intel_context *ce;
866 	struct i915_request *rq;
867 	int err;
868 
869 	ce = intel_context_create(engine);
870 	if (IS_ERR(ce))
871 		return ERR_CAST(ce);
872 
873 	rq = intel_context_create_request(ce);
874 	if (IS_ERR(rq))
875 		goto out_ce;
876 
877 	err = 0;
878 	if (rq->engine->emit_init_breadcrumb)
879 		err = rq->engine->emit_init_breadcrumb(rq);
880 	if (err == 0)
881 		err = emit_semaphore_chain(rq, vma, idx);
882 	if (err == 0)
883 		i915_request_get(rq);
884 	i915_request_add(rq);
885 	if (err)
886 		rq = ERR_PTR(err);
887 
888 out_ce:
889 	intel_context_put(ce);
890 	return rq;
891 }
892 
893 static int
release_queue(struct intel_engine_cs * engine,struct i915_vma * vma,int idx,int prio)894 release_queue(struct intel_engine_cs *engine,
895 	      struct i915_vma *vma,
896 	      int idx, int prio)
897 {
898 	struct i915_sched_attr attr = {
899 		.priority = prio,
900 	};
901 	struct i915_request *rq;
902 	u32 *cs;
903 
904 	rq = intel_engine_create_kernel_request(engine);
905 	if (IS_ERR(rq))
906 		return PTR_ERR(rq);
907 
908 	cs = intel_ring_begin(rq, 4);
909 	if (IS_ERR(cs)) {
910 		i915_request_add(rq);
911 		return PTR_ERR(cs);
912 	}
913 
914 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
915 	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
916 	*cs++ = 0;
917 	*cs++ = 1;
918 
919 	intel_ring_advance(rq, cs);
920 
921 	i915_request_get(rq);
922 	i915_request_add(rq);
923 
924 	local_bh_disable();
925 	engine->schedule(rq, &attr);
926 	local_bh_enable(); /* kick tasklet */
927 
928 	i915_request_put(rq);
929 
930 	return 0;
931 }
932 
933 static int
slice_semaphore_queue(struct intel_engine_cs * outer,struct i915_vma * vma,int count)934 slice_semaphore_queue(struct intel_engine_cs *outer,
935 		      struct i915_vma *vma,
936 		      int count)
937 {
938 	struct intel_engine_cs *engine;
939 	struct i915_request *head;
940 	enum intel_engine_id id;
941 	int err, i, n = 0;
942 
943 	head = semaphore_queue(outer, vma, n++);
944 	if (IS_ERR(head))
945 		return PTR_ERR(head);
946 
947 	for_each_engine(engine, outer->gt, id) {
948 		for (i = 0; i < count; i++) {
949 			struct i915_request *rq;
950 
951 			rq = semaphore_queue(engine, vma, n++);
952 			if (IS_ERR(rq)) {
953 				err = PTR_ERR(rq);
954 				goto out;
955 			}
956 
957 			i915_request_put(rq);
958 		}
959 	}
960 
961 	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
962 	if (err)
963 		goto out;
964 
965 	if (i915_request_wait(head, 0,
966 			      2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
967 		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
968 		       count, n);
969 		GEM_TRACE_DUMP();
970 		intel_gt_set_wedged(outer->gt);
971 		err = -EIO;
972 	}
973 
974 out:
975 	i915_request_put(head);
976 	return err;
977 }
978 
live_timeslice_preempt(void * arg)979 static int live_timeslice_preempt(void *arg)
980 {
981 	struct intel_gt *gt = arg;
982 	struct drm_i915_gem_object *obj;
983 	struct intel_engine_cs *engine;
984 	enum intel_engine_id id;
985 	struct i915_vma *vma;
986 	void *vaddr;
987 	int err = 0;
988 
989 	/*
990 	 * If a request takes too long, we would like to give other users
991 	 * a fair go on the GPU. In particular, users may create batches
992 	 * that wait upon external input, where that input may even be
993 	 * supplied by another GPU job. To avoid blocking forever, we
994 	 * need to preempt the current task and replace it with another
995 	 * ready task.
996 	 */
997 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
998 		return 0;
999 
1000 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1001 	if (IS_ERR(obj))
1002 		return PTR_ERR(obj);
1003 
1004 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1005 	if (IS_ERR(vma)) {
1006 		err = PTR_ERR(vma);
1007 		goto err_obj;
1008 	}
1009 
1010 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1011 	if (IS_ERR(vaddr)) {
1012 		err = PTR_ERR(vaddr);
1013 		goto err_obj;
1014 	}
1015 
1016 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1017 	if (err)
1018 		goto err_map;
1019 
1020 	err = i915_vma_sync(vma);
1021 	if (err)
1022 		goto err_pin;
1023 
1024 	for_each_engine(engine, gt, id) {
1025 		if (!intel_engine_has_preemption(engine))
1026 			continue;
1027 
1028 		memset(vaddr, 0, PAGE_SIZE);
1029 
1030 		st_engine_heartbeat_disable(engine);
1031 		err = slice_semaphore_queue(engine, vma, 5);
1032 		st_engine_heartbeat_enable(engine);
1033 		if (err)
1034 			goto err_pin;
1035 
1036 		if (igt_flush_test(gt->i915)) {
1037 			err = -EIO;
1038 			goto err_pin;
1039 		}
1040 	}
1041 
1042 err_pin:
1043 	i915_vma_unpin(vma);
1044 err_map:
1045 	i915_gem_object_unpin_map(obj);
1046 err_obj:
1047 	i915_gem_object_put(obj);
1048 	return err;
1049 }
1050 
1051 static struct i915_request *
create_rewinder(struct intel_context * ce,struct i915_request * wait,void * slot,int idx)1052 create_rewinder(struct intel_context *ce,
1053 		struct i915_request *wait,
1054 		void *slot, int idx)
1055 {
1056 	const u32 offset =
1057 		i915_ggtt_offset(ce->engine->status_page.vma) +
1058 		offset_in_page(slot);
1059 	struct i915_request *rq;
1060 	u32 *cs;
1061 	int err;
1062 
1063 	rq = intel_context_create_request(ce);
1064 	if (IS_ERR(rq))
1065 		return rq;
1066 
1067 	if (wait) {
1068 		err = i915_request_await_dma_fence(rq, &wait->fence);
1069 		if (err)
1070 			goto err;
1071 	}
1072 
1073 	cs = intel_ring_begin(rq, 14);
1074 	if (IS_ERR(cs)) {
1075 		err = PTR_ERR(cs);
1076 		goto err;
1077 	}
1078 
1079 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1080 	*cs++ = MI_NOOP;
1081 
1082 	*cs++ = MI_SEMAPHORE_WAIT |
1083 		MI_SEMAPHORE_GLOBAL_GTT |
1084 		MI_SEMAPHORE_POLL |
1085 		MI_SEMAPHORE_SAD_GTE_SDD;
1086 	*cs++ = idx;
1087 	*cs++ = offset;
1088 	*cs++ = 0;
1089 
1090 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1091 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1092 	*cs++ = offset + idx * sizeof(u32);
1093 	*cs++ = 0;
1094 
1095 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1096 	*cs++ = offset;
1097 	*cs++ = 0;
1098 	*cs++ = idx + 1;
1099 
1100 	intel_ring_advance(rq, cs);
1101 
1102 	rq->sched.attr.priority = I915_PRIORITY_MASK;
1103 	err = 0;
1104 err:
1105 	i915_request_get(rq);
1106 	i915_request_add(rq);
1107 	if (err) {
1108 		i915_request_put(rq);
1109 		return ERR_PTR(err);
1110 	}
1111 
1112 	return rq;
1113 }
1114 
live_timeslice_rewind(void * arg)1115 static int live_timeslice_rewind(void *arg)
1116 {
1117 	struct intel_gt *gt = arg;
1118 	struct intel_engine_cs *engine;
1119 	enum intel_engine_id id;
1120 
1121 	/*
1122 	 * The usual presumption on timeslice expiration is that we replace
1123 	 * the active context with another. However, given a chain of
1124 	 * dependencies we may end up with replacing the context with itself,
1125 	 * but only a few of those requests, forcing us to rewind the
1126 	 * RING_TAIL of the original request.
1127 	 */
1128 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1129 		return 0;
1130 
1131 	for_each_engine(engine, gt, id) {
1132 		enum { A1, A2, B1 };
1133 		enum { X = 1, Z, Y };
1134 		struct i915_request *rq[3] = {};
1135 		struct intel_context *ce;
1136 		unsigned long timeslice;
1137 		int i, err = 0;
1138 		u32 *slot;
1139 
1140 		if (!intel_engine_has_timeslices(engine))
1141 			continue;
1142 
1143 		/*
1144 		 * A:rq1 -- semaphore wait, timestamp X
1145 		 * A:rq2 -- write timestamp Y
1146 		 *
1147 		 * B:rq1 [await A:rq1] -- write timestamp Z
1148 		 *
1149 		 * Force timeslice, release semaphore.
1150 		 *
1151 		 * Expect execution/evaluation order XZY
1152 		 */
1153 
1154 		st_engine_heartbeat_disable(engine);
1155 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1156 
1157 		slot = memset32(engine->status_page.addr + 1000, 0, 4);
1158 
1159 		ce = intel_context_create(engine);
1160 		if (IS_ERR(ce)) {
1161 			err = PTR_ERR(ce);
1162 			goto err;
1163 		}
1164 
1165 		rq[A1] = create_rewinder(ce, NULL, slot, X);
1166 		if (IS_ERR(rq[A1])) {
1167 			intel_context_put(ce);
1168 			goto err;
1169 		}
1170 
1171 		rq[A2] = create_rewinder(ce, NULL, slot, Y);
1172 		intel_context_put(ce);
1173 		if (IS_ERR(rq[A2]))
1174 			goto err;
1175 
1176 		err = wait_for_submit(engine, rq[A2], HZ / 2);
1177 		if (err) {
1178 			pr_err("%s: failed to submit first context\n",
1179 			       engine->name);
1180 			goto err;
1181 		}
1182 
1183 		ce = intel_context_create(engine);
1184 		if (IS_ERR(ce)) {
1185 			err = PTR_ERR(ce);
1186 			goto err;
1187 		}
1188 
1189 		rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1190 		intel_context_put(ce);
1191 		if (IS_ERR(rq[2]))
1192 			goto err;
1193 
1194 		err = wait_for_submit(engine, rq[B1], HZ / 2);
1195 		if (err) {
1196 			pr_err("%s: failed to submit second context\n",
1197 			       engine->name);
1198 			goto err;
1199 		}
1200 
1201 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1202 		ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1203 		if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
1204 			/* Wait for the timeslice to kick in */
1205 			del_timer(&engine->execlists.timer);
1206 			tasklet_hi_schedule(&engine->execlists.tasklet);
1207 			intel_engine_flush_submission(engine);
1208 		}
1209 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1210 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1211 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1212 		GEM_BUG_ON(i915_request_is_active(rq[A2]));
1213 
1214 		/* Release the hounds! */
1215 		slot[0] = 1;
1216 		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1217 
1218 		for (i = 1; i <= 3; i++) {
1219 			unsigned long timeout = jiffies + HZ / 2;
1220 
1221 			while (!READ_ONCE(slot[i]) &&
1222 			       time_before(jiffies, timeout))
1223 				;
1224 
1225 			if (!time_before(jiffies, timeout)) {
1226 				pr_err("%s: rq[%d] timed out\n",
1227 				       engine->name, i - 1);
1228 				err = -ETIME;
1229 				goto err;
1230 			}
1231 
1232 			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1233 		}
1234 
1235 		/* XZY: XZ < XY */
1236 		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1237 			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1238 			       engine->name,
1239 			       slot[Z] - slot[X],
1240 			       slot[Y] - slot[X]);
1241 			err = -EINVAL;
1242 		}
1243 
1244 err:
1245 		memset32(&slot[0], -1, 4);
1246 		wmb();
1247 
1248 		engine->props.timeslice_duration_ms = timeslice;
1249 		st_engine_heartbeat_enable(engine);
1250 		for (i = 0; i < 3; i++)
1251 			i915_request_put(rq[i]);
1252 		if (igt_flush_test(gt->i915))
1253 			err = -EIO;
1254 		if (err)
1255 			return err;
1256 	}
1257 
1258 	return 0;
1259 }
1260 
nop_request(struct intel_engine_cs * engine)1261 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1262 {
1263 	struct i915_request *rq;
1264 
1265 	rq = intel_engine_create_kernel_request(engine);
1266 	if (IS_ERR(rq))
1267 		return rq;
1268 
1269 	i915_request_get(rq);
1270 	i915_request_add(rq);
1271 
1272 	return rq;
1273 }
1274 
slice_timeout(struct intel_engine_cs * engine)1275 static long slice_timeout(struct intel_engine_cs *engine)
1276 {
1277 	long timeout;
1278 
1279 	/* Enough time for a timeslice to kick in, and kick out */
1280 	timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1281 
1282 	/* Enough time for the nop request to complete */
1283 	timeout += HZ / 5;
1284 
1285 	return timeout + 1;
1286 }
1287 
live_timeslice_queue(void * arg)1288 static int live_timeslice_queue(void *arg)
1289 {
1290 	struct intel_gt *gt = arg;
1291 	struct drm_i915_gem_object *obj;
1292 	struct intel_engine_cs *engine;
1293 	enum intel_engine_id id;
1294 	struct i915_vma *vma;
1295 	void *vaddr;
1296 	int err = 0;
1297 
1298 	/*
1299 	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1300 	 * timeslicing between them disabled, we *do* enable timeslicing
1301 	 * if the queue demands it. (Normally, we do not submit if
1302 	 * ELSP[1] is already occupied, so must rely on timeslicing to
1303 	 * eject ELSP[0] in favour of the queue.)
1304 	 */
1305 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1306 		return 0;
1307 
1308 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1309 	if (IS_ERR(obj))
1310 		return PTR_ERR(obj);
1311 
1312 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1313 	if (IS_ERR(vma)) {
1314 		err = PTR_ERR(vma);
1315 		goto err_obj;
1316 	}
1317 
1318 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1319 	if (IS_ERR(vaddr)) {
1320 		err = PTR_ERR(vaddr);
1321 		goto err_obj;
1322 	}
1323 
1324 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1325 	if (err)
1326 		goto err_map;
1327 
1328 	err = i915_vma_sync(vma);
1329 	if (err)
1330 		goto err_pin;
1331 
1332 	for_each_engine(engine, gt, id) {
1333 		struct i915_sched_attr attr = {
1334 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1335 		};
1336 		struct i915_request *rq, *nop;
1337 
1338 		if (!intel_engine_has_preemption(engine))
1339 			continue;
1340 
1341 		st_engine_heartbeat_disable(engine);
1342 		memset(vaddr, 0, PAGE_SIZE);
1343 
1344 		/* ELSP[0]: semaphore wait */
1345 		rq = semaphore_queue(engine, vma, 0);
1346 		if (IS_ERR(rq)) {
1347 			err = PTR_ERR(rq);
1348 			goto err_heartbeat;
1349 		}
1350 		engine->schedule(rq, &attr);
1351 		err = wait_for_submit(engine, rq, HZ / 2);
1352 		if (err) {
1353 			pr_err("%s: Timed out trying to submit semaphores\n",
1354 			       engine->name);
1355 			goto err_rq;
1356 		}
1357 
1358 		/* ELSP[1]: nop request */
1359 		nop = nop_request(engine);
1360 		if (IS_ERR(nop)) {
1361 			err = PTR_ERR(nop);
1362 			goto err_rq;
1363 		}
1364 		err = wait_for_submit(engine, nop, HZ / 2);
1365 		i915_request_put(nop);
1366 		if (err) {
1367 			pr_err("%s: Timed out trying to submit nop\n",
1368 			       engine->name);
1369 			goto err_rq;
1370 		}
1371 
1372 		GEM_BUG_ON(i915_request_completed(rq));
1373 		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1374 
1375 		/* Queue: semaphore signal, matching priority as semaphore */
1376 		err = release_queue(engine, vma, 1, effective_prio(rq));
1377 		if (err)
1378 			goto err_rq;
1379 
1380 		/* Wait until we ack the release_queue and start timeslicing */
1381 		do {
1382 			cond_resched();
1383 			intel_engine_flush_submission(engine);
1384 		} while (READ_ONCE(engine->execlists.pending[0]));
1385 
1386 		/* Timeslice every jiffy, so within 2 we should signal */
1387 		if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1388 			struct drm_printer p =
1389 				drm_info_printer(gt->i915->drm.dev);
1390 
1391 			pr_err("%s: Failed to timeslice into queue\n",
1392 			       engine->name);
1393 			intel_engine_dump(engine, &p,
1394 					  "%s\n", engine->name);
1395 
1396 			memset(vaddr, 0xff, PAGE_SIZE);
1397 			err = -EIO;
1398 		}
1399 err_rq:
1400 		i915_request_put(rq);
1401 err_heartbeat:
1402 		st_engine_heartbeat_enable(engine);
1403 		if (err)
1404 			break;
1405 	}
1406 
1407 err_pin:
1408 	i915_vma_unpin(vma);
1409 err_map:
1410 	i915_gem_object_unpin_map(obj);
1411 err_obj:
1412 	i915_gem_object_put(obj);
1413 	return err;
1414 }
1415 
live_timeslice_nopreempt(void * arg)1416 static int live_timeslice_nopreempt(void *arg)
1417 {
1418 	struct intel_gt *gt = arg;
1419 	struct intel_engine_cs *engine;
1420 	enum intel_engine_id id;
1421 	struct igt_spinner spin;
1422 	int err = 0;
1423 
1424 	/*
1425 	 * We should not timeslice into a request that is marked with
1426 	 * I915_REQUEST_NOPREEMPT.
1427 	 */
1428 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1429 		return 0;
1430 
1431 	if (igt_spinner_init(&spin, gt))
1432 		return -ENOMEM;
1433 
1434 	for_each_engine(engine, gt, id) {
1435 		struct intel_context *ce;
1436 		struct i915_request *rq;
1437 		unsigned long timeslice;
1438 
1439 		if (!intel_engine_has_preemption(engine))
1440 			continue;
1441 
1442 		ce = intel_context_create(engine);
1443 		if (IS_ERR(ce)) {
1444 			err = PTR_ERR(ce);
1445 			break;
1446 		}
1447 
1448 		st_engine_heartbeat_disable(engine);
1449 		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1450 
1451 		/* Create an unpreemptible spinner */
1452 
1453 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1454 		intel_context_put(ce);
1455 		if (IS_ERR(rq)) {
1456 			err = PTR_ERR(rq);
1457 			goto out_heartbeat;
1458 		}
1459 
1460 		i915_request_get(rq);
1461 		i915_request_add(rq);
1462 
1463 		if (!igt_wait_for_spinner(&spin, rq)) {
1464 			i915_request_put(rq);
1465 			err = -ETIME;
1466 			goto out_spin;
1467 		}
1468 
1469 		set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1470 		i915_request_put(rq);
1471 
1472 		/* Followed by a maximum priority barrier (heartbeat) */
1473 
1474 		ce = intel_context_create(engine);
1475 		if (IS_ERR(ce)) {
1476 			err = PTR_ERR(ce);
1477 			goto out_spin;
1478 		}
1479 
1480 		rq = intel_context_create_request(ce);
1481 		intel_context_put(ce);
1482 		if (IS_ERR(rq)) {
1483 			err = PTR_ERR(rq);
1484 			goto out_spin;
1485 		}
1486 
1487 		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1488 		i915_request_get(rq);
1489 		i915_request_add(rq);
1490 
1491 		/*
1492 		 * Wait until the barrier is in ELSP, and we know timeslicing
1493 		 * will have been activated.
1494 		 */
1495 		if (wait_for_submit(engine, rq, HZ / 2)) {
1496 			i915_request_put(rq);
1497 			err = -ETIME;
1498 			goto out_spin;
1499 		}
1500 
1501 		/*
1502 		 * Since the ELSP[0] request is unpreemptible, it should not
1503 		 * allow the maximum priority barrier through. Wait long
1504 		 * enough to see if it is timesliced in by mistake.
1505 		 */
1506 		if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1507 			pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1508 			       engine->name);
1509 			err = -EINVAL;
1510 		}
1511 		i915_request_put(rq);
1512 
1513 out_spin:
1514 		igt_spinner_end(&spin);
1515 out_heartbeat:
1516 		xchg(&engine->props.timeslice_duration_ms, timeslice);
1517 		st_engine_heartbeat_enable(engine);
1518 		if (err)
1519 			break;
1520 
1521 		if (igt_flush_test(gt->i915)) {
1522 			err = -EIO;
1523 			break;
1524 		}
1525 	}
1526 
1527 	igt_spinner_fini(&spin);
1528 	return err;
1529 }
1530 
live_busywait_preempt(void * arg)1531 static int live_busywait_preempt(void *arg)
1532 {
1533 	struct intel_gt *gt = arg;
1534 	struct i915_gem_context *ctx_hi, *ctx_lo;
1535 	struct intel_engine_cs *engine;
1536 	struct drm_i915_gem_object *obj;
1537 	struct i915_vma *vma;
1538 	enum intel_engine_id id;
1539 	int err = -ENOMEM;
1540 	u32 *map;
1541 
1542 	/*
1543 	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1544 	 * preempt the busywaits used to synchronise between rings.
1545 	 */
1546 
1547 	ctx_hi = kernel_context(gt->i915);
1548 	if (!ctx_hi)
1549 		return -ENOMEM;
1550 	ctx_hi->sched.priority =
1551 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1552 
1553 	ctx_lo = kernel_context(gt->i915);
1554 	if (!ctx_lo)
1555 		goto err_ctx_hi;
1556 	ctx_lo->sched.priority =
1557 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1558 
1559 	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1560 	if (IS_ERR(obj)) {
1561 		err = PTR_ERR(obj);
1562 		goto err_ctx_lo;
1563 	}
1564 
1565 	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1566 	if (IS_ERR(map)) {
1567 		err = PTR_ERR(map);
1568 		goto err_obj;
1569 	}
1570 
1571 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1572 	if (IS_ERR(vma)) {
1573 		err = PTR_ERR(vma);
1574 		goto err_map;
1575 	}
1576 
1577 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1578 	if (err)
1579 		goto err_map;
1580 
1581 	err = i915_vma_sync(vma);
1582 	if (err)
1583 		goto err_vma;
1584 
1585 	for_each_engine(engine, gt, id) {
1586 		struct i915_request *lo, *hi;
1587 		struct igt_live_test t;
1588 		u32 *cs;
1589 
1590 		if (!intel_engine_has_preemption(engine))
1591 			continue;
1592 
1593 		if (!intel_engine_can_store_dword(engine))
1594 			continue;
1595 
1596 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1597 			err = -EIO;
1598 			goto err_vma;
1599 		}
1600 
1601 		/*
1602 		 * We create two requests. The low priority request
1603 		 * busywaits on a semaphore (inside the ringbuffer where
1604 		 * is should be preemptible) and the high priority requests
1605 		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1606 		 * allowing the first request to complete. If preemption
1607 		 * fails, we hang instead.
1608 		 */
1609 
1610 		lo = igt_request_alloc(ctx_lo, engine);
1611 		if (IS_ERR(lo)) {
1612 			err = PTR_ERR(lo);
1613 			goto err_vma;
1614 		}
1615 
1616 		cs = intel_ring_begin(lo, 8);
1617 		if (IS_ERR(cs)) {
1618 			err = PTR_ERR(cs);
1619 			i915_request_add(lo);
1620 			goto err_vma;
1621 		}
1622 
1623 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1624 		*cs++ = i915_ggtt_offset(vma);
1625 		*cs++ = 0;
1626 		*cs++ = 1;
1627 
1628 		/* XXX Do we need a flush + invalidate here? */
1629 
1630 		*cs++ = MI_SEMAPHORE_WAIT |
1631 			MI_SEMAPHORE_GLOBAL_GTT |
1632 			MI_SEMAPHORE_POLL |
1633 			MI_SEMAPHORE_SAD_EQ_SDD;
1634 		*cs++ = 0;
1635 		*cs++ = i915_ggtt_offset(vma);
1636 		*cs++ = 0;
1637 
1638 		intel_ring_advance(lo, cs);
1639 
1640 		i915_request_get(lo);
1641 		i915_request_add(lo);
1642 
1643 		if (wait_for(READ_ONCE(*map), 10)) {
1644 			i915_request_put(lo);
1645 			err = -ETIMEDOUT;
1646 			goto err_vma;
1647 		}
1648 
1649 		/* Low priority request should be busywaiting now */
1650 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
1651 			i915_request_put(lo);
1652 			pr_err("%s: Busywaiting request did not!\n",
1653 			       engine->name);
1654 			err = -EIO;
1655 			goto err_vma;
1656 		}
1657 
1658 		hi = igt_request_alloc(ctx_hi, engine);
1659 		if (IS_ERR(hi)) {
1660 			err = PTR_ERR(hi);
1661 			i915_request_put(lo);
1662 			goto err_vma;
1663 		}
1664 
1665 		cs = intel_ring_begin(hi, 4);
1666 		if (IS_ERR(cs)) {
1667 			err = PTR_ERR(cs);
1668 			i915_request_add(hi);
1669 			i915_request_put(lo);
1670 			goto err_vma;
1671 		}
1672 
1673 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1674 		*cs++ = i915_ggtt_offset(vma);
1675 		*cs++ = 0;
1676 		*cs++ = 0;
1677 
1678 		intel_ring_advance(hi, cs);
1679 		i915_request_add(hi);
1680 
1681 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1682 			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1683 
1684 			pr_err("%s: Failed to preempt semaphore busywait!\n",
1685 			       engine->name);
1686 
1687 			intel_engine_dump(engine, &p, "%s\n", engine->name);
1688 			GEM_TRACE_DUMP();
1689 
1690 			i915_request_put(lo);
1691 			intel_gt_set_wedged(gt);
1692 			err = -EIO;
1693 			goto err_vma;
1694 		}
1695 		GEM_BUG_ON(READ_ONCE(*map));
1696 		i915_request_put(lo);
1697 
1698 		if (igt_live_test_end(&t)) {
1699 			err = -EIO;
1700 			goto err_vma;
1701 		}
1702 	}
1703 
1704 	err = 0;
1705 err_vma:
1706 	i915_vma_unpin(vma);
1707 err_map:
1708 	i915_gem_object_unpin_map(obj);
1709 err_obj:
1710 	i915_gem_object_put(obj);
1711 err_ctx_lo:
1712 	kernel_context_close(ctx_lo);
1713 err_ctx_hi:
1714 	kernel_context_close(ctx_hi);
1715 	return err;
1716 }
1717 
1718 static struct i915_request *
spinner_create_request(struct igt_spinner * spin,struct i915_gem_context * ctx,struct intel_engine_cs * engine,u32 arb)1719 spinner_create_request(struct igt_spinner *spin,
1720 		       struct i915_gem_context *ctx,
1721 		       struct intel_engine_cs *engine,
1722 		       u32 arb)
1723 {
1724 	struct intel_context *ce;
1725 	struct i915_request *rq;
1726 
1727 	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1728 	if (IS_ERR(ce))
1729 		return ERR_CAST(ce);
1730 
1731 	rq = igt_spinner_create_request(spin, ce, arb);
1732 	intel_context_put(ce);
1733 	return rq;
1734 }
1735 
live_preempt(void * arg)1736 static int live_preempt(void *arg)
1737 {
1738 	struct intel_gt *gt = arg;
1739 	struct i915_gem_context *ctx_hi, *ctx_lo;
1740 	struct igt_spinner spin_hi, spin_lo;
1741 	struct intel_engine_cs *engine;
1742 	enum intel_engine_id id;
1743 	int err = -ENOMEM;
1744 
1745 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1746 		return 0;
1747 
1748 	if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1749 		pr_err("Logical preemption supported, but not exposed\n");
1750 
1751 	if (igt_spinner_init(&spin_hi, gt))
1752 		return -ENOMEM;
1753 
1754 	if (igt_spinner_init(&spin_lo, gt))
1755 		goto err_spin_hi;
1756 
1757 	ctx_hi = kernel_context(gt->i915);
1758 	if (!ctx_hi)
1759 		goto err_spin_lo;
1760 	ctx_hi->sched.priority =
1761 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1762 
1763 	ctx_lo = kernel_context(gt->i915);
1764 	if (!ctx_lo)
1765 		goto err_ctx_hi;
1766 	ctx_lo->sched.priority =
1767 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1768 
1769 	for_each_engine(engine, gt, id) {
1770 		struct igt_live_test t;
1771 		struct i915_request *rq;
1772 
1773 		if (!intel_engine_has_preemption(engine))
1774 			continue;
1775 
1776 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1777 			err = -EIO;
1778 			goto err_ctx_lo;
1779 		}
1780 
1781 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1782 					    MI_ARB_CHECK);
1783 		if (IS_ERR(rq)) {
1784 			err = PTR_ERR(rq);
1785 			goto err_ctx_lo;
1786 		}
1787 
1788 		i915_request_add(rq);
1789 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1790 			GEM_TRACE("lo spinner failed to start\n");
1791 			GEM_TRACE_DUMP();
1792 			intel_gt_set_wedged(gt);
1793 			err = -EIO;
1794 			goto err_ctx_lo;
1795 		}
1796 
1797 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1798 					    MI_ARB_CHECK);
1799 		if (IS_ERR(rq)) {
1800 			igt_spinner_end(&spin_lo);
1801 			err = PTR_ERR(rq);
1802 			goto err_ctx_lo;
1803 		}
1804 
1805 		i915_request_add(rq);
1806 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1807 			GEM_TRACE("hi spinner failed to start\n");
1808 			GEM_TRACE_DUMP();
1809 			intel_gt_set_wedged(gt);
1810 			err = -EIO;
1811 			goto err_ctx_lo;
1812 		}
1813 
1814 		igt_spinner_end(&spin_hi);
1815 		igt_spinner_end(&spin_lo);
1816 
1817 		if (igt_live_test_end(&t)) {
1818 			err = -EIO;
1819 			goto err_ctx_lo;
1820 		}
1821 	}
1822 
1823 	err = 0;
1824 err_ctx_lo:
1825 	kernel_context_close(ctx_lo);
1826 err_ctx_hi:
1827 	kernel_context_close(ctx_hi);
1828 err_spin_lo:
1829 	igt_spinner_fini(&spin_lo);
1830 err_spin_hi:
1831 	igt_spinner_fini(&spin_hi);
1832 	return err;
1833 }
1834 
live_late_preempt(void * arg)1835 static int live_late_preempt(void *arg)
1836 {
1837 	struct intel_gt *gt = arg;
1838 	struct i915_gem_context *ctx_hi, *ctx_lo;
1839 	struct igt_spinner spin_hi, spin_lo;
1840 	struct intel_engine_cs *engine;
1841 	struct i915_sched_attr attr = {};
1842 	enum intel_engine_id id;
1843 	int err = -ENOMEM;
1844 
1845 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1846 		return 0;
1847 
1848 	if (igt_spinner_init(&spin_hi, gt))
1849 		return -ENOMEM;
1850 
1851 	if (igt_spinner_init(&spin_lo, gt))
1852 		goto err_spin_hi;
1853 
1854 	ctx_hi = kernel_context(gt->i915);
1855 	if (!ctx_hi)
1856 		goto err_spin_lo;
1857 
1858 	ctx_lo = kernel_context(gt->i915);
1859 	if (!ctx_lo)
1860 		goto err_ctx_hi;
1861 
1862 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1863 	ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1864 
1865 	for_each_engine(engine, gt, id) {
1866 		struct igt_live_test t;
1867 		struct i915_request *rq;
1868 
1869 		if (!intel_engine_has_preemption(engine))
1870 			continue;
1871 
1872 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1873 			err = -EIO;
1874 			goto err_ctx_lo;
1875 		}
1876 
1877 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1878 					    MI_ARB_CHECK);
1879 		if (IS_ERR(rq)) {
1880 			err = PTR_ERR(rq);
1881 			goto err_ctx_lo;
1882 		}
1883 
1884 		i915_request_add(rq);
1885 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1886 			pr_err("First context failed to start\n");
1887 			goto err_wedged;
1888 		}
1889 
1890 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1891 					    MI_NOOP);
1892 		if (IS_ERR(rq)) {
1893 			igt_spinner_end(&spin_lo);
1894 			err = PTR_ERR(rq);
1895 			goto err_ctx_lo;
1896 		}
1897 
1898 		i915_request_add(rq);
1899 		if (igt_wait_for_spinner(&spin_hi, rq)) {
1900 			pr_err("Second context overtook first?\n");
1901 			goto err_wedged;
1902 		}
1903 
1904 		attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1905 		engine->schedule(rq, &attr);
1906 
1907 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1908 			pr_err("High priority context failed to preempt the low priority context\n");
1909 			GEM_TRACE_DUMP();
1910 			goto err_wedged;
1911 		}
1912 
1913 		igt_spinner_end(&spin_hi);
1914 		igt_spinner_end(&spin_lo);
1915 
1916 		if (igt_live_test_end(&t)) {
1917 			err = -EIO;
1918 			goto err_ctx_lo;
1919 		}
1920 	}
1921 
1922 	err = 0;
1923 err_ctx_lo:
1924 	kernel_context_close(ctx_lo);
1925 err_ctx_hi:
1926 	kernel_context_close(ctx_hi);
1927 err_spin_lo:
1928 	igt_spinner_fini(&spin_lo);
1929 err_spin_hi:
1930 	igt_spinner_fini(&spin_hi);
1931 	return err;
1932 
1933 err_wedged:
1934 	igt_spinner_end(&spin_hi);
1935 	igt_spinner_end(&spin_lo);
1936 	intel_gt_set_wedged(gt);
1937 	err = -EIO;
1938 	goto err_ctx_lo;
1939 }
1940 
1941 struct preempt_client {
1942 	struct igt_spinner spin;
1943 	struct i915_gem_context *ctx;
1944 };
1945 
preempt_client_init(struct intel_gt * gt,struct preempt_client * c)1946 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1947 {
1948 	c->ctx = kernel_context(gt->i915);
1949 	if (!c->ctx)
1950 		return -ENOMEM;
1951 
1952 	if (igt_spinner_init(&c->spin, gt))
1953 		goto err_ctx;
1954 
1955 	return 0;
1956 
1957 err_ctx:
1958 	kernel_context_close(c->ctx);
1959 	return -ENOMEM;
1960 }
1961 
preempt_client_fini(struct preempt_client * c)1962 static void preempt_client_fini(struct preempt_client *c)
1963 {
1964 	igt_spinner_fini(&c->spin);
1965 	kernel_context_close(c->ctx);
1966 }
1967 
live_nopreempt(void * arg)1968 static int live_nopreempt(void *arg)
1969 {
1970 	struct intel_gt *gt = arg;
1971 	struct intel_engine_cs *engine;
1972 	struct preempt_client a, b;
1973 	enum intel_engine_id id;
1974 	int err = -ENOMEM;
1975 
1976 	/*
1977 	 * Verify that we can disable preemption for an individual request
1978 	 * that may be being observed and not want to be interrupted.
1979 	 */
1980 
1981 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1982 		return 0;
1983 
1984 	if (preempt_client_init(gt, &a))
1985 		return -ENOMEM;
1986 	if (preempt_client_init(gt, &b))
1987 		goto err_client_a;
1988 	b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1989 
1990 	for_each_engine(engine, gt, id) {
1991 		struct i915_request *rq_a, *rq_b;
1992 
1993 		if (!intel_engine_has_preemption(engine))
1994 			continue;
1995 
1996 		engine->execlists.preempt_hang.count = 0;
1997 
1998 		rq_a = spinner_create_request(&a.spin,
1999 					      a.ctx, engine,
2000 					      MI_ARB_CHECK);
2001 		if (IS_ERR(rq_a)) {
2002 			err = PTR_ERR(rq_a);
2003 			goto err_client_b;
2004 		}
2005 
2006 		/* Low priority client, but unpreemptable! */
2007 		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
2008 
2009 		i915_request_add(rq_a);
2010 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2011 			pr_err("First client failed to start\n");
2012 			goto err_wedged;
2013 		}
2014 
2015 		rq_b = spinner_create_request(&b.spin,
2016 					      b.ctx, engine,
2017 					      MI_ARB_CHECK);
2018 		if (IS_ERR(rq_b)) {
2019 			err = PTR_ERR(rq_b);
2020 			goto err_client_b;
2021 		}
2022 
2023 		i915_request_add(rq_b);
2024 
2025 		/* B is much more important than A! (But A is unpreemptable.) */
2026 		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2027 
2028 		/* Wait long enough for preemption and timeslicing */
2029 		if (igt_wait_for_spinner(&b.spin, rq_b)) {
2030 			pr_err("Second client started too early!\n");
2031 			goto err_wedged;
2032 		}
2033 
2034 		igt_spinner_end(&a.spin);
2035 
2036 		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2037 			pr_err("Second client failed to start\n");
2038 			goto err_wedged;
2039 		}
2040 
2041 		igt_spinner_end(&b.spin);
2042 
2043 		if (engine->execlists.preempt_hang.count) {
2044 			pr_err("Preemption recorded x%d; should have been suppressed!\n",
2045 			       engine->execlists.preempt_hang.count);
2046 			err = -EINVAL;
2047 			goto err_wedged;
2048 		}
2049 
2050 		if (igt_flush_test(gt->i915))
2051 			goto err_wedged;
2052 	}
2053 
2054 	err = 0;
2055 err_client_b:
2056 	preempt_client_fini(&b);
2057 err_client_a:
2058 	preempt_client_fini(&a);
2059 	return err;
2060 
2061 err_wedged:
2062 	igt_spinner_end(&b.spin);
2063 	igt_spinner_end(&a.spin);
2064 	intel_gt_set_wedged(gt);
2065 	err = -EIO;
2066 	goto err_client_b;
2067 }
2068 
2069 struct live_preempt_cancel {
2070 	struct intel_engine_cs *engine;
2071 	struct preempt_client a, b;
2072 };
2073 
__cancel_active0(struct live_preempt_cancel * arg)2074 static int __cancel_active0(struct live_preempt_cancel *arg)
2075 {
2076 	struct i915_request *rq;
2077 	struct igt_live_test t;
2078 	int err;
2079 
2080 	/* Preempt cancel of ELSP0 */
2081 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2082 	if (igt_live_test_begin(&t, arg->engine->i915,
2083 				__func__, arg->engine->name))
2084 		return -EIO;
2085 
2086 	rq = spinner_create_request(&arg->a.spin,
2087 				    arg->a.ctx, arg->engine,
2088 				    MI_ARB_CHECK);
2089 	if (IS_ERR(rq))
2090 		return PTR_ERR(rq);
2091 
2092 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2093 	i915_request_get(rq);
2094 	i915_request_add(rq);
2095 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2096 		err = -EIO;
2097 		goto out;
2098 	}
2099 
2100 	intel_context_set_banned(rq->context);
2101 	err = intel_engine_pulse(arg->engine);
2102 	if (err)
2103 		goto out;
2104 
2105 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2106 	if (err) {
2107 		pr_err("Cancelled inflight0 request did not reset\n");
2108 		goto out;
2109 	}
2110 
2111 out:
2112 	i915_request_put(rq);
2113 	if (igt_live_test_end(&t))
2114 		err = -EIO;
2115 	return err;
2116 }
2117 
__cancel_active1(struct live_preempt_cancel * arg)2118 static int __cancel_active1(struct live_preempt_cancel *arg)
2119 {
2120 	struct i915_request *rq[2] = {};
2121 	struct igt_live_test t;
2122 	int err;
2123 
2124 	/* Preempt cancel of ELSP1 */
2125 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2126 	if (igt_live_test_begin(&t, arg->engine->i915,
2127 				__func__, arg->engine->name))
2128 		return -EIO;
2129 
2130 	rq[0] = spinner_create_request(&arg->a.spin,
2131 				       arg->a.ctx, arg->engine,
2132 				       MI_NOOP); /* no preemption */
2133 	if (IS_ERR(rq[0]))
2134 		return PTR_ERR(rq[0]);
2135 
2136 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2137 	i915_request_get(rq[0]);
2138 	i915_request_add(rq[0]);
2139 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2140 		err = -EIO;
2141 		goto out;
2142 	}
2143 
2144 	rq[1] = spinner_create_request(&arg->b.spin,
2145 				       arg->b.ctx, arg->engine,
2146 				       MI_ARB_CHECK);
2147 	if (IS_ERR(rq[1])) {
2148 		err = PTR_ERR(rq[1]);
2149 		goto out;
2150 	}
2151 
2152 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2153 	i915_request_get(rq[1]);
2154 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2155 	i915_request_add(rq[1]);
2156 	if (err)
2157 		goto out;
2158 
2159 	intel_context_set_banned(rq[1]->context);
2160 	err = intel_engine_pulse(arg->engine);
2161 	if (err)
2162 		goto out;
2163 
2164 	igt_spinner_end(&arg->a.spin);
2165 	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2166 	if (err)
2167 		goto out;
2168 
2169 	if (rq[0]->fence.error != 0) {
2170 		pr_err("Normal inflight0 request did not complete\n");
2171 		err = -EINVAL;
2172 		goto out;
2173 	}
2174 
2175 	if (rq[1]->fence.error != -EIO) {
2176 		pr_err("Cancelled inflight1 request did not report -EIO\n");
2177 		err = -EINVAL;
2178 		goto out;
2179 	}
2180 
2181 out:
2182 	i915_request_put(rq[1]);
2183 	i915_request_put(rq[0]);
2184 	if (igt_live_test_end(&t))
2185 		err = -EIO;
2186 	return err;
2187 }
2188 
__cancel_queued(struct live_preempt_cancel * arg)2189 static int __cancel_queued(struct live_preempt_cancel *arg)
2190 {
2191 	struct i915_request *rq[3] = {};
2192 	struct igt_live_test t;
2193 	int err;
2194 
2195 	/* Full ELSP and one in the wings */
2196 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2197 	if (igt_live_test_begin(&t, arg->engine->i915,
2198 				__func__, arg->engine->name))
2199 		return -EIO;
2200 
2201 	rq[0] = spinner_create_request(&arg->a.spin,
2202 				       arg->a.ctx, arg->engine,
2203 				       MI_ARB_CHECK);
2204 	if (IS_ERR(rq[0]))
2205 		return PTR_ERR(rq[0]);
2206 
2207 	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2208 	i915_request_get(rq[0]);
2209 	i915_request_add(rq[0]);
2210 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2211 		err = -EIO;
2212 		goto out;
2213 	}
2214 
2215 	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2216 	if (IS_ERR(rq[1])) {
2217 		err = PTR_ERR(rq[1]);
2218 		goto out;
2219 	}
2220 
2221 	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2222 	i915_request_get(rq[1]);
2223 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2224 	i915_request_add(rq[1]);
2225 	if (err)
2226 		goto out;
2227 
2228 	rq[2] = spinner_create_request(&arg->b.spin,
2229 				       arg->a.ctx, arg->engine,
2230 				       MI_ARB_CHECK);
2231 	if (IS_ERR(rq[2])) {
2232 		err = PTR_ERR(rq[2]);
2233 		goto out;
2234 	}
2235 
2236 	i915_request_get(rq[2]);
2237 	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2238 	i915_request_add(rq[2]);
2239 	if (err)
2240 		goto out;
2241 
2242 	intel_context_set_banned(rq[2]->context);
2243 	err = intel_engine_pulse(arg->engine);
2244 	if (err)
2245 		goto out;
2246 
2247 	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2248 	if (err)
2249 		goto out;
2250 
2251 	if (rq[0]->fence.error != -EIO) {
2252 		pr_err("Cancelled inflight0 request did not report -EIO\n");
2253 		err = -EINVAL;
2254 		goto out;
2255 	}
2256 
2257 	if (rq[1]->fence.error != 0) {
2258 		pr_err("Normal inflight1 request did not complete\n");
2259 		err = -EINVAL;
2260 		goto out;
2261 	}
2262 
2263 	if (rq[2]->fence.error != -EIO) {
2264 		pr_err("Cancelled queued request did not report -EIO\n");
2265 		err = -EINVAL;
2266 		goto out;
2267 	}
2268 
2269 out:
2270 	i915_request_put(rq[2]);
2271 	i915_request_put(rq[1]);
2272 	i915_request_put(rq[0]);
2273 	if (igt_live_test_end(&t))
2274 		err = -EIO;
2275 	return err;
2276 }
2277 
__cancel_hostile(struct live_preempt_cancel * arg)2278 static int __cancel_hostile(struct live_preempt_cancel *arg)
2279 {
2280 	struct i915_request *rq;
2281 	int err;
2282 
2283 	/* Preempt cancel non-preemptible spinner in ELSP0 */
2284 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2285 		return 0;
2286 
2287 	if (!intel_has_reset_engine(arg->engine->gt))
2288 		return 0;
2289 
2290 	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2291 	rq = spinner_create_request(&arg->a.spin,
2292 				    arg->a.ctx, arg->engine,
2293 				    MI_NOOP); /* preemption disabled */
2294 	if (IS_ERR(rq))
2295 		return PTR_ERR(rq);
2296 
2297 	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2298 	i915_request_get(rq);
2299 	i915_request_add(rq);
2300 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2301 		err = -EIO;
2302 		goto out;
2303 	}
2304 
2305 	intel_context_set_banned(rq->context);
2306 	err = intel_engine_pulse(arg->engine); /* force reset */
2307 	if (err)
2308 		goto out;
2309 
2310 	err = wait_for_reset(arg->engine, rq, HZ / 2);
2311 	if (err) {
2312 		pr_err("Cancelled inflight0 request did not reset\n");
2313 		goto out;
2314 	}
2315 
2316 out:
2317 	i915_request_put(rq);
2318 	if (igt_flush_test(arg->engine->i915))
2319 		err = -EIO;
2320 	return err;
2321 }
2322 
live_preempt_cancel(void * arg)2323 static int live_preempt_cancel(void *arg)
2324 {
2325 	struct intel_gt *gt = arg;
2326 	struct live_preempt_cancel data;
2327 	enum intel_engine_id id;
2328 	int err = -ENOMEM;
2329 
2330 	/*
2331 	 * To cancel an inflight context, we need to first remove it from the
2332 	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2333 	 */
2334 
2335 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2336 		return 0;
2337 
2338 	if (preempt_client_init(gt, &data.a))
2339 		return -ENOMEM;
2340 	if (preempt_client_init(gt, &data.b))
2341 		goto err_client_a;
2342 
2343 	for_each_engine(data.engine, gt, id) {
2344 		if (!intel_engine_has_preemption(data.engine))
2345 			continue;
2346 
2347 		err = __cancel_active0(&data);
2348 		if (err)
2349 			goto err_wedged;
2350 
2351 		err = __cancel_active1(&data);
2352 		if (err)
2353 			goto err_wedged;
2354 
2355 		err = __cancel_queued(&data);
2356 		if (err)
2357 			goto err_wedged;
2358 
2359 		err = __cancel_hostile(&data);
2360 		if (err)
2361 			goto err_wedged;
2362 	}
2363 
2364 	err = 0;
2365 err_client_b:
2366 	preempt_client_fini(&data.b);
2367 err_client_a:
2368 	preempt_client_fini(&data.a);
2369 	return err;
2370 
2371 err_wedged:
2372 	GEM_TRACE_DUMP();
2373 	igt_spinner_end(&data.b.spin);
2374 	igt_spinner_end(&data.a.spin);
2375 	intel_gt_set_wedged(gt);
2376 	goto err_client_b;
2377 }
2378 
live_suppress_self_preempt(void * arg)2379 static int live_suppress_self_preempt(void *arg)
2380 {
2381 	struct intel_gt *gt = arg;
2382 	struct intel_engine_cs *engine;
2383 	struct i915_sched_attr attr = {
2384 		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2385 	};
2386 	struct preempt_client a, b;
2387 	enum intel_engine_id id;
2388 	int err = -ENOMEM;
2389 
2390 	/*
2391 	 * Verify that if a preemption request does not cause a change in
2392 	 * the current execution order, the preempt-to-idle injection is
2393 	 * skipped and that we do not accidentally apply it after the CS
2394 	 * completion event.
2395 	 */
2396 
2397 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2398 		return 0;
2399 
2400 	if (intel_uc_uses_guc_submission(&gt->uc))
2401 		return 0; /* presume black blox */
2402 
2403 	if (intel_vgpu_active(gt->i915))
2404 		return 0; /* GVT forces single port & request submission */
2405 
2406 	if (preempt_client_init(gt, &a))
2407 		return -ENOMEM;
2408 	if (preempt_client_init(gt, &b))
2409 		goto err_client_a;
2410 
2411 	for_each_engine(engine, gt, id) {
2412 		struct i915_request *rq_a, *rq_b;
2413 		int depth;
2414 
2415 		if (!intel_engine_has_preemption(engine))
2416 			continue;
2417 
2418 		if (igt_flush_test(gt->i915))
2419 			goto err_wedged;
2420 
2421 		st_engine_heartbeat_disable(engine);
2422 		engine->execlists.preempt_hang.count = 0;
2423 
2424 		rq_a = spinner_create_request(&a.spin,
2425 					      a.ctx, engine,
2426 					      MI_NOOP);
2427 		if (IS_ERR(rq_a)) {
2428 			err = PTR_ERR(rq_a);
2429 			st_engine_heartbeat_enable(engine);
2430 			goto err_client_b;
2431 		}
2432 
2433 		i915_request_add(rq_a);
2434 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2435 			pr_err("First client failed to start\n");
2436 			st_engine_heartbeat_enable(engine);
2437 			goto err_wedged;
2438 		}
2439 
2440 		/* Keep postponing the timer to avoid premature slicing */
2441 		mod_timer(&engine->execlists.timer, jiffies + HZ);
2442 		for (depth = 0; depth < 8; depth++) {
2443 			rq_b = spinner_create_request(&b.spin,
2444 						      b.ctx, engine,
2445 						      MI_NOOP);
2446 			if (IS_ERR(rq_b)) {
2447 				err = PTR_ERR(rq_b);
2448 				st_engine_heartbeat_enable(engine);
2449 				goto err_client_b;
2450 			}
2451 			i915_request_add(rq_b);
2452 
2453 			GEM_BUG_ON(i915_request_completed(rq_a));
2454 			engine->schedule(rq_a, &attr);
2455 			igt_spinner_end(&a.spin);
2456 
2457 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2458 				pr_err("Second client failed to start\n");
2459 				st_engine_heartbeat_enable(engine);
2460 				goto err_wedged;
2461 			}
2462 
2463 			swap(a, b);
2464 			rq_a = rq_b;
2465 		}
2466 		igt_spinner_end(&a.spin);
2467 
2468 		if (engine->execlists.preempt_hang.count) {
2469 			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2470 			       engine->name,
2471 			       engine->execlists.preempt_hang.count,
2472 			       depth);
2473 			st_engine_heartbeat_enable(engine);
2474 			err = -EINVAL;
2475 			goto err_client_b;
2476 		}
2477 
2478 		st_engine_heartbeat_enable(engine);
2479 		if (igt_flush_test(gt->i915))
2480 			goto err_wedged;
2481 	}
2482 
2483 	err = 0;
2484 err_client_b:
2485 	preempt_client_fini(&b);
2486 err_client_a:
2487 	preempt_client_fini(&a);
2488 	return err;
2489 
2490 err_wedged:
2491 	igt_spinner_end(&b.spin);
2492 	igt_spinner_end(&a.spin);
2493 	intel_gt_set_wedged(gt);
2494 	err = -EIO;
2495 	goto err_client_b;
2496 }
2497 
live_chain_preempt(void * arg)2498 static int live_chain_preempt(void *arg)
2499 {
2500 	struct intel_gt *gt = arg;
2501 	struct intel_engine_cs *engine;
2502 	struct preempt_client hi, lo;
2503 	enum intel_engine_id id;
2504 	int err = -ENOMEM;
2505 
2506 	/*
2507 	 * Build a chain AB...BA between two contexts (A, B) and request
2508 	 * preemption of the last request. It should then complete before
2509 	 * the previously submitted spinner in B.
2510 	 */
2511 
2512 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2513 		return 0;
2514 
2515 	if (preempt_client_init(gt, &hi))
2516 		return -ENOMEM;
2517 
2518 	if (preempt_client_init(gt, &lo))
2519 		goto err_client_hi;
2520 
2521 	for_each_engine(engine, gt, id) {
2522 		struct i915_sched_attr attr = {
2523 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2524 		};
2525 		struct igt_live_test t;
2526 		struct i915_request *rq;
2527 		int ring_size, count, i;
2528 
2529 		if (!intel_engine_has_preemption(engine))
2530 			continue;
2531 
2532 		rq = spinner_create_request(&lo.spin,
2533 					    lo.ctx, engine,
2534 					    MI_ARB_CHECK);
2535 		if (IS_ERR(rq))
2536 			goto err_wedged;
2537 
2538 		i915_request_get(rq);
2539 		i915_request_add(rq);
2540 
2541 		ring_size = rq->wa_tail - rq->head;
2542 		if (ring_size < 0)
2543 			ring_size += rq->ring->size;
2544 		ring_size = rq->ring->size / ring_size;
2545 		pr_debug("%s(%s): Using maximum of %d requests\n",
2546 			 __func__, engine->name, ring_size);
2547 
2548 		igt_spinner_end(&lo.spin);
2549 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2550 			pr_err("Timed out waiting to flush %s\n", engine->name);
2551 			i915_request_put(rq);
2552 			goto err_wedged;
2553 		}
2554 		i915_request_put(rq);
2555 
2556 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2557 			err = -EIO;
2558 			goto err_wedged;
2559 		}
2560 
2561 		for_each_prime_number_from(count, 1, ring_size) {
2562 			rq = spinner_create_request(&hi.spin,
2563 						    hi.ctx, engine,
2564 						    MI_ARB_CHECK);
2565 			if (IS_ERR(rq))
2566 				goto err_wedged;
2567 			i915_request_add(rq);
2568 			if (!igt_wait_for_spinner(&hi.spin, rq))
2569 				goto err_wedged;
2570 
2571 			rq = spinner_create_request(&lo.spin,
2572 						    lo.ctx, engine,
2573 						    MI_ARB_CHECK);
2574 			if (IS_ERR(rq))
2575 				goto err_wedged;
2576 			i915_request_add(rq);
2577 
2578 			for (i = 0; i < count; i++) {
2579 				rq = igt_request_alloc(lo.ctx, engine);
2580 				if (IS_ERR(rq))
2581 					goto err_wedged;
2582 				i915_request_add(rq);
2583 			}
2584 
2585 			rq = igt_request_alloc(hi.ctx, engine);
2586 			if (IS_ERR(rq))
2587 				goto err_wedged;
2588 
2589 			i915_request_get(rq);
2590 			i915_request_add(rq);
2591 			engine->schedule(rq, &attr);
2592 
2593 			igt_spinner_end(&hi.spin);
2594 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2595 				struct drm_printer p =
2596 					drm_info_printer(gt->i915->drm.dev);
2597 
2598 				pr_err("Failed to preempt over chain of %d\n",
2599 				       count);
2600 				intel_engine_dump(engine, &p,
2601 						  "%s\n", engine->name);
2602 				i915_request_put(rq);
2603 				goto err_wedged;
2604 			}
2605 			igt_spinner_end(&lo.spin);
2606 			i915_request_put(rq);
2607 
2608 			rq = igt_request_alloc(lo.ctx, engine);
2609 			if (IS_ERR(rq))
2610 				goto err_wedged;
2611 
2612 			i915_request_get(rq);
2613 			i915_request_add(rq);
2614 
2615 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2616 				struct drm_printer p =
2617 					drm_info_printer(gt->i915->drm.dev);
2618 
2619 				pr_err("Failed to flush low priority chain of %d requests\n",
2620 				       count);
2621 				intel_engine_dump(engine, &p,
2622 						  "%s\n", engine->name);
2623 
2624 				i915_request_put(rq);
2625 				goto err_wedged;
2626 			}
2627 			i915_request_put(rq);
2628 		}
2629 
2630 		if (igt_live_test_end(&t)) {
2631 			err = -EIO;
2632 			goto err_wedged;
2633 		}
2634 	}
2635 
2636 	err = 0;
2637 err_client_lo:
2638 	preempt_client_fini(&lo);
2639 err_client_hi:
2640 	preempt_client_fini(&hi);
2641 	return err;
2642 
2643 err_wedged:
2644 	igt_spinner_end(&hi.spin);
2645 	igt_spinner_end(&lo.spin);
2646 	intel_gt_set_wedged(gt);
2647 	err = -EIO;
2648 	goto err_client_lo;
2649 }
2650 
create_gang(struct intel_engine_cs * engine,struct i915_request ** prev)2651 static int create_gang(struct intel_engine_cs *engine,
2652 		       struct i915_request **prev)
2653 {
2654 	struct drm_i915_gem_object *obj;
2655 	struct intel_context *ce;
2656 	struct i915_request *rq;
2657 	struct i915_vma *vma;
2658 	u32 *cs;
2659 	int err;
2660 
2661 	ce = intel_context_create(engine);
2662 	if (IS_ERR(ce))
2663 		return PTR_ERR(ce);
2664 
2665 	obj = i915_gem_object_create_internal(engine->i915, 4096);
2666 	if (IS_ERR(obj)) {
2667 		err = PTR_ERR(obj);
2668 		goto err_ce;
2669 	}
2670 
2671 	vma = i915_vma_instance(obj, ce->vm, NULL);
2672 	if (IS_ERR(vma)) {
2673 		err = PTR_ERR(vma);
2674 		goto err_obj;
2675 	}
2676 
2677 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2678 	if (err)
2679 		goto err_obj;
2680 
2681 	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2682 	if (IS_ERR(cs))
2683 		goto err_obj;
2684 
2685 	/* Semaphore target: spin until zero */
2686 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2687 
2688 	*cs++ = MI_SEMAPHORE_WAIT |
2689 		MI_SEMAPHORE_POLL |
2690 		MI_SEMAPHORE_SAD_EQ_SDD;
2691 	*cs++ = 0;
2692 	*cs++ = lower_32_bits(vma->node.start);
2693 	*cs++ = upper_32_bits(vma->node.start);
2694 
2695 	if (*prev) {
2696 		u64 offset = (*prev)->batch->node.start;
2697 
2698 		/* Terminate the spinner in the next lower priority batch. */
2699 		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2700 		*cs++ = lower_32_bits(offset);
2701 		*cs++ = upper_32_bits(offset);
2702 		*cs++ = 0;
2703 	}
2704 
2705 	*cs++ = MI_BATCH_BUFFER_END;
2706 	i915_gem_object_flush_map(obj);
2707 	i915_gem_object_unpin_map(obj);
2708 
2709 	rq = intel_context_create_request(ce);
2710 	if (IS_ERR(rq))
2711 		goto err_obj;
2712 
2713 	rq->batch = i915_vma_get(vma);
2714 	i915_request_get(rq);
2715 
2716 	i915_vma_lock(vma);
2717 	err = i915_request_await_object(rq, vma->obj, false);
2718 	if (!err)
2719 		err = i915_vma_move_to_active(vma, rq, 0);
2720 	if (!err)
2721 		err = rq->engine->emit_bb_start(rq,
2722 						vma->node.start,
2723 						PAGE_SIZE, 0);
2724 	i915_vma_unlock(vma);
2725 	i915_request_add(rq);
2726 	if (err)
2727 		goto err_rq;
2728 
2729 	i915_gem_object_put(obj);
2730 	intel_context_put(ce);
2731 
2732 	rq->mock.link.next = &(*prev)->mock.link;
2733 	*prev = rq;
2734 	return 0;
2735 
2736 err_rq:
2737 	i915_vma_put(rq->batch);
2738 	i915_request_put(rq);
2739 err_obj:
2740 	i915_gem_object_put(obj);
2741 err_ce:
2742 	intel_context_put(ce);
2743 	return err;
2744 }
2745 
__live_preempt_ring(struct intel_engine_cs * engine,struct igt_spinner * spin,int queue_sz,int ring_sz)2746 static int __live_preempt_ring(struct intel_engine_cs *engine,
2747 			       struct igt_spinner *spin,
2748 			       int queue_sz, int ring_sz)
2749 {
2750 	struct intel_context *ce[2] = {};
2751 	struct i915_request *rq;
2752 	struct igt_live_test t;
2753 	int err = 0;
2754 	int n;
2755 
2756 	if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2757 		return -EIO;
2758 
2759 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2760 		struct intel_context *tmp;
2761 
2762 		tmp = intel_context_create(engine);
2763 		if (IS_ERR(tmp)) {
2764 			err = PTR_ERR(tmp);
2765 			goto err_ce;
2766 		}
2767 
2768 		tmp->ring = __intel_context_ring_size(ring_sz);
2769 
2770 		err = intel_context_pin(tmp);
2771 		if (err) {
2772 			intel_context_put(tmp);
2773 			goto err_ce;
2774 		}
2775 
2776 		memset32(tmp->ring->vaddr,
2777 			 0xdeadbeef, /* trigger a hang if executed */
2778 			 tmp->ring->vma->size / sizeof(u32));
2779 
2780 		ce[n] = tmp;
2781 	}
2782 
2783 	rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2784 	if (IS_ERR(rq)) {
2785 		err = PTR_ERR(rq);
2786 		goto err_ce;
2787 	}
2788 
2789 	i915_request_get(rq);
2790 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2791 	i915_request_add(rq);
2792 
2793 	if (!igt_wait_for_spinner(spin, rq)) {
2794 		intel_gt_set_wedged(engine->gt);
2795 		i915_request_put(rq);
2796 		err = -ETIME;
2797 		goto err_ce;
2798 	}
2799 
2800 	/* Fill the ring, until we will cause a wrap */
2801 	n = 0;
2802 	while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2803 		struct i915_request *tmp;
2804 
2805 		tmp = intel_context_create_request(ce[0]);
2806 		if (IS_ERR(tmp)) {
2807 			err = PTR_ERR(tmp);
2808 			i915_request_put(rq);
2809 			goto err_ce;
2810 		}
2811 
2812 		i915_request_add(tmp);
2813 		intel_engine_flush_submission(engine);
2814 		n++;
2815 	}
2816 	intel_engine_flush_submission(engine);
2817 	pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2818 		 engine->name, queue_sz, n,
2819 		 ce[0]->ring->size,
2820 		 ce[0]->ring->tail,
2821 		 ce[0]->ring->emit,
2822 		 rq->tail);
2823 	i915_request_put(rq);
2824 
2825 	/* Create a second request to preempt the first ring */
2826 	rq = intel_context_create_request(ce[1]);
2827 	if (IS_ERR(rq)) {
2828 		err = PTR_ERR(rq);
2829 		goto err_ce;
2830 	}
2831 
2832 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2833 	i915_request_get(rq);
2834 	i915_request_add(rq);
2835 
2836 	err = wait_for_submit(engine, rq, HZ / 2);
2837 	i915_request_put(rq);
2838 	if (err) {
2839 		pr_err("%s: preemption request was not submited\n",
2840 		       engine->name);
2841 		err = -ETIME;
2842 	}
2843 
2844 	pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2845 		 engine->name,
2846 		 ce[0]->ring->tail, ce[0]->ring->emit,
2847 		 ce[1]->ring->tail, ce[1]->ring->emit);
2848 
2849 err_ce:
2850 	intel_engine_flush_submission(engine);
2851 	igt_spinner_end(spin);
2852 	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2853 		if (IS_ERR_OR_NULL(ce[n]))
2854 			break;
2855 
2856 		intel_context_unpin(ce[n]);
2857 		intel_context_put(ce[n]);
2858 	}
2859 	if (igt_live_test_end(&t))
2860 		err = -EIO;
2861 	return err;
2862 }
2863 
live_preempt_ring(void * arg)2864 static int live_preempt_ring(void *arg)
2865 {
2866 	struct intel_gt *gt = arg;
2867 	struct intel_engine_cs *engine;
2868 	struct igt_spinner spin;
2869 	enum intel_engine_id id;
2870 	int err = 0;
2871 
2872 	/*
2873 	 * Check that we rollback large chunks of a ring in order to do a
2874 	 * preemption event. Similar to live_unlite_ring, but looking at
2875 	 * ring size rather than the impact of intel_ring_direction().
2876 	 */
2877 
2878 	if (igt_spinner_init(&spin, gt))
2879 		return -ENOMEM;
2880 
2881 	for_each_engine(engine, gt, id) {
2882 		int n;
2883 
2884 		if (!intel_engine_has_preemption(engine))
2885 			continue;
2886 
2887 		if (!intel_engine_can_store_dword(engine))
2888 			continue;
2889 
2890 		st_engine_heartbeat_disable(engine);
2891 
2892 		for (n = 0; n <= 3; n++) {
2893 			err = __live_preempt_ring(engine, &spin,
2894 						  n * SZ_4K / 4, SZ_4K);
2895 			if (err)
2896 				break;
2897 		}
2898 
2899 		st_engine_heartbeat_enable(engine);
2900 		if (err)
2901 			break;
2902 	}
2903 
2904 	igt_spinner_fini(&spin);
2905 	return err;
2906 }
2907 
live_preempt_gang(void * arg)2908 static int live_preempt_gang(void *arg)
2909 {
2910 	struct intel_gt *gt = arg;
2911 	struct intel_engine_cs *engine;
2912 	enum intel_engine_id id;
2913 
2914 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2915 		return 0;
2916 
2917 	/*
2918 	 * Build as long a chain of preempters as we can, with each
2919 	 * request higher priority than the last. Once we are ready, we release
2920 	 * the last batch which then precolates down the chain, each releasing
2921 	 * the next oldest in turn. The intent is to simply push as hard as we
2922 	 * can with the number of preemptions, trying to exceed narrow HW
2923 	 * limits. At a minimum, we insist that we can sort all the user
2924 	 * high priority levels into execution order.
2925 	 */
2926 
2927 	for_each_engine(engine, gt, id) {
2928 		struct i915_request *rq = NULL;
2929 		struct igt_live_test t;
2930 		IGT_TIMEOUT(end_time);
2931 		int prio = 0;
2932 		int err = 0;
2933 		u32 *cs;
2934 
2935 		if (!intel_engine_has_preemption(engine))
2936 			continue;
2937 
2938 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2939 			return -EIO;
2940 
2941 		do {
2942 			struct i915_sched_attr attr = {
2943 				.priority = I915_USER_PRIORITY(prio++),
2944 			};
2945 
2946 			err = create_gang(engine, &rq);
2947 			if (err)
2948 				break;
2949 
2950 			/* Submit each spinner at increasing priority */
2951 			engine->schedule(rq, &attr);
2952 		} while (prio <= I915_PRIORITY_MAX &&
2953 			 !__igt_timeout(end_time, NULL));
2954 		pr_debug("%s: Preempt chain of %d requests\n",
2955 			 engine->name, prio);
2956 
2957 		/*
2958 		 * Such that the last spinner is the highest priority and
2959 		 * should execute first. When that spinner completes,
2960 		 * it will terminate the next lowest spinner until there
2961 		 * are no more spinners and the gang is complete.
2962 		 */
2963 		cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2964 		if (!IS_ERR(cs)) {
2965 			*cs = 0;
2966 			i915_gem_object_unpin_map(rq->batch->obj);
2967 		} else {
2968 			err = PTR_ERR(cs);
2969 			intel_gt_set_wedged(gt);
2970 		}
2971 
2972 		while (rq) { /* wait for each rq from highest to lowest prio */
2973 			struct i915_request *n = list_next_entry(rq, mock.link);
2974 
2975 			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2976 				struct drm_printer p =
2977 					drm_info_printer(engine->i915->drm.dev);
2978 
2979 				pr_err("Failed to flush chain of %d requests, at %d\n",
2980 				       prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2981 				intel_engine_dump(engine, &p,
2982 						  "%s\n", engine->name);
2983 
2984 				err = -ETIME;
2985 			}
2986 
2987 			i915_vma_put(rq->batch);
2988 			i915_request_put(rq);
2989 			rq = n;
2990 		}
2991 
2992 		if (igt_live_test_end(&t))
2993 			err = -EIO;
2994 		if (err)
2995 			return err;
2996 	}
2997 
2998 	return 0;
2999 }
3000 
3001 static struct i915_vma *
create_gpr_user(struct intel_engine_cs * engine,struct i915_vma * result,unsigned int offset)3002 create_gpr_user(struct intel_engine_cs *engine,
3003 		struct i915_vma *result,
3004 		unsigned int offset)
3005 {
3006 	struct drm_i915_gem_object *obj;
3007 	struct i915_vma *vma;
3008 	u32 *cs;
3009 	int err;
3010 	int i;
3011 
3012 	obj = i915_gem_object_create_internal(engine->i915, 4096);
3013 	if (IS_ERR(obj))
3014 		return ERR_CAST(obj);
3015 
3016 	vma = i915_vma_instance(obj, result->vm, NULL);
3017 	if (IS_ERR(vma)) {
3018 		i915_gem_object_put(obj);
3019 		return vma;
3020 	}
3021 
3022 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3023 	if (err) {
3024 		i915_vma_put(vma);
3025 		return ERR_PTR(err);
3026 	}
3027 
3028 	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
3029 	if (IS_ERR(cs)) {
3030 		i915_vma_put(vma);
3031 		return ERR_CAST(cs);
3032 	}
3033 
3034 	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
3035 	*cs++ = MI_LOAD_REGISTER_IMM(1);
3036 	*cs++ = CS_GPR(engine, 0);
3037 	*cs++ = 1;
3038 
3039 	for (i = 1; i < NUM_GPR; i++) {
3040 		u64 addr;
3041 
3042 		/*
3043 		 * Perform: GPR[i]++
3044 		 *
3045 		 * As we read and write into the context saved GPR[i], if
3046 		 * we restart this batch buffer from an earlier point, we
3047 		 * will repeat the increment and store a value > 1.
3048 		 */
3049 		*cs++ = MI_MATH(4);
3050 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3051 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3052 		*cs++ = MI_MATH_ADD;
3053 		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3054 
3055 		addr = result->node.start + offset + i * sizeof(*cs);
3056 		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
3057 		*cs++ = CS_GPR(engine, 2 * i);
3058 		*cs++ = lower_32_bits(addr);
3059 		*cs++ = upper_32_bits(addr);
3060 
3061 		*cs++ = MI_SEMAPHORE_WAIT |
3062 			MI_SEMAPHORE_POLL |
3063 			MI_SEMAPHORE_SAD_GTE_SDD;
3064 		*cs++ = i;
3065 		*cs++ = lower_32_bits(result->node.start);
3066 		*cs++ = upper_32_bits(result->node.start);
3067 	}
3068 
3069 	*cs++ = MI_BATCH_BUFFER_END;
3070 	i915_gem_object_flush_map(obj);
3071 	i915_gem_object_unpin_map(obj);
3072 
3073 	return vma;
3074 }
3075 
create_global(struct intel_gt * gt,size_t sz)3076 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3077 {
3078 	struct drm_i915_gem_object *obj;
3079 	struct i915_vma *vma;
3080 	int err;
3081 
3082 	obj = i915_gem_object_create_internal(gt->i915, sz);
3083 	if (IS_ERR(obj))
3084 		return ERR_CAST(obj);
3085 
3086 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3087 	if (IS_ERR(vma)) {
3088 		i915_gem_object_put(obj);
3089 		return vma;
3090 	}
3091 
3092 	err = i915_ggtt_pin(vma, NULL, 0, 0);
3093 	if (err) {
3094 		i915_vma_put(vma);
3095 		return ERR_PTR(err);
3096 	}
3097 
3098 	return vma;
3099 }
3100 
3101 static struct i915_request *
create_gpr_client(struct intel_engine_cs * engine,struct i915_vma * global,unsigned int offset)3102 create_gpr_client(struct intel_engine_cs *engine,
3103 		  struct i915_vma *global,
3104 		  unsigned int offset)
3105 {
3106 	struct i915_vma *batch, *vma;
3107 	struct intel_context *ce;
3108 	struct i915_request *rq;
3109 	int err;
3110 
3111 	ce = intel_context_create(engine);
3112 	if (IS_ERR(ce))
3113 		return ERR_CAST(ce);
3114 
3115 	vma = i915_vma_instance(global->obj, ce->vm, NULL);
3116 	if (IS_ERR(vma)) {
3117 		err = PTR_ERR(vma);
3118 		goto out_ce;
3119 	}
3120 
3121 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3122 	if (err)
3123 		goto out_ce;
3124 
3125 	batch = create_gpr_user(engine, vma, offset);
3126 	if (IS_ERR(batch)) {
3127 		err = PTR_ERR(batch);
3128 		goto out_vma;
3129 	}
3130 
3131 	rq = intel_context_create_request(ce);
3132 	if (IS_ERR(rq)) {
3133 		err = PTR_ERR(rq);
3134 		goto out_batch;
3135 	}
3136 
3137 	i915_vma_lock(vma);
3138 	err = i915_request_await_object(rq, vma->obj, false);
3139 	if (!err)
3140 		err = i915_vma_move_to_active(vma, rq, 0);
3141 	i915_vma_unlock(vma);
3142 
3143 	i915_vma_lock(batch);
3144 	if (!err)
3145 		err = i915_request_await_object(rq, batch->obj, false);
3146 	if (!err)
3147 		err = i915_vma_move_to_active(batch, rq, 0);
3148 	if (!err)
3149 		err = rq->engine->emit_bb_start(rq,
3150 						batch->node.start,
3151 						PAGE_SIZE, 0);
3152 	i915_vma_unlock(batch);
3153 	i915_vma_unpin(batch);
3154 
3155 	if (!err)
3156 		i915_request_get(rq);
3157 	i915_request_add(rq);
3158 
3159 out_batch:
3160 	i915_vma_put(batch);
3161 out_vma:
3162 	i915_vma_unpin(vma);
3163 out_ce:
3164 	intel_context_put(ce);
3165 	return err ? ERR_PTR(err) : rq;
3166 }
3167 
preempt_user(struct intel_engine_cs * engine,struct i915_vma * global,int id)3168 static int preempt_user(struct intel_engine_cs *engine,
3169 			struct i915_vma *global,
3170 			int id)
3171 {
3172 	struct i915_sched_attr attr = {
3173 		.priority = I915_PRIORITY_MAX
3174 	};
3175 	struct i915_request *rq;
3176 	int err = 0;
3177 	u32 *cs;
3178 
3179 	rq = intel_engine_create_kernel_request(engine);
3180 	if (IS_ERR(rq))
3181 		return PTR_ERR(rq);
3182 
3183 	cs = intel_ring_begin(rq, 4);
3184 	if (IS_ERR(cs)) {
3185 		i915_request_add(rq);
3186 		return PTR_ERR(cs);
3187 	}
3188 
3189 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3190 	*cs++ = i915_ggtt_offset(global);
3191 	*cs++ = 0;
3192 	*cs++ = id;
3193 
3194 	intel_ring_advance(rq, cs);
3195 
3196 	i915_request_get(rq);
3197 	i915_request_add(rq);
3198 
3199 	engine->schedule(rq, &attr);
3200 
3201 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
3202 		err = -ETIME;
3203 	i915_request_put(rq);
3204 
3205 	return err;
3206 }
3207 
live_preempt_user(void * arg)3208 static int live_preempt_user(void *arg)
3209 {
3210 	struct intel_gt *gt = arg;
3211 	struct intel_engine_cs *engine;
3212 	struct i915_vma *global;
3213 	enum intel_engine_id id;
3214 	u32 *result;
3215 	int err = 0;
3216 
3217 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3218 		return 0;
3219 
3220 	/*
3221 	 * In our other tests, we look at preemption in carefully
3222 	 * controlled conditions in the ringbuffer. Since most of the
3223 	 * time is spent in user batches, most of our preemptions naturally
3224 	 * occur there. We want to verify that when we preempt inside a batch
3225 	 * we continue on from the current instruction and do not roll back
3226 	 * to the start, or another earlier arbitration point.
3227 	 *
3228 	 * To verify this, we create a batch which is a mixture of
3229 	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3230 	 * a few preempting contexts thrown into the mix, we look for any
3231 	 * repeated instructions (which show up as incorrect values).
3232 	 */
3233 
3234 	global = create_global(gt, 4096);
3235 	if (IS_ERR(global))
3236 		return PTR_ERR(global);
3237 
3238 	result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
3239 	if (IS_ERR(result)) {
3240 		i915_vma_unpin_and_release(&global, 0);
3241 		return PTR_ERR(result);
3242 	}
3243 
3244 	for_each_engine(engine, gt, id) {
3245 		struct i915_request *client[3] = {};
3246 		struct igt_live_test t;
3247 		int i;
3248 
3249 		if (!intel_engine_has_preemption(engine))
3250 			continue;
3251 
3252 		if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3253 			continue; /* we need per-context GPR */
3254 
3255 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3256 			err = -EIO;
3257 			break;
3258 		}
3259 
3260 		memset(result, 0, 4096);
3261 
3262 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3263 			struct i915_request *rq;
3264 
3265 			rq = create_gpr_client(engine, global,
3266 					       NUM_GPR * i * sizeof(u32));
3267 			if (IS_ERR(rq))
3268 				goto end_test;
3269 
3270 			client[i] = rq;
3271 		}
3272 
3273 		/* Continuously preempt the set of 3 running contexts */
3274 		for (i = 1; i <= NUM_GPR; i++) {
3275 			err = preempt_user(engine, global, i);
3276 			if (err)
3277 				goto end_test;
3278 		}
3279 
3280 		if (READ_ONCE(result[0]) != NUM_GPR) {
3281 			pr_err("%s: Failed to release semaphore\n",
3282 			       engine->name);
3283 			err = -EIO;
3284 			goto end_test;
3285 		}
3286 
3287 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3288 			int gpr;
3289 
3290 			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3291 				err = -ETIME;
3292 				goto end_test;
3293 			}
3294 
3295 			for (gpr = 1; gpr < NUM_GPR; gpr++) {
3296 				if (result[NUM_GPR * i + gpr] != 1) {
3297 					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3298 					       engine->name,
3299 					       i, gpr, result[NUM_GPR * i + gpr]);
3300 					err = -EINVAL;
3301 					goto end_test;
3302 				}
3303 			}
3304 		}
3305 
3306 end_test:
3307 		for (i = 0; i < ARRAY_SIZE(client); i++) {
3308 			if (!client[i])
3309 				break;
3310 
3311 			i915_request_put(client[i]);
3312 		}
3313 
3314 		/* Flush the semaphores on error */
3315 		smp_store_mb(result[0], -1);
3316 		if (igt_live_test_end(&t))
3317 			err = -EIO;
3318 		if (err)
3319 			break;
3320 	}
3321 
3322 	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3323 	return err;
3324 }
3325 
live_preempt_timeout(void * arg)3326 static int live_preempt_timeout(void *arg)
3327 {
3328 	struct intel_gt *gt = arg;
3329 	struct i915_gem_context *ctx_hi, *ctx_lo;
3330 	struct igt_spinner spin_lo;
3331 	struct intel_engine_cs *engine;
3332 	enum intel_engine_id id;
3333 	int err = -ENOMEM;
3334 
3335 	/*
3336 	 * Check that we force preemption to occur by cancelling the previous
3337 	 * context if it refuses to yield the GPU.
3338 	 */
3339 	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3340 		return 0;
3341 
3342 	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3343 		return 0;
3344 
3345 	if (!intel_has_reset_engine(gt))
3346 		return 0;
3347 
3348 	if (igt_spinner_init(&spin_lo, gt))
3349 		return -ENOMEM;
3350 
3351 	ctx_hi = kernel_context(gt->i915);
3352 	if (!ctx_hi)
3353 		goto err_spin_lo;
3354 	ctx_hi->sched.priority =
3355 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
3356 
3357 	ctx_lo = kernel_context(gt->i915);
3358 	if (!ctx_lo)
3359 		goto err_ctx_hi;
3360 	ctx_lo->sched.priority =
3361 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
3362 
3363 	for_each_engine(engine, gt, id) {
3364 		unsigned long saved_timeout;
3365 		struct i915_request *rq;
3366 
3367 		if (!intel_engine_has_preemption(engine))
3368 			continue;
3369 
3370 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3371 					    MI_NOOP); /* preemption disabled */
3372 		if (IS_ERR(rq)) {
3373 			err = PTR_ERR(rq);
3374 			goto err_ctx_lo;
3375 		}
3376 
3377 		i915_request_add(rq);
3378 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
3379 			intel_gt_set_wedged(gt);
3380 			err = -EIO;
3381 			goto err_ctx_lo;
3382 		}
3383 
3384 		rq = igt_request_alloc(ctx_hi, engine);
3385 		if (IS_ERR(rq)) {
3386 			igt_spinner_end(&spin_lo);
3387 			err = PTR_ERR(rq);
3388 			goto err_ctx_lo;
3389 		}
3390 
3391 		/* Flush the previous CS ack before changing timeouts */
3392 		while (READ_ONCE(engine->execlists.pending[0]))
3393 			cpu_relax();
3394 
3395 		saved_timeout = engine->props.preempt_timeout_ms;
3396 		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3397 
3398 		i915_request_get(rq);
3399 		i915_request_add(rq);
3400 
3401 		intel_engine_flush_submission(engine);
3402 		engine->props.preempt_timeout_ms = saved_timeout;
3403 
3404 		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3405 			intel_gt_set_wedged(gt);
3406 			i915_request_put(rq);
3407 			err = -ETIME;
3408 			goto err_ctx_lo;
3409 		}
3410 
3411 		igt_spinner_end(&spin_lo);
3412 		i915_request_put(rq);
3413 	}
3414 
3415 	err = 0;
3416 err_ctx_lo:
3417 	kernel_context_close(ctx_lo);
3418 err_ctx_hi:
3419 	kernel_context_close(ctx_hi);
3420 err_spin_lo:
3421 	igt_spinner_fini(&spin_lo);
3422 	return err;
3423 }
3424 
random_range(struct rnd_state * rnd,int min,int max)3425 static int random_range(struct rnd_state *rnd, int min, int max)
3426 {
3427 	return i915_prandom_u32_max_state(max - min, rnd) + min;
3428 }
3429 
random_priority(struct rnd_state * rnd)3430 static int random_priority(struct rnd_state *rnd)
3431 {
3432 	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3433 }
3434 
3435 struct preempt_smoke {
3436 	struct intel_gt *gt;
3437 	struct i915_gem_context **contexts;
3438 	struct intel_engine_cs *engine;
3439 	struct drm_i915_gem_object *batch;
3440 	unsigned int ncontext;
3441 	struct rnd_state prng;
3442 	unsigned long count;
3443 };
3444 
smoke_context(struct preempt_smoke * smoke)3445 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3446 {
3447 	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3448 							  &smoke->prng)];
3449 }
3450 
smoke_submit(struct preempt_smoke * smoke,struct i915_gem_context * ctx,int prio,struct drm_i915_gem_object * batch)3451 static int smoke_submit(struct preempt_smoke *smoke,
3452 			struct i915_gem_context *ctx, int prio,
3453 			struct drm_i915_gem_object *batch)
3454 {
3455 	struct i915_request *rq;
3456 	struct i915_vma *vma = NULL;
3457 	int err = 0;
3458 
3459 	if (batch) {
3460 		struct i915_address_space *vm;
3461 
3462 		vm = i915_gem_context_get_vm_rcu(ctx);
3463 		vma = i915_vma_instance(batch, vm, NULL);
3464 		i915_vm_put(vm);
3465 		if (IS_ERR(vma))
3466 			return PTR_ERR(vma);
3467 
3468 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
3469 		if (err)
3470 			return err;
3471 	}
3472 
3473 	ctx->sched.priority = prio;
3474 
3475 	rq = igt_request_alloc(ctx, smoke->engine);
3476 	if (IS_ERR(rq)) {
3477 		err = PTR_ERR(rq);
3478 		goto unpin;
3479 	}
3480 
3481 	if (vma) {
3482 		i915_vma_lock(vma);
3483 		err = i915_request_await_object(rq, vma->obj, false);
3484 		if (!err)
3485 			err = i915_vma_move_to_active(vma, rq, 0);
3486 		if (!err)
3487 			err = rq->engine->emit_bb_start(rq,
3488 							vma->node.start,
3489 							PAGE_SIZE, 0);
3490 		i915_vma_unlock(vma);
3491 	}
3492 
3493 	i915_request_add(rq);
3494 
3495 unpin:
3496 	if (vma)
3497 		i915_vma_unpin(vma);
3498 
3499 	return err;
3500 }
3501 
smoke_crescendo_thread(void * arg)3502 static int smoke_crescendo_thread(void *arg)
3503 {
3504 	struct preempt_smoke *smoke = arg;
3505 	IGT_TIMEOUT(end_time);
3506 	unsigned long count;
3507 
3508 	count = 0;
3509 	do {
3510 		struct i915_gem_context *ctx = smoke_context(smoke);
3511 		int err;
3512 
3513 		err = smoke_submit(smoke,
3514 				   ctx, count % I915_PRIORITY_MAX,
3515 				   smoke->batch);
3516 		if (err)
3517 			return err;
3518 
3519 		count++;
3520 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3521 
3522 	smoke->count = count;
3523 	return 0;
3524 }
3525 
smoke_crescendo(struct preempt_smoke * smoke,unsigned int flags)3526 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3527 #define BATCH BIT(0)
3528 {
3529 	struct task_struct *tsk[I915_NUM_ENGINES] = {};
3530 	struct preempt_smoke arg[I915_NUM_ENGINES];
3531 	struct intel_engine_cs *engine;
3532 	enum intel_engine_id id;
3533 	unsigned long count;
3534 	int err = 0;
3535 
3536 	for_each_engine(engine, smoke->gt, id) {
3537 		arg[id] = *smoke;
3538 		arg[id].engine = engine;
3539 		if (!(flags & BATCH))
3540 			arg[id].batch = NULL;
3541 		arg[id].count = 0;
3542 
3543 		tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3544 				      "igt/smoke:%d", id);
3545 		if (IS_ERR(tsk[id])) {
3546 			err = PTR_ERR(tsk[id]);
3547 			break;
3548 		}
3549 		get_task_struct(tsk[id]);
3550 	}
3551 
3552 	yield(); /* start all threads before we kthread_stop() */
3553 
3554 	count = 0;
3555 	for_each_engine(engine, smoke->gt, id) {
3556 		int status;
3557 
3558 		if (IS_ERR_OR_NULL(tsk[id]))
3559 			continue;
3560 
3561 		status = kthread_stop(tsk[id]);
3562 		if (status && !err)
3563 			err = status;
3564 
3565 		count += arg[id].count;
3566 
3567 		put_task_struct(tsk[id]);
3568 	}
3569 
3570 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3571 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3572 	return 0;
3573 }
3574 
smoke_random(struct preempt_smoke * smoke,unsigned int flags)3575 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3576 {
3577 	enum intel_engine_id id;
3578 	IGT_TIMEOUT(end_time);
3579 	unsigned long count;
3580 
3581 	count = 0;
3582 	do {
3583 		for_each_engine(smoke->engine, smoke->gt, id) {
3584 			struct i915_gem_context *ctx = smoke_context(smoke);
3585 			int err;
3586 
3587 			err = smoke_submit(smoke,
3588 					   ctx, random_priority(&smoke->prng),
3589 					   flags & BATCH ? smoke->batch : NULL);
3590 			if (err)
3591 				return err;
3592 
3593 			count++;
3594 		}
3595 	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3596 
3597 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3598 		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3599 	return 0;
3600 }
3601 
live_preempt_smoke(void * arg)3602 static int live_preempt_smoke(void *arg)
3603 {
3604 	struct preempt_smoke smoke = {
3605 		.gt = arg,
3606 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3607 		.ncontext = 256,
3608 	};
3609 	const unsigned int phase[] = { 0, BATCH };
3610 	struct igt_live_test t;
3611 	int err = -ENOMEM;
3612 	u32 *cs;
3613 	int n;
3614 
3615 	if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3616 		return 0;
3617 
3618 	smoke.contexts = kmalloc_array(smoke.ncontext,
3619 				       sizeof(*smoke.contexts),
3620 				       GFP_KERNEL);
3621 	if (!smoke.contexts)
3622 		return -ENOMEM;
3623 
3624 	smoke.batch =
3625 		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3626 	if (IS_ERR(smoke.batch)) {
3627 		err = PTR_ERR(smoke.batch);
3628 		goto err_free;
3629 	}
3630 
3631 	cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3632 	if (IS_ERR(cs)) {
3633 		err = PTR_ERR(cs);
3634 		goto err_batch;
3635 	}
3636 	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3637 		cs[n] = MI_ARB_CHECK;
3638 	cs[n] = MI_BATCH_BUFFER_END;
3639 	i915_gem_object_flush_map(smoke.batch);
3640 	i915_gem_object_unpin_map(smoke.batch);
3641 
3642 	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3643 		err = -EIO;
3644 		goto err_batch;
3645 	}
3646 
3647 	for (n = 0; n < smoke.ncontext; n++) {
3648 		smoke.contexts[n] = kernel_context(smoke.gt->i915);
3649 		if (!smoke.contexts[n])
3650 			goto err_ctx;
3651 	}
3652 
3653 	for (n = 0; n < ARRAY_SIZE(phase); n++) {
3654 		err = smoke_crescendo(&smoke, phase[n]);
3655 		if (err)
3656 			goto err_ctx;
3657 
3658 		err = smoke_random(&smoke, phase[n]);
3659 		if (err)
3660 			goto err_ctx;
3661 	}
3662 
3663 err_ctx:
3664 	if (igt_live_test_end(&t))
3665 		err = -EIO;
3666 
3667 	for (n = 0; n < smoke.ncontext; n++) {
3668 		if (!smoke.contexts[n])
3669 			break;
3670 		kernel_context_close(smoke.contexts[n]);
3671 	}
3672 
3673 err_batch:
3674 	i915_gem_object_put(smoke.batch);
3675 err_free:
3676 	kfree(smoke.contexts);
3677 
3678 	return err;
3679 }
3680 
nop_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling,unsigned int nctx,unsigned int flags)3681 static int nop_virtual_engine(struct intel_gt *gt,
3682 			      struct intel_engine_cs **siblings,
3683 			      unsigned int nsibling,
3684 			      unsigned int nctx,
3685 			      unsigned int flags)
3686 #define CHAIN BIT(0)
3687 {
3688 	IGT_TIMEOUT(end_time);
3689 	struct i915_request *request[16] = {};
3690 	struct intel_context *ve[16];
3691 	unsigned long n, prime, nc;
3692 	struct igt_live_test t;
3693 	ktime_t times[2] = {};
3694 	int err;
3695 
3696 	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3697 
3698 	for (n = 0; n < nctx; n++) {
3699 		ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3700 		if (IS_ERR(ve[n])) {
3701 			err = PTR_ERR(ve[n]);
3702 			nctx = n;
3703 			goto out;
3704 		}
3705 
3706 		err = intel_context_pin(ve[n]);
3707 		if (err) {
3708 			intel_context_put(ve[n]);
3709 			nctx = n;
3710 			goto out;
3711 		}
3712 	}
3713 
3714 	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3715 	if (err)
3716 		goto out;
3717 
3718 	for_each_prime_number_from(prime, 1, 8192) {
3719 		times[1] = ktime_get_raw();
3720 
3721 		if (flags & CHAIN) {
3722 			for (nc = 0; nc < nctx; nc++) {
3723 				for (n = 0; n < prime; n++) {
3724 					struct i915_request *rq;
3725 
3726 					rq = i915_request_create(ve[nc]);
3727 					if (IS_ERR(rq)) {
3728 						err = PTR_ERR(rq);
3729 						goto out;
3730 					}
3731 
3732 					if (request[nc])
3733 						i915_request_put(request[nc]);
3734 					request[nc] = i915_request_get(rq);
3735 					i915_request_add(rq);
3736 				}
3737 			}
3738 		} else {
3739 			for (n = 0; n < prime; n++) {
3740 				for (nc = 0; nc < nctx; nc++) {
3741 					struct i915_request *rq;
3742 
3743 					rq = i915_request_create(ve[nc]);
3744 					if (IS_ERR(rq)) {
3745 						err = PTR_ERR(rq);
3746 						goto out;
3747 					}
3748 
3749 					if (request[nc])
3750 						i915_request_put(request[nc]);
3751 					request[nc] = i915_request_get(rq);
3752 					i915_request_add(rq);
3753 				}
3754 			}
3755 		}
3756 
3757 		for (nc = 0; nc < nctx; nc++) {
3758 			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3759 				pr_err("%s(%s): wait for %llx:%lld timed out\n",
3760 				       __func__, ve[0]->engine->name,
3761 				       request[nc]->fence.context,
3762 				       request[nc]->fence.seqno);
3763 
3764 				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3765 					  __func__, ve[0]->engine->name,
3766 					  request[nc]->fence.context,
3767 					  request[nc]->fence.seqno);
3768 				GEM_TRACE_DUMP();
3769 				intel_gt_set_wedged(gt);
3770 				break;
3771 			}
3772 		}
3773 
3774 		times[1] = ktime_sub(ktime_get_raw(), times[1]);
3775 		if (prime == 1)
3776 			times[0] = times[1];
3777 
3778 		for (nc = 0; nc < nctx; nc++) {
3779 			i915_request_put(request[nc]);
3780 			request[nc] = NULL;
3781 		}
3782 
3783 		if (__igt_timeout(end_time, NULL))
3784 			break;
3785 	}
3786 
3787 	err = igt_live_test_end(&t);
3788 	if (err)
3789 		goto out;
3790 
3791 	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3792 		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3793 		prime, div64_u64(ktime_to_ns(times[1]), prime));
3794 
3795 out:
3796 	if (igt_flush_test(gt->i915))
3797 		err = -EIO;
3798 
3799 	for (nc = 0; nc < nctx; nc++) {
3800 		i915_request_put(request[nc]);
3801 		intel_context_unpin(ve[nc]);
3802 		intel_context_put(ve[nc]);
3803 	}
3804 	return err;
3805 }
3806 
3807 static unsigned int
__select_siblings(struct intel_gt * gt,unsigned int class,struct intel_engine_cs ** siblings,bool (* filter)(const struct intel_engine_cs *))3808 __select_siblings(struct intel_gt *gt,
3809 		  unsigned int class,
3810 		  struct intel_engine_cs **siblings,
3811 		  bool (*filter)(const struct intel_engine_cs *))
3812 {
3813 	unsigned int n = 0;
3814 	unsigned int inst;
3815 
3816 	for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3817 		if (!gt->engine_class[class][inst])
3818 			continue;
3819 
3820 		if (filter && !filter(gt->engine_class[class][inst]))
3821 			continue;
3822 
3823 		siblings[n++] = gt->engine_class[class][inst];
3824 	}
3825 
3826 	return n;
3827 }
3828 
3829 static unsigned int
select_siblings(struct intel_gt * gt,unsigned int class,struct intel_engine_cs ** siblings)3830 select_siblings(struct intel_gt *gt,
3831 		unsigned int class,
3832 		struct intel_engine_cs **siblings)
3833 {
3834 	return __select_siblings(gt, class, siblings, NULL);
3835 }
3836 
live_virtual_engine(void * arg)3837 static int live_virtual_engine(void *arg)
3838 {
3839 	struct intel_gt *gt = arg;
3840 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3841 	struct intel_engine_cs *engine;
3842 	enum intel_engine_id id;
3843 	unsigned int class;
3844 	int err;
3845 
3846 	if (intel_uc_uses_guc_submission(&gt->uc))
3847 		return 0;
3848 
3849 	for_each_engine(engine, gt, id) {
3850 		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3851 		if (err) {
3852 			pr_err("Failed to wrap engine %s: err=%d\n",
3853 			       engine->name, err);
3854 			return err;
3855 		}
3856 	}
3857 
3858 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3859 		int nsibling, n;
3860 
3861 		nsibling = select_siblings(gt, class, siblings);
3862 		if (nsibling < 2)
3863 			continue;
3864 
3865 		for (n = 1; n <= nsibling + 1; n++) {
3866 			err = nop_virtual_engine(gt, siblings, nsibling,
3867 						 n, 0);
3868 			if (err)
3869 				return err;
3870 		}
3871 
3872 		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3873 		if (err)
3874 			return err;
3875 	}
3876 
3877 	return 0;
3878 }
3879 
mask_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)3880 static int mask_virtual_engine(struct intel_gt *gt,
3881 			       struct intel_engine_cs **siblings,
3882 			       unsigned int nsibling)
3883 {
3884 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3885 	struct intel_context *ve;
3886 	struct igt_live_test t;
3887 	unsigned int n;
3888 	int err;
3889 
3890 	/*
3891 	 * Check that by setting the execution mask on a request, we can
3892 	 * restrict it to our desired engine within the virtual engine.
3893 	 */
3894 
3895 	ve = intel_execlists_create_virtual(siblings, nsibling);
3896 	if (IS_ERR(ve)) {
3897 		err = PTR_ERR(ve);
3898 		goto out_close;
3899 	}
3900 
3901 	err = intel_context_pin(ve);
3902 	if (err)
3903 		goto out_put;
3904 
3905 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3906 	if (err)
3907 		goto out_unpin;
3908 
3909 	for (n = 0; n < nsibling; n++) {
3910 		request[n] = i915_request_create(ve);
3911 		if (IS_ERR(request[n])) {
3912 			err = PTR_ERR(request[n]);
3913 			nsibling = n;
3914 			goto out;
3915 		}
3916 
3917 		/* Reverse order as it's more likely to be unnatural */
3918 		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3919 
3920 		i915_request_get(request[n]);
3921 		i915_request_add(request[n]);
3922 	}
3923 
3924 	for (n = 0; n < nsibling; n++) {
3925 		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3926 			pr_err("%s(%s): wait for %llx:%lld timed out\n",
3927 			       __func__, ve->engine->name,
3928 			       request[n]->fence.context,
3929 			       request[n]->fence.seqno);
3930 
3931 			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3932 				  __func__, ve->engine->name,
3933 				  request[n]->fence.context,
3934 				  request[n]->fence.seqno);
3935 			GEM_TRACE_DUMP();
3936 			intel_gt_set_wedged(gt);
3937 			err = -EIO;
3938 			goto out;
3939 		}
3940 
3941 		if (request[n]->engine != siblings[nsibling - n - 1]) {
3942 			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3943 			       request[n]->engine->name,
3944 			       siblings[nsibling - n - 1]->name);
3945 			err = -EINVAL;
3946 			goto out;
3947 		}
3948 	}
3949 
3950 	err = igt_live_test_end(&t);
3951 out:
3952 	if (igt_flush_test(gt->i915))
3953 		err = -EIO;
3954 
3955 	for (n = 0; n < nsibling; n++)
3956 		i915_request_put(request[n]);
3957 
3958 out_unpin:
3959 	intel_context_unpin(ve);
3960 out_put:
3961 	intel_context_put(ve);
3962 out_close:
3963 	return err;
3964 }
3965 
live_virtual_mask(void * arg)3966 static int live_virtual_mask(void *arg)
3967 {
3968 	struct intel_gt *gt = arg;
3969 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3970 	unsigned int class;
3971 	int err;
3972 
3973 	if (intel_uc_uses_guc_submission(&gt->uc))
3974 		return 0;
3975 
3976 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3977 		unsigned int nsibling;
3978 
3979 		nsibling = select_siblings(gt, class, siblings);
3980 		if (nsibling < 2)
3981 			continue;
3982 
3983 		err = mask_virtual_engine(gt, siblings, nsibling);
3984 		if (err)
3985 			return err;
3986 	}
3987 
3988 	return 0;
3989 }
3990 
slicein_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)3991 static int slicein_virtual_engine(struct intel_gt *gt,
3992 				  struct intel_engine_cs **siblings,
3993 				  unsigned int nsibling)
3994 {
3995 	const long timeout = slice_timeout(siblings[0]);
3996 	struct intel_context *ce;
3997 	struct i915_request *rq;
3998 	struct igt_spinner spin;
3999 	unsigned int n;
4000 	int err = 0;
4001 
4002 	/*
4003 	 * Virtual requests must take part in timeslicing on the target engines.
4004 	 */
4005 
4006 	if (igt_spinner_init(&spin, gt))
4007 		return -ENOMEM;
4008 
4009 	for (n = 0; n < nsibling; n++) {
4010 		ce = intel_context_create(siblings[n]);
4011 		if (IS_ERR(ce)) {
4012 			err = PTR_ERR(ce);
4013 			goto out;
4014 		}
4015 
4016 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4017 		intel_context_put(ce);
4018 		if (IS_ERR(rq)) {
4019 			err = PTR_ERR(rq);
4020 			goto out;
4021 		}
4022 
4023 		i915_request_add(rq);
4024 	}
4025 
4026 	ce = intel_execlists_create_virtual(siblings, nsibling);
4027 	if (IS_ERR(ce)) {
4028 		err = PTR_ERR(ce);
4029 		goto out;
4030 	}
4031 
4032 	rq = intel_context_create_request(ce);
4033 	intel_context_put(ce);
4034 	if (IS_ERR(rq)) {
4035 		err = PTR_ERR(rq);
4036 		goto out;
4037 	}
4038 
4039 	i915_request_get(rq);
4040 	i915_request_add(rq);
4041 	if (i915_request_wait(rq, 0, timeout) < 0) {
4042 		GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4043 			      __func__, rq->engine->name);
4044 		GEM_TRACE_DUMP();
4045 		intel_gt_set_wedged(gt);
4046 		err = -EIO;
4047 	}
4048 	i915_request_put(rq);
4049 
4050 out:
4051 	igt_spinner_end(&spin);
4052 	if (igt_flush_test(gt->i915))
4053 		err = -EIO;
4054 	igt_spinner_fini(&spin);
4055 	return err;
4056 }
4057 
sliceout_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4058 static int sliceout_virtual_engine(struct intel_gt *gt,
4059 				   struct intel_engine_cs **siblings,
4060 				   unsigned int nsibling)
4061 {
4062 	const long timeout = slice_timeout(siblings[0]);
4063 	struct intel_context *ce;
4064 	struct i915_request *rq;
4065 	struct igt_spinner spin;
4066 	unsigned int n;
4067 	int err = 0;
4068 
4069 	/*
4070 	 * Virtual requests must allow others a fair timeslice.
4071 	 */
4072 
4073 	if (igt_spinner_init(&spin, gt))
4074 		return -ENOMEM;
4075 
4076 	/* XXX We do not handle oversubscription and fairness with normal rq */
4077 	for (n = 0; n < nsibling; n++) {
4078 		ce = intel_execlists_create_virtual(siblings, nsibling);
4079 		if (IS_ERR(ce)) {
4080 			err = PTR_ERR(ce);
4081 			goto out;
4082 		}
4083 
4084 		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4085 		intel_context_put(ce);
4086 		if (IS_ERR(rq)) {
4087 			err = PTR_ERR(rq);
4088 			goto out;
4089 		}
4090 
4091 		i915_request_add(rq);
4092 	}
4093 
4094 	for (n = 0; !err && n < nsibling; n++) {
4095 		ce = intel_context_create(siblings[n]);
4096 		if (IS_ERR(ce)) {
4097 			err = PTR_ERR(ce);
4098 			goto out;
4099 		}
4100 
4101 		rq = intel_context_create_request(ce);
4102 		intel_context_put(ce);
4103 		if (IS_ERR(rq)) {
4104 			err = PTR_ERR(rq);
4105 			goto out;
4106 		}
4107 
4108 		i915_request_get(rq);
4109 		i915_request_add(rq);
4110 		if (i915_request_wait(rq, 0, timeout) < 0) {
4111 			GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4112 				      __func__, siblings[n]->name);
4113 			GEM_TRACE_DUMP();
4114 			intel_gt_set_wedged(gt);
4115 			err = -EIO;
4116 		}
4117 		i915_request_put(rq);
4118 	}
4119 
4120 out:
4121 	igt_spinner_end(&spin);
4122 	if (igt_flush_test(gt->i915))
4123 		err = -EIO;
4124 	igt_spinner_fini(&spin);
4125 	return err;
4126 }
4127 
live_virtual_slice(void * arg)4128 static int live_virtual_slice(void *arg)
4129 {
4130 	struct intel_gt *gt = arg;
4131 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4132 	unsigned int class;
4133 	int err;
4134 
4135 	if (intel_uc_uses_guc_submission(&gt->uc))
4136 		return 0;
4137 
4138 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4139 		unsigned int nsibling;
4140 
4141 		nsibling = __select_siblings(gt, class, siblings,
4142 					     intel_engine_has_timeslices);
4143 		if (nsibling < 2)
4144 			continue;
4145 
4146 		err = slicein_virtual_engine(gt, siblings, nsibling);
4147 		if (err)
4148 			return err;
4149 
4150 		err = sliceout_virtual_engine(gt, siblings, nsibling);
4151 		if (err)
4152 			return err;
4153 	}
4154 
4155 	return 0;
4156 }
4157 
preserved_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4158 static int preserved_virtual_engine(struct intel_gt *gt,
4159 				    struct intel_engine_cs **siblings,
4160 				    unsigned int nsibling)
4161 {
4162 	struct i915_request *last = NULL;
4163 	struct intel_context *ve;
4164 	struct i915_vma *scratch;
4165 	struct igt_live_test t;
4166 	unsigned int n;
4167 	int err = 0;
4168 	u32 *cs;
4169 
4170 	scratch = create_scratch(siblings[0]->gt);
4171 	if (IS_ERR(scratch))
4172 		return PTR_ERR(scratch);
4173 
4174 	err = i915_vma_sync(scratch);
4175 	if (err)
4176 		goto out_scratch;
4177 
4178 	ve = intel_execlists_create_virtual(siblings, nsibling);
4179 	if (IS_ERR(ve)) {
4180 		err = PTR_ERR(ve);
4181 		goto out_scratch;
4182 	}
4183 
4184 	err = intel_context_pin(ve);
4185 	if (err)
4186 		goto out_put;
4187 
4188 	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4189 	if (err)
4190 		goto out_unpin;
4191 
4192 	for (n = 0; n < NUM_GPR_DW; n++) {
4193 		struct intel_engine_cs *engine = siblings[n % nsibling];
4194 		struct i915_request *rq;
4195 
4196 		rq = i915_request_create(ve);
4197 		if (IS_ERR(rq)) {
4198 			err = PTR_ERR(rq);
4199 			goto out_end;
4200 		}
4201 
4202 		i915_request_put(last);
4203 		last = i915_request_get(rq);
4204 
4205 		cs = intel_ring_begin(rq, 8);
4206 		if (IS_ERR(cs)) {
4207 			i915_request_add(rq);
4208 			err = PTR_ERR(cs);
4209 			goto out_end;
4210 		}
4211 
4212 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4213 		*cs++ = CS_GPR(engine, n);
4214 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4215 		*cs++ = 0;
4216 
4217 		*cs++ = MI_LOAD_REGISTER_IMM(1);
4218 		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4219 		*cs++ = n + 1;
4220 
4221 		*cs++ = MI_NOOP;
4222 		intel_ring_advance(rq, cs);
4223 
4224 		/* Restrict this request to run on a particular engine */
4225 		rq->execution_mask = engine->mask;
4226 		i915_request_add(rq);
4227 	}
4228 
4229 	if (i915_request_wait(last, 0, HZ / 5) < 0) {
4230 		err = -ETIME;
4231 		goto out_end;
4232 	}
4233 
4234 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4235 	if (IS_ERR(cs)) {
4236 		err = PTR_ERR(cs);
4237 		goto out_end;
4238 	}
4239 
4240 	for (n = 0; n < NUM_GPR_DW; n++) {
4241 		if (cs[n] != n) {
4242 			pr_err("Incorrect value[%d] found for GPR[%d]\n",
4243 			       cs[n], n);
4244 			err = -EINVAL;
4245 			break;
4246 		}
4247 	}
4248 
4249 	i915_gem_object_unpin_map(scratch->obj);
4250 
4251 out_end:
4252 	if (igt_live_test_end(&t))
4253 		err = -EIO;
4254 	i915_request_put(last);
4255 out_unpin:
4256 	intel_context_unpin(ve);
4257 out_put:
4258 	intel_context_put(ve);
4259 out_scratch:
4260 	i915_vma_unpin_and_release(&scratch, 0);
4261 	return err;
4262 }
4263 
live_virtual_preserved(void * arg)4264 static int live_virtual_preserved(void *arg)
4265 {
4266 	struct intel_gt *gt = arg;
4267 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4268 	unsigned int class;
4269 
4270 	/*
4271 	 * Check that the context image retains non-privileged (user) registers
4272 	 * from one engine to the next. For this we check that the CS_GPR
4273 	 * are preserved.
4274 	 */
4275 
4276 	if (intel_uc_uses_guc_submission(&gt->uc))
4277 		return 0;
4278 
4279 	/* As we use CS_GPR we cannot run before they existed on all engines. */
4280 	if (INTEL_GEN(gt->i915) < 9)
4281 		return 0;
4282 
4283 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4284 		int nsibling, err;
4285 
4286 		nsibling = select_siblings(gt, class, siblings);
4287 		if (nsibling < 2)
4288 			continue;
4289 
4290 		err = preserved_virtual_engine(gt, siblings, nsibling);
4291 		if (err)
4292 			return err;
4293 	}
4294 
4295 	return 0;
4296 }
4297 
bond_virtual_engine(struct intel_gt * gt,unsigned int class,struct intel_engine_cs ** siblings,unsigned int nsibling,unsigned int flags)4298 static int bond_virtual_engine(struct intel_gt *gt,
4299 			       unsigned int class,
4300 			       struct intel_engine_cs **siblings,
4301 			       unsigned int nsibling,
4302 			       unsigned int flags)
4303 #define BOND_SCHEDULE BIT(0)
4304 {
4305 	struct intel_engine_cs *master;
4306 	struct i915_request *rq[16];
4307 	enum intel_engine_id id;
4308 	struct igt_spinner spin;
4309 	unsigned long n;
4310 	int err;
4311 
4312 	/*
4313 	 * A set of bonded requests is intended to be run concurrently
4314 	 * across a number of engines. We use one request per-engine
4315 	 * and a magic fence to schedule each of the bonded requests
4316 	 * at the same time. A consequence of our current scheduler is that
4317 	 * we only move requests to the HW ready queue when the request
4318 	 * becomes ready, that is when all of its prerequisite fences have
4319 	 * been signaled. As one of those fences is the master submit fence,
4320 	 * there is a delay on all secondary fences as the HW may be
4321 	 * currently busy. Equally, as all the requests are independent,
4322 	 * they may have other fences that delay individual request
4323 	 * submission to HW. Ergo, we do not guarantee that all requests are
4324 	 * immediately submitted to HW at the same time, just that if the
4325 	 * rules are abided by, they are ready at the same time as the
4326 	 * first is submitted. Userspace can embed semaphores in its batch
4327 	 * to ensure parallel execution of its phases as it requires.
4328 	 * Though naturally it gets requested that perhaps the scheduler should
4329 	 * take care of parallel execution, even across preemption events on
4330 	 * different HW. (The proper answer is of course "lalalala".)
4331 	 *
4332 	 * With the submit-fence, we have identified three possible phases
4333 	 * of synchronisation depending on the master fence: queued (not
4334 	 * ready), executing, and signaled. The first two are quite simple
4335 	 * and checked below. However, the signaled master fence handling is
4336 	 * contentious. Currently we do not distinguish between a signaled
4337 	 * fence and an expired fence, as once signaled it does not convey
4338 	 * any information about the previous execution. It may even be freed
4339 	 * and hence checking later it may not exist at all. Ergo we currently
4340 	 * do not apply the bonding constraint for an already signaled fence,
4341 	 * as our expectation is that it should not constrain the secondaries
4342 	 * and is outside of the scope of the bonded request API (i.e. all
4343 	 * userspace requests are meant to be running in parallel). As
4344 	 * it imposes no constraint, and is effectively a no-op, we do not
4345 	 * check below as normal execution flows are checked extensively above.
4346 	 *
4347 	 * XXX Is the degenerate handling of signaled submit fences the
4348 	 * expected behaviour for userpace?
4349 	 */
4350 
4351 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4352 
4353 	if (igt_spinner_init(&spin, gt))
4354 		return -ENOMEM;
4355 
4356 	err = 0;
4357 	rq[0] = ERR_PTR(-ENOMEM);
4358 	for_each_engine(master, gt, id) {
4359 		struct i915_sw_fence fence = {};
4360 		struct intel_context *ce;
4361 
4362 		if (master->class == class)
4363 			continue;
4364 
4365 		ce = intel_context_create(master);
4366 		if (IS_ERR(ce)) {
4367 			err = PTR_ERR(ce);
4368 			goto out;
4369 		}
4370 
4371 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4372 
4373 		rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4374 		intel_context_put(ce);
4375 		if (IS_ERR(rq[0])) {
4376 			err = PTR_ERR(rq[0]);
4377 			goto out;
4378 		}
4379 		i915_request_get(rq[0]);
4380 
4381 		if (flags & BOND_SCHEDULE) {
4382 			onstack_fence_init(&fence);
4383 			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4384 							       &fence,
4385 							       GFP_KERNEL);
4386 		}
4387 
4388 		i915_request_add(rq[0]);
4389 		if (err < 0)
4390 			goto out;
4391 
4392 		if (!(flags & BOND_SCHEDULE) &&
4393 		    !igt_wait_for_spinner(&spin, rq[0])) {
4394 			err = -EIO;
4395 			goto out;
4396 		}
4397 
4398 		for (n = 0; n < nsibling; n++) {
4399 			struct intel_context *ve;
4400 
4401 			ve = intel_execlists_create_virtual(siblings, nsibling);
4402 			if (IS_ERR(ve)) {
4403 				err = PTR_ERR(ve);
4404 				onstack_fence_fini(&fence);
4405 				goto out;
4406 			}
4407 
4408 			err = intel_virtual_engine_attach_bond(ve->engine,
4409 							       master,
4410 							       siblings[n]);
4411 			if (err) {
4412 				intel_context_put(ve);
4413 				onstack_fence_fini(&fence);
4414 				goto out;
4415 			}
4416 
4417 			err = intel_context_pin(ve);
4418 			intel_context_put(ve);
4419 			if (err) {
4420 				onstack_fence_fini(&fence);
4421 				goto out;
4422 			}
4423 
4424 			rq[n + 1] = i915_request_create(ve);
4425 			intel_context_unpin(ve);
4426 			if (IS_ERR(rq[n + 1])) {
4427 				err = PTR_ERR(rq[n + 1]);
4428 				onstack_fence_fini(&fence);
4429 				goto out;
4430 			}
4431 			i915_request_get(rq[n + 1]);
4432 
4433 			err = i915_request_await_execution(rq[n + 1],
4434 							   &rq[0]->fence,
4435 							   ve->engine->bond_execute);
4436 			i915_request_add(rq[n + 1]);
4437 			if (err < 0) {
4438 				onstack_fence_fini(&fence);
4439 				goto out;
4440 			}
4441 		}
4442 		onstack_fence_fini(&fence);
4443 		intel_engine_flush_submission(master);
4444 		igt_spinner_end(&spin);
4445 
4446 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4447 			pr_err("Master request did not execute (on %s)!\n",
4448 			       rq[0]->engine->name);
4449 			err = -EIO;
4450 			goto out;
4451 		}
4452 
4453 		for (n = 0; n < nsibling; n++) {
4454 			if (i915_request_wait(rq[n + 1], 0,
4455 					      MAX_SCHEDULE_TIMEOUT) < 0) {
4456 				err = -EIO;
4457 				goto out;
4458 			}
4459 
4460 			if (rq[n + 1]->engine != siblings[n]) {
4461 				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4462 				       siblings[n]->name,
4463 				       rq[n + 1]->engine->name,
4464 				       rq[0]->engine->name);
4465 				err = -EINVAL;
4466 				goto out;
4467 			}
4468 		}
4469 
4470 		for (n = 0; !IS_ERR(rq[n]); n++)
4471 			i915_request_put(rq[n]);
4472 		rq[0] = ERR_PTR(-ENOMEM);
4473 	}
4474 
4475 out:
4476 	for (n = 0; !IS_ERR(rq[n]); n++)
4477 		i915_request_put(rq[n]);
4478 	if (igt_flush_test(gt->i915))
4479 		err = -EIO;
4480 
4481 	igt_spinner_fini(&spin);
4482 	return err;
4483 }
4484 
live_virtual_bond(void * arg)4485 static int live_virtual_bond(void *arg)
4486 {
4487 	static const struct phase {
4488 		const char *name;
4489 		unsigned int flags;
4490 	} phases[] = {
4491 		{ "", 0 },
4492 		{ "schedule", BOND_SCHEDULE },
4493 		{ },
4494 	};
4495 	struct intel_gt *gt = arg;
4496 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4497 	unsigned int class;
4498 	int err;
4499 
4500 	if (intel_uc_uses_guc_submission(&gt->uc))
4501 		return 0;
4502 
4503 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4504 		const struct phase *p;
4505 		int nsibling;
4506 
4507 		nsibling = select_siblings(gt, class, siblings);
4508 		if (nsibling < 2)
4509 			continue;
4510 
4511 		for (p = phases; p->name; p++) {
4512 			err = bond_virtual_engine(gt,
4513 						  class, siblings, nsibling,
4514 						  p->flags);
4515 			if (err) {
4516 				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4517 				       __func__, p->name, class, nsibling, err);
4518 				return err;
4519 			}
4520 		}
4521 	}
4522 
4523 	return 0;
4524 }
4525 
reset_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4526 static int reset_virtual_engine(struct intel_gt *gt,
4527 				struct intel_engine_cs **siblings,
4528 				unsigned int nsibling)
4529 {
4530 	struct intel_engine_cs *engine;
4531 	struct intel_context *ve;
4532 	struct igt_spinner spin;
4533 	struct i915_request *rq;
4534 	unsigned int n;
4535 	int err = 0;
4536 
4537 	/*
4538 	 * In order to support offline error capture for fast preempt reset,
4539 	 * we need to decouple the guilty request and ensure that it and its
4540 	 * descendents are not executed while the capture is in progress.
4541 	 */
4542 
4543 	if (igt_spinner_init(&spin, gt))
4544 		return -ENOMEM;
4545 
4546 	ve = intel_execlists_create_virtual(siblings, nsibling);
4547 	if (IS_ERR(ve)) {
4548 		err = PTR_ERR(ve);
4549 		goto out_spin;
4550 	}
4551 
4552 	for (n = 0; n < nsibling; n++)
4553 		st_engine_heartbeat_disable(siblings[n]);
4554 
4555 	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4556 	if (IS_ERR(rq)) {
4557 		err = PTR_ERR(rq);
4558 		goto out_heartbeat;
4559 	}
4560 	i915_request_add(rq);
4561 
4562 	if (!igt_wait_for_spinner(&spin, rq)) {
4563 		intel_gt_set_wedged(gt);
4564 		err = -ETIME;
4565 		goto out_heartbeat;
4566 	}
4567 
4568 	engine = rq->engine;
4569 	GEM_BUG_ON(engine == ve->engine);
4570 
4571 	/* Take ownership of the reset and tasklet */
4572 	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4573 			     &gt->reset.flags)) {
4574 		intel_gt_set_wedged(gt);
4575 		err = -EBUSY;
4576 		goto out_heartbeat;
4577 	}
4578 	tasklet_disable(&engine->execlists.tasklet);
4579 
4580 	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
4581 	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4582 
4583 	/* Fake a preemption event; failed of course */
4584 	spin_lock_irq(&engine->active.lock);
4585 	__unwind_incomplete_requests(engine);
4586 	spin_unlock_irq(&engine->active.lock);
4587 	GEM_BUG_ON(rq->engine != ve->engine);
4588 
4589 	/* Reset the engine while keeping our active request on hold */
4590 	execlists_hold(engine, rq);
4591 	GEM_BUG_ON(!i915_request_on_hold(rq));
4592 
4593 	intel_engine_reset(engine, NULL);
4594 	GEM_BUG_ON(rq->fence.error != -EIO);
4595 
4596 	/* Release our grasp on the engine, letting CS flow again */
4597 	tasklet_enable(&engine->execlists.tasklet);
4598 	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
4599 
4600 	/* Check that we do not resubmit the held request */
4601 	i915_request_get(rq);
4602 	if (!i915_request_wait(rq, 0, HZ / 5)) {
4603 		pr_err("%s: on hold request completed!\n",
4604 		       engine->name);
4605 		intel_gt_set_wedged(gt);
4606 		err = -EIO;
4607 		goto out_rq;
4608 	}
4609 	GEM_BUG_ON(!i915_request_on_hold(rq));
4610 
4611 	/* But is resubmitted on release */
4612 	execlists_unhold(engine, rq);
4613 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4614 		pr_err("%s: held request did not complete!\n",
4615 		       engine->name);
4616 		intel_gt_set_wedged(gt);
4617 		err = -ETIME;
4618 	}
4619 
4620 out_rq:
4621 	i915_request_put(rq);
4622 out_heartbeat:
4623 	for (n = 0; n < nsibling; n++)
4624 		st_engine_heartbeat_enable(siblings[n]);
4625 
4626 	intel_context_put(ve);
4627 out_spin:
4628 	igt_spinner_fini(&spin);
4629 	return err;
4630 }
4631 
live_virtual_reset(void * arg)4632 static int live_virtual_reset(void *arg)
4633 {
4634 	struct intel_gt *gt = arg;
4635 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4636 	unsigned int class;
4637 
4638 	/*
4639 	 * Check that we handle a reset event within a virtual engine.
4640 	 * Only the physical engine is reset, but we have to check the flow
4641 	 * of the virtual requests around the reset, and make sure it is not
4642 	 * forgotten.
4643 	 */
4644 
4645 	if (intel_uc_uses_guc_submission(&gt->uc))
4646 		return 0;
4647 
4648 	if (!intel_has_reset_engine(gt))
4649 		return 0;
4650 
4651 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4652 		int nsibling, err;
4653 
4654 		nsibling = select_siblings(gt, class, siblings);
4655 		if (nsibling < 2)
4656 			continue;
4657 
4658 		err = reset_virtual_engine(gt, siblings, nsibling);
4659 		if (err)
4660 			return err;
4661 	}
4662 
4663 	return 0;
4664 }
4665 
intel_execlists_live_selftests(struct drm_i915_private * i915)4666 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4667 {
4668 	static const struct i915_subtest tests[] = {
4669 		SUBTEST(live_sanitycheck),
4670 		SUBTEST(live_unlite_switch),
4671 		SUBTEST(live_unlite_preempt),
4672 		SUBTEST(live_unlite_ring),
4673 		SUBTEST(live_pin_rewind),
4674 		SUBTEST(live_hold_reset),
4675 		SUBTEST(live_error_interrupt),
4676 		SUBTEST(live_timeslice_preempt),
4677 		SUBTEST(live_timeslice_rewind),
4678 		SUBTEST(live_timeslice_queue),
4679 		SUBTEST(live_timeslice_nopreempt),
4680 		SUBTEST(live_busywait_preempt),
4681 		SUBTEST(live_preempt),
4682 		SUBTEST(live_late_preempt),
4683 		SUBTEST(live_nopreempt),
4684 		SUBTEST(live_preempt_cancel),
4685 		SUBTEST(live_suppress_self_preempt),
4686 		SUBTEST(live_chain_preempt),
4687 		SUBTEST(live_preempt_ring),
4688 		SUBTEST(live_preempt_gang),
4689 		SUBTEST(live_preempt_timeout),
4690 		SUBTEST(live_preempt_user),
4691 		SUBTEST(live_preempt_smoke),
4692 		SUBTEST(live_virtual_engine),
4693 		SUBTEST(live_virtual_mask),
4694 		SUBTEST(live_virtual_preserved),
4695 		SUBTEST(live_virtual_slice),
4696 		SUBTEST(live_virtual_bond),
4697 		SUBTEST(live_virtual_reset),
4698 	};
4699 
4700 	if (!HAS_EXECLISTS(i915))
4701 		return 0;
4702 
4703 	if (intel_gt_is_wedged(&i915->gt))
4704 		return 0;
4705 
4706 	return intel_gt_live_subtests(tests, &i915->gt);
4707 }
4708 
emit_semaphore_signal(struct intel_context * ce,void * slot)4709 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
4710 {
4711 	const u32 offset =
4712 		i915_ggtt_offset(ce->engine->status_page.vma) +
4713 		offset_in_page(slot);
4714 	struct i915_request *rq;
4715 	u32 *cs;
4716 
4717 	rq = intel_context_create_request(ce);
4718 	if (IS_ERR(rq))
4719 		return PTR_ERR(rq);
4720 
4721 	cs = intel_ring_begin(rq, 4);
4722 	if (IS_ERR(cs)) {
4723 		i915_request_add(rq);
4724 		return PTR_ERR(cs);
4725 	}
4726 
4727 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4728 	*cs++ = offset;
4729 	*cs++ = 0;
4730 	*cs++ = 1;
4731 
4732 	intel_ring_advance(rq, cs);
4733 
4734 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4735 	i915_request_add(rq);
4736 	return 0;
4737 }
4738 
context_flush(struct intel_context * ce,long timeout)4739 static int context_flush(struct intel_context *ce, long timeout)
4740 {
4741 	struct i915_request *rq;
4742 	struct dma_fence *fence;
4743 	int err = 0;
4744 
4745 	rq = intel_engine_create_kernel_request(ce->engine);
4746 	if (IS_ERR(rq))
4747 		return PTR_ERR(rq);
4748 
4749 	fence = i915_active_fence_get(&ce->timeline->last_request);
4750 	if (fence) {
4751 		i915_request_await_dma_fence(rq, fence);
4752 		dma_fence_put(fence);
4753 	}
4754 
4755 	rq = i915_request_get(rq);
4756 	i915_request_add(rq);
4757 	if (i915_request_wait(rq, 0, timeout) < 0)
4758 		err = -ETIME;
4759 	i915_request_put(rq);
4760 
4761 	rmb(); /* We know the request is written, make sure all state is too! */
4762 	return err;
4763 }
4764 
live_lrc_layout(void * arg)4765 static int live_lrc_layout(void *arg)
4766 {
4767 	struct intel_gt *gt = arg;
4768 	struct intel_engine_cs *engine;
4769 	enum intel_engine_id id;
4770 	u32 *lrc;
4771 	int err;
4772 
4773 	/*
4774 	 * Check the registers offsets we use to create the initial reg state
4775 	 * match the layout saved by HW.
4776 	 */
4777 
4778 	lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4779 	if (!lrc)
4780 		return -ENOMEM;
4781 
4782 	err = 0;
4783 	for_each_engine(engine, gt, id) {
4784 		u32 *hw;
4785 		int dw;
4786 
4787 		if (!engine->default_state)
4788 			continue;
4789 
4790 		hw = shmem_pin_map(engine->default_state);
4791 		if (IS_ERR(hw)) {
4792 			err = PTR_ERR(hw);
4793 			break;
4794 		}
4795 		hw += LRC_STATE_OFFSET / sizeof(*hw);
4796 
4797 		execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4798 					 engine->kernel_context,
4799 					 engine,
4800 					 engine->kernel_context->ring,
4801 					 true);
4802 
4803 		dw = 0;
4804 		do {
4805 			u32 lri = hw[dw];
4806 
4807 			if (lri == 0) {
4808 				dw++;
4809 				continue;
4810 			}
4811 
4812 			if (lrc[dw] == 0) {
4813 				pr_debug("%s: skipped instruction %x at dword %d\n",
4814 					 engine->name, lri, dw);
4815 				dw++;
4816 				continue;
4817 			}
4818 
4819 			if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4820 				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4821 				       engine->name, dw, lri);
4822 				err = -EINVAL;
4823 				break;
4824 			}
4825 
4826 			if (lrc[dw] != lri) {
4827 				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4828 				       engine->name, dw, lri, lrc[dw]);
4829 				err = -EINVAL;
4830 				break;
4831 			}
4832 
4833 			lri &= 0x7f;
4834 			lri++;
4835 			dw++;
4836 
4837 			while (lri) {
4838 				if (hw[dw] != lrc[dw]) {
4839 					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4840 					       engine->name, dw, hw[dw], lrc[dw]);
4841 					err = -EINVAL;
4842 					break;
4843 				}
4844 
4845 				/*
4846 				 * Skip over the actual register value as we
4847 				 * expect that to differ.
4848 				 */
4849 				dw += 2;
4850 				lri -= 2;
4851 			}
4852 		} while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4853 
4854 		if (err) {
4855 			pr_info("%s: HW register image:\n", engine->name);
4856 			igt_hexdump(hw, PAGE_SIZE);
4857 
4858 			pr_info("%s: SW register image:\n", engine->name);
4859 			igt_hexdump(lrc, PAGE_SIZE);
4860 		}
4861 
4862 		shmem_unpin_map(engine->default_state, hw);
4863 		if (err)
4864 			break;
4865 	}
4866 
4867 	kfree(lrc);
4868 	return err;
4869 }
4870 
find_offset(const u32 * lri,u32 offset)4871 static int find_offset(const u32 *lri, u32 offset)
4872 {
4873 	int i;
4874 
4875 	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4876 		if (lri[i] == offset)
4877 			return i;
4878 
4879 	return -1;
4880 }
4881 
live_lrc_fixed(void * arg)4882 static int live_lrc_fixed(void *arg)
4883 {
4884 	struct intel_gt *gt = arg;
4885 	struct intel_engine_cs *engine;
4886 	enum intel_engine_id id;
4887 	int err = 0;
4888 
4889 	/*
4890 	 * Check the assumed register offsets match the actual locations in
4891 	 * the context image.
4892 	 */
4893 
4894 	for_each_engine(engine, gt, id) {
4895 		const struct {
4896 			u32 reg;
4897 			u32 offset;
4898 			const char *name;
4899 		} tbl[] = {
4900 			{
4901 				i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4902 				CTX_RING_START - 1,
4903 				"RING_START"
4904 			},
4905 			{
4906 				i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4907 				CTX_RING_CTL - 1,
4908 				"RING_CTL"
4909 			},
4910 			{
4911 				i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4912 				CTX_RING_HEAD - 1,
4913 				"RING_HEAD"
4914 			},
4915 			{
4916 				i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4917 				CTX_RING_TAIL - 1,
4918 				"RING_TAIL"
4919 			},
4920 			{
4921 				i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4922 				lrc_ring_mi_mode(engine),
4923 				"RING_MI_MODE"
4924 			},
4925 			{
4926 				i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4927 				CTX_BB_STATE - 1,
4928 				"BB_STATE"
4929 			},
4930 			{
4931 				i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
4932 				lrc_ring_wa_bb_per_ctx(engine),
4933 				"RING_BB_PER_CTX_PTR"
4934 			},
4935 			{
4936 				i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
4937 				lrc_ring_indirect_ptr(engine),
4938 				"RING_INDIRECT_CTX_PTR"
4939 			},
4940 			{
4941 				i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
4942 				lrc_ring_indirect_offset(engine),
4943 				"RING_INDIRECT_CTX_OFFSET"
4944 			},
4945 			{
4946 				i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4947 				CTX_TIMESTAMP - 1,
4948 				"RING_CTX_TIMESTAMP"
4949 			},
4950 			{
4951 				i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
4952 				lrc_ring_gpr0(engine),
4953 				"RING_CS_GPR0"
4954 			},
4955 			{
4956 				i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
4957 				lrc_ring_cmd_buf_cctl(engine),
4958 				"RING_CMD_BUF_CCTL"
4959 			},
4960 			{ },
4961 		}, *t;
4962 		u32 *hw;
4963 
4964 		if (!engine->default_state)
4965 			continue;
4966 
4967 		hw = shmem_pin_map(engine->default_state);
4968 		if (IS_ERR(hw)) {
4969 			err = PTR_ERR(hw);
4970 			break;
4971 		}
4972 		hw += LRC_STATE_OFFSET / sizeof(*hw);
4973 
4974 		for (t = tbl; t->name; t++) {
4975 			int dw = find_offset(hw, t->reg);
4976 
4977 			if (dw != t->offset) {
4978 				pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4979 				       engine->name,
4980 				       t->name,
4981 				       t->reg,
4982 				       dw,
4983 				       t->offset);
4984 				err = -EINVAL;
4985 			}
4986 		}
4987 
4988 		shmem_unpin_map(engine->default_state, hw);
4989 	}
4990 
4991 	return err;
4992 }
4993 
__live_lrc_state(struct intel_engine_cs * engine,struct i915_vma * scratch)4994 static int __live_lrc_state(struct intel_engine_cs *engine,
4995 			    struct i915_vma *scratch)
4996 {
4997 	struct intel_context *ce;
4998 	struct i915_request *rq;
4999 	struct i915_gem_ww_ctx ww;
5000 	enum {
5001 		RING_START_IDX = 0,
5002 		RING_TAIL_IDX,
5003 		MAX_IDX
5004 	};
5005 	u32 expected[MAX_IDX];
5006 	u32 *cs;
5007 	int err;
5008 	int n;
5009 
5010 	ce = intel_context_create(engine);
5011 	if (IS_ERR(ce))
5012 		return PTR_ERR(ce);
5013 
5014 	i915_gem_ww_ctx_init(&ww, false);
5015 retry:
5016 	err = i915_gem_object_lock(scratch->obj, &ww);
5017 	if (!err)
5018 		err = intel_context_pin_ww(ce, &ww);
5019 	if (err)
5020 		goto err_put;
5021 
5022 	rq = i915_request_create(ce);
5023 	if (IS_ERR(rq)) {
5024 		err = PTR_ERR(rq);
5025 		goto err_unpin;
5026 	}
5027 
5028 	cs = intel_ring_begin(rq, 4 * MAX_IDX);
5029 	if (IS_ERR(cs)) {
5030 		err = PTR_ERR(cs);
5031 		i915_request_add(rq);
5032 		goto err_unpin;
5033 	}
5034 
5035 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5036 	*cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
5037 	*cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
5038 	*cs++ = 0;
5039 
5040 	expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
5041 
5042 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5043 	*cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
5044 	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
5045 	*cs++ = 0;
5046 
5047 	err = i915_request_await_object(rq, scratch->obj, true);
5048 	if (!err)
5049 		err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
5050 
5051 	i915_request_get(rq);
5052 	i915_request_add(rq);
5053 	if (err)
5054 		goto err_rq;
5055 
5056 	intel_engine_flush_submission(engine);
5057 	expected[RING_TAIL_IDX] = ce->ring->tail;
5058 
5059 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
5060 		err = -ETIME;
5061 		goto err_rq;
5062 	}
5063 
5064 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
5065 	if (IS_ERR(cs)) {
5066 		err = PTR_ERR(cs);
5067 		goto err_rq;
5068 	}
5069 
5070 	for (n = 0; n < MAX_IDX; n++) {
5071 		if (cs[n] != expected[n]) {
5072 			pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
5073 			       engine->name, n, cs[n], expected[n]);
5074 			err = -EINVAL;
5075 			break;
5076 		}
5077 	}
5078 
5079 	i915_gem_object_unpin_map(scratch->obj);
5080 
5081 err_rq:
5082 	i915_request_put(rq);
5083 err_unpin:
5084 	intel_context_unpin(ce);
5085 err_put:
5086 	if (err == -EDEADLK) {
5087 		err = i915_gem_ww_ctx_backoff(&ww);
5088 		if (!err)
5089 			goto retry;
5090 	}
5091 	i915_gem_ww_ctx_fini(&ww);
5092 	intel_context_put(ce);
5093 	return err;
5094 }
5095 
live_lrc_state(void * arg)5096 static int live_lrc_state(void *arg)
5097 {
5098 	struct intel_gt *gt = arg;
5099 	struct intel_engine_cs *engine;
5100 	struct i915_vma *scratch;
5101 	enum intel_engine_id id;
5102 	int err = 0;
5103 
5104 	/*
5105 	 * Check the live register state matches what we expect for this
5106 	 * intel_context.
5107 	 */
5108 
5109 	scratch = create_scratch(gt);
5110 	if (IS_ERR(scratch))
5111 		return PTR_ERR(scratch);
5112 
5113 	for_each_engine(engine, gt, id) {
5114 		err = __live_lrc_state(engine, scratch);
5115 		if (err)
5116 			break;
5117 	}
5118 
5119 	if (igt_flush_test(gt->i915))
5120 		err = -EIO;
5121 
5122 	i915_vma_unpin_and_release(&scratch, 0);
5123 	return err;
5124 }
5125 
gpr_make_dirty(struct intel_context * ce)5126 static int gpr_make_dirty(struct intel_context *ce)
5127 {
5128 	struct i915_request *rq;
5129 	u32 *cs;
5130 	int n;
5131 
5132 	rq = intel_context_create_request(ce);
5133 	if (IS_ERR(rq))
5134 		return PTR_ERR(rq);
5135 
5136 	cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
5137 	if (IS_ERR(cs)) {
5138 		i915_request_add(rq);
5139 		return PTR_ERR(cs);
5140 	}
5141 
5142 	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
5143 	for (n = 0; n < NUM_GPR_DW; n++) {
5144 		*cs++ = CS_GPR(ce->engine, n);
5145 		*cs++ = STACK_MAGIC;
5146 	}
5147 	*cs++ = MI_NOOP;
5148 
5149 	intel_ring_advance(rq, cs);
5150 
5151 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5152 	i915_request_add(rq);
5153 
5154 	return 0;
5155 }
5156 
5157 static struct i915_request *
__gpr_read(struct intel_context * ce,struct i915_vma * scratch,u32 * slot)5158 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
5159 {
5160 	const u32 offset =
5161 		i915_ggtt_offset(ce->engine->status_page.vma) +
5162 		offset_in_page(slot);
5163 	struct i915_request *rq;
5164 	u32 *cs;
5165 	int err;
5166 	int n;
5167 
5168 	rq = intel_context_create_request(ce);
5169 	if (IS_ERR(rq))
5170 		return rq;
5171 
5172 	cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
5173 	if (IS_ERR(cs)) {
5174 		i915_request_add(rq);
5175 		return ERR_CAST(cs);
5176 	}
5177 
5178 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5179 	*cs++ = MI_NOOP;
5180 
5181 	*cs++ = MI_SEMAPHORE_WAIT |
5182 		MI_SEMAPHORE_GLOBAL_GTT |
5183 		MI_SEMAPHORE_POLL |
5184 		MI_SEMAPHORE_SAD_NEQ_SDD;
5185 	*cs++ = 0;
5186 	*cs++ = offset;
5187 	*cs++ = 0;
5188 
5189 	for (n = 0; n < NUM_GPR_DW; n++) {
5190 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5191 		*cs++ = CS_GPR(ce->engine, n);
5192 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
5193 		*cs++ = 0;
5194 	}
5195 
5196 	i915_vma_lock(scratch);
5197 	err = i915_request_await_object(rq, scratch->obj, true);
5198 	if (!err)
5199 		err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
5200 	i915_vma_unlock(scratch);
5201 
5202 	i915_request_get(rq);
5203 	i915_request_add(rq);
5204 	if (err) {
5205 		i915_request_put(rq);
5206 		rq = ERR_PTR(err);
5207 	}
5208 
5209 	return rq;
5210 }
5211 
__live_lrc_gpr(struct intel_engine_cs * engine,struct i915_vma * scratch,bool preempt)5212 static int __live_lrc_gpr(struct intel_engine_cs *engine,
5213 			  struct i915_vma *scratch,
5214 			  bool preempt)
5215 {
5216 	u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
5217 	struct intel_context *ce;
5218 	struct i915_request *rq;
5219 	u32 *cs;
5220 	int err;
5221 	int n;
5222 
5223 	if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
5224 		return 0; /* GPR only on rcs0 for gen8 */
5225 
5226 	err = gpr_make_dirty(engine->kernel_context);
5227 	if (err)
5228 		return err;
5229 
5230 	ce = intel_context_create(engine);
5231 	if (IS_ERR(ce))
5232 		return PTR_ERR(ce);
5233 
5234 	rq = __gpr_read(ce, scratch, slot);
5235 	if (IS_ERR(rq)) {
5236 		err = PTR_ERR(rq);
5237 		goto err_put;
5238 	}
5239 
5240 	err = wait_for_submit(engine, rq, HZ / 2);
5241 	if (err)
5242 		goto err_rq;
5243 
5244 	if (preempt) {
5245 		err = gpr_make_dirty(engine->kernel_context);
5246 		if (err)
5247 			goto err_rq;
5248 
5249 		err = emit_semaphore_signal(engine->kernel_context, slot);
5250 		if (err)
5251 			goto err_rq;
5252 	} else {
5253 		slot[0] = 1;
5254 		wmb();
5255 	}
5256 
5257 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
5258 		err = -ETIME;
5259 		goto err_rq;
5260 	}
5261 
5262 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
5263 	if (IS_ERR(cs)) {
5264 		err = PTR_ERR(cs);
5265 		goto err_rq;
5266 	}
5267 
5268 	for (n = 0; n < NUM_GPR_DW; n++) {
5269 		if (cs[n]) {
5270 			pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
5271 			       engine->name,
5272 			       n / 2, n & 1 ? "udw" : "ldw",
5273 			       cs[n]);
5274 			err = -EINVAL;
5275 			break;
5276 		}
5277 	}
5278 
5279 	i915_gem_object_unpin_map(scratch->obj);
5280 
5281 err_rq:
5282 	memset32(&slot[0], -1, 4);
5283 	wmb();
5284 	i915_request_put(rq);
5285 err_put:
5286 	intel_context_put(ce);
5287 	return err;
5288 }
5289 
live_lrc_gpr(void * arg)5290 static int live_lrc_gpr(void *arg)
5291 {
5292 	struct intel_gt *gt = arg;
5293 	struct intel_engine_cs *engine;
5294 	struct i915_vma *scratch;
5295 	enum intel_engine_id id;
5296 	int err = 0;
5297 
5298 	/*
5299 	 * Check that GPR registers are cleared in new contexts as we need
5300 	 * to avoid leaking any information from previous contexts.
5301 	 */
5302 
5303 	scratch = create_scratch(gt);
5304 	if (IS_ERR(scratch))
5305 		return PTR_ERR(scratch);
5306 
5307 	for_each_engine(engine, gt, id) {
5308 		st_engine_heartbeat_disable(engine);
5309 
5310 		err = __live_lrc_gpr(engine, scratch, false);
5311 		if (err)
5312 			goto err;
5313 
5314 		err = __live_lrc_gpr(engine, scratch, true);
5315 		if (err)
5316 			goto err;
5317 
5318 err:
5319 		st_engine_heartbeat_enable(engine);
5320 		if (igt_flush_test(gt->i915))
5321 			err = -EIO;
5322 		if (err)
5323 			break;
5324 	}
5325 
5326 	i915_vma_unpin_and_release(&scratch, 0);
5327 	return err;
5328 }
5329 
5330 static struct i915_request *
create_timestamp(struct intel_context * ce,void * slot,int idx)5331 create_timestamp(struct intel_context *ce, void *slot, int idx)
5332 {
5333 	const u32 offset =
5334 		i915_ggtt_offset(ce->engine->status_page.vma) +
5335 		offset_in_page(slot);
5336 	struct i915_request *rq;
5337 	u32 *cs;
5338 	int err;
5339 
5340 	rq = intel_context_create_request(ce);
5341 	if (IS_ERR(rq))
5342 		return rq;
5343 
5344 	cs = intel_ring_begin(rq, 10);
5345 	if (IS_ERR(cs)) {
5346 		err = PTR_ERR(cs);
5347 		goto err;
5348 	}
5349 
5350 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5351 	*cs++ = MI_NOOP;
5352 
5353 	*cs++ = MI_SEMAPHORE_WAIT |
5354 		MI_SEMAPHORE_GLOBAL_GTT |
5355 		MI_SEMAPHORE_POLL |
5356 		MI_SEMAPHORE_SAD_NEQ_SDD;
5357 	*cs++ = 0;
5358 	*cs++ = offset;
5359 	*cs++ = 0;
5360 
5361 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5362 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
5363 	*cs++ = offset + idx * sizeof(u32);
5364 	*cs++ = 0;
5365 
5366 	intel_ring_advance(rq, cs);
5367 
5368 	rq->sched.attr.priority = I915_PRIORITY_MASK;
5369 	err = 0;
5370 err:
5371 	i915_request_get(rq);
5372 	i915_request_add(rq);
5373 	if (err) {
5374 		i915_request_put(rq);
5375 		return ERR_PTR(err);
5376 	}
5377 
5378 	return rq;
5379 }
5380 
5381 struct lrc_timestamp {
5382 	struct intel_engine_cs *engine;
5383 	struct intel_context *ce[2];
5384 	u32 poison;
5385 };
5386 
timestamp_advanced(u32 start,u32 end)5387 static bool timestamp_advanced(u32 start, u32 end)
5388 {
5389 	return (s32)(end - start) > 0;
5390 }
5391 
__lrc_timestamp(const struct lrc_timestamp * arg,bool preempt)5392 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
5393 {
5394 	u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
5395 	struct i915_request *rq;
5396 	u32 timestamp;
5397 	int err = 0;
5398 
5399 	arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
5400 	rq = create_timestamp(arg->ce[0], slot, 1);
5401 	if (IS_ERR(rq))
5402 		return PTR_ERR(rq);
5403 
5404 	err = wait_for_submit(rq->engine, rq, HZ / 2);
5405 	if (err)
5406 		goto err;
5407 
5408 	if (preempt) {
5409 		arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
5410 		err = emit_semaphore_signal(arg->ce[1], slot);
5411 		if (err)
5412 			goto err;
5413 	} else {
5414 		slot[0] = 1;
5415 		wmb();
5416 	}
5417 
5418 	/* And wait for switch to kernel (to save our context to memory) */
5419 	err = context_flush(arg->ce[0], HZ / 2);
5420 	if (err)
5421 		goto err;
5422 
5423 	if (!timestamp_advanced(arg->poison, slot[1])) {
5424 		pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
5425 		       arg->engine->name, preempt ? "preempt" : "simple",
5426 		       arg->poison, slot[1]);
5427 		err = -EINVAL;
5428 	}
5429 
5430 	timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
5431 	if (!timestamp_advanced(slot[1], timestamp)) {
5432 		pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
5433 		       arg->engine->name, preempt ? "preempt" : "simple",
5434 		       slot[1], timestamp);
5435 		err = -EINVAL;
5436 	}
5437 
5438 err:
5439 	memset32(slot, -1, 4);
5440 	i915_request_put(rq);
5441 	return err;
5442 }
5443 
live_lrc_timestamp(void * arg)5444 static int live_lrc_timestamp(void *arg)
5445 {
5446 	struct lrc_timestamp data = {};
5447 	struct intel_gt *gt = arg;
5448 	enum intel_engine_id id;
5449 	const u32 poison[] = {
5450 		0,
5451 		S32_MAX,
5452 		(u32)S32_MAX + 1,
5453 		U32_MAX,
5454 	};
5455 
5456 	/*
5457 	 * We want to verify that the timestamp is saved and restore across
5458 	 * context switches and is monotonic.
5459 	 *
5460 	 * So we do this with a little bit of LRC poisoning to check various
5461 	 * boundary conditions, and see what happens if we preempt the context
5462 	 * with a second request (carrying more poison into the timestamp).
5463 	 */
5464 
5465 	for_each_engine(data.engine, gt, id) {
5466 		int i, err = 0;
5467 
5468 		st_engine_heartbeat_disable(data.engine);
5469 
5470 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5471 			struct intel_context *tmp;
5472 
5473 			tmp = intel_context_create(data.engine);
5474 			if (IS_ERR(tmp)) {
5475 				err = PTR_ERR(tmp);
5476 				goto err;
5477 			}
5478 
5479 			err = intel_context_pin(tmp);
5480 			if (err) {
5481 				intel_context_put(tmp);
5482 				goto err;
5483 			}
5484 
5485 			data.ce[i] = tmp;
5486 		}
5487 
5488 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
5489 			data.poison = poison[i];
5490 
5491 			err = __lrc_timestamp(&data, false);
5492 			if (err)
5493 				break;
5494 
5495 			err = __lrc_timestamp(&data, true);
5496 			if (err)
5497 				break;
5498 		}
5499 
5500 err:
5501 		st_engine_heartbeat_enable(data.engine);
5502 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5503 			if (!data.ce[i])
5504 				break;
5505 
5506 			intel_context_unpin(data.ce[i]);
5507 			intel_context_put(data.ce[i]);
5508 		}
5509 
5510 		if (igt_flush_test(gt->i915))
5511 			err = -EIO;
5512 		if (err)
5513 			return err;
5514 	}
5515 
5516 	return 0;
5517 }
5518 
5519 static struct i915_vma *
create_user_vma(struct i915_address_space * vm,unsigned long size)5520 create_user_vma(struct i915_address_space *vm, unsigned long size)
5521 {
5522 	struct drm_i915_gem_object *obj;
5523 	struct i915_vma *vma;
5524 	int err;
5525 
5526 	obj = i915_gem_object_create_internal(vm->i915, size);
5527 	if (IS_ERR(obj))
5528 		return ERR_CAST(obj);
5529 
5530 	vma = i915_vma_instance(obj, vm, NULL);
5531 	if (IS_ERR(vma)) {
5532 		i915_gem_object_put(obj);
5533 		return vma;
5534 	}
5535 
5536 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
5537 	if (err) {
5538 		i915_gem_object_put(obj);
5539 		return ERR_PTR(err);
5540 	}
5541 
5542 	return vma;
5543 }
5544 
5545 static struct i915_vma *
store_context(struct intel_context * ce,struct i915_vma * scratch)5546 store_context(struct intel_context *ce, struct i915_vma *scratch)
5547 {
5548 	struct i915_vma *batch;
5549 	u32 dw, x, *cs, *hw;
5550 	u32 *defaults;
5551 
5552 	batch = create_user_vma(ce->vm, SZ_64K);
5553 	if (IS_ERR(batch))
5554 		return batch;
5555 
5556 	cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5557 	if (IS_ERR(cs)) {
5558 		i915_vma_put(batch);
5559 		return ERR_CAST(cs);
5560 	}
5561 
5562 	defaults = shmem_pin_map(ce->engine->default_state);
5563 	if (!defaults) {
5564 		i915_gem_object_unpin_map(batch->obj);
5565 		i915_vma_put(batch);
5566 		return ERR_PTR(-ENOMEM);
5567 	}
5568 
5569 	x = 0;
5570 	dw = 0;
5571 	hw = defaults;
5572 	hw += LRC_STATE_OFFSET / sizeof(*hw);
5573 	do {
5574 		u32 len = hw[dw] & 0x7f;
5575 
5576 		if (hw[dw] == 0) {
5577 			dw++;
5578 			continue;
5579 		}
5580 
5581 		if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5582 			dw += len + 2;
5583 			continue;
5584 		}
5585 
5586 		dw++;
5587 		len = (len + 1) / 2;
5588 		while (len--) {
5589 			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
5590 			*cs++ = hw[dw];
5591 			*cs++ = lower_32_bits(scratch->node.start + x);
5592 			*cs++ = upper_32_bits(scratch->node.start + x);
5593 
5594 			dw += 2;
5595 			x += 4;
5596 		}
5597 	} while (dw < PAGE_SIZE / sizeof(u32) &&
5598 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5599 
5600 	*cs++ = MI_BATCH_BUFFER_END;
5601 
5602 	shmem_unpin_map(ce->engine->default_state, defaults);
5603 
5604 	i915_gem_object_flush_map(batch->obj);
5605 	i915_gem_object_unpin_map(batch->obj);
5606 
5607 	return batch;
5608 }
5609 
move_to_active(struct i915_request * rq,struct i915_vma * vma,unsigned int flags)5610 static int move_to_active(struct i915_request *rq,
5611 			  struct i915_vma *vma,
5612 			  unsigned int flags)
5613 {
5614 	int err;
5615 
5616 	i915_vma_lock(vma);
5617 	err = i915_request_await_object(rq, vma->obj, flags);
5618 	if (!err)
5619 		err = i915_vma_move_to_active(vma, rq, flags);
5620 	i915_vma_unlock(vma);
5621 
5622 	return err;
5623 }
5624 
5625 static struct i915_request *
record_registers(struct intel_context * ce,struct i915_vma * before,struct i915_vma * after,u32 * sema)5626 record_registers(struct intel_context *ce,
5627 		 struct i915_vma *before,
5628 		 struct i915_vma *after,
5629 		 u32 *sema)
5630 {
5631 	struct i915_vma *b_before, *b_after;
5632 	struct i915_request *rq;
5633 	u32 *cs;
5634 	int err;
5635 
5636 	b_before = store_context(ce, before);
5637 	if (IS_ERR(b_before))
5638 		return ERR_CAST(b_before);
5639 
5640 	b_after = store_context(ce, after);
5641 	if (IS_ERR(b_after)) {
5642 		rq = ERR_CAST(b_after);
5643 		goto err_before;
5644 	}
5645 
5646 	rq = intel_context_create_request(ce);
5647 	if (IS_ERR(rq))
5648 		goto err_after;
5649 
5650 	err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
5651 	if (err)
5652 		goto err_rq;
5653 
5654 	err = move_to_active(rq, b_before, 0);
5655 	if (err)
5656 		goto err_rq;
5657 
5658 	err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
5659 	if (err)
5660 		goto err_rq;
5661 
5662 	err = move_to_active(rq, b_after, 0);
5663 	if (err)
5664 		goto err_rq;
5665 
5666 	cs = intel_ring_begin(rq, 14);
5667 	if (IS_ERR(cs)) {
5668 		err = PTR_ERR(cs);
5669 		goto err_rq;
5670 	}
5671 
5672 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5673 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5674 	*cs++ = lower_32_bits(b_before->node.start);
5675 	*cs++ = upper_32_bits(b_before->node.start);
5676 
5677 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5678 	*cs++ = MI_SEMAPHORE_WAIT |
5679 		MI_SEMAPHORE_GLOBAL_GTT |
5680 		MI_SEMAPHORE_POLL |
5681 		MI_SEMAPHORE_SAD_NEQ_SDD;
5682 	*cs++ = 0;
5683 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5684 		offset_in_page(sema);
5685 	*cs++ = 0;
5686 	*cs++ = MI_NOOP;
5687 
5688 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5689 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5690 	*cs++ = lower_32_bits(b_after->node.start);
5691 	*cs++ = upper_32_bits(b_after->node.start);
5692 
5693 	intel_ring_advance(rq, cs);
5694 
5695 	WRITE_ONCE(*sema, 0);
5696 	i915_request_get(rq);
5697 	i915_request_add(rq);
5698 err_after:
5699 	i915_vma_put(b_after);
5700 err_before:
5701 	i915_vma_put(b_before);
5702 	return rq;
5703 
5704 err_rq:
5705 	i915_request_add(rq);
5706 	rq = ERR_PTR(err);
5707 	goto err_after;
5708 }
5709 
load_context(struct intel_context * ce,u32 poison)5710 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
5711 {
5712 	struct i915_vma *batch;
5713 	u32 dw, *cs, *hw;
5714 	u32 *defaults;
5715 
5716 	batch = create_user_vma(ce->vm, SZ_64K);
5717 	if (IS_ERR(batch))
5718 		return batch;
5719 
5720 	cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5721 	if (IS_ERR(cs)) {
5722 		i915_vma_put(batch);
5723 		return ERR_CAST(cs);
5724 	}
5725 
5726 	defaults = shmem_pin_map(ce->engine->default_state);
5727 	if (!defaults) {
5728 		i915_gem_object_unpin_map(batch->obj);
5729 		i915_vma_put(batch);
5730 		return ERR_PTR(-ENOMEM);
5731 	}
5732 
5733 	dw = 0;
5734 	hw = defaults;
5735 	hw += LRC_STATE_OFFSET / sizeof(*hw);
5736 	do {
5737 		u32 len = hw[dw] & 0x7f;
5738 
5739 		if (hw[dw] == 0) {
5740 			dw++;
5741 			continue;
5742 		}
5743 
5744 		if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5745 			dw += len + 2;
5746 			continue;
5747 		}
5748 
5749 		dw++;
5750 		len = (len + 1) / 2;
5751 		*cs++ = MI_LOAD_REGISTER_IMM(len);
5752 		while (len--) {
5753 			*cs++ = hw[dw];
5754 			*cs++ = poison;
5755 			dw += 2;
5756 		}
5757 	} while (dw < PAGE_SIZE / sizeof(u32) &&
5758 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5759 
5760 	*cs++ = MI_BATCH_BUFFER_END;
5761 
5762 	shmem_unpin_map(ce->engine->default_state, defaults);
5763 
5764 	i915_gem_object_flush_map(batch->obj);
5765 	i915_gem_object_unpin_map(batch->obj);
5766 
5767 	return batch;
5768 }
5769 
poison_registers(struct intel_context * ce,u32 poison,u32 * sema)5770 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
5771 {
5772 	struct i915_request *rq;
5773 	struct i915_vma *batch;
5774 	u32 *cs;
5775 	int err;
5776 
5777 	batch = load_context(ce, poison);
5778 	if (IS_ERR(batch))
5779 		return PTR_ERR(batch);
5780 
5781 	rq = intel_context_create_request(ce);
5782 	if (IS_ERR(rq)) {
5783 		err = PTR_ERR(rq);
5784 		goto err_batch;
5785 	}
5786 
5787 	err = move_to_active(rq, batch, 0);
5788 	if (err)
5789 		goto err_rq;
5790 
5791 	cs = intel_ring_begin(rq, 8);
5792 	if (IS_ERR(cs)) {
5793 		err = PTR_ERR(cs);
5794 		goto err_rq;
5795 	}
5796 
5797 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5798 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5799 	*cs++ = lower_32_bits(batch->node.start);
5800 	*cs++ = upper_32_bits(batch->node.start);
5801 
5802 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
5803 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5804 		offset_in_page(sema);
5805 	*cs++ = 0;
5806 	*cs++ = 1;
5807 
5808 	intel_ring_advance(rq, cs);
5809 
5810 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5811 err_rq:
5812 	i915_request_add(rq);
5813 err_batch:
5814 	i915_vma_put(batch);
5815 	return err;
5816 }
5817 
is_moving(u32 a,u32 b)5818 static bool is_moving(u32 a, u32 b)
5819 {
5820 	return a != b;
5821 }
5822 
compare_isolation(struct intel_engine_cs * engine,struct i915_vma * ref[2],struct i915_vma * result[2],struct intel_context * ce,u32 poison)5823 static int compare_isolation(struct intel_engine_cs *engine,
5824 			     struct i915_vma *ref[2],
5825 			     struct i915_vma *result[2],
5826 			     struct intel_context *ce,
5827 			     u32 poison)
5828 {
5829 	u32 x, dw, *hw, *lrc;
5830 	u32 *A[2], *B[2];
5831 	u32 *defaults;
5832 	int err = 0;
5833 
5834 	A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
5835 	if (IS_ERR(A[0]))
5836 		return PTR_ERR(A[0]);
5837 
5838 	A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
5839 	if (IS_ERR(A[1])) {
5840 		err = PTR_ERR(A[1]);
5841 		goto err_A0;
5842 	}
5843 
5844 	B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
5845 	if (IS_ERR(B[0])) {
5846 		err = PTR_ERR(B[0]);
5847 		goto err_A1;
5848 	}
5849 
5850 	B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
5851 	if (IS_ERR(B[1])) {
5852 		err = PTR_ERR(B[1]);
5853 		goto err_B0;
5854 	}
5855 
5856 	lrc = i915_gem_object_pin_map(ce->state->obj,
5857 				      i915_coherent_map_type(engine->i915));
5858 	if (IS_ERR(lrc)) {
5859 		err = PTR_ERR(lrc);
5860 		goto err_B1;
5861 	}
5862 	lrc += LRC_STATE_OFFSET / sizeof(*hw);
5863 
5864 	defaults = shmem_pin_map(ce->engine->default_state);
5865 	if (!defaults) {
5866 		err = -ENOMEM;
5867 		goto err_lrc;
5868 	}
5869 
5870 	x = 0;
5871 	dw = 0;
5872 	hw = defaults;
5873 	hw += LRC_STATE_OFFSET / sizeof(*hw);
5874 	do {
5875 		u32 len = hw[dw] & 0x7f;
5876 
5877 		if (hw[dw] == 0) {
5878 			dw++;
5879 			continue;
5880 		}
5881 
5882 		if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5883 			dw += len + 2;
5884 			continue;
5885 		}
5886 
5887 		dw++;
5888 		len = (len + 1) / 2;
5889 		while (len--) {
5890 			if (!is_moving(A[0][x], A[1][x]) &&
5891 			    (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
5892 				switch (hw[dw] & 4095) {
5893 				case 0x30: /* RING_HEAD */
5894 				case 0x34: /* RING_TAIL */
5895 					break;
5896 
5897 				default:
5898 					pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5899 					       engine->name, dw,
5900 					       hw[dw], hw[dw + 1],
5901 					       A[0][x], B[0][x], B[1][x],
5902 					       poison, lrc[dw + 1]);
5903 					err = -EINVAL;
5904 				}
5905 			}
5906 			dw += 2;
5907 			x++;
5908 		}
5909 	} while (dw < PAGE_SIZE / sizeof(u32) &&
5910 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5911 
5912 	shmem_unpin_map(ce->engine->default_state, defaults);
5913 err_lrc:
5914 	i915_gem_object_unpin_map(ce->state->obj);
5915 err_B1:
5916 	i915_gem_object_unpin_map(result[1]->obj);
5917 err_B0:
5918 	i915_gem_object_unpin_map(result[0]->obj);
5919 err_A1:
5920 	i915_gem_object_unpin_map(ref[1]->obj);
5921 err_A0:
5922 	i915_gem_object_unpin_map(ref[0]->obj);
5923 	return err;
5924 }
5925 
__lrc_isolation(struct intel_engine_cs * engine,u32 poison)5926 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
5927 {
5928 	u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
5929 	struct i915_vma *ref[2], *result[2];
5930 	struct intel_context *A, *B;
5931 	struct i915_request *rq;
5932 	int err;
5933 
5934 	A = intel_context_create(engine);
5935 	if (IS_ERR(A))
5936 		return PTR_ERR(A);
5937 
5938 	B = intel_context_create(engine);
5939 	if (IS_ERR(B)) {
5940 		err = PTR_ERR(B);
5941 		goto err_A;
5942 	}
5943 
5944 	ref[0] = create_user_vma(A->vm, SZ_64K);
5945 	if (IS_ERR(ref[0])) {
5946 		err = PTR_ERR(ref[0]);
5947 		goto err_B;
5948 	}
5949 
5950 	ref[1] = create_user_vma(A->vm, SZ_64K);
5951 	if (IS_ERR(ref[1])) {
5952 		err = PTR_ERR(ref[1]);
5953 		goto err_ref0;
5954 	}
5955 
5956 	rq = record_registers(A, ref[0], ref[1], sema);
5957 	if (IS_ERR(rq)) {
5958 		err = PTR_ERR(rq);
5959 		goto err_ref1;
5960 	}
5961 
5962 	WRITE_ONCE(*sema, 1);
5963 	wmb();
5964 
5965 	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5966 		i915_request_put(rq);
5967 		err = -ETIME;
5968 		goto err_ref1;
5969 	}
5970 	i915_request_put(rq);
5971 
5972 	result[0] = create_user_vma(A->vm, SZ_64K);
5973 	if (IS_ERR(result[0])) {
5974 		err = PTR_ERR(result[0]);
5975 		goto err_ref1;
5976 	}
5977 
5978 	result[1] = create_user_vma(A->vm, SZ_64K);
5979 	if (IS_ERR(result[1])) {
5980 		err = PTR_ERR(result[1]);
5981 		goto err_result0;
5982 	}
5983 
5984 	rq = record_registers(A, result[0], result[1], sema);
5985 	if (IS_ERR(rq)) {
5986 		err = PTR_ERR(rq);
5987 		goto err_result1;
5988 	}
5989 
5990 	err = poison_registers(B, poison, sema);
5991 	if (err) {
5992 		WRITE_ONCE(*sema, -1);
5993 		i915_request_put(rq);
5994 		goto err_result1;
5995 	}
5996 
5997 	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5998 		i915_request_put(rq);
5999 		err = -ETIME;
6000 		goto err_result1;
6001 	}
6002 	i915_request_put(rq);
6003 
6004 	err = compare_isolation(engine, ref, result, A, poison);
6005 
6006 err_result1:
6007 	i915_vma_put(result[1]);
6008 err_result0:
6009 	i915_vma_put(result[0]);
6010 err_ref1:
6011 	i915_vma_put(ref[1]);
6012 err_ref0:
6013 	i915_vma_put(ref[0]);
6014 err_B:
6015 	intel_context_put(B);
6016 err_A:
6017 	intel_context_put(A);
6018 	return err;
6019 }
6020 
skip_isolation(const struct intel_engine_cs * engine)6021 static bool skip_isolation(const struct intel_engine_cs *engine)
6022 {
6023 	if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
6024 		return true;
6025 
6026 	if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
6027 		return true;
6028 
6029 	return false;
6030 }
6031 
live_lrc_isolation(void * arg)6032 static int live_lrc_isolation(void *arg)
6033 {
6034 	struct intel_gt *gt = arg;
6035 	struct intel_engine_cs *engine;
6036 	enum intel_engine_id id;
6037 	const u32 poison[] = {
6038 		STACK_MAGIC,
6039 		0x3a3a3a3a,
6040 		0x5c5c5c5c,
6041 		0xffffffff,
6042 		0xffff0000,
6043 	};
6044 	int err = 0;
6045 
6046 	/*
6047 	 * Our goal is try and verify that per-context state cannot be
6048 	 * tampered with by another non-privileged client.
6049 	 *
6050 	 * We take the list of context registers from the LRI in the default
6051 	 * context image and attempt to modify that list from a remote context.
6052 	 */
6053 
6054 	for_each_engine(engine, gt, id) {
6055 		int i;
6056 
6057 		/* Just don't even ask */
6058 		if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
6059 		    skip_isolation(engine))
6060 			continue;
6061 
6062 		intel_engine_pm_get(engine);
6063 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
6064 			int result;
6065 
6066 			result = __lrc_isolation(engine, poison[i]);
6067 			if (result && !err)
6068 				err = result;
6069 
6070 			result = __lrc_isolation(engine, ~poison[i]);
6071 			if (result && !err)
6072 				err = result;
6073 		}
6074 		intel_engine_pm_put(engine);
6075 		if (igt_flush_test(gt->i915)) {
6076 			err = -EIO;
6077 			break;
6078 		}
6079 	}
6080 
6081 	return err;
6082 }
6083 
indirect_ctx_submit_req(struct intel_context * ce)6084 static int indirect_ctx_submit_req(struct intel_context *ce)
6085 {
6086 	struct i915_request *rq;
6087 	int err = 0;
6088 
6089 	rq = intel_context_create_request(ce);
6090 	if (IS_ERR(rq))
6091 		return PTR_ERR(rq);
6092 
6093 	i915_request_get(rq);
6094 	i915_request_add(rq);
6095 
6096 	if (i915_request_wait(rq, 0, HZ / 5) < 0)
6097 		err = -ETIME;
6098 
6099 	i915_request_put(rq);
6100 
6101 	return err;
6102 }
6103 
6104 #define CTX_BB_CANARY_OFFSET (3 * 1024)
6105 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
6106 
6107 static u32 *
emit_indirect_ctx_bb_canary(const struct intel_context * ce,u32 * cs)6108 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
6109 {
6110 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 |
6111 		MI_SRM_LRM_GLOBAL_GTT |
6112 		MI_LRI_LRM_CS_MMIO;
6113 	*cs++ = i915_mmio_reg_offset(RING_START(0));
6114 	*cs++ = i915_ggtt_offset(ce->state) +
6115 		context_wa_bb_offset(ce) +
6116 		CTX_BB_CANARY_OFFSET;
6117 	*cs++ = 0;
6118 
6119 	return cs;
6120 }
6121 
6122 static void
indirect_ctx_bb_setup(struct intel_context * ce)6123 indirect_ctx_bb_setup(struct intel_context *ce)
6124 {
6125 	u32 *cs = context_indirect_bb(ce);
6126 
6127 	cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
6128 
6129 	setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
6130 }
6131 
check_ring_start(struct intel_context * ce)6132 static bool check_ring_start(struct intel_context *ce)
6133 {
6134 	const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
6135 		LRC_STATE_OFFSET + context_wa_bb_offset(ce);
6136 
6137 	if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
6138 		return true;
6139 
6140 	pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
6141 	       ctx_bb[CTX_BB_CANARY_INDEX],
6142 	       ce->lrc_reg_state[CTX_RING_START]);
6143 
6144 	return false;
6145 }
6146 
indirect_ctx_bb_check(struct intel_context * ce)6147 static int indirect_ctx_bb_check(struct intel_context *ce)
6148 {
6149 	int err;
6150 
6151 	err = indirect_ctx_submit_req(ce);
6152 	if (err)
6153 		return err;
6154 
6155 	if (!check_ring_start(ce))
6156 		return -EINVAL;
6157 
6158 	return 0;
6159 }
6160 
__live_lrc_indirect_ctx_bb(struct intel_engine_cs * engine)6161 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
6162 {
6163 	struct intel_context *a, *b;
6164 	int err;
6165 
6166 	a = intel_context_create(engine);
6167 	if (IS_ERR(a))
6168 		return PTR_ERR(a);
6169 	err = intel_context_pin(a);
6170 	if (err)
6171 		goto put_a;
6172 
6173 	b = intel_context_create(engine);
6174 	if (IS_ERR(b)) {
6175 		err = PTR_ERR(b);
6176 		goto unpin_a;
6177 	}
6178 	err = intel_context_pin(b);
6179 	if (err)
6180 		goto put_b;
6181 
6182 	/* We use the already reserved extra page in context state */
6183 	if (!a->wa_bb_page) {
6184 		GEM_BUG_ON(b->wa_bb_page);
6185 		GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
6186 		goto unpin_b;
6187 	}
6188 
6189 	/*
6190 	 * In order to test that our per context bb is truly per context,
6191 	 * and executes at the intended spot on context restoring process,
6192 	 * make the batch store the ring start value to memory.
6193 	 * As ring start is restored apriori of starting the indirect ctx bb and
6194 	 * as it will be different for each context, it fits to this purpose.
6195 	 */
6196 	indirect_ctx_bb_setup(a);
6197 	indirect_ctx_bb_setup(b);
6198 
6199 	err = indirect_ctx_bb_check(a);
6200 	if (err)
6201 		goto unpin_b;
6202 
6203 	err = indirect_ctx_bb_check(b);
6204 
6205 unpin_b:
6206 	intel_context_unpin(b);
6207 put_b:
6208 	intel_context_put(b);
6209 unpin_a:
6210 	intel_context_unpin(a);
6211 put_a:
6212 	intel_context_put(a);
6213 
6214 	return err;
6215 }
6216 
live_lrc_indirect_ctx_bb(void * arg)6217 static int live_lrc_indirect_ctx_bb(void *arg)
6218 {
6219 	struct intel_gt *gt = arg;
6220 	struct intel_engine_cs *engine;
6221 	enum intel_engine_id id;
6222 	int err = 0;
6223 
6224 	for_each_engine(engine, gt, id) {
6225 		intel_engine_pm_get(engine);
6226 		err = __live_lrc_indirect_ctx_bb(engine);
6227 		intel_engine_pm_put(engine);
6228 
6229 		if (igt_flush_test(gt->i915))
6230 			err = -EIO;
6231 
6232 		if (err)
6233 			break;
6234 	}
6235 
6236 	return err;
6237 }
6238 
garbage_reset(struct intel_engine_cs * engine,struct i915_request * rq)6239 static void garbage_reset(struct intel_engine_cs *engine,
6240 			  struct i915_request *rq)
6241 {
6242 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
6243 	unsigned long *lock = &engine->gt->reset.flags;
6244 
6245 	if (test_and_set_bit(bit, lock))
6246 		return;
6247 
6248 	tasklet_disable(&engine->execlists.tasklet);
6249 
6250 	if (!rq->fence.error)
6251 		intel_engine_reset(engine, NULL);
6252 
6253 	tasklet_enable(&engine->execlists.tasklet);
6254 	clear_and_wake_up_bit(bit, lock);
6255 }
6256 
garbage(struct intel_context * ce,struct rnd_state * prng)6257 static struct i915_request *garbage(struct intel_context *ce,
6258 				    struct rnd_state *prng)
6259 {
6260 	struct i915_request *rq;
6261 	int err;
6262 
6263 	err = intel_context_pin(ce);
6264 	if (err)
6265 		return ERR_PTR(err);
6266 
6267 	prandom_bytes_state(prng,
6268 			    ce->lrc_reg_state,
6269 			    ce->engine->context_size -
6270 			    LRC_STATE_OFFSET);
6271 
6272 	rq = intel_context_create_request(ce);
6273 	if (IS_ERR(rq)) {
6274 		err = PTR_ERR(rq);
6275 		goto err_unpin;
6276 	}
6277 
6278 	i915_request_get(rq);
6279 	i915_request_add(rq);
6280 	return rq;
6281 
6282 err_unpin:
6283 	intel_context_unpin(ce);
6284 	return ERR_PTR(err);
6285 }
6286 
__lrc_garbage(struct intel_engine_cs * engine,struct rnd_state * prng)6287 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
6288 {
6289 	struct intel_context *ce;
6290 	struct i915_request *hang;
6291 	int err = 0;
6292 
6293 	ce = intel_context_create(engine);
6294 	if (IS_ERR(ce))
6295 		return PTR_ERR(ce);
6296 
6297 	hang = garbage(ce, prng);
6298 	if (IS_ERR(hang)) {
6299 		err = PTR_ERR(hang);
6300 		goto err_ce;
6301 	}
6302 
6303 	if (wait_for_submit(engine, hang, HZ / 2)) {
6304 		i915_request_put(hang);
6305 		err = -ETIME;
6306 		goto err_ce;
6307 	}
6308 
6309 	intel_context_set_banned(ce);
6310 	garbage_reset(engine, hang);
6311 
6312 	intel_engine_flush_submission(engine);
6313 	if (!hang->fence.error) {
6314 		i915_request_put(hang);
6315 		pr_err("%s: corrupted context was not reset\n",
6316 		       engine->name);
6317 		err = -EINVAL;
6318 		goto err_ce;
6319 	}
6320 
6321 	if (i915_request_wait(hang, 0, HZ / 2) < 0) {
6322 		pr_err("%s: corrupted context did not recover\n",
6323 		       engine->name);
6324 		i915_request_put(hang);
6325 		err = -EIO;
6326 		goto err_ce;
6327 	}
6328 	i915_request_put(hang);
6329 
6330 err_ce:
6331 	intel_context_put(ce);
6332 	return err;
6333 }
6334 
live_lrc_garbage(void * arg)6335 static int live_lrc_garbage(void *arg)
6336 {
6337 	struct intel_gt *gt = arg;
6338 	struct intel_engine_cs *engine;
6339 	enum intel_engine_id id;
6340 
6341 	/*
6342 	 * Verify that we can recover if one context state is completely
6343 	 * corrupted.
6344 	 */
6345 
6346 	if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
6347 		return 0;
6348 
6349 	for_each_engine(engine, gt, id) {
6350 		I915_RND_STATE(prng);
6351 		int err = 0, i;
6352 
6353 		if (!intel_has_reset_engine(engine->gt))
6354 			continue;
6355 
6356 		intel_engine_pm_get(engine);
6357 		for (i = 0; i < 3; i++) {
6358 			err = __lrc_garbage(engine, &prng);
6359 			if (err)
6360 				break;
6361 		}
6362 		intel_engine_pm_put(engine);
6363 
6364 		if (igt_flush_test(gt->i915))
6365 			err = -EIO;
6366 		if (err)
6367 			return err;
6368 	}
6369 
6370 	return 0;
6371 }
6372 
__live_pphwsp_runtime(struct intel_engine_cs * engine)6373 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
6374 {
6375 	struct intel_context *ce;
6376 	struct i915_request *rq;
6377 	IGT_TIMEOUT(end_time);
6378 	int err;
6379 
6380 	ce = intel_context_create(engine);
6381 	if (IS_ERR(ce))
6382 		return PTR_ERR(ce);
6383 
6384 	ce->runtime.num_underflow = 0;
6385 	ce->runtime.max_underflow = 0;
6386 
6387 	do {
6388 		unsigned int loop = 1024;
6389 
6390 		while (loop) {
6391 			rq = intel_context_create_request(ce);
6392 			if (IS_ERR(rq)) {
6393 				err = PTR_ERR(rq);
6394 				goto err_rq;
6395 			}
6396 
6397 			if (--loop == 0)
6398 				i915_request_get(rq);
6399 
6400 			i915_request_add(rq);
6401 		}
6402 
6403 		if (__igt_timeout(end_time, NULL))
6404 			break;
6405 
6406 		i915_request_put(rq);
6407 	} while (1);
6408 
6409 	err = i915_request_wait(rq, 0, HZ / 5);
6410 	if (err < 0) {
6411 		pr_err("%s: request not completed!\n", engine->name);
6412 		goto err_wait;
6413 	}
6414 
6415 	igt_flush_test(engine->i915);
6416 
6417 	pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
6418 		engine->name,
6419 		intel_context_get_total_runtime_ns(ce),
6420 		intel_context_get_avg_runtime_ns(ce));
6421 
6422 	err = 0;
6423 	if (ce->runtime.num_underflow) {
6424 		pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
6425 		       engine->name,
6426 		       ce->runtime.num_underflow,
6427 		       ce->runtime.max_underflow);
6428 		GEM_TRACE_DUMP();
6429 		err = -EOVERFLOW;
6430 	}
6431 
6432 err_wait:
6433 	i915_request_put(rq);
6434 err_rq:
6435 	intel_context_put(ce);
6436 	return err;
6437 }
6438 
live_pphwsp_runtime(void * arg)6439 static int live_pphwsp_runtime(void *arg)
6440 {
6441 	struct intel_gt *gt = arg;
6442 	struct intel_engine_cs *engine;
6443 	enum intel_engine_id id;
6444 	int err = 0;
6445 
6446 	/*
6447 	 * Check that cumulative context runtime as stored in the pphwsp[16]
6448 	 * is monotonic.
6449 	 */
6450 
6451 	for_each_engine(engine, gt, id) {
6452 		err = __live_pphwsp_runtime(engine);
6453 		if (err)
6454 			break;
6455 	}
6456 
6457 	if (igt_flush_test(gt->i915))
6458 		err = -EIO;
6459 
6460 	return err;
6461 }
6462 
intel_lrc_live_selftests(struct drm_i915_private * i915)6463 int intel_lrc_live_selftests(struct drm_i915_private *i915)
6464 {
6465 	static const struct i915_subtest tests[] = {
6466 		SUBTEST(live_lrc_layout),
6467 		SUBTEST(live_lrc_fixed),
6468 		SUBTEST(live_lrc_state),
6469 		SUBTEST(live_lrc_gpr),
6470 		SUBTEST(live_lrc_isolation),
6471 		SUBTEST(live_lrc_timestamp),
6472 		SUBTEST(live_lrc_garbage),
6473 		SUBTEST(live_pphwsp_runtime),
6474 		SUBTEST(live_lrc_indirect_ctx_bb),
6475 	};
6476 
6477 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
6478 		return 0;
6479 
6480 	return intel_gt_live_subtests(tests, &i915->gt);
6481 }
6482