1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6 
7 #include <linux/sort.h>
8 
9 #include "i915_drv.h"
10 
11 #include "intel_gt_requests.h"
12 #include "i915_selftest.h"
13 #include "selftest_engine_heartbeat.h"
14 
timeline_sync(struct intel_timeline * tl)15 static int timeline_sync(struct intel_timeline *tl)
16 {
17 	struct dma_fence *fence;
18 	long timeout;
19 
20 	fence = i915_active_fence_get(&tl->last_request);
21 	if (!fence)
22 		return 0;
23 
24 	timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
25 	dma_fence_put(fence);
26 	if (timeout < 0)
27 		return timeout;
28 
29 	return 0;
30 }
31 
engine_sync_barrier(struct intel_engine_cs * engine)32 static int engine_sync_barrier(struct intel_engine_cs *engine)
33 {
34 	return timeline_sync(engine->kernel_context->timeline);
35 }
36 
37 struct pulse {
38 	struct i915_active active;
39 	struct kref kref;
40 };
41 
pulse_active(struct i915_active * active)42 static int pulse_active(struct i915_active *active)
43 {
44 	kref_get(&container_of(active, struct pulse, active)->kref);
45 	return 0;
46 }
47 
pulse_free(struct kref * kref)48 static void pulse_free(struct kref *kref)
49 {
50 	struct pulse *p = container_of(kref, typeof(*p), kref);
51 
52 	i915_active_fini(&p->active);
53 	kfree(p);
54 }
55 
pulse_put(struct pulse * p)56 static void pulse_put(struct pulse *p)
57 {
58 	kref_put(&p->kref, pulse_free);
59 }
60 
pulse_retire(struct i915_active * active)61 static void pulse_retire(struct i915_active *active)
62 {
63 	pulse_put(container_of(active, struct pulse, active));
64 }
65 
pulse_create(void)66 static struct pulse *pulse_create(void)
67 {
68 	struct pulse *p;
69 
70 	p = kmalloc(sizeof(*p), GFP_KERNEL);
71 	if (!p)
72 		return p;
73 
74 	kref_init(&p->kref);
75 	i915_active_init(&p->active, pulse_active, pulse_retire);
76 
77 	return p;
78 }
79 
pulse_unlock_wait(struct pulse * p)80 static void pulse_unlock_wait(struct pulse *p)
81 {
82 	i915_active_unlock_wait(&p->active);
83 }
84 
__live_idle_pulse(struct intel_engine_cs * engine,int (* fn)(struct intel_engine_cs * cs))85 static int __live_idle_pulse(struct intel_engine_cs *engine,
86 			     int (*fn)(struct intel_engine_cs *cs))
87 {
88 	struct pulse *p;
89 	int err;
90 
91 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
92 
93 	p = pulse_create();
94 	if (!p)
95 		return -ENOMEM;
96 
97 	err = i915_active_acquire(&p->active);
98 	if (err)
99 		goto out;
100 
101 	err = i915_active_acquire_preallocate_barrier(&p->active, engine);
102 	if (err) {
103 		i915_active_release(&p->active);
104 		goto out;
105 	}
106 
107 	i915_active_acquire_barrier(&p->active);
108 	i915_active_release(&p->active);
109 
110 	GEM_BUG_ON(i915_active_is_idle(&p->active));
111 	GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
112 
113 	err = fn(engine);
114 	if (err)
115 		goto out;
116 
117 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
118 
119 	if (engine_sync_barrier(engine)) {
120 		struct drm_printer m = drm_err_printer("pulse");
121 
122 		pr_err("%s: no heartbeat pulse?\n", engine->name);
123 		intel_engine_dump(engine, &m, "%s", engine->name);
124 
125 		err = -ETIME;
126 		goto out;
127 	}
128 
129 	GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
130 
131 	pulse_unlock_wait(p); /* synchronize with the retirement callback */
132 
133 	if (!i915_active_is_idle(&p->active)) {
134 		struct drm_printer m = drm_err_printer("pulse");
135 
136 		pr_err("%s: heartbeat pulse did not flush idle tasks\n",
137 		       engine->name);
138 		i915_active_print(&p->active, &m);
139 
140 		err = -EINVAL;
141 		goto out;
142 	}
143 
144 out:
145 	pulse_put(p);
146 	return err;
147 }
148 
live_idle_flush(void * arg)149 static int live_idle_flush(void *arg)
150 {
151 	struct intel_gt *gt = arg;
152 	struct intel_engine_cs *engine;
153 	enum intel_engine_id id;
154 	int err = 0;
155 
156 	/* Check that we can flush the idle barriers */
157 
158 	for_each_engine(engine, gt, id) {
159 		st_engine_heartbeat_disable(engine);
160 		err = __live_idle_pulse(engine, intel_engine_flush_barriers);
161 		st_engine_heartbeat_enable(engine);
162 		if (err)
163 			break;
164 	}
165 
166 	return err;
167 }
168 
live_idle_pulse(void * arg)169 static int live_idle_pulse(void *arg)
170 {
171 	struct intel_gt *gt = arg;
172 	struct intel_engine_cs *engine;
173 	enum intel_engine_id id;
174 	int err = 0;
175 
176 	/* Check that heartbeat pulses flush the idle barriers */
177 
178 	for_each_engine(engine, gt, id) {
179 		st_engine_heartbeat_disable(engine);
180 		err = __live_idle_pulse(engine, intel_engine_pulse);
181 		st_engine_heartbeat_enable(engine);
182 		if (err && err != -ENODEV)
183 			break;
184 
185 		err = 0;
186 	}
187 
188 	return err;
189 }
190 
cmp_u32(const void * _a,const void * _b)191 static int cmp_u32(const void *_a, const void *_b)
192 {
193 	const u32 *a = _a, *b = _b;
194 
195 	return *a - *b;
196 }
197 
__live_heartbeat_fast(struct intel_engine_cs * engine)198 static int __live_heartbeat_fast(struct intel_engine_cs *engine)
199 {
200 	struct intel_context *ce;
201 	struct i915_request *rq;
202 	ktime_t t0, t1;
203 	u32 times[5];
204 	int err;
205 	int i;
206 
207 	ce = intel_context_create(engine);
208 	if (IS_ERR(ce))
209 		return PTR_ERR(ce);
210 
211 	intel_engine_pm_get(engine);
212 
213 	err = intel_engine_set_heartbeat(engine, 1);
214 	if (err)
215 		goto err_pm;
216 
217 	for (i = 0; i < ARRAY_SIZE(times); i++) {
218 		/* Manufacture a tick */
219 		do {
220 			while (READ_ONCE(engine->heartbeat.systole))
221 				flush_delayed_work(&engine->heartbeat.work);
222 
223 			engine->serial++; /* quick, pretend we are not idle! */
224 			flush_delayed_work(&engine->heartbeat.work);
225 			if (!delayed_work_pending(&engine->heartbeat.work)) {
226 				pr_err("%s: heartbeat did not start\n",
227 				       engine->name);
228 				err = -EINVAL;
229 				goto err_pm;
230 			}
231 
232 			rcu_read_lock();
233 			rq = READ_ONCE(engine->heartbeat.systole);
234 			if (rq)
235 				rq = i915_request_get_rcu(rq);
236 			rcu_read_unlock();
237 		} while (!rq);
238 
239 		t0 = ktime_get();
240 		while (rq == READ_ONCE(engine->heartbeat.systole))
241 			yield(); /* work is on the local cpu! */
242 		t1 = ktime_get();
243 
244 		i915_request_put(rq);
245 		times[i] = ktime_us_delta(t1, t0);
246 	}
247 
248 	sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
249 
250 	pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
251 		engine->name,
252 		times[ARRAY_SIZE(times) / 2],
253 		times[0],
254 		times[ARRAY_SIZE(times) - 1]);
255 
256 	/* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */
257 	if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) {
258 		pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
259 		       engine->name,
260 		       times[ARRAY_SIZE(times) / 2],
261 		       jiffies_to_usecs(6));
262 		err = -EINVAL;
263 	}
264 
265 	intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
266 err_pm:
267 	intel_engine_pm_put(engine);
268 	intel_context_put(ce);
269 	return err;
270 }
271 
live_heartbeat_fast(void * arg)272 static int live_heartbeat_fast(void *arg)
273 {
274 	struct intel_gt *gt = arg;
275 	struct intel_engine_cs *engine;
276 	enum intel_engine_id id;
277 	int err = 0;
278 
279 	/* Check that the heartbeat ticks at the desired rate. */
280 	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
281 		return 0;
282 
283 	for_each_engine(engine, gt, id) {
284 		err = __live_heartbeat_fast(engine);
285 		if (err)
286 			break;
287 	}
288 
289 	return err;
290 }
291 
__live_heartbeat_off(struct intel_engine_cs * engine)292 static int __live_heartbeat_off(struct intel_engine_cs *engine)
293 {
294 	int err;
295 
296 	intel_engine_pm_get(engine);
297 
298 	engine->serial++;
299 	flush_delayed_work(&engine->heartbeat.work);
300 	if (!delayed_work_pending(&engine->heartbeat.work)) {
301 		pr_err("%s: heartbeat not running\n",
302 		       engine->name);
303 		err = -EINVAL;
304 		goto err_pm;
305 	}
306 
307 	err = intel_engine_set_heartbeat(engine, 0);
308 	if (err)
309 		goto err_pm;
310 
311 	engine->serial++;
312 	flush_delayed_work(&engine->heartbeat.work);
313 	if (delayed_work_pending(&engine->heartbeat.work)) {
314 		pr_err("%s: heartbeat still running\n",
315 		       engine->name);
316 		err = -EINVAL;
317 		goto err_beat;
318 	}
319 
320 	if (READ_ONCE(engine->heartbeat.systole)) {
321 		pr_err("%s: heartbeat still allocated\n",
322 		       engine->name);
323 		err = -EINVAL;
324 		goto err_beat;
325 	}
326 
327 err_beat:
328 	intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
329 err_pm:
330 	intel_engine_pm_put(engine);
331 	return err;
332 }
333 
live_heartbeat_off(void * arg)334 static int live_heartbeat_off(void *arg)
335 {
336 	struct intel_gt *gt = arg;
337 	struct intel_engine_cs *engine;
338 	enum intel_engine_id id;
339 	int err = 0;
340 
341 	/* Check that we can turn off heartbeat and not interrupt VIP */
342 	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
343 		return 0;
344 
345 	for_each_engine(engine, gt, id) {
346 		if (!intel_engine_has_preemption(engine))
347 			continue;
348 
349 		err = __live_heartbeat_off(engine);
350 		if (err)
351 			break;
352 	}
353 
354 	return err;
355 }
356 
intel_heartbeat_live_selftests(struct drm_i915_private * i915)357 int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
358 {
359 	static const struct i915_subtest tests[] = {
360 		SUBTEST(live_idle_flush),
361 		SUBTEST(live_idle_pulse),
362 		SUBTEST(live_heartbeat_fast),
363 		SUBTEST(live_heartbeat_off),
364 	};
365 	int saved_hangcheck;
366 	int err;
367 
368 	if (intel_gt_is_wedged(&i915->gt))
369 		return 0;
370 
371 	saved_hangcheck = i915->params.enable_hangcheck;
372 	i915->params.enable_hangcheck = INT_MAX;
373 
374 	err = intel_gt_live_subtests(tests, &i915->gt);
375 
376 	i915->params.enable_hangcheck = saved_hangcheck;
377 	return err;
378 }
379 
st_engine_heartbeat_disable(struct intel_engine_cs * engine)380 void st_engine_heartbeat_disable(struct intel_engine_cs *engine)
381 {
382 	engine->props.heartbeat_interval_ms = 0;
383 
384 	intel_engine_pm_get(engine);
385 	intel_engine_park_heartbeat(engine);
386 }
387 
st_engine_heartbeat_enable(struct intel_engine_cs * engine)388 void st_engine_heartbeat_enable(struct intel_engine_cs *engine)
389 {
390 	intel_engine_pm_put(engine);
391 
392 	engine->props.heartbeat_interval_ms =
393 		engine->defaults.heartbeat_interval_ms;
394 }
395