1 /*
2  * Copyright 2021 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 #include "runl.h"
23 #include "cgrp.h"
24 #include "chan.h"
25 #include "chid.h"
26 #include "priv.h"
27 #include "runq.h"
28 
29 #include <core/gpuobj.h>
30 #include <subdev/timer.h>
31 #include <subdev/top.h>
32 
33 static struct nvkm_cgrp *
nvkm_engn_cgrp_get(struct nvkm_engn * engn,unsigned long * pirqflags)34 nvkm_engn_cgrp_get(struct nvkm_engn *engn, unsigned long *pirqflags)
35 {
36 	struct nvkm_cgrp *cgrp = NULL;
37 	struct nvkm_chan *chan;
38 	bool cgid;
39 	int id;
40 
41 	id = engn->func->cxid(engn, &cgid);
42 	if (id < 0)
43 		return NULL;
44 
45 	if (!cgid) {
46 		chan = nvkm_runl_chan_get_chid(engn->runl, id, pirqflags);
47 		if (chan)
48 			cgrp = chan->cgrp;
49 	} else {
50 		cgrp = nvkm_runl_cgrp_get_cgid(engn->runl, id, pirqflags);
51 	}
52 
53 	WARN_ON(!cgrp);
54 	return cgrp;
55 }
56 
57 static void
nvkm_runl_rc(struct nvkm_runl * runl)58 nvkm_runl_rc(struct nvkm_runl *runl)
59 {
60 	struct nvkm_fifo *fifo = runl->fifo;
61 	struct nvkm_cgrp *cgrp, *gtmp;
62 	struct nvkm_chan *chan, *ctmp;
63 	struct nvkm_engn *engn;
64 	unsigned long flags;
65 	int rc, state, i;
66 	bool reset;
67 
68 	/* Runlist is blocked before scheduling recovery - fetch count. */
69 	BUG_ON(!mutex_is_locked(&runl->mutex));
70 	rc = atomic_xchg(&runl->rc_pending, 0);
71 	if (!rc)
72 		return;
73 
74 	/* Look for channel groups flagged for RC. */
75 	nvkm_runl_foreach_cgrp_safe(cgrp, gtmp, runl) {
76 		state = atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_PENDING, NVKM_CGRP_RC_RUNNING);
77 		if (state == NVKM_CGRP_RC_PENDING) {
78 			/* Disable all channels in them, and remove from runlist. */
79 			nvkm_cgrp_foreach_chan_safe(chan, ctmp, cgrp) {
80 				nvkm_chan_error(chan, false);
81 				nvkm_chan_remove_locked(chan);
82 			}
83 		}
84 	}
85 
86 	/* On GPUs with runlist preempt, wait for PBDMA(s) servicing runlist to go idle. */
87 	if (runl->func->preempt) {
88 		for (i = 0; i < runl->runq_nr; i++) {
89 			struct nvkm_runq *runq = runl->runq[i];
90 
91 			if (runq) {
92 				nvkm_msec(fifo->engine.subdev.device, 2000,
93 					if (runq->func->idle(runq))
94 						break;
95 				);
96 			}
97 		}
98 	}
99 
100 	/* Look for engines that are still on flagged channel groups - reset them. */
101 	nvkm_runl_foreach_engn_cond(engn, runl, engn->func->cxid) {
102 		cgrp = nvkm_engn_cgrp_get(engn, &flags);
103 		if (!cgrp) {
104 			ENGN_DEBUG(engn, "cxid not valid");
105 			continue;
106 		}
107 
108 		reset = atomic_read(&cgrp->rc) == NVKM_CGRP_RC_RUNNING;
109 		nvkm_cgrp_put(&cgrp, flags);
110 		if (!reset) {
111 			ENGN_DEBUG(engn, "cxid not in recovery");
112 			continue;
113 		}
114 
115 		ENGN_DEBUG(engn, "resetting...");
116 		/*TODO: can we do something less of a potential catastrophe on failure? */
117 		WARN_ON(nvkm_engine_reset(engn->engine));
118 	}
119 
120 	/* Submit runlist update, and clear any remaining exception state. */
121 	runl->func->update(runl);
122 	if (runl->func->fault_clear)
123 		runl->func->fault_clear(runl);
124 
125 	/* Unblock runlist processing. */
126 	while (rc--)
127 		nvkm_runl_allow(runl);
128 	runl->func->wait(runl);
129 }
130 
131 static void
nvkm_runl_rc_runl(struct nvkm_runl * runl)132 nvkm_runl_rc_runl(struct nvkm_runl *runl)
133 {
134 	RUNL_ERROR(runl, "rc scheduled");
135 
136 	nvkm_runl_block(runl);
137 	if (runl->func->preempt)
138 		runl->func->preempt(runl);
139 
140 	atomic_inc(&runl->rc_pending);
141 	schedule_work(&runl->work);
142 }
143 
144 void
nvkm_runl_rc_cgrp(struct nvkm_cgrp * cgrp)145 nvkm_runl_rc_cgrp(struct nvkm_cgrp *cgrp)
146 {
147 	if (atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_NONE, NVKM_CGRP_RC_PENDING) != NVKM_CGRP_RC_NONE)
148 		return;
149 
150 	CGRP_ERROR(cgrp, "rc scheduled");
151 	nvkm_runl_rc_runl(cgrp->runl);
152 }
153 
154 void
nvkm_runl_rc_engn(struct nvkm_runl * runl,struct nvkm_engn * engn)155 nvkm_runl_rc_engn(struct nvkm_runl *runl, struct nvkm_engn *engn)
156 {
157 	struct nvkm_cgrp *cgrp;
158 	unsigned long flags;
159 
160 	/* Lookup channel group currently on engine. */
161 	cgrp = nvkm_engn_cgrp_get(engn, &flags);
162 	if (!cgrp) {
163 		ENGN_DEBUG(engn, "rc skipped, not on channel");
164 		return;
165 	}
166 
167 	nvkm_runl_rc_cgrp(cgrp);
168 	nvkm_cgrp_put(&cgrp, flags);
169 }
170 
171 static void
nvkm_runl_work(struct work_struct * work)172 nvkm_runl_work(struct work_struct *work)
173 {
174 	struct nvkm_runl *runl = container_of(work, typeof(*runl), work);
175 
176 	mutex_lock(&runl->mutex);
177 	nvkm_runl_rc(runl);
178 	mutex_unlock(&runl->mutex);
179 
180 }
181 
182 struct nvkm_chan *
nvkm_runl_chan_get_inst(struct nvkm_runl * runl,u64 inst,unsigned long * pirqflags)183 nvkm_runl_chan_get_inst(struct nvkm_runl *runl, u64 inst, unsigned long *pirqflags)
184 {
185 	struct nvkm_chid *chid = runl->chid;
186 	struct nvkm_chan *chan;
187 	unsigned long flags;
188 	int id;
189 
190 	spin_lock_irqsave(&chid->lock, flags);
191 	for_each_set_bit(id, chid->used, chid->nr) {
192 		chan = chid->data[id];
193 		if (likely(chan)) {
194 			if (chan->inst->addr == inst) {
195 				spin_lock(&chan->cgrp->lock);
196 				*pirqflags = flags;
197 				spin_unlock(&chid->lock);
198 				return chan;
199 			}
200 		}
201 	}
202 	spin_unlock_irqrestore(&chid->lock, flags);
203 	return NULL;
204 }
205 
206 struct nvkm_chan *
nvkm_runl_chan_get_chid(struct nvkm_runl * runl,int id,unsigned long * pirqflags)207 nvkm_runl_chan_get_chid(struct nvkm_runl *runl, int id, unsigned long *pirqflags)
208 {
209 	struct nvkm_chid *chid = runl->chid;
210 	struct nvkm_chan *chan;
211 	unsigned long flags;
212 
213 	spin_lock_irqsave(&chid->lock, flags);
214 	if (!WARN_ON(id >= chid->nr)) {
215 		chan = chid->data[id];
216 		if (likely(chan)) {
217 			spin_lock(&chan->cgrp->lock);
218 			*pirqflags = flags;
219 			spin_unlock(&chid->lock);
220 			return chan;
221 		}
222 	}
223 	spin_unlock_irqrestore(&chid->lock, flags);
224 	return NULL;
225 }
226 
227 struct nvkm_cgrp *
nvkm_runl_cgrp_get_cgid(struct nvkm_runl * runl,int id,unsigned long * pirqflags)228 nvkm_runl_cgrp_get_cgid(struct nvkm_runl *runl, int id, unsigned long *pirqflags)
229 {
230 	struct nvkm_chid *cgid = runl->cgid;
231 	struct nvkm_cgrp *cgrp;
232 	unsigned long flags;
233 
234 	spin_lock_irqsave(&cgid->lock, flags);
235 	if (!WARN_ON(id >= cgid->nr)) {
236 		cgrp = cgid->data[id];
237 		if (likely(cgrp)) {
238 			spin_lock(&cgrp->lock);
239 			*pirqflags = flags;
240 			spin_unlock(&cgid->lock);
241 			return cgrp;
242 		}
243 	}
244 	spin_unlock_irqrestore(&cgid->lock, flags);
245 	return NULL;
246 }
247 
248 int
nvkm_runl_preempt_wait(struct nvkm_runl * runl)249 nvkm_runl_preempt_wait(struct nvkm_runl *runl)
250 {
251 	return nvkm_msec(runl->fifo->engine.subdev.device, runl->fifo->timeout.chan_msec,
252 		if (!runl->func->preempt_pending(runl))
253 			break;
254 
255 		nvkm_runl_rc(runl);
256 		usleep_range(1, 2);
257 	) < 0 ? -ETIMEDOUT : 0;
258 }
259 
260 bool
nvkm_runl_update_pending(struct nvkm_runl * runl)261 nvkm_runl_update_pending(struct nvkm_runl *runl)
262 {
263 	if (!runl->func->pending(runl))
264 		return false;
265 
266 	nvkm_runl_rc(runl);
267 	return true;
268 }
269 
270 void
nvkm_runl_update_locked(struct nvkm_runl * runl,bool wait)271 nvkm_runl_update_locked(struct nvkm_runl *runl, bool wait)
272 {
273 	if (atomic_xchg(&runl->changed, 0) && runl->func->update) {
274 		runl->func->update(runl);
275 		if (wait)
276 			runl->func->wait(runl);
277 	}
278 }
279 
280 void
nvkm_runl_allow(struct nvkm_runl * runl)281 nvkm_runl_allow(struct nvkm_runl *runl)
282 {
283 	struct nvkm_fifo *fifo = runl->fifo;
284 	unsigned long flags;
285 
286 	spin_lock_irqsave(&fifo->lock, flags);
287 	if (!--runl->blocked) {
288 		RUNL_TRACE(runl, "running");
289 		runl->func->allow(runl, ~0);
290 	}
291 	spin_unlock_irqrestore(&fifo->lock, flags);
292 }
293 
294 void
nvkm_runl_block(struct nvkm_runl * runl)295 nvkm_runl_block(struct nvkm_runl *runl)
296 {
297 	struct nvkm_fifo *fifo = runl->fifo;
298 	unsigned long flags;
299 
300 	spin_lock_irqsave(&fifo->lock, flags);
301 	if (!runl->blocked++) {
302 		RUNL_TRACE(runl, "stopped");
303 		runl->func->block(runl, ~0);
304 	}
305 	spin_unlock_irqrestore(&fifo->lock, flags);
306 }
307 
308 void
nvkm_runl_fini(struct nvkm_runl * runl)309 nvkm_runl_fini(struct nvkm_runl *runl)
310 {
311 	if (runl->func->fini)
312 		runl->func->fini(runl);
313 
314 	flush_work(&runl->work);
315 }
316 
317 void
nvkm_runl_del(struct nvkm_runl * runl)318 nvkm_runl_del(struct nvkm_runl *runl)
319 {
320 	struct nvkm_engn *engn, *engt;
321 
322 	nvkm_memory_unref(&runl->mem);
323 
324 	list_for_each_entry_safe(engn, engt, &runl->engns, head) {
325 		list_del(&engn->head);
326 		kfree(engn);
327 	}
328 
329 	nvkm_chid_unref(&runl->chid);
330 	nvkm_chid_unref(&runl->cgid);
331 
332 	list_del(&runl->head);
333 	mutex_destroy(&runl->mutex);
334 	kfree(runl);
335 }
336 
337 struct nvkm_engn *
nvkm_runl_add(struct nvkm_runl * runl,int engi,const struct nvkm_engn_func * func,enum nvkm_subdev_type type,int inst)338 nvkm_runl_add(struct nvkm_runl *runl, int engi, const struct nvkm_engn_func *func,
339 	      enum nvkm_subdev_type type, int inst)
340 {
341 	struct nvkm_fifo *fifo = runl->fifo;
342 	struct nvkm_device *device = fifo->engine.subdev.device;
343 	struct nvkm_engine *engine;
344 	struct nvkm_engn *engn;
345 
346 	engine = nvkm_device_engine(device, type, inst);
347 	if (!engine) {
348 		RUNL_DEBUG(runl, "engn %d.%d[%s] not found", engi, inst, nvkm_subdev_type[type]);
349 		return NULL;
350 	}
351 
352 	if (!(engn = kzalloc(sizeof(*engn), GFP_KERNEL)))
353 		return NULL;
354 
355 	engn->func = func;
356 	engn->runl = runl;
357 	engn->id = engi;
358 	engn->engine = engine;
359 	engn->fault = -1;
360 	list_add_tail(&engn->head, &runl->engns);
361 
362 	/* Lookup MMU engine ID for fault handling. */
363 	if (device->top)
364 		engn->fault = nvkm_top_fault_id(device, engine->subdev.type, engine->subdev.inst);
365 
366 	if (engn->fault < 0 && fifo->func->mmu_fault) {
367 		const struct nvkm_enum *map = fifo->func->mmu_fault->engine;
368 
369 		while (map->name) {
370 			if (map->data2 == engine->subdev.type && map->inst == engine->subdev.inst) {
371 				engn->fault = map->value;
372 				break;
373 			}
374 			map++;
375 		}
376 	}
377 
378 	return engn;
379 }
380 
381 struct nvkm_runl *
nvkm_runl_get(struct nvkm_fifo * fifo,int runi,u32 addr)382 nvkm_runl_get(struct nvkm_fifo *fifo, int runi, u32 addr)
383 {
384 	struct nvkm_runl *runl;
385 
386 	nvkm_runl_foreach(runl, fifo) {
387 		if ((runi >= 0 && runl->id == runi) || (runi < 0 && runl->addr == addr))
388 			return runl;
389 	}
390 
391 	return NULL;
392 }
393 
394 struct nvkm_runl *
nvkm_runl_new(struct nvkm_fifo * fifo,int runi,u32 addr,int id_nr)395 nvkm_runl_new(struct nvkm_fifo *fifo, int runi, u32 addr, int id_nr)
396 {
397 	struct nvkm_subdev *subdev = &fifo->engine.subdev;
398 	struct nvkm_runl *runl;
399 	int ret;
400 
401 	if (!(runl = kzalloc(sizeof(*runl), GFP_KERNEL)))
402 		return ERR_PTR(-ENOMEM);
403 
404 	runl->func = fifo->func->runl;
405 	runl->fifo = fifo;
406 	runl->id = runi;
407 	runl->addr = addr;
408 	INIT_LIST_HEAD(&runl->engns);
409 	INIT_LIST_HEAD(&runl->cgrps);
410 	atomic_set(&runl->changed, 0);
411 	mutex_init(&runl->mutex);
412 	INIT_WORK(&runl->work, nvkm_runl_work);
413 	atomic_set(&runl->rc_triggered, 0);
414 	atomic_set(&runl->rc_pending, 0);
415 	list_add_tail(&runl->head, &fifo->runls);
416 
417 	if (!fifo->chid) {
418 		if ((ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->cgid)) ||
419 		    (ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->chid))) {
420 			RUNL_ERROR(runl, "cgid/chid: %d", ret);
421 			nvkm_runl_del(runl);
422 			return ERR_PTR(ret);
423 		}
424 	} else {
425 		runl->cgid = nvkm_chid_ref(fifo->cgid);
426 		runl->chid = nvkm_chid_ref(fifo->chid);
427 	}
428 
429 	return runl;
430 }
431