1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2019 Intel Corporation
5  */
6 
7 #ifndef __INTEL_ENGINE_TYPES__
8 #define __INTEL_ENGINE_TYPES__
9 
10 #include <linux/hashtable.h>
11 #include <linux/irq_work.h>
12 #include <linux/kref.h>
13 #include <linux/list.h>
14 #include <linux/llist.h>
15 #include <linux/rbtree.h>
16 #include <linux/timer.h>
17 #include <linux/types.h>
18 
19 #include "i915_gem.h"
20 #include "i915_pmu.h"
21 #include "i915_priolist_types.h"
22 #include "i915_selftest.h"
23 #include "intel_engine_pool_types.h"
24 #include "intel_sseu.h"
25 #include "intel_timeline_types.h"
26 #include "intel_wakeref.h"
27 #include "intel_workarounds_types.h"
28 
29 /* Legacy HW Engine ID */
30 
31 #define RCS0_HW		0
32 #define VCS0_HW		1
33 #define BCS0_HW		2
34 #define VECS0_HW	3
35 #define VCS1_HW		4
36 #define VCS2_HW		6
37 #define VCS3_HW		7
38 #define VECS1_HW	12
39 
40 /* Gen11+ HW Engine class + instance */
41 #define RENDER_CLASS		0
42 #define VIDEO_DECODE_CLASS	1
43 #define VIDEO_ENHANCEMENT_CLASS	2
44 #define COPY_ENGINE_CLASS	3
45 #define OTHER_CLASS		4
46 #define MAX_ENGINE_CLASS	4
47 #define MAX_ENGINE_INSTANCE	3
48 
49 #define I915_MAX_SLICES	3
50 #define I915_MAX_SUBSLICES 8
51 
52 #define I915_CMD_HASH_ORDER 9
53 
54 struct dma_fence;
55 struct drm_i915_gem_object;
56 struct drm_i915_reg_table;
57 struct i915_gem_context;
58 struct i915_request;
59 struct i915_sched_attr;
60 struct intel_gt;
61 struct intel_uncore;
62 
63 typedef u8 intel_engine_mask_t;
64 #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
65 
66 struct intel_hw_status_page {
67 	struct i915_vma *vma;
68 	u32 *addr;
69 };
70 
71 struct intel_instdone {
72 	u32 instdone;
73 	/* The following exist only in the RCS engine */
74 	u32 slice_common;
75 	u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
76 	u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
77 };
78 
79 struct intel_engine_hangcheck {
80 	u64 acthd;
81 	u32 last_ring;
82 	u32 last_head;
83 	unsigned long action_timestamp;
84 	struct intel_instdone instdone;
85 };
86 
87 struct intel_ring {
88 	struct kref ref;
89 	struct i915_vma *vma;
90 	void *vaddr;
91 
92 	/*
93 	 * As we have two types of rings, one global to the engine used
94 	 * by ringbuffer submission and those that are exclusive to a
95 	 * context used by execlists, we have to play safe and allow
96 	 * atomic updates to the pin_count. However, the actual pinning
97 	 * of the context is either done during initialisation for
98 	 * ringbuffer submission or serialised as part of the context
99 	 * pinning for execlists, and so we do not need a mutex ourselves
100 	 * to serialise intel_ring_pin/intel_ring_unpin.
101 	 */
102 	atomic_t pin_count;
103 
104 	u32 head;
105 	u32 tail;
106 	u32 emit;
107 
108 	u32 space;
109 	u32 size;
110 	u32 effective_size;
111 };
112 
113 /*
114  * we use a single page to load ctx workarounds so all of these
115  * values are referred in terms of dwords
116  *
117  * struct i915_wa_ctx_bb:
118  *  offset: specifies batch starting position, also helpful in case
119  *    if we want to have multiple batches at different offsets based on
120  *    some criteria. It is not a requirement at the moment but provides
121  *    an option for future use.
122  *  size: size of the batch in DWORDS
123  */
124 struct i915_ctx_workarounds {
125 	struct i915_wa_ctx_bb {
126 		u32 offset;
127 		u32 size;
128 	} indirect_ctx, per_ctx;
129 	struct i915_vma *vma;
130 };
131 
132 #define I915_MAX_VCS	4
133 #define I915_MAX_VECS	2
134 
135 /*
136  * Engine IDs definitions.
137  * Keep instances of the same type engine together.
138  */
139 enum intel_engine_id {
140 	RCS0 = 0,
141 	BCS0,
142 	VCS0,
143 	VCS1,
144 	VCS2,
145 	VCS3,
146 #define _VCS(n) (VCS0 + (n))
147 	VECS0,
148 	VECS1,
149 #define _VECS(n) (VECS0 + (n))
150 	I915_NUM_ENGINES
151 };
152 
153 struct st_preempt_hang {
154 	struct completion completion;
155 	unsigned int count;
156 	bool inject_hang;
157 };
158 
159 /**
160  * struct intel_engine_execlists - execlist submission queue and port state
161  *
162  * The struct intel_engine_execlists represents the combined logical state of
163  * driver and the hardware state for execlist mode of submission.
164  */
165 struct intel_engine_execlists {
166 	/**
167 	 * @tasklet: softirq tasklet for bottom handler
168 	 */
169 	struct tasklet_struct tasklet;
170 
171 	/**
172 	 * @timer: kick the current context if its timeslice expires
173 	 */
174 	struct timer_list timer;
175 
176 	/**
177 	 * @default_priolist: priority list for I915_PRIORITY_NORMAL
178 	 */
179 	struct i915_priolist default_priolist;
180 
181 	/**
182 	 * @no_priolist: priority lists disabled
183 	 */
184 	bool no_priolist;
185 
186 	/**
187 	 * @submit_reg: gen-specific execlist submission register
188 	 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
189 	 * the ExecList Submission Queue Contents register array for Gen11+
190 	 */
191 	u32 __iomem *submit_reg;
192 
193 	/**
194 	 * @ctrl_reg: the enhanced execlists control register, used to load the
195 	 * submit queue on the HW and to request preemptions to idle
196 	 */
197 	u32 __iomem *ctrl_reg;
198 
199 #define EXECLIST_MAX_PORTS 2
200 	/**
201 	 * @active: the currently known context executing on HW
202 	 */
203 	struct i915_request * const *active;
204 	/**
205 	 * @inflight: the set of contexts submitted and acknowleged by HW
206 	 *
207 	 * The set of inflight contexts is managed by reading CS events
208 	 * from the HW. On a context-switch event (not preemption), we
209 	 * know the HW has transitioned from port0 to port1, and we
210 	 * advance our inflight/active tracking accordingly.
211 	 */
212 	struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */];
213 	/**
214 	 * @pending: the next set of contexts submitted to ELSP
215 	 *
216 	 * We store the array of contexts that we submit to HW (via ELSP) and
217 	 * promote them to the inflight array once HW has signaled the
218 	 * preemption or idle-to-active event.
219 	 */
220 	struct i915_request *pending[EXECLIST_MAX_PORTS + 1];
221 
222 	/**
223 	 * @port_mask: number of execlist ports - 1
224 	 */
225 	unsigned int port_mask;
226 
227 	/**
228 	 * @switch_priority_hint: Second context priority.
229 	 *
230 	 * We submit multiple contexts to the HW simultaneously and would
231 	 * like to occasionally switch between them to emulate timeslicing.
232 	 * To know when timeslicing is suitable, we track the priority of
233 	 * the context submitted second.
234 	 */
235 	int switch_priority_hint;
236 
237 	/**
238 	 * @queue_priority_hint: Highest pending priority.
239 	 *
240 	 * When we add requests into the queue, or adjust the priority of
241 	 * executing requests, we compute the maximum priority of those
242 	 * pending requests. We can then use this value to determine if
243 	 * we need to preempt the executing requests to service the queue.
244 	 * However, since the we may have recorded the priority of an inflight
245 	 * request we wanted to preempt but since completed, at the time of
246 	 * dequeuing the priority hint may no longer may match the highest
247 	 * available request priority.
248 	 */
249 	int queue_priority_hint;
250 
251 	/**
252 	 * @queue: queue of requests, in priority lists
253 	 */
254 	struct rb_root_cached queue;
255 	struct rb_root_cached virtual;
256 
257 	/**
258 	 * @csb_write: control register for Context Switch buffer
259 	 *
260 	 * Note this register may be either mmio or HWSP shadow.
261 	 */
262 	u32 *csb_write;
263 
264 	/**
265 	 * @csb_status: status array for Context Switch buffer
266 	 *
267 	 * Note these register may be either mmio or HWSP shadow.
268 	 */
269 	u32 *csb_status;
270 
271 	/**
272 	 * @csb_size: context status buffer FIFO size
273 	 */
274 	u8 csb_size;
275 
276 	/**
277 	 * @csb_head: context status buffer head
278 	 */
279 	u8 csb_head;
280 
281 	I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
282 };
283 
284 #define INTEL_ENGINE_CS_MAX_NAME 8
285 
286 struct intel_engine_cs {
287 	struct drm_i915_private *i915;
288 	struct intel_gt *gt;
289 	struct intel_uncore *uncore;
290 	char name[INTEL_ENGINE_CS_MAX_NAME];
291 
292 	enum intel_engine_id id;
293 	enum intel_engine_id legacy_idx;
294 
295 	unsigned int hw_id;
296 	unsigned int guc_id;
297 
298 	intel_engine_mask_t mask;
299 
300 	u8 class;
301 	u8 instance;
302 
303 	u8 uabi_class;
304 	u8 uabi_instance;
305 
306 	u32 context_size;
307 	u32 mmio_base;
308 
309 	u32 uabi_capabilities;
310 
311 	struct rb_node uabi_node;
312 
313 	struct intel_sseu sseu;
314 
315 	struct {
316 		spinlock_t lock;
317 		struct list_head requests;
318 	} active;
319 
320 	struct llist_head barrier_tasks;
321 
322 	struct intel_context *kernel_context; /* pinned */
323 
324 	intel_engine_mask_t saturated; /* submitting semaphores too late? */
325 
326 	unsigned long serial;
327 
328 	unsigned long wakeref_serial;
329 	struct intel_wakeref wakeref;
330 	struct drm_i915_gem_object *default_state;
331 	void *pinned_default_state;
332 
333 	struct {
334 		struct intel_ring *ring;
335 		struct intel_timeline *timeline;
336 	} legacy;
337 
338 	/* Rather than have every client wait upon all user interrupts,
339 	 * with the herd waking after every interrupt and each doing the
340 	 * heavyweight seqno dance, we delegate the task (of being the
341 	 * bottom-half of the user interrupt) to the first client. After
342 	 * every interrupt, we wake up one client, who does the heavyweight
343 	 * coherent seqno read and either goes back to sleep (if incomplete),
344 	 * or wakes up all the completed clients in parallel, before then
345 	 * transferring the bottom-half status to the next client in the queue.
346 	 *
347 	 * Compared to walking the entire list of waiters in a single dedicated
348 	 * bottom-half, we reduce the latency of the first waiter by avoiding
349 	 * a context switch, but incur additional coherent seqno reads when
350 	 * following the chain of request breadcrumbs. Since it is most likely
351 	 * that we have a single client waiting on each seqno, then reducing
352 	 * the overhead of waking that client is much preferred.
353 	 */
354 	struct intel_breadcrumbs {
355 		spinlock_t irq_lock;
356 		struct list_head signalers;
357 
358 		struct irq_work irq_work; /* for use from inside irq_lock */
359 
360 		unsigned int irq_enabled;
361 
362 		bool irq_armed;
363 	} breadcrumbs;
364 
365 	struct intel_engine_pmu {
366 		/**
367 		 * @enable: Bitmask of enable sample events on this engine.
368 		 *
369 		 * Bits correspond to sample event types, for instance
370 		 * I915_SAMPLE_QUEUED is bit 0 etc.
371 		 */
372 		u32 enable;
373 		/**
374 		 * @enable_count: Reference count for the enabled samplers.
375 		 *
376 		 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
377 		 */
378 		unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT];
379 		/**
380 		 * @sample: Counter values for sampling events.
381 		 *
382 		 * Our internal timer stores the current counters in this field.
383 		 *
384 		 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
385 		 */
386 		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
387 	} pmu;
388 
389 	/*
390 	 * A pool of objects to use as shadow copies of client batch buffers
391 	 * when the command parser is enabled. Prevents the client from
392 	 * modifying the batch contents after software parsing.
393 	 */
394 	struct intel_engine_pool pool;
395 
396 	struct intel_hw_status_page status_page;
397 	struct i915_ctx_workarounds wa_ctx;
398 	struct i915_wa_list ctx_wa_list;
399 	struct i915_wa_list wa_list;
400 	struct i915_wa_list whitelist;
401 
402 	u32             irq_keep_mask; /* always keep these interrupts */
403 	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
404 	void		(*irq_enable)(struct intel_engine_cs *engine);
405 	void		(*irq_disable)(struct intel_engine_cs *engine);
406 
407 	int		(*resume)(struct intel_engine_cs *engine);
408 
409 	struct {
410 		void (*prepare)(struct intel_engine_cs *engine);
411 		void (*reset)(struct intel_engine_cs *engine, bool stalled);
412 		void (*finish)(struct intel_engine_cs *engine);
413 	} reset;
414 
415 	void		(*park)(struct intel_engine_cs *engine);
416 	void		(*unpark)(struct intel_engine_cs *engine);
417 
418 	void		(*set_default_submission)(struct intel_engine_cs *engine);
419 
420 	const struct intel_context_ops *cops;
421 
422 	int		(*request_alloc)(struct i915_request *rq);
423 
424 	int		(*emit_flush)(struct i915_request *request, u32 mode);
425 #define EMIT_INVALIDATE	BIT(0)
426 #define EMIT_FLUSH	BIT(1)
427 #define EMIT_BARRIER	(EMIT_INVALIDATE | EMIT_FLUSH)
428 	int		(*emit_bb_start)(struct i915_request *rq,
429 					 u64 offset, u32 length,
430 					 unsigned int dispatch_flags);
431 #define I915_DISPATCH_SECURE BIT(0)
432 #define I915_DISPATCH_PINNED BIT(1)
433 	int		 (*emit_init_breadcrumb)(struct i915_request *rq);
434 	u32		*(*emit_fini_breadcrumb)(struct i915_request *rq,
435 						 u32 *cs);
436 	unsigned int	emit_fini_breadcrumb_dw;
437 
438 	/* Pass the request to the hardware queue (e.g. directly into
439 	 * the legacy ringbuffer or to the end of an execlist).
440 	 *
441 	 * This is called from an atomic context with irqs disabled; must
442 	 * be irq safe.
443 	 */
444 	void		(*submit_request)(struct i915_request *rq);
445 
446 	/*
447 	 * Called on signaling of a SUBMIT_FENCE, passing along the signaling
448 	 * request down to the bonded pairs.
449 	 */
450 	void            (*bond_execute)(struct i915_request *rq,
451 					struct dma_fence *signal);
452 
453 	/*
454 	 * Call when the priority on a request has changed and it and its
455 	 * dependencies may need rescheduling. Note the request itself may
456 	 * not be ready to run!
457 	 */
458 	void		(*schedule)(struct i915_request *request,
459 				    const struct i915_sched_attr *attr);
460 
461 	/*
462 	 * Cancel all requests on the hardware, or queued for execution.
463 	 * This should only cancel the ready requests that have been
464 	 * submitted to the engine (via the engine->submit_request callback).
465 	 * This is called when marking the device as wedged.
466 	 */
467 	void		(*cancel_requests)(struct intel_engine_cs *engine);
468 
469 	void		(*destroy)(struct intel_engine_cs *engine);
470 
471 	struct intel_engine_execlists execlists;
472 
473 	/* status_notifier: list of callbacks for context-switch changes */
474 	struct atomic_notifier_head context_status_notifier;
475 
476 	struct intel_engine_hangcheck hangcheck;
477 
478 #define I915_ENGINE_USING_CMD_PARSER BIT(0)
479 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
480 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
481 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
482 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
483 #define I915_ENGINE_IS_VIRTUAL       BIT(5)
484 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
485 	unsigned int flags;
486 
487 	/*
488 	 * Table of commands the command parser needs to know about
489 	 * for this engine.
490 	 */
491 	DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
492 
493 	/*
494 	 * Table of registers allowed in commands that read/write registers.
495 	 */
496 	const struct drm_i915_reg_table *reg_tables;
497 	int reg_table_count;
498 
499 	/*
500 	 * Returns the bitmask for the length field of the specified command.
501 	 * Return 0 for an unrecognized/invalid command.
502 	 *
503 	 * If the command parser finds an entry for a command in the engine's
504 	 * cmd_tables, it gets the command's length based on the table entry.
505 	 * If not, it calls this function to determine the per-engine length
506 	 * field encoding for the command (i.e. different opcode ranges use
507 	 * certain bits to encode the command length in the header).
508 	 */
509 	u32 (*get_cmd_length_mask)(u32 cmd_header);
510 
511 	struct {
512 		/**
513 		 * @lock: Lock protecting the below fields.
514 		 */
515 		seqlock_t lock;
516 		/**
517 		 * @enabled: Reference count indicating number of listeners.
518 		 */
519 		unsigned int enabled;
520 		/**
521 		 * @active: Number of contexts currently scheduled in.
522 		 */
523 		unsigned int active;
524 		/**
525 		 * @enabled_at: Timestamp when busy stats were enabled.
526 		 */
527 		ktime_t enabled_at;
528 		/**
529 		 * @start: Timestamp of the last idle to active transition.
530 		 *
531 		 * Idle is defined as active == 0, active is active > 0.
532 		 */
533 		ktime_t start;
534 		/**
535 		 * @total: Total time this engine was busy.
536 		 *
537 		 * Accumulated time not counting the most recent block in cases
538 		 * where engine is currently busy (active > 0).
539 		 */
540 		ktime_t total;
541 	} stats;
542 };
543 
544 static inline bool
intel_engine_using_cmd_parser(const struct intel_engine_cs * engine)545 intel_engine_using_cmd_parser(const struct intel_engine_cs *engine)
546 {
547 	return engine->flags & I915_ENGINE_USING_CMD_PARSER;
548 }
549 
550 static inline bool
intel_engine_requires_cmd_parser(const struct intel_engine_cs * engine)551 intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine)
552 {
553 	return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER;
554 }
555 
556 static inline bool
intel_engine_supports_stats(const struct intel_engine_cs * engine)557 intel_engine_supports_stats(const struct intel_engine_cs *engine)
558 {
559 	return engine->flags & I915_ENGINE_SUPPORTS_STATS;
560 }
561 
562 static inline bool
intel_engine_has_preemption(const struct intel_engine_cs * engine)563 intel_engine_has_preemption(const struct intel_engine_cs *engine)
564 {
565 	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
566 }
567 
568 static inline bool
intel_engine_has_semaphores(const struct intel_engine_cs * engine)569 intel_engine_has_semaphores(const struct intel_engine_cs *engine)
570 {
571 	return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
572 }
573 
574 static inline bool
intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs * engine)575 intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
576 {
577 	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
578 }
579 
580 static inline bool
intel_engine_is_virtual(const struct intel_engine_cs * engine)581 intel_engine_is_virtual(const struct intel_engine_cs *engine)
582 {
583 	return engine->flags & I915_ENGINE_IS_VIRTUAL;
584 }
585 
586 #define instdone_slice_mask(dev_priv__) \
587 	(IS_GEN(dev_priv__, 7) ? \
588 	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
589 
590 #define instdone_subslice_mask(dev_priv__) \
591 	(IS_GEN(dev_priv__, 7) ? \
592 	 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
593 
594 #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
595 	for ((slice__) = 0, (subslice__) = 0; \
596 	     (slice__) < I915_MAX_SLICES; \
597 	     (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
598 	       (slice__) += ((subslice__) == 0)) \
599 		for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
600 			    (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
601 
602 #endif /* __INTEL_ENGINE_TYPES_H__ */
603