1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2014 Intel Corporation
4 */
5
6 #include <linux/circ_buf.h>
7
8 #include "gem/i915_gem_context.h"
9 #include "gt/gen8_engine_cs.h"
10 #include "gt/intel_breadcrumbs.h"
11 #include "gt/intel_context.h"
12 #include "gt/intel_engine_heartbeat.h"
13 #include "gt/intel_engine_pm.h"
14 #include "gt/intel_engine_regs.h"
15 #include "gt/intel_gpu_commands.h"
16 #include "gt/intel_gt.h"
17 #include "gt/intel_gt_clock_utils.h"
18 #include "gt/intel_gt_irq.h"
19 #include "gt/intel_gt_pm.h"
20 #include "gt/intel_gt_regs.h"
21 #include "gt/intel_gt_requests.h"
22 #include "gt/intel_lrc.h"
23 #include "gt/intel_lrc_reg.h"
24 #include "gt/intel_mocs.h"
25 #include "gt/intel_ring.h"
26
27 #include "intel_guc_ads.h"
28 #include "intel_guc_capture.h"
29 #include "intel_guc_submission.h"
30
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33
34 /**
35 * DOC: GuC-based command submission
36 *
37 * The Scratch registers:
38 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
39 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
40 * triggers an interrupt on the GuC via another register write (0xC4C8).
41 * Firmware writes a success/fail code back to the action register after
42 * processes the request. The kernel driver polls waiting for this update and
43 * then proceeds.
44 *
45 * Command Transport buffers (CTBs):
46 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
47 * - G2H) are a message interface between the i915 and GuC.
48 *
49 * Context registration:
50 * Before a context can be submitted it must be registered with the GuC via a
51 * H2G. A unique guc_id is associated with each context. The context is either
52 * registered at request creation time (normal operation) or at submission time
53 * (abnormal operation, e.g. after a reset).
54 *
55 * Context submission:
56 * The i915 updates the LRC tail value in memory. The i915 must enable the
57 * scheduling of the context within the GuC for the GuC to actually consider it.
58 * Therefore, the first time a disabled context is submitted we use a schedule
59 * enable H2G, while follow up submissions are done via the context submit H2G,
60 * which informs the GuC that a previously enabled context has new work
61 * available.
62 *
63 * Context unpin:
64 * To unpin a context a H2G is used to disable scheduling. When the
65 * corresponding G2H returns indicating the scheduling disable operation has
66 * completed it is safe to unpin the context. While a disable is in flight it
67 * isn't safe to resubmit the context so a fence is used to stall all future
68 * requests of that context until the G2H is returned.
69 *
70 * Context deregistration:
71 * Before a context can be destroyed or if we steal its guc_id we must
72 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
73 * safe to submit anything to this guc_id until the deregister completes so a
74 * fence is used to stall all requests associated with this guc_id until the
75 * corresponding G2H returns indicating the guc_id has been deregistered.
76 *
77 * submission_state.guc_ids:
78 * Unique number associated with private GuC context data passed in during
79 * context registration / submission / deregistration. 64k available. Simple ida
80 * is used for allocation.
81 *
82 * Stealing guc_ids:
83 * If no guc_ids are available they can be stolen from another context at
84 * request creation time if that context is unpinned. If a guc_id can't be found
85 * we punt this problem to the user as we believe this is near impossible to hit
86 * during normal use cases.
87 *
88 * Locking:
89 * In the GuC submission code we have 3 basic spin locks which protect
90 * everything. Details about each below.
91 *
92 * sched_engine->lock
93 * This is the submission lock for all contexts that share an i915 schedule
94 * engine (sched_engine), thus only one of the contexts which share a
95 * sched_engine can be submitting at a time. Currently only one sched_engine is
96 * used for all of GuC submission but that could change in the future.
97 *
98 * guc->submission_state.lock
99 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
100 * list.
101 *
102 * ce->guc_state.lock
103 * Protects everything under ce->guc_state. Ensures that a context is in the
104 * correct state before issuing a H2G. e.g. We don't issue a schedule disable
105 * on a disabled context (bad idea), we don't issue a schedule enable when a
106 * schedule disable is in flight, etc... Also protects list of inflight requests
107 * on the context and the priority management state. Lock is individual to each
108 * context.
109 *
110 * Lock ordering rules:
111 * sched_engine->lock -> ce->guc_state.lock
112 * guc->submission_state.lock -> ce->guc_state.lock
113 *
114 * Reset races:
115 * When a full GT reset is triggered it is assumed that some G2H responses to
116 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
117 * fatal as we do certain operations upon receiving a G2H (e.g. destroy
118 * contexts, release guc_ids, etc...). When this occurs we can scrub the
119 * context state and cleanup appropriately, however this is quite racey.
120 * To avoid races, the reset code must disable submission before scrubbing for
121 * the missing G2H, while the submission code must check for submission being
122 * disabled and skip sending H2Gs and updating context states when it is. Both
123 * sides must also make sure to hold the relevant locks.
124 */
125
126 /* GuC Virtual Engine */
127 struct guc_virtual_engine {
128 struct intel_engine_cs base;
129 struct intel_context context;
130 };
131
132 static struct intel_context *
133 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
134 unsigned long flags);
135
136 static struct intel_context *
137 guc_create_parallel(struct intel_engine_cs **engines,
138 unsigned int num_siblings,
139 unsigned int width);
140
141 #define GUC_REQUEST_SIZE 64 /* bytes */
142
143 /*
144 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
145 * per the GuC submission interface. A different allocation algorithm is used
146 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
147 * partition the guc_id space. We believe the number of multi-lrc contexts in
148 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
149 * multi-lrc.
150 */
151 #define NUMBER_MULTI_LRC_GUC_ID(guc) \
152 ((guc)->submission_state.num_guc_ids / 16)
153
154 /*
155 * Below is a set of functions which control the GuC scheduling state which
156 * require a lock.
157 */
158 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0)
159 #define SCHED_STATE_DESTROYED BIT(1)
160 #define SCHED_STATE_PENDING_DISABLE BIT(2)
161 #define SCHED_STATE_BANNED BIT(3)
162 #define SCHED_STATE_ENABLED BIT(4)
163 #define SCHED_STATE_PENDING_ENABLE BIT(5)
164 #define SCHED_STATE_REGISTERED BIT(6)
165 #define SCHED_STATE_POLICY_REQUIRED BIT(7)
166 #define SCHED_STATE_BLOCKED_SHIFT 8
167 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
168 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
169
init_sched_state(struct intel_context * ce)170 static inline void init_sched_state(struct intel_context *ce)
171 {
172 lockdep_assert_held(&ce->guc_state.lock);
173 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
174 }
175
176 __maybe_unused
sched_state_is_init(struct intel_context * ce)177 static bool sched_state_is_init(struct intel_context *ce)
178 {
179 /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */
180 return !(ce->guc_state.sched_state &
181 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
182 }
183
184 static inline bool
context_wait_for_deregister_to_register(struct intel_context * ce)185 context_wait_for_deregister_to_register(struct intel_context *ce)
186 {
187 return ce->guc_state.sched_state &
188 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
189 }
190
191 static inline void
set_context_wait_for_deregister_to_register(struct intel_context * ce)192 set_context_wait_for_deregister_to_register(struct intel_context *ce)
193 {
194 lockdep_assert_held(&ce->guc_state.lock);
195 ce->guc_state.sched_state |=
196 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
197 }
198
199 static inline void
clr_context_wait_for_deregister_to_register(struct intel_context * ce)200 clr_context_wait_for_deregister_to_register(struct intel_context *ce)
201 {
202 lockdep_assert_held(&ce->guc_state.lock);
203 ce->guc_state.sched_state &=
204 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
205 }
206
207 static inline bool
context_destroyed(struct intel_context * ce)208 context_destroyed(struct intel_context *ce)
209 {
210 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
211 }
212
213 static inline void
set_context_destroyed(struct intel_context * ce)214 set_context_destroyed(struct intel_context *ce)
215 {
216 lockdep_assert_held(&ce->guc_state.lock);
217 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
218 }
219
context_pending_disable(struct intel_context * ce)220 static inline bool context_pending_disable(struct intel_context *ce)
221 {
222 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
223 }
224
set_context_pending_disable(struct intel_context * ce)225 static inline void set_context_pending_disable(struct intel_context *ce)
226 {
227 lockdep_assert_held(&ce->guc_state.lock);
228 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE;
229 }
230
clr_context_pending_disable(struct intel_context * ce)231 static inline void clr_context_pending_disable(struct intel_context *ce)
232 {
233 lockdep_assert_held(&ce->guc_state.lock);
234 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE;
235 }
236
context_banned(struct intel_context * ce)237 static inline bool context_banned(struct intel_context *ce)
238 {
239 return ce->guc_state.sched_state & SCHED_STATE_BANNED;
240 }
241
set_context_banned(struct intel_context * ce)242 static inline void set_context_banned(struct intel_context *ce)
243 {
244 lockdep_assert_held(&ce->guc_state.lock);
245 ce->guc_state.sched_state |= SCHED_STATE_BANNED;
246 }
247
clr_context_banned(struct intel_context * ce)248 static inline void clr_context_banned(struct intel_context *ce)
249 {
250 lockdep_assert_held(&ce->guc_state.lock);
251 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
252 }
253
context_enabled(struct intel_context * ce)254 static inline bool context_enabled(struct intel_context *ce)
255 {
256 return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
257 }
258
set_context_enabled(struct intel_context * ce)259 static inline void set_context_enabled(struct intel_context *ce)
260 {
261 lockdep_assert_held(&ce->guc_state.lock);
262 ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
263 }
264
clr_context_enabled(struct intel_context * ce)265 static inline void clr_context_enabled(struct intel_context *ce)
266 {
267 lockdep_assert_held(&ce->guc_state.lock);
268 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
269 }
270
context_pending_enable(struct intel_context * ce)271 static inline bool context_pending_enable(struct intel_context *ce)
272 {
273 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
274 }
275
set_context_pending_enable(struct intel_context * ce)276 static inline void set_context_pending_enable(struct intel_context *ce)
277 {
278 lockdep_assert_held(&ce->guc_state.lock);
279 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
280 }
281
clr_context_pending_enable(struct intel_context * ce)282 static inline void clr_context_pending_enable(struct intel_context *ce)
283 {
284 lockdep_assert_held(&ce->guc_state.lock);
285 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
286 }
287
context_registered(struct intel_context * ce)288 static inline bool context_registered(struct intel_context *ce)
289 {
290 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
291 }
292
set_context_registered(struct intel_context * ce)293 static inline void set_context_registered(struct intel_context *ce)
294 {
295 lockdep_assert_held(&ce->guc_state.lock);
296 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
297 }
298
clr_context_registered(struct intel_context * ce)299 static inline void clr_context_registered(struct intel_context *ce)
300 {
301 lockdep_assert_held(&ce->guc_state.lock);
302 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
303 }
304
context_policy_required(struct intel_context * ce)305 static inline bool context_policy_required(struct intel_context *ce)
306 {
307 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED;
308 }
309
set_context_policy_required(struct intel_context * ce)310 static inline void set_context_policy_required(struct intel_context *ce)
311 {
312 lockdep_assert_held(&ce->guc_state.lock);
313 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED;
314 }
315
clr_context_policy_required(struct intel_context * ce)316 static inline void clr_context_policy_required(struct intel_context *ce)
317 {
318 lockdep_assert_held(&ce->guc_state.lock);
319 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
320 }
321
context_blocked(struct intel_context * ce)322 static inline u32 context_blocked(struct intel_context *ce)
323 {
324 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
325 SCHED_STATE_BLOCKED_SHIFT;
326 }
327
incr_context_blocked(struct intel_context * ce)328 static inline void incr_context_blocked(struct intel_context *ce)
329 {
330 lockdep_assert_held(&ce->guc_state.lock);
331
332 ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
333
334 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */
335 }
336
decr_context_blocked(struct intel_context * ce)337 static inline void decr_context_blocked(struct intel_context *ce)
338 {
339 lockdep_assert_held(&ce->guc_state.lock);
340
341 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */
342
343 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
344 }
345
context_has_committed_requests(struct intel_context * ce)346 static inline bool context_has_committed_requests(struct intel_context *ce)
347 {
348 return !!ce->guc_state.number_committed_requests;
349 }
350
incr_context_committed_requests(struct intel_context * ce)351 static inline void incr_context_committed_requests(struct intel_context *ce)
352 {
353 lockdep_assert_held(&ce->guc_state.lock);
354 ++ce->guc_state.number_committed_requests;
355 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
356 }
357
decr_context_committed_requests(struct intel_context * ce)358 static inline void decr_context_committed_requests(struct intel_context *ce)
359 {
360 lockdep_assert_held(&ce->guc_state.lock);
361 --ce->guc_state.number_committed_requests;
362 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
363 }
364
365 static struct intel_context *
request_to_scheduling_context(struct i915_request * rq)366 request_to_scheduling_context(struct i915_request *rq)
367 {
368 return intel_context_to_parent(rq->context);
369 }
370
context_guc_id_invalid(struct intel_context * ce)371 static inline bool context_guc_id_invalid(struct intel_context *ce)
372 {
373 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID;
374 }
375
set_context_guc_id_invalid(struct intel_context * ce)376 static inline void set_context_guc_id_invalid(struct intel_context *ce)
377 {
378 ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
379 }
380
ce_to_guc(struct intel_context * ce)381 static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
382 {
383 return &ce->engine->gt->uc.guc;
384 }
385
to_priolist(struct rb_node * rb)386 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
387 {
388 return rb_entry(rb, struct i915_priolist, node);
389 }
390
391 /*
392 * When using multi-lrc submission a scratch memory area is reserved in the
393 * parent's context state for the process descriptor, work queue, and handshake
394 * between the parent + children contexts to insert safe preemption points
395 * between each of the BBs. Currently the scratch area is sized to a page.
396 *
397 * The layout of this scratch area is below:
398 * 0 guc_process_desc
399 * + sizeof(struct guc_process_desc) child go
400 * + CACHELINE_BYTES child join[0]
401 * ...
402 * + CACHELINE_BYTES child join[n - 1]
403 * ... unused
404 * PARENT_SCRATCH_SIZE / 2 work queue start
405 * ... work queue
406 * PARENT_SCRATCH_SIZE - 1 work queue end
407 */
408 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2)
409 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE)
410
411 struct sync_semaphore {
412 u32 semaphore;
413 u8 unused[CACHELINE_BYTES - sizeof(u32)];
414 };
415
416 struct parent_scratch {
417 union guc_descs {
418 struct guc_sched_wq_desc wq_desc;
419 struct guc_process_desc_v69 pdesc;
420 } descs;
421
422 struct sync_semaphore go;
423 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
424
425 u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
426 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
427
428 u32 wq[WQ_SIZE / sizeof(u32)];
429 };
430
__get_parent_scratch_offset(struct intel_context * ce)431 static u32 __get_parent_scratch_offset(struct intel_context *ce)
432 {
433 GEM_BUG_ON(!ce->parallel.guc.parent_page);
434
435 return ce->parallel.guc.parent_page * PAGE_SIZE;
436 }
437
__get_wq_offset(struct intel_context * ce)438 static u32 __get_wq_offset(struct intel_context *ce)
439 {
440 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET);
441
442 return __get_parent_scratch_offset(ce) + WQ_OFFSET;
443 }
444
445 static struct parent_scratch *
__get_parent_scratch(struct intel_context * ce)446 __get_parent_scratch(struct intel_context *ce)
447 {
448 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE);
449 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES);
450
451 /*
452 * Need to subtract LRC_STATE_OFFSET here as the
453 * parallel.guc.parent_page is the offset into ce->state while
454 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
455 */
456 return (struct parent_scratch *)
457 (ce->lrc_reg_state +
458 ((__get_parent_scratch_offset(ce) -
459 LRC_STATE_OFFSET) / sizeof(u32)));
460 }
461
462 static struct guc_process_desc_v69 *
__get_process_desc_v69(struct intel_context * ce)463 __get_process_desc_v69(struct intel_context *ce)
464 {
465 struct parent_scratch *ps = __get_parent_scratch(ce);
466
467 return &ps->descs.pdesc;
468 }
469
470 static struct guc_sched_wq_desc *
__get_wq_desc_v70(struct intel_context * ce)471 __get_wq_desc_v70(struct intel_context *ce)
472 {
473 struct parent_scratch *ps = __get_parent_scratch(ce);
474
475 return &ps->descs.wq_desc;
476 }
477
get_wq_pointer(struct intel_context * ce,u32 wqi_size)478 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
479 {
480 /*
481 * Check for space in work queue. Caching a value of head pointer in
482 * intel_context structure in order reduce the number accesses to shared
483 * GPU memory which may be across a PCIe bus.
484 */
485 #define AVAILABLE_SPACE \
486 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
487 if (wqi_size > AVAILABLE_SPACE) {
488 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
489
490 if (wqi_size > AVAILABLE_SPACE)
491 return NULL;
492 }
493 #undef AVAILABLE_SPACE
494
495 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
496 }
497
__get_context(struct intel_guc * guc,u32 id)498 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
499 {
500 struct intel_context *ce = xa_load(&guc->context_lookup, id);
501
502 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID);
503
504 return ce;
505 }
506
__get_lrc_desc_v69(struct intel_guc * guc,u32 index)507 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
508 {
509 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
510
511 if (!base)
512 return NULL;
513
514 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
515
516 return &base[index];
517 }
518
guc_lrc_desc_pool_create_v69(struct intel_guc * guc)519 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
520 {
521 u32 size;
522 int ret;
523
524 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
525 GUC_MAX_CONTEXT_ID);
526 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
527 (void **)&guc->lrc_desc_pool_vaddr_v69);
528 if (ret)
529 return ret;
530
531 return 0;
532 }
533
guc_lrc_desc_pool_destroy_v69(struct intel_guc * guc)534 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
535 {
536 if (!guc->lrc_desc_pool_vaddr_v69)
537 return;
538
539 guc->lrc_desc_pool_vaddr_v69 = NULL;
540 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
541 }
542
guc_submission_initialized(struct intel_guc * guc)543 static inline bool guc_submission_initialized(struct intel_guc *guc)
544 {
545 return guc->submission_initialized;
546 }
547
_reset_lrc_desc_v69(struct intel_guc * guc,u32 id)548 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
549 {
550 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
551
552 if (desc)
553 memset(desc, 0, sizeof(*desc));
554 }
555
ctx_id_mapped(struct intel_guc * guc,u32 id)556 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
557 {
558 return __get_context(guc, id);
559 }
560
set_ctx_id_mapping(struct intel_guc * guc,u32 id,struct intel_context * ce)561 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id,
562 struct intel_context *ce)
563 {
564 unsigned long flags;
565
566 /*
567 * xarray API doesn't have xa_save_irqsave wrapper, so calling the
568 * lower level functions directly.
569 */
570 xa_lock_irqsave(&guc->context_lookup, flags);
571 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC);
572 xa_unlock_irqrestore(&guc->context_lookup, flags);
573 }
574
clr_ctx_id_mapping(struct intel_guc * guc,u32 id)575 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
576 {
577 unsigned long flags;
578
579 if (unlikely(!guc_submission_initialized(guc)))
580 return;
581
582 _reset_lrc_desc_v69(guc, id);
583
584 /*
585 * xarray API doesn't have xa_erase_irqsave wrapper, so calling
586 * the lower level functions directly.
587 */
588 xa_lock_irqsave(&guc->context_lookup, flags);
589 __xa_erase(&guc->context_lookup, id);
590 xa_unlock_irqrestore(&guc->context_lookup, flags);
591 }
592
decr_outstanding_submission_g2h(struct intel_guc * guc)593 static void decr_outstanding_submission_g2h(struct intel_guc *guc)
594 {
595 if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
596 wake_up_all(&guc->ct.wq);
597 }
598
guc_submission_send_busy_loop(struct intel_guc * guc,const u32 * action,u32 len,u32 g2h_len_dw,bool loop)599 static int guc_submission_send_busy_loop(struct intel_guc *guc,
600 const u32 *action,
601 u32 len,
602 u32 g2h_len_dw,
603 bool loop)
604 {
605 /*
606 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
607 * so we don't handle the case where we don't get a reply because we
608 * aborted the send due to the channel being busy.
609 */
610 GEM_BUG_ON(g2h_len_dw && !loop);
611
612 if (g2h_len_dw)
613 atomic_inc(&guc->outstanding_submission_g2h);
614
615 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
616 }
617
intel_guc_wait_for_pending_msg(struct intel_guc * guc,atomic_t * wait_var,bool interruptible,long timeout)618 int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
619 atomic_t *wait_var,
620 bool interruptible,
621 long timeout)
622 {
623 const int state = interruptible ?
624 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
625 DEFINE_WAIT(wait);
626
627 might_sleep();
628 GEM_BUG_ON(timeout < 0);
629
630 if (!atomic_read(wait_var))
631 return 0;
632
633 if (!timeout)
634 return -ETIME;
635
636 for (;;) {
637 prepare_to_wait(&guc->ct.wq, &wait, state);
638
639 if (!atomic_read(wait_var))
640 break;
641
642 if (signal_pending_state(state, current)) {
643 timeout = -EINTR;
644 break;
645 }
646
647 if (!timeout) {
648 timeout = -ETIME;
649 break;
650 }
651
652 timeout = io_schedule_timeout(timeout);
653 }
654 finish_wait(&guc->ct.wq, &wait);
655
656 return (timeout < 0) ? timeout : 0;
657 }
658
intel_guc_wait_for_idle(struct intel_guc * guc,long timeout)659 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
660 {
661 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc))
662 return 0;
663
664 return intel_guc_wait_for_pending_msg(guc,
665 &guc->outstanding_submission_g2h,
666 true, timeout);
667 }
668
669 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
670 static int try_context_registration(struct intel_context *ce, bool loop);
671
__guc_add_request(struct intel_guc * guc,struct i915_request * rq)672 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
673 {
674 int err = 0;
675 struct intel_context *ce = request_to_scheduling_context(rq);
676 u32 action[3];
677 int len = 0;
678 u32 g2h_len_dw = 0;
679 bool enabled;
680
681 lockdep_assert_held(&rq->engine->sched_engine->lock);
682
683 /*
684 * Corner case where requests were sitting in the priority list or a
685 * request resubmitted after the context was banned.
686 */
687 if (unlikely(!intel_context_is_schedulable(ce))) {
688 i915_request_put(i915_request_mark_eio(rq));
689 intel_engine_signal_breadcrumbs(ce->engine);
690 return 0;
691 }
692
693 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
694 GEM_BUG_ON(context_guc_id_invalid(ce));
695
696 if (context_policy_required(ce)) {
697 err = guc_context_policy_init_v70(ce, false);
698 if (err)
699 return err;
700 }
701
702 spin_lock(&ce->guc_state.lock);
703
704 /*
705 * The request / context will be run on the hardware when scheduling
706 * gets enabled in the unblock. For multi-lrc we still submit the
707 * context to move the LRC tails.
708 */
709 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce)))
710 goto out;
711
712 enabled = context_enabled(ce) || context_blocked(ce);
713
714 if (!enabled) {
715 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
716 action[len++] = ce->guc_id.id;
717 action[len++] = GUC_CONTEXT_ENABLE;
718 set_context_pending_enable(ce);
719 intel_context_get(ce);
720 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
721 } else {
722 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
723 action[len++] = ce->guc_id.id;
724 }
725
726 err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
727 if (!enabled && !err) {
728 trace_intel_context_sched_enable(ce);
729 atomic_inc(&guc->outstanding_submission_g2h);
730 set_context_enabled(ce);
731
732 /*
733 * Without multi-lrc KMD does the submission step (moving the
734 * lrc tail) so enabling scheduling is sufficient to submit the
735 * context. This isn't the case in multi-lrc submission as the
736 * GuC needs to move the tails, hence the need for another H2G
737 * to submit a multi-lrc context after enabling scheduling.
738 */
739 if (intel_context_is_parent(ce)) {
740 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
741 err = intel_guc_send_nb(guc, action, len - 1, 0);
742 }
743 } else if (!enabled) {
744 clr_context_pending_enable(ce);
745 intel_context_put(ce);
746 }
747 if (likely(!err))
748 trace_i915_request_guc_submit(rq);
749
750 out:
751 spin_unlock(&ce->guc_state.lock);
752 return err;
753 }
754
guc_add_request(struct intel_guc * guc,struct i915_request * rq)755 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
756 {
757 int ret = __guc_add_request(guc, rq);
758
759 if (unlikely(ret == -EBUSY)) {
760 guc->stalled_request = rq;
761 guc->submission_stall_reason = STALL_ADD_REQUEST;
762 }
763
764 return ret;
765 }
766
guc_set_lrc_tail(struct i915_request * rq)767 static inline void guc_set_lrc_tail(struct i915_request *rq)
768 {
769 rq->context->lrc_reg_state[CTX_RING_TAIL] =
770 intel_ring_set_tail(rq->ring, rq->tail);
771 }
772
rq_prio(const struct i915_request * rq)773 static inline int rq_prio(const struct i915_request *rq)
774 {
775 return rq->sched.attr.priority;
776 }
777
is_multi_lrc_rq(struct i915_request * rq)778 static bool is_multi_lrc_rq(struct i915_request *rq)
779 {
780 return intel_context_is_parallel(rq->context);
781 }
782
can_merge_rq(struct i915_request * rq,struct i915_request * last)783 static bool can_merge_rq(struct i915_request *rq,
784 struct i915_request *last)
785 {
786 return request_to_scheduling_context(rq) ==
787 request_to_scheduling_context(last);
788 }
789
wq_space_until_wrap(struct intel_context * ce)790 static u32 wq_space_until_wrap(struct intel_context *ce)
791 {
792 return (WQ_SIZE - ce->parallel.guc.wqi_tail);
793 }
794
write_wqi(struct intel_context * ce,u32 wqi_size)795 static void write_wqi(struct intel_context *ce, u32 wqi_size)
796 {
797 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
798
799 /*
800 * Ensure WQI are visible before updating tail
801 */
802 intel_guc_write_barrier(ce_to_guc(ce));
803
804 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
805 (WQ_SIZE - 1);
806 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
807 }
808
guc_wq_noop_append(struct intel_context * ce)809 static int guc_wq_noop_append(struct intel_context *ce)
810 {
811 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
812 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
813
814 if (!wqi)
815 return -EBUSY;
816
817 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
818
819 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
820 FIELD_PREP(WQ_LEN_MASK, len_dw);
821 ce->parallel.guc.wqi_tail = 0;
822
823 return 0;
824 }
825
__guc_wq_item_append(struct i915_request * rq)826 static int __guc_wq_item_append(struct i915_request *rq)
827 {
828 struct intel_context *ce = request_to_scheduling_context(rq);
829 struct intel_context *child;
830 unsigned int wqi_size = (ce->parallel.number_children + 4) *
831 sizeof(u32);
832 u32 *wqi;
833 u32 len_dw = (wqi_size / sizeof(u32)) - 1;
834 int ret;
835
836 /* Ensure context is in correct state updating work queue */
837 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
838 GEM_BUG_ON(context_guc_id_invalid(ce));
839 GEM_BUG_ON(context_wait_for_deregister_to_register(ce));
840 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id));
841
842 /* Insert NOOP if this work queue item will wrap the tail pointer. */
843 if (wqi_size > wq_space_until_wrap(ce)) {
844 ret = guc_wq_noop_append(ce);
845 if (ret)
846 return ret;
847 }
848
849 wqi = get_wq_pointer(ce, wqi_size);
850 if (!wqi)
851 return -EBUSY;
852
853 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
854
855 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
856 FIELD_PREP(WQ_LEN_MASK, len_dw);
857 *wqi++ = ce->lrc.lrca;
858 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) |
859 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64));
860 *wqi++ = 0; /* fence_id */
861 for_each_child(ce, child)
862 *wqi++ = child->ring->tail / sizeof(u64);
863
864 write_wqi(ce, wqi_size);
865
866 return 0;
867 }
868
guc_wq_item_append(struct intel_guc * guc,struct i915_request * rq)869 static int guc_wq_item_append(struct intel_guc *guc,
870 struct i915_request *rq)
871 {
872 struct intel_context *ce = request_to_scheduling_context(rq);
873 int ret;
874
875 if (unlikely(!intel_context_is_schedulable(ce)))
876 return 0;
877
878 ret = __guc_wq_item_append(rq);
879 if (unlikely(ret == -EBUSY)) {
880 guc->stalled_request = rq;
881 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
882 }
883
884 return ret;
885 }
886
multi_lrc_submit(struct i915_request * rq)887 static bool multi_lrc_submit(struct i915_request *rq)
888 {
889 struct intel_context *ce = request_to_scheduling_context(rq);
890
891 intel_ring_set_tail(rq->ring, rq->tail);
892
893 /*
894 * We expect the front end (execbuf IOCTL) to set this flag on the last
895 * request generated from a multi-BB submission. This indicates to the
896 * backend (GuC interface) that we should submit this context thus
897 * submitting all the requests generated in parallel.
898 */
899 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
900 !intel_context_is_schedulable(ce);
901 }
902
guc_dequeue_one_context(struct intel_guc * guc)903 static int guc_dequeue_one_context(struct intel_guc *guc)
904 {
905 struct i915_sched_engine * const sched_engine = guc->sched_engine;
906 struct i915_request *last = NULL;
907 bool submit = false;
908 struct rb_node *rb;
909 int ret;
910
911 lockdep_assert_held(&sched_engine->lock);
912
913 if (guc->stalled_request) {
914 submit = true;
915 last = guc->stalled_request;
916
917 switch (guc->submission_stall_reason) {
918 case STALL_REGISTER_CONTEXT:
919 goto register_context;
920 case STALL_MOVE_LRC_TAIL:
921 goto move_lrc_tail;
922 case STALL_ADD_REQUEST:
923 goto add_request;
924 default:
925 MISSING_CASE(guc->submission_stall_reason);
926 }
927 }
928
929 while ((rb = rb_first_cached(&sched_engine->queue))) {
930 struct i915_priolist *p = to_priolist(rb);
931 struct i915_request *rq, *rn;
932
933 priolist_for_each_request_consume(rq, rn, p) {
934 if (last && !can_merge_rq(rq, last))
935 goto register_context;
936
937 list_del_init(&rq->sched.link);
938
939 __i915_request_submit(rq);
940
941 trace_i915_request_in(rq, 0);
942 last = rq;
943
944 if (is_multi_lrc_rq(rq)) {
945 /*
946 * We need to coalesce all multi-lrc requests in
947 * a relationship into a single H2G. We are
948 * guaranteed that all of these requests will be
949 * submitted sequentially.
950 */
951 if (multi_lrc_submit(rq)) {
952 submit = true;
953 goto register_context;
954 }
955 } else {
956 submit = true;
957 }
958 }
959
960 rb_erase_cached(&p->node, &sched_engine->queue);
961 i915_priolist_free(p);
962 }
963
964 register_context:
965 if (submit) {
966 struct intel_context *ce = request_to_scheduling_context(last);
967
968 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) &&
969 intel_context_is_schedulable(ce))) {
970 ret = try_context_registration(ce, false);
971 if (unlikely(ret == -EPIPE)) {
972 goto deadlk;
973 } else if (ret == -EBUSY) {
974 guc->stalled_request = last;
975 guc->submission_stall_reason =
976 STALL_REGISTER_CONTEXT;
977 goto schedule_tasklet;
978 } else if (ret != 0) {
979 GEM_WARN_ON(ret); /* Unexpected */
980 goto deadlk;
981 }
982 }
983
984 move_lrc_tail:
985 if (is_multi_lrc_rq(last)) {
986 ret = guc_wq_item_append(guc, last);
987 if (ret == -EBUSY) {
988 goto schedule_tasklet;
989 } else if (ret != 0) {
990 GEM_WARN_ON(ret); /* Unexpected */
991 goto deadlk;
992 }
993 } else {
994 guc_set_lrc_tail(last);
995 }
996
997 add_request:
998 ret = guc_add_request(guc, last);
999 if (unlikely(ret == -EPIPE)) {
1000 goto deadlk;
1001 } else if (ret == -EBUSY) {
1002 goto schedule_tasklet;
1003 } else if (ret != 0) {
1004 GEM_WARN_ON(ret); /* Unexpected */
1005 goto deadlk;
1006 }
1007 }
1008
1009 guc->stalled_request = NULL;
1010 guc->submission_stall_reason = STALL_NONE;
1011 return submit;
1012
1013 deadlk:
1014 sched_engine->tasklet.callback = NULL;
1015 tasklet_disable_nosync(&sched_engine->tasklet);
1016 return false;
1017
1018 schedule_tasklet:
1019 tasklet_schedule(&sched_engine->tasklet);
1020 return false;
1021 }
1022
guc_submission_tasklet(struct tasklet_struct * t)1023 static void guc_submission_tasklet(struct tasklet_struct *t)
1024 {
1025 struct i915_sched_engine *sched_engine =
1026 from_tasklet(sched_engine, t, tasklet);
1027 unsigned long flags;
1028 bool loop;
1029
1030 spin_lock_irqsave(&sched_engine->lock, flags);
1031
1032 do {
1033 loop = guc_dequeue_one_context(sched_engine->private_data);
1034 } while (loop);
1035
1036 i915_sched_engine_reset_on_empty(sched_engine);
1037
1038 spin_unlock_irqrestore(&sched_engine->lock, flags);
1039 }
1040
cs_irq_handler(struct intel_engine_cs * engine,u16 iir)1041 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
1042 {
1043 if (iir & GT_RENDER_USER_INTERRUPT)
1044 intel_engine_signal_breadcrumbs(engine);
1045 }
1046
1047 static void __guc_context_destroy(struct intel_context *ce);
1048 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
1049 static void guc_signal_context_fence(struct intel_context *ce);
1050 static void guc_cancel_context_requests(struct intel_context *ce);
1051 static void guc_blocked_fence_complete(struct intel_context *ce);
1052
scrub_guc_desc_for_outstanding_g2h(struct intel_guc * guc)1053 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
1054 {
1055 struct intel_context *ce;
1056 unsigned long index, flags;
1057 bool pending_disable, pending_enable, deregister, destroyed, banned;
1058
1059 xa_lock_irqsave(&guc->context_lookup, flags);
1060 xa_for_each(&guc->context_lookup, index, ce) {
1061 /*
1062 * Corner case where the ref count on the object is zero but and
1063 * deregister G2H was lost. In this case we don't touch the ref
1064 * count and finish the destroy of the context.
1065 */
1066 bool do_put = kref_get_unless_zero(&ce->ref);
1067
1068 xa_unlock(&guc->context_lookup);
1069
1070 spin_lock(&ce->guc_state.lock);
1071
1072 /*
1073 * Once we are at this point submission_disabled() is guaranteed
1074 * to be visible to all callers who set the below flags (see above
1075 * flush and flushes in reset_prepare). If submission_disabled()
1076 * is set, the caller shouldn't set these flags.
1077 */
1078
1079 destroyed = context_destroyed(ce);
1080 pending_enable = context_pending_enable(ce);
1081 pending_disable = context_pending_disable(ce);
1082 deregister = context_wait_for_deregister_to_register(ce);
1083 banned = context_banned(ce);
1084 init_sched_state(ce);
1085
1086 spin_unlock(&ce->guc_state.lock);
1087
1088 if (pending_enable || destroyed || deregister) {
1089 decr_outstanding_submission_g2h(guc);
1090 if (deregister)
1091 guc_signal_context_fence(ce);
1092 if (destroyed) {
1093 intel_gt_pm_put_async(guc_to_gt(guc));
1094 release_guc_id(guc, ce);
1095 __guc_context_destroy(ce);
1096 }
1097 if (pending_enable || deregister)
1098 intel_context_put(ce);
1099 }
1100
1101 /* Not mutualy exclusive with above if statement. */
1102 if (pending_disable) {
1103 guc_signal_context_fence(ce);
1104 if (banned) {
1105 guc_cancel_context_requests(ce);
1106 intel_engine_signal_breadcrumbs(ce->engine);
1107 }
1108 intel_context_sched_disable_unpin(ce);
1109 decr_outstanding_submission_g2h(guc);
1110
1111 spin_lock(&ce->guc_state.lock);
1112 guc_blocked_fence_complete(ce);
1113 spin_unlock(&ce->guc_state.lock);
1114
1115 intel_context_put(ce);
1116 }
1117
1118 if (do_put)
1119 intel_context_put(ce);
1120 xa_lock(&guc->context_lookup);
1121 }
1122 xa_unlock_irqrestore(&guc->context_lookup, flags);
1123 }
1124
1125 /*
1126 * GuC stores busyness stats for each engine at context in/out boundaries. A
1127 * context 'in' logs execution start time, 'out' adds in -> out delta to total.
1128 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
1129 * GuC.
1130 *
1131 * __i915_pmu_event_read samples engine busyness. When sampling, if context id
1132 * is valid (!= ~0) and start is non-zero, the engine is considered to be
1133 * active. For an active engine total busyness = total + (now - start), where
1134 * 'now' is the time at which the busyness is sampled. For inactive engine,
1135 * total busyness = total.
1136 *
1137 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
1138 *
1139 * The start and total values provided by GuC are 32 bits and wrap around in a
1140 * few minutes. Since perf pmu provides busyness as 64 bit monotonically
1141 * increasing ns values, there is a need for this implementation to account for
1142 * overflows and extend the GuC provided values to 64 bits before returning
1143 * busyness to the user. In order to do that, a worker runs periodically at
1144 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
1145 * 27 seconds for a gt clock frequency of 19.2 MHz).
1146 */
1147
1148 #define WRAP_TIME_CLKS U32_MAX
1149 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3)
1150
1151 static void
__extend_last_switch(struct intel_guc * guc,u64 * prev_start,u32 new_start)1152 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
1153 {
1154 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1155 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp);
1156
1157 if (new_start == lower_32_bits(*prev_start))
1158 return;
1159
1160 /*
1161 * When gt is unparked, we update the gt timestamp and start the ping
1162 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
1163 * is unparked, all switched in contexts will have a start time that is
1164 * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
1165 *
1166 * If neither gt_stamp nor new_start has rolled over, then the
1167 * gt_stamp_hi does not need to be adjusted, however if one of them has
1168 * rolled over, we need to adjust gt_stamp_hi accordingly.
1169 *
1170 * The below conditions address the cases of new_start rollover and
1171 * gt_stamp_last rollover respectively.
1172 */
1173 if (new_start < gt_stamp_last &&
1174 (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
1175 gt_stamp_hi++;
1176
1177 if (new_start > gt_stamp_last &&
1178 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi)
1179 gt_stamp_hi--;
1180
1181 *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
1182 }
1183
1184 #define record_read(map_, field_) \
1185 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_)
1186
1187 /*
1188 * GuC updates shared memory and KMD reads it. Since this is not synchronized,
1189 * we run into a race where the value read is inconsistent. Sometimes the
1190 * inconsistency is in reading the upper MSB bytes of the last_in value when
1191 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
1192 * 24 bits are zero. Since these are non-zero values, it is non-trivial to
1193 * determine validity of these values. Instead we read the values multiple times
1194 * until they are consistent. In test runs, 3 attempts results in consistent
1195 * values. The upper bound is set to 6 attempts and may need to be tuned as per
1196 * any new occurences.
1197 */
__get_engine_usage_record(struct intel_engine_cs * engine,u32 * last_in,u32 * id,u32 * total)1198 static void __get_engine_usage_record(struct intel_engine_cs *engine,
1199 u32 *last_in, u32 *id, u32 *total)
1200 {
1201 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
1202 int i = 0;
1203
1204 do {
1205 *last_in = record_read(&rec_map, last_switch_in_stamp);
1206 *id = record_read(&rec_map, current_context_index);
1207 *total = record_read(&rec_map, total_runtime);
1208
1209 if (record_read(&rec_map, last_switch_in_stamp) == *last_in &&
1210 record_read(&rec_map, current_context_index) == *id &&
1211 record_read(&rec_map, total_runtime) == *total)
1212 break;
1213 } while (++i < 6);
1214 }
1215
guc_update_engine_gt_clks(struct intel_engine_cs * engine)1216 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
1217 {
1218 struct intel_engine_guc_stats *stats = &engine->stats.guc;
1219 struct intel_guc *guc = &engine->gt->uc.guc;
1220 u32 last_switch, ctx_id, total;
1221
1222 lockdep_assert_held(&guc->timestamp.lock);
1223
1224 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
1225
1226 stats->running = ctx_id != ~0U && last_switch;
1227 if (stats->running)
1228 __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
1229
1230 /*
1231 * Instead of adjusting the total for overflow, just add the
1232 * difference from previous sample stats->total_gt_clks
1233 */
1234 if (total && total != ~0U) {
1235 stats->total_gt_clks += (u32)(total - stats->prev_total);
1236 stats->prev_total = total;
1237 }
1238 }
1239
gpm_timestamp_shift(struct intel_gt * gt)1240 static u32 gpm_timestamp_shift(struct intel_gt *gt)
1241 {
1242 intel_wakeref_t wakeref;
1243 u32 reg, shift;
1244
1245 with_intel_runtime_pm(gt->uncore->rpm, wakeref)
1246 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
1247
1248 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
1249 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
1250
1251 return 3 - shift;
1252 }
1253
guc_update_pm_timestamp(struct intel_guc * guc,ktime_t * now)1254 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
1255 {
1256 struct intel_gt *gt = guc_to_gt(guc);
1257 u32 gt_stamp_lo, gt_stamp_hi;
1258 u64 gpm_ts;
1259
1260 lockdep_assert_held(&guc->timestamp.lock);
1261
1262 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
1263 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0,
1264 MISC_STATUS1) >> guc->timestamp.shift;
1265 gt_stamp_lo = lower_32_bits(gpm_ts);
1266 *now = ktime_get();
1267
1268 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
1269 gt_stamp_hi++;
1270
1271 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
1272 }
1273
1274 /*
1275 * Unlike the execlist mode of submission total and active times are in terms of
1276 * gt clocks. The *now parameter is retained to return the cpu time at which the
1277 * busyness was sampled.
1278 */
guc_engine_busyness(struct intel_engine_cs * engine,ktime_t * now)1279 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
1280 {
1281 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
1282 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
1283 struct intel_gt *gt = engine->gt;
1284 struct intel_guc *guc = >->uc.guc;
1285 u64 total, gt_stamp_saved;
1286 unsigned long flags;
1287 u32 reset_count;
1288 bool in_reset;
1289
1290 spin_lock_irqsave(&guc->timestamp.lock, flags);
1291
1292 /*
1293 * If a reset happened, we risk reading partially updated engine
1294 * busyness from GuC, so we just use the driver stored copy of busyness.
1295 * Synchronize with gt reset using reset_count and the
1296 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
1297 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is
1298 * usable by checking the flag afterwards.
1299 */
1300 reset_count = i915_reset_count(gpu_error);
1301 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags);
1302
1303 *now = ktime_get();
1304
1305 /*
1306 * The active busyness depends on start_gt_clk and gt_stamp.
1307 * gt_stamp is updated by i915 only when gt is awake and the
1308 * start_gt_clk is derived from GuC state. To get a consistent
1309 * view of activity, we query the GuC state only if gt is awake.
1310 */
1311 if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
1312 stats_saved = *stats;
1313 gt_stamp_saved = guc->timestamp.gt_stamp;
1314 /*
1315 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
1316 * start_gt_clk' calculation below for active engines.
1317 */
1318 guc_update_engine_gt_clks(engine);
1319 guc_update_pm_timestamp(guc, now);
1320 intel_gt_pm_put_async(gt);
1321 if (i915_reset_count(gpu_error) != reset_count) {
1322 *stats = stats_saved;
1323 guc->timestamp.gt_stamp = gt_stamp_saved;
1324 }
1325 }
1326
1327 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
1328 if (stats->running) {
1329 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
1330
1331 total += intel_gt_clock_interval_to_ns(gt, clk);
1332 }
1333
1334 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1335
1336 return ns_to_ktime(total);
1337 }
1338
__reset_guc_busyness_stats(struct intel_guc * guc)1339 static void __reset_guc_busyness_stats(struct intel_guc *guc)
1340 {
1341 struct intel_gt *gt = guc_to_gt(guc);
1342 struct intel_engine_cs *engine;
1343 enum intel_engine_id id;
1344 unsigned long flags;
1345 ktime_t unused;
1346
1347 cancel_delayed_work_sync(&guc->timestamp.work);
1348
1349 spin_lock_irqsave(&guc->timestamp.lock, flags);
1350
1351 guc_update_pm_timestamp(guc, &unused);
1352 for_each_engine(engine, gt, id) {
1353 guc_update_engine_gt_clks(engine);
1354 engine->stats.guc.prev_total = 0;
1355 }
1356
1357 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1358 }
1359
__update_guc_busyness_stats(struct intel_guc * guc)1360 static void __update_guc_busyness_stats(struct intel_guc *guc)
1361 {
1362 struct intel_gt *gt = guc_to_gt(guc);
1363 struct intel_engine_cs *engine;
1364 enum intel_engine_id id;
1365 unsigned long flags;
1366 ktime_t unused;
1367
1368 guc->timestamp.last_stat_jiffies = jiffies;
1369
1370 spin_lock_irqsave(&guc->timestamp.lock, flags);
1371
1372 guc_update_pm_timestamp(guc, &unused);
1373 for_each_engine(engine, gt, id)
1374 guc_update_engine_gt_clks(engine);
1375
1376 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1377 }
1378
guc_timestamp_ping(struct work_struct * wrk)1379 static void guc_timestamp_ping(struct work_struct *wrk)
1380 {
1381 struct intel_guc *guc = container_of(wrk, typeof(*guc),
1382 timestamp.work.work);
1383 struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
1384 struct intel_gt *gt = guc_to_gt(guc);
1385 intel_wakeref_t wakeref;
1386 int srcu, ret;
1387
1388 /*
1389 * Synchronize with gt reset to make sure the worker does not
1390 * corrupt the engine/guc stats.
1391 */
1392 ret = intel_gt_reset_trylock(gt, &srcu);
1393 if (ret)
1394 return;
1395
1396 with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
1397 __update_guc_busyness_stats(guc);
1398
1399 intel_gt_reset_unlock(gt, srcu);
1400
1401 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1402 guc->timestamp.ping_delay);
1403 }
1404
guc_action_enable_usage_stats(struct intel_guc * guc)1405 static int guc_action_enable_usage_stats(struct intel_guc *guc)
1406 {
1407 u32 offset = intel_guc_engine_usage_offset(guc);
1408 u32 action[] = {
1409 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
1410 offset,
1411 0,
1412 };
1413
1414 return intel_guc_send(guc, action, ARRAY_SIZE(action));
1415 }
1416
guc_init_engine_stats(struct intel_guc * guc)1417 static void guc_init_engine_stats(struct intel_guc *guc)
1418 {
1419 struct intel_gt *gt = guc_to_gt(guc);
1420 intel_wakeref_t wakeref;
1421
1422 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1423 guc->timestamp.ping_delay);
1424
1425 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) {
1426 int ret = guc_action_enable_usage_stats(guc);
1427
1428 if (ret)
1429 drm_err(>->i915->drm,
1430 "Failed to enable usage stats: %d!\n", ret);
1431 }
1432 }
1433
intel_guc_busyness_park(struct intel_gt * gt)1434 void intel_guc_busyness_park(struct intel_gt *gt)
1435 {
1436 struct intel_guc *guc = >->uc.guc;
1437
1438 if (!guc_submission_initialized(guc))
1439 return;
1440
1441 /*
1442 * There is a race with suspend flow where the worker runs after suspend
1443 * and causes an unclaimed register access warning. Cancel the worker
1444 * synchronously here.
1445 */
1446 cancel_delayed_work_sync(&guc->timestamp.work);
1447
1448 /*
1449 * Before parking, we should sample engine busyness stats if we need to.
1450 * We can skip it if we are less than half a ping from the last time we
1451 * sampled the busyness stats.
1452 */
1453 if (guc->timestamp.last_stat_jiffies &&
1454 !time_after(jiffies, guc->timestamp.last_stat_jiffies +
1455 (guc->timestamp.ping_delay / 2)))
1456 return;
1457
1458 __update_guc_busyness_stats(guc);
1459 }
1460
intel_guc_busyness_unpark(struct intel_gt * gt)1461 void intel_guc_busyness_unpark(struct intel_gt *gt)
1462 {
1463 struct intel_guc *guc = >->uc.guc;
1464 unsigned long flags;
1465 ktime_t unused;
1466
1467 if (!guc_submission_initialized(guc))
1468 return;
1469
1470 spin_lock_irqsave(&guc->timestamp.lock, flags);
1471 guc_update_pm_timestamp(guc, &unused);
1472 spin_unlock_irqrestore(&guc->timestamp.lock, flags);
1473 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1474 guc->timestamp.ping_delay);
1475 }
1476
1477 static inline bool
submission_disabled(struct intel_guc * guc)1478 submission_disabled(struct intel_guc *guc)
1479 {
1480 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1481
1482 return unlikely(!sched_engine ||
1483 !__tasklet_is_enabled(&sched_engine->tasklet) ||
1484 intel_gt_is_wedged(guc_to_gt(guc)));
1485 }
1486
disable_submission(struct intel_guc * guc)1487 static void disable_submission(struct intel_guc *guc)
1488 {
1489 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1490
1491 if (__tasklet_is_enabled(&sched_engine->tasklet)) {
1492 GEM_BUG_ON(!guc->ct.enabled);
1493 __tasklet_disable_sync_once(&sched_engine->tasklet);
1494 sched_engine->tasklet.callback = NULL;
1495 }
1496 }
1497
enable_submission(struct intel_guc * guc)1498 static void enable_submission(struct intel_guc *guc)
1499 {
1500 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1501 unsigned long flags;
1502
1503 spin_lock_irqsave(&guc->sched_engine->lock, flags);
1504 sched_engine->tasklet.callback = guc_submission_tasklet;
1505 wmb(); /* Make sure callback visible */
1506 if (!__tasklet_is_enabled(&sched_engine->tasklet) &&
1507 __tasklet_enable(&sched_engine->tasklet)) {
1508 GEM_BUG_ON(!guc->ct.enabled);
1509
1510 /* And kick in case we missed a new request submission. */
1511 tasklet_hi_schedule(&sched_engine->tasklet);
1512 }
1513 spin_unlock_irqrestore(&guc->sched_engine->lock, flags);
1514 }
1515
guc_flush_submissions(struct intel_guc * guc)1516 static void guc_flush_submissions(struct intel_guc *guc)
1517 {
1518 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1519 unsigned long flags;
1520
1521 spin_lock_irqsave(&sched_engine->lock, flags);
1522 spin_unlock_irqrestore(&sched_engine->lock, flags);
1523 }
1524
1525 static void guc_flush_destroyed_contexts(struct intel_guc *guc);
1526
intel_guc_submission_reset_prepare(struct intel_guc * guc)1527 void intel_guc_submission_reset_prepare(struct intel_guc *guc)
1528 {
1529 if (unlikely(!guc_submission_initialized(guc))) {
1530 /* Reset called during driver load? GuC not yet initialised! */
1531 return;
1532 }
1533
1534 intel_gt_park_heartbeats(guc_to_gt(guc));
1535 disable_submission(guc);
1536 guc->interrupts.disable(guc);
1537 __reset_guc_busyness_stats(guc);
1538
1539 /* Flush IRQ handler */
1540 spin_lock_irq(guc_to_gt(guc)->irq_lock);
1541 spin_unlock_irq(guc_to_gt(guc)->irq_lock);
1542
1543 guc_flush_submissions(guc);
1544 guc_flush_destroyed_contexts(guc);
1545 flush_work(&guc->ct.requests.worker);
1546
1547 scrub_guc_desc_for_outstanding_g2h(guc);
1548 }
1549
1550 static struct intel_engine_cs *
guc_virtual_get_sibling(struct intel_engine_cs * ve,unsigned int sibling)1551 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
1552 {
1553 struct intel_engine_cs *engine;
1554 intel_engine_mask_t tmp, mask = ve->mask;
1555 unsigned int num_siblings = 0;
1556
1557 for_each_engine_masked(engine, ve->gt, mask, tmp)
1558 if (num_siblings++ == sibling)
1559 return engine;
1560
1561 return NULL;
1562 }
1563
1564 static inline struct intel_engine_cs *
__context_to_physical_engine(struct intel_context * ce)1565 __context_to_physical_engine(struct intel_context *ce)
1566 {
1567 struct intel_engine_cs *engine = ce->engine;
1568
1569 if (intel_engine_is_virtual(engine))
1570 engine = guc_virtual_get_sibling(engine, 0);
1571
1572 return engine;
1573 }
1574
guc_reset_state(struct intel_context * ce,u32 head,bool scrub)1575 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
1576 {
1577 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
1578
1579 if (!intel_context_is_schedulable(ce))
1580 return;
1581
1582 GEM_BUG_ON(!intel_context_is_pinned(ce));
1583
1584 /*
1585 * We want a simple context + ring to execute the breadcrumb update.
1586 * We cannot rely on the context being intact across the GPU hang,
1587 * so clear it and rebuild just what we need for the breadcrumb.
1588 * All pending requests for this context will be zapped, and any
1589 * future request will be after userspace has had the opportunity
1590 * to recreate its own state.
1591 */
1592 if (scrub)
1593 lrc_init_regs(ce, engine, true);
1594
1595 /* Rerun the request; its payload has been neutered (if guilty). */
1596 lrc_update_regs(ce, engine, head);
1597 }
1598
guc_engine_reset_prepare(struct intel_engine_cs * engine)1599 static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
1600 {
1601 if (!IS_GRAPHICS_VER(engine->i915, 11, 12))
1602 return;
1603
1604 intel_engine_stop_cs(engine);
1605
1606 /*
1607 * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need
1608 * to wait for any pending mi force wakeups
1609 */
1610 intel_engine_wait_for_pending_mi_fw(engine);
1611 }
1612
guc_reset_nop(struct intel_engine_cs * engine)1613 static void guc_reset_nop(struct intel_engine_cs *engine)
1614 {
1615 }
1616
guc_rewind_nop(struct intel_engine_cs * engine,bool stalled)1617 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled)
1618 {
1619 }
1620
1621 static void
__unwind_incomplete_requests(struct intel_context * ce)1622 __unwind_incomplete_requests(struct intel_context *ce)
1623 {
1624 struct i915_request *rq, *rn;
1625 struct list_head *pl;
1626 int prio = I915_PRIORITY_INVALID;
1627 struct i915_sched_engine * const sched_engine =
1628 ce->engine->sched_engine;
1629 unsigned long flags;
1630
1631 spin_lock_irqsave(&sched_engine->lock, flags);
1632 spin_lock(&ce->guc_state.lock);
1633 list_for_each_entry_safe_reverse(rq, rn,
1634 &ce->guc_state.requests,
1635 sched.link) {
1636 if (i915_request_completed(rq))
1637 continue;
1638
1639 list_del_init(&rq->sched.link);
1640 __i915_request_unsubmit(rq);
1641
1642 /* Push the request back into the queue for later resubmission. */
1643 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1644 if (rq_prio(rq) != prio) {
1645 prio = rq_prio(rq);
1646 pl = i915_sched_lookup_priolist(sched_engine, prio);
1647 }
1648 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
1649
1650 list_add(&rq->sched.link, pl);
1651 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1652 }
1653 spin_unlock(&ce->guc_state.lock);
1654 spin_unlock_irqrestore(&sched_engine->lock, flags);
1655 }
1656
__guc_reset_context(struct intel_context * ce,intel_engine_mask_t stalled)1657 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
1658 {
1659 bool guilty;
1660 struct i915_request *rq;
1661 unsigned long flags;
1662 u32 head;
1663 int i, number_children = ce->parallel.number_children;
1664 struct intel_context *parent = ce;
1665
1666 GEM_BUG_ON(intel_context_is_child(ce));
1667
1668 intel_context_get(ce);
1669
1670 /*
1671 * GuC will implicitly mark the context as non-schedulable when it sends
1672 * the reset notification. Make sure our state reflects this change. The
1673 * context will be marked enabled on resubmission.
1674 */
1675 spin_lock_irqsave(&ce->guc_state.lock, flags);
1676 clr_context_enabled(ce);
1677 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
1678
1679 /*
1680 * For each context in the relationship find the hanging request
1681 * resetting each context / request as needed
1682 */
1683 for (i = 0; i < number_children + 1; ++i) {
1684 if (!intel_context_is_pinned(ce))
1685 goto next_context;
1686
1687 guilty = false;
1688 rq = intel_context_find_active_request(ce);
1689 if (!rq) {
1690 head = ce->ring->tail;
1691 goto out_replay;
1692 }
1693
1694 if (i915_request_started(rq))
1695 guilty = stalled & ce->engine->mask;
1696
1697 GEM_BUG_ON(i915_active_is_idle(&ce->active));
1698 head = intel_ring_wrap(ce->ring, rq->head);
1699
1700 __i915_request_reset(rq, guilty);
1701 out_replay:
1702 guc_reset_state(ce, head, guilty);
1703 next_context:
1704 if (i != number_children)
1705 ce = list_next_entry(ce, parallel.child_link);
1706 }
1707
1708 __unwind_incomplete_requests(parent);
1709 intel_context_put(parent);
1710 }
1711
intel_guc_submission_reset(struct intel_guc * guc,intel_engine_mask_t stalled)1712 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
1713 {
1714 struct intel_context *ce;
1715 unsigned long index;
1716 unsigned long flags;
1717
1718 if (unlikely(!guc_submission_initialized(guc))) {
1719 /* Reset called during driver load? GuC not yet initialised! */
1720 return;
1721 }
1722
1723 xa_lock_irqsave(&guc->context_lookup, flags);
1724 xa_for_each(&guc->context_lookup, index, ce) {
1725 if (!kref_get_unless_zero(&ce->ref))
1726 continue;
1727
1728 xa_unlock(&guc->context_lookup);
1729
1730 if (intel_context_is_pinned(ce) &&
1731 !intel_context_is_child(ce))
1732 __guc_reset_context(ce, stalled);
1733
1734 intel_context_put(ce);
1735
1736 xa_lock(&guc->context_lookup);
1737 }
1738 xa_unlock_irqrestore(&guc->context_lookup, flags);
1739
1740 /* GuC is blown away, drop all references to contexts */
1741 xa_destroy(&guc->context_lookup);
1742 }
1743
guc_cancel_context_requests(struct intel_context * ce)1744 static void guc_cancel_context_requests(struct intel_context *ce)
1745 {
1746 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine;
1747 struct i915_request *rq;
1748 unsigned long flags;
1749
1750 /* Mark all executing requests as skipped. */
1751 spin_lock_irqsave(&sched_engine->lock, flags);
1752 spin_lock(&ce->guc_state.lock);
1753 list_for_each_entry(rq, &ce->guc_state.requests, sched.link)
1754 i915_request_put(i915_request_mark_eio(rq));
1755 spin_unlock(&ce->guc_state.lock);
1756 spin_unlock_irqrestore(&sched_engine->lock, flags);
1757 }
1758
1759 static void
guc_cancel_sched_engine_requests(struct i915_sched_engine * sched_engine)1760 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine)
1761 {
1762 struct i915_request *rq, *rn;
1763 struct rb_node *rb;
1764 unsigned long flags;
1765
1766 /* Can be called during boot if GuC fails to load */
1767 if (!sched_engine)
1768 return;
1769
1770 /*
1771 * Before we call engine->cancel_requests(), we should have exclusive
1772 * access to the submission state. This is arranged for us by the
1773 * caller disabling the interrupt generation, the tasklet and other
1774 * threads that may then access the same state, giving us a free hand
1775 * to reset state. However, we still need to let lockdep be aware that
1776 * we know this state may be accessed in hardirq context, so we
1777 * disable the irq around this manipulation and we want to keep
1778 * the spinlock focused on its duties and not accidentally conflate
1779 * coverage to the submission's irq state. (Similarly, although we
1780 * shouldn't need to disable irq around the manipulation of the
1781 * submission's irq state, we also wish to remind ourselves that
1782 * it is irq state.)
1783 */
1784 spin_lock_irqsave(&sched_engine->lock, flags);
1785
1786 /* Flush the queued requests to the timeline list (for retiring). */
1787 while ((rb = rb_first_cached(&sched_engine->queue))) {
1788 struct i915_priolist *p = to_priolist(rb);
1789
1790 priolist_for_each_request_consume(rq, rn, p) {
1791 list_del_init(&rq->sched.link);
1792
1793 __i915_request_submit(rq);
1794
1795 i915_request_put(i915_request_mark_eio(rq));
1796 }
1797
1798 rb_erase_cached(&p->node, &sched_engine->queue);
1799 i915_priolist_free(p);
1800 }
1801
1802 /* Remaining _unready_ requests will be nop'ed when submitted */
1803
1804 sched_engine->queue_priority_hint = INT_MIN;
1805 sched_engine->queue = RB_ROOT_CACHED;
1806
1807 spin_unlock_irqrestore(&sched_engine->lock, flags);
1808 }
1809
intel_guc_submission_cancel_requests(struct intel_guc * guc)1810 void intel_guc_submission_cancel_requests(struct intel_guc *guc)
1811 {
1812 struct intel_context *ce;
1813 unsigned long index;
1814 unsigned long flags;
1815
1816 xa_lock_irqsave(&guc->context_lookup, flags);
1817 xa_for_each(&guc->context_lookup, index, ce) {
1818 if (!kref_get_unless_zero(&ce->ref))
1819 continue;
1820
1821 xa_unlock(&guc->context_lookup);
1822
1823 if (intel_context_is_pinned(ce) &&
1824 !intel_context_is_child(ce))
1825 guc_cancel_context_requests(ce);
1826
1827 intel_context_put(ce);
1828
1829 xa_lock(&guc->context_lookup);
1830 }
1831 xa_unlock_irqrestore(&guc->context_lookup, flags);
1832
1833 guc_cancel_sched_engine_requests(guc->sched_engine);
1834
1835 /* GuC is blown away, drop all references to contexts */
1836 xa_destroy(&guc->context_lookup);
1837 }
1838
intel_guc_submission_reset_finish(struct intel_guc * guc)1839 void intel_guc_submission_reset_finish(struct intel_guc *guc)
1840 {
1841 /* Reset called during driver load or during wedge? */
1842 if (unlikely(!guc_submission_initialized(guc) ||
1843 intel_gt_is_wedged(guc_to_gt(guc)))) {
1844 return;
1845 }
1846
1847 /*
1848 * Technically possible for either of these values to be non-zero here,
1849 * but very unlikely + harmless. Regardless let's add a warn so we can
1850 * see in CI if this happens frequently / a precursor to taking down the
1851 * machine.
1852 */
1853 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
1854 atomic_set(&guc->outstanding_submission_g2h, 0);
1855
1856 intel_guc_global_policies_update(guc);
1857 enable_submission(guc);
1858 intel_gt_unpark_heartbeats(guc_to_gt(guc));
1859 }
1860
1861 static void destroyed_worker_func(struct work_struct *w);
1862 static void reset_fail_worker_func(struct work_struct *w);
1863
1864 /*
1865 * Set up the memory resources to be shared with the GuC (via the GGTT)
1866 * at firmware loading time.
1867 */
intel_guc_submission_init(struct intel_guc * guc)1868 int intel_guc_submission_init(struct intel_guc *guc)
1869 {
1870 struct intel_gt *gt = guc_to_gt(guc);
1871 int ret;
1872
1873 if (guc->submission_initialized)
1874 return 0;
1875
1876 if (GET_UC_VER(guc) < MAKE_UC_VER(70, 0, 0)) {
1877 ret = guc_lrc_desc_pool_create_v69(guc);
1878 if (ret)
1879 return ret;
1880 }
1881
1882 guc->submission_state.guc_ids_bitmap =
1883 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
1884 if (!guc->submission_state.guc_ids_bitmap) {
1885 ret = -ENOMEM;
1886 goto destroy_pool;
1887 }
1888
1889 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
1890 guc->timestamp.shift = gpm_timestamp_shift(gt);
1891 guc->submission_initialized = true;
1892
1893 return 0;
1894
1895 destroy_pool:
1896 guc_lrc_desc_pool_destroy_v69(guc);
1897
1898 return ret;
1899 }
1900
intel_guc_submission_fini(struct intel_guc * guc)1901 void intel_guc_submission_fini(struct intel_guc *guc)
1902 {
1903 if (!guc->submission_initialized)
1904 return;
1905
1906 guc_flush_destroyed_contexts(guc);
1907 guc_lrc_desc_pool_destroy_v69(guc);
1908 i915_sched_engine_put(guc->sched_engine);
1909 bitmap_free(guc->submission_state.guc_ids_bitmap);
1910 guc->submission_initialized = false;
1911 }
1912
queue_request(struct i915_sched_engine * sched_engine,struct i915_request * rq,int prio)1913 static inline void queue_request(struct i915_sched_engine *sched_engine,
1914 struct i915_request *rq,
1915 int prio)
1916 {
1917 GEM_BUG_ON(!list_empty(&rq->sched.link));
1918 list_add_tail(&rq->sched.link,
1919 i915_sched_lookup_priolist(sched_engine, prio));
1920 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1921 tasklet_hi_schedule(&sched_engine->tasklet);
1922 }
1923
guc_bypass_tasklet_submit(struct intel_guc * guc,struct i915_request * rq)1924 static int guc_bypass_tasklet_submit(struct intel_guc *guc,
1925 struct i915_request *rq)
1926 {
1927 int ret = 0;
1928
1929 __i915_request_submit(rq);
1930
1931 trace_i915_request_in(rq, 0);
1932
1933 if (is_multi_lrc_rq(rq)) {
1934 if (multi_lrc_submit(rq)) {
1935 ret = guc_wq_item_append(guc, rq);
1936 if (!ret)
1937 ret = guc_add_request(guc, rq);
1938 }
1939 } else {
1940 guc_set_lrc_tail(rq);
1941 ret = guc_add_request(guc, rq);
1942 }
1943
1944 if (unlikely(ret == -EPIPE))
1945 disable_submission(guc);
1946
1947 return ret;
1948 }
1949
need_tasklet(struct intel_guc * guc,struct i915_request * rq)1950 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
1951 {
1952 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
1953 struct intel_context *ce = request_to_scheduling_context(rq);
1954
1955 return submission_disabled(guc) || guc->stalled_request ||
1956 !i915_sched_engine_is_empty(sched_engine) ||
1957 !ctx_id_mapped(guc, ce->guc_id.id);
1958 }
1959
guc_submit_request(struct i915_request * rq)1960 static void guc_submit_request(struct i915_request *rq)
1961 {
1962 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
1963 struct intel_guc *guc = &rq->engine->gt->uc.guc;
1964 unsigned long flags;
1965
1966 /* Will be called from irq-context when using foreign fences. */
1967 spin_lock_irqsave(&sched_engine->lock, flags);
1968
1969 if (need_tasklet(guc, rq))
1970 queue_request(sched_engine, rq, rq_prio(rq));
1971 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
1972 tasklet_hi_schedule(&sched_engine->tasklet);
1973
1974 spin_unlock_irqrestore(&sched_engine->lock, flags);
1975 }
1976
new_guc_id(struct intel_guc * guc,struct intel_context * ce)1977 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
1978 {
1979 int ret;
1980
1981 GEM_BUG_ON(intel_context_is_child(ce));
1982
1983 if (intel_context_is_parent(ce))
1984 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
1985 NUMBER_MULTI_LRC_GUC_ID(guc),
1986 order_base_2(ce->parallel.number_children
1987 + 1));
1988 else
1989 ret = ida_simple_get(&guc->submission_state.guc_ids,
1990 NUMBER_MULTI_LRC_GUC_ID(guc),
1991 guc->submission_state.num_guc_ids,
1992 GFP_KERNEL | __GFP_RETRY_MAYFAIL |
1993 __GFP_NOWARN);
1994 if (unlikely(ret < 0))
1995 return ret;
1996
1997 ce->guc_id.id = ret;
1998 return 0;
1999 }
2000
__release_guc_id(struct intel_guc * guc,struct intel_context * ce)2001 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2002 {
2003 GEM_BUG_ON(intel_context_is_child(ce));
2004
2005 if (!context_guc_id_invalid(ce)) {
2006 if (intel_context_is_parent(ce))
2007 bitmap_release_region(guc->submission_state.guc_ids_bitmap,
2008 ce->guc_id.id,
2009 order_base_2(ce->parallel.number_children
2010 + 1));
2011 else
2012 ida_simple_remove(&guc->submission_state.guc_ids,
2013 ce->guc_id.id);
2014 clr_ctx_id_mapping(guc, ce->guc_id.id);
2015 set_context_guc_id_invalid(ce);
2016 }
2017 if (!list_empty(&ce->guc_id.link))
2018 list_del_init(&ce->guc_id.link);
2019 }
2020
release_guc_id(struct intel_guc * guc,struct intel_context * ce)2021 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2022 {
2023 unsigned long flags;
2024
2025 spin_lock_irqsave(&guc->submission_state.lock, flags);
2026 __release_guc_id(guc, ce);
2027 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2028 }
2029
steal_guc_id(struct intel_guc * guc,struct intel_context * ce)2030 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
2031 {
2032 struct intel_context *cn;
2033
2034 lockdep_assert_held(&guc->submission_state.lock);
2035 GEM_BUG_ON(intel_context_is_child(ce));
2036 GEM_BUG_ON(intel_context_is_parent(ce));
2037
2038 if (!list_empty(&guc->submission_state.guc_id_list)) {
2039 cn = list_first_entry(&guc->submission_state.guc_id_list,
2040 struct intel_context,
2041 guc_id.link);
2042
2043 GEM_BUG_ON(atomic_read(&cn->guc_id.ref));
2044 GEM_BUG_ON(context_guc_id_invalid(cn));
2045 GEM_BUG_ON(intel_context_is_child(cn));
2046 GEM_BUG_ON(intel_context_is_parent(cn));
2047
2048 list_del_init(&cn->guc_id.link);
2049 ce->guc_id.id = cn->guc_id.id;
2050
2051 spin_lock(&cn->guc_state.lock);
2052 clr_context_registered(cn);
2053 spin_unlock(&cn->guc_state.lock);
2054
2055 set_context_guc_id_invalid(cn);
2056
2057 #ifdef CONFIG_DRM_I915_SELFTEST
2058 guc->number_guc_id_stolen++;
2059 #endif
2060
2061 return 0;
2062 } else {
2063 return -EAGAIN;
2064 }
2065 }
2066
assign_guc_id(struct intel_guc * guc,struct intel_context * ce)2067 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
2068 {
2069 int ret;
2070
2071 lockdep_assert_held(&guc->submission_state.lock);
2072 GEM_BUG_ON(intel_context_is_child(ce));
2073
2074 ret = new_guc_id(guc, ce);
2075 if (unlikely(ret < 0)) {
2076 if (intel_context_is_parent(ce))
2077 return -ENOSPC;
2078
2079 ret = steal_guc_id(guc, ce);
2080 if (ret < 0)
2081 return ret;
2082 }
2083
2084 if (intel_context_is_parent(ce)) {
2085 struct intel_context *child;
2086 int i = 1;
2087
2088 for_each_child(ce, child)
2089 child->guc_id.id = ce->guc_id.id + i++;
2090 }
2091
2092 return 0;
2093 }
2094
2095 #define PIN_GUC_ID_TRIES 4
pin_guc_id(struct intel_guc * guc,struct intel_context * ce)2096 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2097 {
2098 int ret = 0;
2099 unsigned long flags, tries = PIN_GUC_ID_TRIES;
2100
2101 GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
2102
2103 try_again:
2104 spin_lock_irqsave(&guc->submission_state.lock, flags);
2105
2106 might_lock(&ce->guc_state.lock);
2107
2108 if (context_guc_id_invalid(ce)) {
2109 ret = assign_guc_id(guc, ce);
2110 if (ret)
2111 goto out_unlock;
2112 ret = 1; /* Indidcates newly assigned guc_id */
2113 }
2114 if (!list_empty(&ce->guc_id.link))
2115 list_del_init(&ce->guc_id.link);
2116 atomic_inc(&ce->guc_id.ref);
2117
2118 out_unlock:
2119 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2120
2121 /*
2122 * -EAGAIN indicates no guc_id are available, let's retire any
2123 * outstanding requests to see if that frees up a guc_id. If the first
2124 * retire didn't help, insert a sleep with the timeslice duration before
2125 * attempting to retire more requests. Double the sleep period each
2126 * subsequent pass before finally giving up. The sleep period has max of
2127 * 100ms and minimum of 1ms.
2128 */
2129 if (ret == -EAGAIN && --tries) {
2130 if (PIN_GUC_ID_TRIES - tries > 1) {
2131 unsigned int timeslice_shifted =
2132 ce->engine->props.timeslice_duration_ms <<
2133 (PIN_GUC_ID_TRIES - tries - 2);
2134 unsigned int max = min_t(unsigned int, 100,
2135 timeslice_shifted);
2136
2137 msleep(max_t(unsigned int, max, 1));
2138 }
2139 intel_gt_retire_requests(guc_to_gt(guc));
2140 goto try_again;
2141 }
2142
2143 return ret;
2144 }
2145
unpin_guc_id(struct intel_guc * guc,struct intel_context * ce)2146 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2147 {
2148 unsigned long flags;
2149
2150 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
2151 GEM_BUG_ON(intel_context_is_child(ce));
2152
2153 if (unlikely(context_guc_id_invalid(ce) ||
2154 intel_context_is_parent(ce)))
2155 return;
2156
2157 spin_lock_irqsave(&guc->submission_state.lock, flags);
2158 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
2159 !atomic_read(&ce->guc_id.ref))
2160 list_add_tail(&ce->guc_id.link,
2161 &guc->submission_state.guc_id_list);
2162 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
2163 }
2164
__guc_action_register_multi_lrc_v69(struct intel_guc * guc,struct intel_context * ce,u32 guc_id,u32 offset,bool loop)2165 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
2166 struct intel_context *ce,
2167 u32 guc_id,
2168 u32 offset,
2169 bool loop)
2170 {
2171 struct intel_context *child;
2172 u32 action[4 + MAX_ENGINE_INSTANCE];
2173 int len = 0;
2174
2175 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2176
2177 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2178 action[len++] = guc_id;
2179 action[len++] = ce->parallel.number_children + 1;
2180 action[len++] = offset;
2181 for_each_child(ce, child) {
2182 offset += sizeof(struct guc_lrc_desc_v69);
2183 action[len++] = offset;
2184 }
2185
2186 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2187 }
2188
__guc_action_register_multi_lrc_v70(struct intel_guc * guc,struct intel_context * ce,struct guc_ctxt_registration_info * info,bool loop)2189 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
2190 struct intel_context *ce,
2191 struct guc_ctxt_registration_info *info,
2192 bool loop)
2193 {
2194 struct intel_context *child;
2195 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
2196 int len = 0;
2197 u32 next_id;
2198
2199 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
2200
2201 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2202 action[len++] = info->flags;
2203 action[len++] = info->context_idx;
2204 action[len++] = info->engine_class;
2205 action[len++] = info->engine_submit_mask;
2206 action[len++] = info->wq_desc_lo;
2207 action[len++] = info->wq_desc_hi;
2208 action[len++] = info->wq_base_lo;
2209 action[len++] = info->wq_base_hi;
2210 action[len++] = info->wq_size;
2211 action[len++] = ce->parallel.number_children + 1;
2212 action[len++] = info->hwlrca_lo;
2213 action[len++] = info->hwlrca_hi;
2214
2215 next_id = info->context_idx + 1;
2216 for_each_child(ce, child) {
2217 GEM_BUG_ON(next_id++ != child->guc_id.id);
2218
2219 /*
2220 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2221 * only supports 32 bit currently.
2222 */
2223 action[len++] = lower_32_bits(child->lrc.lrca);
2224 action[len++] = upper_32_bits(child->lrc.lrca);
2225 }
2226
2227 GEM_BUG_ON(len > ARRAY_SIZE(action));
2228
2229 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2230 }
2231
__guc_action_register_context_v69(struct intel_guc * guc,u32 guc_id,u32 offset,bool loop)2232 static int __guc_action_register_context_v69(struct intel_guc *guc,
2233 u32 guc_id,
2234 u32 offset,
2235 bool loop)
2236 {
2237 u32 action[] = {
2238 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2239 guc_id,
2240 offset,
2241 };
2242
2243 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2244 0, loop);
2245 }
2246
__guc_action_register_context_v70(struct intel_guc * guc,struct guc_ctxt_registration_info * info,bool loop)2247 static int __guc_action_register_context_v70(struct intel_guc *guc,
2248 struct guc_ctxt_registration_info *info,
2249 bool loop)
2250 {
2251 u32 action[] = {
2252 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2253 info->flags,
2254 info->context_idx,
2255 info->engine_class,
2256 info->engine_submit_mask,
2257 info->wq_desc_lo,
2258 info->wq_desc_hi,
2259 info->wq_base_lo,
2260 info->wq_base_hi,
2261 info->wq_size,
2262 info->hwlrca_lo,
2263 info->hwlrca_hi,
2264 };
2265
2266 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2267 0, loop);
2268 }
2269
2270 static void prepare_context_registration_info_v69(struct intel_context *ce);
2271 static void prepare_context_registration_info_v70(struct intel_context *ce,
2272 struct guc_ctxt_registration_info *info);
2273
2274 static int
register_context_v69(struct intel_guc * guc,struct intel_context * ce,bool loop)2275 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
2276 {
2277 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
2278 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
2279
2280 prepare_context_registration_info_v69(ce);
2281
2282 if (intel_context_is_parent(ce))
2283 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
2284 offset, loop);
2285 else
2286 return __guc_action_register_context_v69(guc, ce->guc_id.id,
2287 offset, loop);
2288 }
2289
2290 static int
register_context_v70(struct intel_guc * guc,struct intel_context * ce,bool loop)2291 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
2292 {
2293 struct guc_ctxt_registration_info info;
2294
2295 prepare_context_registration_info_v70(ce, &info);
2296
2297 if (intel_context_is_parent(ce))
2298 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
2299 else
2300 return __guc_action_register_context_v70(guc, &info, loop);
2301 }
2302
register_context(struct intel_context * ce,bool loop)2303 static int register_context(struct intel_context *ce, bool loop)
2304 {
2305 struct intel_guc *guc = ce_to_guc(ce);
2306 int ret;
2307
2308 GEM_BUG_ON(intel_context_is_child(ce));
2309 trace_intel_context_register(ce);
2310
2311 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0))
2312 ret = register_context_v70(guc, ce, loop);
2313 else
2314 ret = register_context_v69(guc, ce, loop);
2315
2316 if (likely(!ret)) {
2317 unsigned long flags;
2318
2319 spin_lock_irqsave(&ce->guc_state.lock, flags);
2320 set_context_registered(ce);
2321 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2322
2323 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0))
2324 guc_context_policy_init_v70(ce, loop);
2325 }
2326
2327 return ret;
2328 }
2329
__guc_action_deregister_context(struct intel_guc * guc,u32 guc_id)2330 static int __guc_action_deregister_context(struct intel_guc *guc,
2331 u32 guc_id)
2332 {
2333 u32 action[] = {
2334 INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
2335 guc_id,
2336 };
2337
2338 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2339 G2H_LEN_DW_DEREGISTER_CONTEXT,
2340 true);
2341 }
2342
deregister_context(struct intel_context * ce,u32 guc_id)2343 static int deregister_context(struct intel_context *ce, u32 guc_id)
2344 {
2345 struct intel_guc *guc = ce_to_guc(ce);
2346
2347 GEM_BUG_ON(intel_context_is_child(ce));
2348 trace_intel_context_deregister(ce);
2349
2350 return __guc_action_deregister_context(guc, guc_id);
2351 }
2352
clear_children_join_go_memory(struct intel_context * ce)2353 static inline void clear_children_join_go_memory(struct intel_context *ce)
2354 {
2355 struct parent_scratch *ps = __get_parent_scratch(ce);
2356 int i;
2357
2358 ps->go.semaphore = 0;
2359 for (i = 0; i < ce->parallel.number_children + 1; ++i)
2360 ps->join[i].semaphore = 0;
2361 }
2362
get_children_go_value(struct intel_context * ce)2363 static inline u32 get_children_go_value(struct intel_context *ce)
2364 {
2365 return __get_parent_scratch(ce)->go.semaphore;
2366 }
2367
get_children_join_value(struct intel_context * ce,u8 child_index)2368 static inline u32 get_children_join_value(struct intel_context *ce,
2369 u8 child_index)
2370 {
2371 return __get_parent_scratch(ce)->join[child_index].semaphore;
2372 }
2373
2374 struct context_policy {
2375 u32 count;
2376 struct guc_update_context_policy h2g;
2377 };
2378
__guc_context_policy_action_size(struct context_policy * policy)2379 static u32 __guc_context_policy_action_size(struct context_policy *policy)
2380 {
2381 size_t bytes = sizeof(policy->h2g.header) +
2382 (sizeof(policy->h2g.klv[0]) * policy->count);
2383
2384 return bytes / sizeof(u32);
2385 }
2386
__guc_context_policy_start_klv(struct context_policy * policy,u16 guc_id)2387 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id)
2388 {
2389 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
2390 policy->h2g.header.ctx_id = guc_id;
2391 policy->count = 0;
2392 }
2393
2394 #define MAKE_CONTEXT_POLICY_ADD(func, id) \
2395 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \
2396 { \
2397 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
2398 policy->h2g.klv[policy->count].kl = \
2399 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
2400 FIELD_PREP(GUC_KLV_0_LEN, 1); \
2401 policy->h2g.klv[policy->count].value = data; \
2402 policy->count++; \
2403 }
2404
MAKE_CONTEXT_POLICY_ADD(execution_quantum,EXECUTION_QUANTUM)2405 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
2406 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
2407 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
2408 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
2409
2410 #undef MAKE_CONTEXT_POLICY_ADD
2411
2412 static int __guc_context_set_context_policies(struct intel_guc *guc,
2413 struct context_policy *policy,
2414 bool loop)
2415 {
2416 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g,
2417 __guc_context_policy_action_size(policy),
2418 0, loop);
2419 }
2420
guc_context_policy_init_v70(struct intel_context * ce,bool loop)2421 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
2422 {
2423 struct intel_engine_cs *engine = ce->engine;
2424 struct intel_guc *guc = &engine->gt->uc.guc;
2425 struct context_policy policy;
2426 u32 execution_quantum;
2427 u32 preemption_timeout;
2428 unsigned long flags;
2429 int ret;
2430
2431 /* NB: For both of these, zero means disabled. */
2432 execution_quantum = engine->props.timeslice_duration_ms * 1000;
2433 preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2434
2435 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
2436
2437 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
2438 __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
2439 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2440
2441 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2442 __guc_context_policy_add_preempt_to_idle(&policy, 1);
2443
2444 ret = __guc_context_set_context_policies(guc, &policy, loop);
2445
2446 spin_lock_irqsave(&ce->guc_state.lock, flags);
2447 if (ret != 0)
2448 set_context_policy_required(ce);
2449 else
2450 clr_context_policy_required(ce);
2451 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2452
2453 return ret;
2454 }
2455
guc_context_policy_init_v69(struct intel_engine_cs * engine,struct guc_lrc_desc_v69 * desc)2456 static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
2457 struct guc_lrc_desc_v69 *desc)
2458 {
2459 desc->policy_flags = 0;
2460
2461 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
2462 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
2463
2464 /* NB: For both of these, zero means disabled. */
2465 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
2466 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2467 }
2468
map_guc_prio_to_lrc_desc_prio(u8 prio)2469 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
2470 {
2471 /*
2472 * this matches the mapping we do in map_i915_prio_to_guc_prio()
2473 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL)
2474 */
2475 switch (prio) {
2476 default:
2477 MISSING_CASE(prio);
2478 fallthrough;
2479 case GUC_CLIENT_PRIORITY_KMD_NORMAL:
2480 return GEN12_CTX_PRIORITY_NORMAL;
2481 case GUC_CLIENT_PRIORITY_NORMAL:
2482 return GEN12_CTX_PRIORITY_LOW;
2483 case GUC_CLIENT_PRIORITY_HIGH:
2484 case GUC_CLIENT_PRIORITY_KMD_HIGH:
2485 return GEN12_CTX_PRIORITY_HIGH;
2486 }
2487 }
2488
prepare_context_registration_info_v69(struct intel_context * ce)2489 static void prepare_context_registration_info_v69(struct intel_context *ce)
2490 {
2491 struct intel_engine_cs *engine = ce->engine;
2492 struct intel_guc *guc = &engine->gt->uc.guc;
2493 u32 ctx_id = ce->guc_id.id;
2494 struct guc_lrc_desc_v69 *desc;
2495 struct intel_context *child;
2496
2497 GEM_BUG_ON(!engine->mask);
2498
2499 /*
2500 * Ensure LRC + CT vmas are is same region as write barrier is done
2501 * based on CT vma region.
2502 */
2503 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2504 i915_gem_object_is_lmem(ce->ring->vma->obj));
2505
2506 desc = __get_lrc_desc_v69(guc, ctx_id);
2507 desc->engine_class = engine_class_to_guc_class(engine->class);
2508 desc->engine_submit_mask = engine->logical_mask;
2509 desc->hw_context_desc = ce->lrc.lrca;
2510 desc->priority = ce->guc_state.prio;
2511 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2512 guc_context_policy_init_v69(engine, desc);
2513
2514 /*
2515 * If context is a parent, we need to register a process descriptor
2516 * describing a work queue and register all child contexts.
2517 */
2518 if (intel_context_is_parent(ce)) {
2519 struct guc_process_desc_v69 *pdesc;
2520
2521 ce->parallel.guc.wqi_tail = 0;
2522 ce->parallel.guc.wqi_head = 0;
2523
2524 desc->process_desc = i915_ggtt_offset(ce->state) +
2525 __get_parent_scratch_offset(ce);
2526 desc->wq_addr = i915_ggtt_offset(ce->state) +
2527 __get_wq_offset(ce);
2528 desc->wq_size = WQ_SIZE;
2529
2530 pdesc = __get_process_desc_v69(ce);
2531 memset(pdesc, 0, sizeof(*(pdesc)));
2532 pdesc->stage_id = ce->guc_id.id;
2533 pdesc->wq_base_addr = desc->wq_addr;
2534 pdesc->wq_size_bytes = desc->wq_size;
2535 pdesc->wq_status = WQ_STATUS_ACTIVE;
2536
2537 ce->parallel.guc.wq_head = &pdesc->head;
2538 ce->parallel.guc.wq_tail = &pdesc->tail;
2539 ce->parallel.guc.wq_status = &pdesc->wq_status;
2540
2541 for_each_child(ce, child) {
2542 desc = __get_lrc_desc_v69(guc, child->guc_id.id);
2543
2544 desc->engine_class =
2545 engine_class_to_guc_class(engine->class);
2546 desc->hw_context_desc = child->lrc.lrca;
2547 desc->priority = ce->guc_state.prio;
2548 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
2549 guc_context_policy_init_v69(engine, desc);
2550 }
2551
2552 clear_children_join_go_memory(ce);
2553 }
2554 }
2555
prepare_context_registration_info_v70(struct intel_context * ce,struct guc_ctxt_registration_info * info)2556 static void prepare_context_registration_info_v70(struct intel_context *ce,
2557 struct guc_ctxt_registration_info *info)
2558 {
2559 struct intel_engine_cs *engine = ce->engine;
2560 struct intel_guc *guc = &engine->gt->uc.guc;
2561 u32 ctx_id = ce->guc_id.id;
2562
2563 GEM_BUG_ON(!engine->mask);
2564
2565 /*
2566 * Ensure LRC + CT vmas are is same region as write barrier is done
2567 * based on CT vma region.
2568 */
2569 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
2570 i915_gem_object_is_lmem(ce->ring->vma->obj));
2571
2572 memset(info, 0, sizeof(*info));
2573 info->context_idx = ctx_id;
2574 info->engine_class = engine_class_to_guc_class(engine->class);
2575 info->engine_submit_mask = engine->logical_mask;
2576 /*
2577 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2578 * only supports 32 bit currently.
2579 */
2580 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca);
2581 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca);
2582 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
2583 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio);
2584 info->flags = CONTEXT_REGISTRATION_FLAG_KMD;
2585
2586 /*
2587 * If context is a parent, we need to register a process descriptor
2588 * describing a work queue and register all child contexts.
2589 */
2590 if (intel_context_is_parent(ce)) {
2591 struct guc_sched_wq_desc *wq_desc;
2592 u64 wq_desc_offset, wq_base_offset;
2593
2594 ce->parallel.guc.wqi_tail = 0;
2595 ce->parallel.guc.wqi_head = 0;
2596
2597 wq_desc_offset = i915_ggtt_offset(ce->state) +
2598 __get_parent_scratch_offset(ce);
2599 wq_base_offset = i915_ggtt_offset(ce->state) +
2600 __get_wq_offset(ce);
2601 info->wq_desc_lo = lower_32_bits(wq_desc_offset);
2602 info->wq_desc_hi = upper_32_bits(wq_desc_offset);
2603 info->wq_base_lo = lower_32_bits(wq_base_offset);
2604 info->wq_base_hi = upper_32_bits(wq_base_offset);
2605 info->wq_size = WQ_SIZE;
2606
2607 wq_desc = __get_wq_desc_v70(ce);
2608 memset(wq_desc, 0, sizeof(*wq_desc));
2609 wq_desc->wq_status = WQ_STATUS_ACTIVE;
2610
2611 ce->parallel.guc.wq_head = &wq_desc->head;
2612 ce->parallel.guc.wq_tail = &wq_desc->tail;
2613 ce->parallel.guc.wq_status = &wq_desc->wq_status;
2614
2615 clear_children_join_go_memory(ce);
2616 }
2617 }
2618
try_context_registration(struct intel_context * ce,bool loop)2619 static int try_context_registration(struct intel_context *ce, bool loop)
2620 {
2621 struct intel_engine_cs *engine = ce->engine;
2622 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
2623 struct intel_guc *guc = &engine->gt->uc.guc;
2624 intel_wakeref_t wakeref;
2625 u32 ctx_id = ce->guc_id.id;
2626 bool context_registered;
2627 int ret = 0;
2628
2629 GEM_BUG_ON(!sched_state_is_init(ce));
2630
2631 context_registered = ctx_id_mapped(guc, ctx_id);
2632
2633 clr_ctx_id_mapping(guc, ctx_id);
2634 set_ctx_id_mapping(guc, ctx_id, ce);
2635
2636 /*
2637 * The context_lookup xarray is used to determine if the hardware
2638 * context is currently registered. There are two cases in which it
2639 * could be registered either the guc_id has been stolen from another
2640 * context or the lrc descriptor address of this context has changed. In
2641 * either case the context needs to be deregistered with the GuC before
2642 * registering this context.
2643 */
2644 if (context_registered) {
2645 bool disabled;
2646 unsigned long flags;
2647
2648 trace_intel_context_steal_guc_id(ce);
2649 GEM_BUG_ON(!loop);
2650
2651 /* Seal race with Reset */
2652 spin_lock_irqsave(&ce->guc_state.lock, flags);
2653 disabled = submission_disabled(guc);
2654 if (likely(!disabled)) {
2655 set_context_wait_for_deregister_to_register(ce);
2656 intel_context_get(ce);
2657 }
2658 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2659 if (unlikely(disabled)) {
2660 clr_ctx_id_mapping(guc, ctx_id);
2661 return 0; /* Will get registered later */
2662 }
2663
2664 /*
2665 * If stealing the guc_id, this ce has the same guc_id as the
2666 * context whose guc_id was stolen.
2667 */
2668 with_intel_runtime_pm(runtime_pm, wakeref)
2669 ret = deregister_context(ce, ce->guc_id.id);
2670 if (unlikely(ret == -ENODEV))
2671 ret = 0; /* Will get registered later */
2672 } else {
2673 with_intel_runtime_pm(runtime_pm, wakeref)
2674 ret = register_context(ce, loop);
2675 if (unlikely(ret == -EBUSY)) {
2676 clr_ctx_id_mapping(guc, ctx_id);
2677 } else if (unlikely(ret == -ENODEV)) {
2678 clr_ctx_id_mapping(guc, ctx_id);
2679 ret = 0; /* Will get registered later */
2680 }
2681 }
2682
2683 return ret;
2684 }
2685
__guc_context_pre_pin(struct intel_context * ce,struct intel_engine_cs * engine,struct i915_gem_ww_ctx * ww,void ** vaddr)2686 static int __guc_context_pre_pin(struct intel_context *ce,
2687 struct intel_engine_cs *engine,
2688 struct i915_gem_ww_ctx *ww,
2689 void **vaddr)
2690 {
2691 return lrc_pre_pin(ce, engine, ww, vaddr);
2692 }
2693
__guc_context_pin(struct intel_context * ce,struct intel_engine_cs * engine,void * vaddr)2694 static int __guc_context_pin(struct intel_context *ce,
2695 struct intel_engine_cs *engine,
2696 void *vaddr)
2697 {
2698 if (i915_ggtt_offset(ce->state) !=
2699 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
2700 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
2701
2702 /*
2703 * GuC context gets pinned in guc_request_alloc. See that function for
2704 * explaination of why.
2705 */
2706
2707 return lrc_pin(ce, engine, vaddr);
2708 }
2709
guc_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww,void ** vaddr)2710 static int guc_context_pre_pin(struct intel_context *ce,
2711 struct i915_gem_ww_ctx *ww,
2712 void **vaddr)
2713 {
2714 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
2715 }
2716
guc_context_pin(struct intel_context * ce,void * vaddr)2717 static int guc_context_pin(struct intel_context *ce, void *vaddr)
2718 {
2719 int ret = __guc_context_pin(ce, ce->engine, vaddr);
2720
2721 if (likely(!ret && !intel_context_is_barrier(ce)))
2722 intel_engine_pm_get(ce->engine);
2723
2724 return ret;
2725 }
2726
guc_context_unpin(struct intel_context * ce)2727 static void guc_context_unpin(struct intel_context *ce)
2728 {
2729 struct intel_guc *guc = ce_to_guc(ce);
2730
2731 unpin_guc_id(guc, ce);
2732 lrc_unpin(ce);
2733
2734 if (likely(!intel_context_is_barrier(ce)))
2735 intel_engine_pm_put_async(ce->engine);
2736 }
2737
guc_context_post_unpin(struct intel_context * ce)2738 static void guc_context_post_unpin(struct intel_context *ce)
2739 {
2740 lrc_post_unpin(ce);
2741 }
2742
__guc_context_sched_enable(struct intel_guc * guc,struct intel_context * ce)2743 static void __guc_context_sched_enable(struct intel_guc *guc,
2744 struct intel_context *ce)
2745 {
2746 u32 action[] = {
2747 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2748 ce->guc_id.id,
2749 GUC_CONTEXT_ENABLE
2750 };
2751
2752 trace_intel_context_sched_enable(ce);
2753
2754 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2755 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2756 }
2757
__guc_context_sched_disable(struct intel_guc * guc,struct intel_context * ce,u16 guc_id)2758 static void __guc_context_sched_disable(struct intel_guc *guc,
2759 struct intel_context *ce,
2760 u16 guc_id)
2761 {
2762 u32 action[] = {
2763 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2764 guc_id, /* ce->guc_id.id not stable */
2765 GUC_CONTEXT_DISABLE
2766 };
2767
2768 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID);
2769
2770 GEM_BUG_ON(intel_context_is_child(ce));
2771 trace_intel_context_sched_disable(ce);
2772
2773 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
2774 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
2775 }
2776
guc_blocked_fence_complete(struct intel_context * ce)2777 static void guc_blocked_fence_complete(struct intel_context *ce)
2778 {
2779 lockdep_assert_held(&ce->guc_state.lock);
2780
2781 if (!i915_sw_fence_done(&ce->guc_state.blocked))
2782 i915_sw_fence_complete(&ce->guc_state.blocked);
2783 }
2784
guc_blocked_fence_reinit(struct intel_context * ce)2785 static void guc_blocked_fence_reinit(struct intel_context *ce)
2786 {
2787 lockdep_assert_held(&ce->guc_state.lock);
2788 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked));
2789
2790 /*
2791 * This fence is always complete unless a pending schedule disable is
2792 * outstanding. We arm the fence here and complete it when we receive
2793 * the pending schedule disable complete message.
2794 */
2795 i915_sw_fence_fini(&ce->guc_state.blocked);
2796 i915_sw_fence_reinit(&ce->guc_state.blocked);
2797 i915_sw_fence_await(&ce->guc_state.blocked);
2798 i915_sw_fence_commit(&ce->guc_state.blocked);
2799 }
2800
prep_context_pending_disable(struct intel_context * ce)2801 static u16 prep_context_pending_disable(struct intel_context *ce)
2802 {
2803 lockdep_assert_held(&ce->guc_state.lock);
2804
2805 set_context_pending_disable(ce);
2806 clr_context_enabled(ce);
2807 guc_blocked_fence_reinit(ce);
2808 intel_context_get(ce);
2809
2810 return ce->guc_id.id;
2811 }
2812
guc_context_block(struct intel_context * ce)2813 static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
2814 {
2815 struct intel_guc *guc = ce_to_guc(ce);
2816 unsigned long flags;
2817 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2818 intel_wakeref_t wakeref;
2819 u16 guc_id;
2820 bool enabled;
2821
2822 GEM_BUG_ON(intel_context_is_child(ce));
2823
2824 spin_lock_irqsave(&ce->guc_state.lock, flags);
2825
2826 incr_context_blocked(ce);
2827
2828 enabled = context_enabled(ce);
2829 if (unlikely(!enabled || submission_disabled(guc))) {
2830 if (enabled)
2831 clr_context_enabled(ce);
2832 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2833 return &ce->guc_state.blocked;
2834 }
2835
2836 /*
2837 * We add +2 here as the schedule disable complete CTB handler calls
2838 * intel_context_sched_disable_unpin (-2 to pin_count).
2839 */
2840 atomic_add(2, &ce->pin_count);
2841
2842 guc_id = prep_context_pending_disable(ce);
2843
2844 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2845
2846 with_intel_runtime_pm(runtime_pm, wakeref)
2847 __guc_context_sched_disable(guc, ce, guc_id);
2848
2849 return &ce->guc_state.blocked;
2850 }
2851
2852 #define SCHED_STATE_MULTI_BLOCKED_MASK \
2853 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
2854 #define SCHED_STATE_NO_UNBLOCK \
2855 (SCHED_STATE_MULTI_BLOCKED_MASK | \
2856 SCHED_STATE_PENDING_DISABLE | \
2857 SCHED_STATE_BANNED)
2858
context_cant_unblock(struct intel_context * ce)2859 static bool context_cant_unblock(struct intel_context *ce)
2860 {
2861 lockdep_assert_held(&ce->guc_state.lock);
2862
2863 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
2864 context_guc_id_invalid(ce) ||
2865 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) ||
2866 !intel_context_is_pinned(ce);
2867 }
2868
guc_context_unblock(struct intel_context * ce)2869 static void guc_context_unblock(struct intel_context *ce)
2870 {
2871 struct intel_guc *guc = ce_to_guc(ce);
2872 unsigned long flags;
2873 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2874 intel_wakeref_t wakeref;
2875 bool enable;
2876
2877 GEM_BUG_ON(context_enabled(ce));
2878 GEM_BUG_ON(intel_context_is_child(ce));
2879
2880 spin_lock_irqsave(&ce->guc_state.lock, flags);
2881
2882 if (unlikely(submission_disabled(guc) ||
2883 context_cant_unblock(ce))) {
2884 enable = false;
2885 } else {
2886 enable = true;
2887 set_context_pending_enable(ce);
2888 set_context_enabled(ce);
2889 intel_context_get(ce);
2890 }
2891
2892 decr_context_blocked(ce);
2893
2894 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2895
2896 if (enable) {
2897 with_intel_runtime_pm(runtime_pm, wakeref)
2898 __guc_context_sched_enable(guc, ce);
2899 }
2900 }
2901
guc_context_cancel_request(struct intel_context * ce,struct i915_request * rq)2902 static void guc_context_cancel_request(struct intel_context *ce,
2903 struct i915_request *rq)
2904 {
2905 struct intel_context *block_context =
2906 request_to_scheduling_context(rq);
2907
2908 if (i915_sw_fence_signaled(&rq->submit)) {
2909 struct i915_sw_fence *fence;
2910
2911 intel_context_get(ce);
2912 fence = guc_context_block(block_context);
2913 i915_sw_fence_wait(fence);
2914 if (!i915_request_completed(rq)) {
2915 __i915_request_skip(rq);
2916 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
2917 true);
2918 }
2919
2920 guc_context_unblock(block_context);
2921 intel_context_put(ce);
2922 }
2923 }
2924
__guc_context_set_preemption_timeout(struct intel_guc * guc,u16 guc_id,u32 preemption_timeout)2925 static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
2926 u16 guc_id,
2927 u32 preemption_timeout)
2928 {
2929 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) {
2930 struct context_policy policy;
2931
2932 __guc_context_policy_start_klv(&policy, guc_id);
2933 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2934 __guc_context_set_context_policies(guc, &policy, true);
2935 } else {
2936 u32 action[] = {
2937 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
2938 guc_id,
2939 preemption_timeout
2940 };
2941
2942 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
2943 }
2944 }
2945
2946 static void
guc_context_revoke(struct intel_context * ce,struct i915_request * rq,unsigned int preempt_timeout_ms)2947 guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
2948 unsigned int preempt_timeout_ms)
2949 {
2950 struct intel_guc *guc = ce_to_guc(ce);
2951 struct intel_runtime_pm *runtime_pm =
2952 &ce->engine->gt->i915->runtime_pm;
2953 intel_wakeref_t wakeref;
2954 unsigned long flags;
2955
2956 GEM_BUG_ON(intel_context_is_child(ce));
2957
2958 guc_flush_submissions(guc);
2959
2960 spin_lock_irqsave(&ce->guc_state.lock, flags);
2961 set_context_banned(ce);
2962
2963 if (submission_disabled(guc) ||
2964 (!context_enabled(ce) && !context_pending_disable(ce))) {
2965 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2966
2967 guc_cancel_context_requests(ce);
2968 intel_engine_signal_breadcrumbs(ce->engine);
2969 } else if (!context_pending_disable(ce)) {
2970 u16 guc_id;
2971
2972 /*
2973 * We add +2 here as the schedule disable complete CTB handler
2974 * calls intel_context_sched_disable_unpin (-2 to pin_count).
2975 */
2976 atomic_add(2, &ce->pin_count);
2977
2978 guc_id = prep_context_pending_disable(ce);
2979 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2980
2981 /*
2982 * In addition to disabling scheduling, set the preemption
2983 * timeout to the minimum value (1 us) so the banned context
2984 * gets kicked off the HW ASAP.
2985 */
2986 with_intel_runtime_pm(runtime_pm, wakeref) {
2987 __guc_context_set_preemption_timeout(guc, guc_id,
2988 preempt_timeout_ms);
2989 __guc_context_sched_disable(guc, ce, guc_id);
2990 }
2991 } else {
2992 if (!context_guc_id_invalid(ce))
2993 with_intel_runtime_pm(runtime_pm, wakeref)
2994 __guc_context_set_preemption_timeout(guc,
2995 ce->guc_id.id,
2996 preempt_timeout_ms);
2997 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
2998 }
2999 }
3000
guc_context_sched_disable(struct intel_context * ce)3001 static void guc_context_sched_disable(struct intel_context *ce)
3002 {
3003 struct intel_guc *guc = ce_to_guc(ce);
3004 unsigned long flags;
3005 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
3006 intel_wakeref_t wakeref;
3007 u16 guc_id;
3008
3009 GEM_BUG_ON(intel_context_is_child(ce));
3010
3011 spin_lock_irqsave(&ce->guc_state.lock, flags);
3012
3013 /*
3014 * We have to check if the context has been disabled by another thread,
3015 * check if submssion has been disabled to seal a race with reset and
3016 * finally check if any more requests have been committed to the
3017 * context ensursing that a request doesn't slip through the
3018 * 'context_pending_disable' fence.
3019 */
3020 if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
3021 context_has_committed_requests(ce))) {
3022 clr_context_enabled(ce);
3023 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3024 goto unpin;
3025 }
3026 guc_id = prep_context_pending_disable(ce);
3027
3028 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3029
3030 with_intel_runtime_pm(runtime_pm, wakeref)
3031 __guc_context_sched_disable(guc, ce, guc_id);
3032
3033 return;
3034 unpin:
3035 intel_context_sched_disable_unpin(ce);
3036 }
3037
guc_lrc_desc_unpin(struct intel_context * ce)3038 static inline void guc_lrc_desc_unpin(struct intel_context *ce)
3039 {
3040 struct intel_guc *guc = ce_to_guc(ce);
3041 struct intel_gt *gt = guc_to_gt(guc);
3042 unsigned long flags;
3043 bool disabled;
3044
3045 GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
3046 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
3047 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
3048 GEM_BUG_ON(context_enabled(ce));
3049
3050 /* Seal race with Reset */
3051 spin_lock_irqsave(&ce->guc_state.lock, flags);
3052 disabled = submission_disabled(guc);
3053 if (likely(!disabled)) {
3054 __intel_gt_pm_get(gt);
3055 set_context_destroyed(ce);
3056 clr_context_registered(ce);
3057 }
3058 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3059 if (unlikely(disabled)) {
3060 release_guc_id(guc, ce);
3061 __guc_context_destroy(ce);
3062 return;
3063 }
3064
3065 deregister_context(ce, ce->guc_id.id);
3066 }
3067
__guc_context_destroy(struct intel_context * ce)3068 static void __guc_context_destroy(struct intel_context *ce)
3069 {
3070 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
3071 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
3072 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
3073 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
3074 GEM_BUG_ON(ce->guc_state.number_committed_requests);
3075
3076 lrc_fini(ce);
3077 intel_context_fini(ce);
3078
3079 if (intel_engine_is_virtual(ce->engine)) {
3080 struct guc_virtual_engine *ve =
3081 container_of(ce, typeof(*ve), context);
3082
3083 if (ve->base.breadcrumbs)
3084 intel_breadcrumbs_put(ve->base.breadcrumbs);
3085
3086 kfree(ve);
3087 } else {
3088 intel_context_free(ce);
3089 }
3090 }
3091
guc_flush_destroyed_contexts(struct intel_guc * guc)3092 static void guc_flush_destroyed_contexts(struct intel_guc *guc)
3093 {
3094 struct intel_context *ce;
3095 unsigned long flags;
3096
3097 GEM_BUG_ON(!submission_disabled(guc) &&
3098 guc_submission_initialized(guc));
3099
3100 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3101 spin_lock_irqsave(&guc->submission_state.lock, flags);
3102 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3103 struct intel_context,
3104 destroyed_link);
3105 if (ce)
3106 list_del_init(&ce->destroyed_link);
3107 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3108
3109 if (!ce)
3110 break;
3111
3112 release_guc_id(guc, ce);
3113 __guc_context_destroy(ce);
3114 }
3115 }
3116
deregister_destroyed_contexts(struct intel_guc * guc)3117 static void deregister_destroyed_contexts(struct intel_guc *guc)
3118 {
3119 struct intel_context *ce;
3120 unsigned long flags;
3121
3122 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3123 spin_lock_irqsave(&guc->submission_state.lock, flags);
3124 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
3125 struct intel_context,
3126 destroyed_link);
3127 if (ce)
3128 list_del_init(&ce->destroyed_link);
3129 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3130
3131 if (!ce)
3132 break;
3133
3134 guc_lrc_desc_unpin(ce);
3135 }
3136 }
3137
destroyed_worker_func(struct work_struct * w)3138 static void destroyed_worker_func(struct work_struct *w)
3139 {
3140 struct intel_guc *guc = container_of(w, struct intel_guc,
3141 submission_state.destroyed_worker);
3142 struct intel_gt *gt = guc_to_gt(guc);
3143 int tmp;
3144
3145 with_intel_gt_pm(gt, tmp)
3146 deregister_destroyed_contexts(guc);
3147 }
3148
guc_context_destroy(struct kref * kref)3149 static void guc_context_destroy(struct kref *kref)
3150 {
3151 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3152 struct intel_guc *guc = ce_to_guc(ce);
3153 unsigned long flags;
3154 bool destroy;
3155
3156 /*
3157 * If the guc_id is invalid this context has been stolen and we can free
3158 * it immediately. Also can be freed immediately if the context is not
3159 * registered with the GuC or the GuC is in the middle of a reset.
3160 */
3161 spin_lock_irqsave(&guc->submission_state.lock, flags);
3162 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
3163 !ctx_id_mapped(guc, ce->guc_id.id);
3164 if (likely(!destroy)) {
3165 if (!list_empty(&ce->guc_id.link))
3166 list_del_init(&ce->guc_id.link);
3167 list_add_tail(&ce->destroyed_link,
3168 &guc->submission_state.destroyed_contexts);
3169 } else {
3170 __release_guc_id(guc, ce);
3171 }
3172 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
3173 if (unlikely(destroy)) {
3174 __guc_context_destroy(ce);
3175 return;
3176 }
3177
3178 /*
3179 * We use a worker to issue the H2G to deregister the context as we can
3180 * take the GT PM for the first time which isn't allowed from an atomic
3181 * context.
3182 */
3183 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
3184 }
3185
guc_context_alloc(struct intel_context * ce)3186 static int guc_context_alloc(struct intel_context *ce)
3187 {
3188 return lrc_alloc(ce, ce->engine);
3189 }
3190
__guc_context_set_prio(struct intel_guc * guc,struct intel_context * ce)3191 static void __guc_context_set_prio(struct intel_guc *guc,
3192 struct intel_context *ce)
3193 {
3194 if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) {
3195 struct context_policy policy;
3196
3197 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
3198 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
3199 __guc_context_set_context_policies(guc, &policy, true);
3200 } else {
3201 u32 action[] = {
3202 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
3203 ce->guc_id.id,
3204 ce->guc_state.prio,
3205 };
3206
3207 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
3208 }
3209 }
3210
guc_context_set_prio(struct intel_guc * guc,struct intel_context * ce,u8 prio)3211 static void guc_context_set_prio(struct intel_guc *guc,
3212 struct intel_context *ce,
3213 u8 prio)
3214 {
3215 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
3216 prio > GUC_CLIENT_PRIORITY_NORMAL);
3217 lockdep_assert_held(&ce->guc_state.lock);
3218
3219 if (ce->guc_state.prio == prio || submission_disabled(guc) ||
3220 !context_registered(ce)) {
3221 ce->guc_state.prio = prio;
3222 return;
3223 }
3224
3225 ce->guc_state.prio = prio;
3226 __guc_context_set_prio(guc, ce);
3227
3228 trace_intel_context_set_prio(ce);
3229 }
3230
map_i915_prio_to_guc_prio(int prio)3231 static inline u8 map_i915_prio_to_guc_prio(int prio)
3232 {
3233 if (prio == I915_PRIORITY_NORMAL)
3234 return GUC_CLIENT_PRIORITY_KMD_NORMAL;
3235 else if (prio < I915_PRIORITY_NORMAL)
3236 return GUC_CLIENT_PRIORITY_NORMAL;
3237 else if (prio < I915_PRIORITY_DISPLAY)
3238 return GUC_CLIENT_PRIORITY_HIGH;
3239 else
3240 return GUC_CLIENT_PRIORITY_KMD_HIGH;
3241 }
3242
add_context_inflight_prio(struct intel_context * ce,u8 guc_prio)3243 static inline void add_context_inflight_prio(struct intel_context *ce,
3244 u8 guc_prio)
3245 {
3246 lockdep_assert_held(&ce->guc_state.lock);
3247 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3248
3249 ++ce->guc_state.prio_count[guc_prio];
3250
3251 /* Overflow protection */
3252 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3253 }
3254
sub_context_inflight_prio(struct intel_context * ce,u8 guc_prio)3255 static inline void sub_context_inflight_prio(struct intel_context *ce,
3256 u8 guc_prio)
3257 {
3258 lockdep_assert_held(&ce->guc_state.lock);
3259 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
3260
3261 /* Underflow protection */
3262 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
3263
3264 --ce->guc_state.prio_count[guc_prio];
3265 }
3266
update_context_prio(struct intel_context * ce)3267 static inline void update_context_prio(struct intel_context *ce)
3268 {
3269 struct intel_guc *guc = &ce->engine->gt->uc.guc;
3270 int i;
3271
3272 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
3273 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
3274
3275 lockdep_assert_held(&ce->guc_state.lock);
3276
3277 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) {
3278 if (ce->guc_state.prio_count[i]) {
3279 guc_context_set_prio(guc, ce, i);
3280 break;
3281 }
3282 }
3283 }
3284
new_guc_prio_higher(u8 old_guc_prio,u8 new_guc_prio)3285 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
3286 {
3287 /* Lower value is higher priority */
3288 return new_guc_prio < old_guc_prio;
3289 }
3290
add_to_context(struct i915_request * rq)3291 static void add_to_context(struct i915_request *rq)
3292 {
3293 struct intel_context *ce = request_to_scheduling_context(rq);
3294 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
3295
3296 GEM_BUG_ON(intel_context_is_child(ce));
3297 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
3298
3299 spin_lock(&ce->guc_state.lock);
3300 list_move_tail(&rq->sched.link, &ce->guc_state.requests);
3301
3302 if (rq->guc_prio == GUC_PRIO_INIT) {
3303 rq->guc_prio = new_guc_prio;
3304 add_context_inflight_prio(ce, rq->guc_prio);
3305 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
3306 sub_context_inflight_prio(ce, rq->guc_prio);
3307 rq->guc_prio = new_guc_prio;
3308 add_context_inflight_prio(ce, rq->guc_prio);
3309 }
3310 update_context_prio(ce);
3311
3312 spin_unlock(&ce->guc_state.lock);
3313 }
3314
guc_prio_fini(struct i915_request * rq,struct intel_context * ce)3315 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
3316 {
3317 lockdep_assert_held(&ce->guc_state.lock);
3318
3319 if (rq->guc_prio != GUC_PRIO_INIT &&
3320 rq->guc_prio != GUC_PRIO_FINI) {
3321 sub_context_inflight_prio(ce, rq->guc_prio);
3322 update_context_prio(ce);
3323 }
3324 rq->guc_prio = GUC_PRIO_FINI;
3325 }
3326
remove_from_context(struct i915_request * rq)3327 static void remove_from_context(struct i915_request *rq)
3328 {
3329 struct intel_context *ce = request_to_scheduling_context(rq);
3330
3331 GEM_BUG_ON(intel_context_is_child(ce));
3332
3333 spin_lock_irq(&ce->guc_state.lock);
3334
3335 list_del_init(&rq->sched.link);
3336 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3337
3338 /* Prevent further __await_execution() registering a cb, then flush */
3339 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
3340
3341 guc_prio_fini(rq, ce);
3342
3343 decr_context_committed_requests(ce);
3344
3345 spin_unlock_irq(&ce->guc_state.lock);
3346
3347 atomic_dec(&ce->guc_id.ref);
3348 i915_request_notify_execute_cb_imm(rq);
3349 }
3350
3351 static const struct intel_context_ops guc_context_ops = {
3352 .alloc = guc_context_alloc,
3353
3354 .pre_pin = guc_context_pre_pin,
3355 .pin = guc_context_pin,
3356 .unpin = guc_context_unpin,
3357 .post_unpin = guc_context_post_unpin,
3358
3359 .revoke = guc_context_revoke,
3360
3361 .cancel_request = guc_context_cancel_request,
3362
3363 .enter = intel_context_enter_engine,
3364 .exit = intel_context_exit_engine,
3365
3366 .sched_disable = guc_context_sched_disable,
3367
3368 .reset = lrc_reset,
3369 .destroy = guc_context_destroy,
3370
3371 .create_virtual = guc_create_virtual,
3372 .create_parallel = guc_create_parallel,
3373 };
3374
submit_work_cb(struct irq_work * wrk)3375 static void submit_work_cb(struct irq_work *wrk)
3376 {
3377 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work);
3378
3379 might_lock(&rq->engine->sched_engine->lock);
3380 i915_sw_fence_complete(&rq->submit);
3381 }
3382
__guc_signal_context_fence(struct intel_context * ce)3383 static void __guc_signal_context_fence(struct intel_context *ce)
3384 {
3385 struct i915_request *rq, *rn;
3386
3387 lockdep_assert_held(&ce->guc_state.lock);
3388
3389 if (!list_empty(&ce->guc_state.fences))
3390 trace_intel_context_fence_release(ce);
3391
3392 /*
3393 * Use an IRQ to ensure locking order of sched_engine->lock ->
3394 * ce->guc_state.lock is preserved.
3395 */
3396 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,
3397 guc_fence_link) {
3398 list_del(&rq->guc_fence_link);
3399 irq_work_queue(&rq->submit_work);
3400 }
3401
3402 INIT_LIST_HEAD(&ce->guc_state.fences);
3403 }
3404
guc_signal_context_fence(struct intel_context * ce)3405 static void guc_signal_context_fence(struct intel_context *ce)
3406 {
3407 unsigned long flags;
3408
3409 GEM_BUG_ON(intel_context_is_child(ce));
3410
3411 spin_lock_irqsave(&ce->guc_state.lock, flags);
3412 clr_context_wait_for_deregister_to_register(ce);
3413 __guc_signal_context_fence(ce);
3414 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3415 }
3416
context_needs_register(struct intel_context * ce,bool new_guc_id)3417 static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
3418 {
3419 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
3420 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) &&
3421 !submission_disabled(ce_to_guc(ce));
3422 }
3423
guc_context_init(struct intel_context * ce)3424 static void guc_context_init(struct intel_context *ce)
3425 {
3426 const struct i915_gem_context *ctx;
3427 int prio = I915_CONTEXT_DEFAULT_PRIORITY;
3428
3429 rcu_read_lock();
3430 ctx = rcu_dereference(ce->gem_context);
3431 if (ctx)
3432 prio = ctx->sched.priority;
3433 rcu_read_unlock();
3434
3435 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
3436 set_bit(CONTEXT_GUC_INIT, &ce->flags);
3437 }
3438
guc_request_alloc(struct i915_request * rq)3439 static int guc_request_alloc(struct i915_request *rq)
3440 {
3441 struct intel_context *ce = request_to_scheduling_context(rq);
3442 struct intel_guc *guc = ce_to_guc(ce);
3443 unsigned long flags;
3444 int ret;
3445
3446 GEM_BUG_ON(!intel_context_is_pinned(rq->context));
3447
3448 /*
3449 * Flush enough space to reduce the likelihood of waiting after
3450 * we start building the request - in which case we will just
3451 * have to repeat work.
3452 */
3453 rq->reserved_space += GUC_REQUEST_SIZE;
3454
3455 /*
3456 * Note that after this point, we have committed to using
3457 * this request as it is being used to both track the
3458 * state of engine initialisation and liveness of the
3459 * golden renderstate above. Think twice before you try
3460 * to cancel/unwind this request now.
3461 */
3462
3463 /* Unconditionally invalidate GPU caches and TLBs. */
3464 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
3465 if (ret)
3466 return ret;
3467
3468 rq->reserved_space -= GUC_REQUEST_SIZE;
3469
3470 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
3471 guc_context_init(ce);
3472
3473 /*
3474 * Call pin_guc_id here rather than in the pinning step as with
3475 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
3476 * guc_id and creating horrible race conditions. This is especially bad
3477 * when guc_id are being stolen due to over subscription. By the time
3478 * this function is reached, it is guaranteed that the guc_id will be
3479 * persistent until the generated request is retired. Thus, sealing these
3480 * race conditions. It is still safe to fail here if guc_id are
3481 * exhausted and return -EAGAIN to the user indicating that they can try
3482 * again in the future.
3483 *
3484 * There is no need for a lock here as the timeline mutex ensures at
3485 * most one context can be executing this code path at once. The
3486 * guc_id_ref is incremented once for every request in flight and
3487 * decremented on each retire. When it is zero, a lock around the
3488 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
3489 */
3490 if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
3491 goto out;
3492
3493 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */
3494 if (unlikely(ret < 0))
3495 return ret;
3496 if (context_needs_register(ce, !!ret)) {
3497 ret = try_context_registration(ce, true);
3498 if (unlikely(ret)) { /* unwind */
3499 if (ret == -EPIPE) {
3500 disable_submission(guc);
3501 goto out; /* GPU will be reset */
3502 }
3503 atomic_dec(&ce->guc_id.ref);
3504 unpin_guc_id(guc, ce);
3505 return ret;
3506 }
3507 }
3508
3509 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
3510
3511 out:
3512 /*
3513 * We block all requests on this context if a G2H is pending for a
3514 * schedule disable or context deregistration as the GuC will fail a
3515 * schedule enable or context registration if either G2H is pending
3516 * respectfully. Once a G2H returns, the fence is released that is
3517 * blocking these requests (see guc_signal_context_fence).
3518 */
3519 spin_lock_irqsave(&ce->guc_state.lock, flags);
3520 if (context_wait_for_deregister_to_register(ce) ||
3521 context_pending_disable(ce)) {
3522 init_irq_work(&rq->submit_work, submit_work_cb);
3523 i915_sw_fence_await(&rq->submit);
3524
3525 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
3526 }
3527 incr_context_committed_requests(ce);
3528 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
3529
3530 return 0;
3531 }
3532
guc_virtual_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww,void ** vaddr)3533 static int guc_virtual_context_pre_pin(struct intel_context *ce,
3534 struct i915_gem_ww_ctx *ww,
3535 void **vaddr)
3536 {
3537 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3538
3539 return __guc_context_pre_pin(ce, engine, ww, vaddr);
3540 }
3541
guc_virtual_context_pin(struct intel_context * ce,void * vaddr)3542 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
3543 {
3544 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3545 int ret = __guc_context_pin(ce, engine, vaddr);
3546 intel_engine_mask_t tmp, mask = ce->engine->mask;
3547
3548 if (likely(!ret))
3549 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3550 intel_engine_pm_get(engine);
3551
3552 return ret;
3553 }
3554
guc_virtual_context_unpin(struct intel_context * ce)3555 static void guc_virtual_context_unpin(struct intel_context *ce)
3556 {
3557 intel_engine_mask_t tmp, mask = ce->engine->mask;
3558 struct intel_engine_cs *engine;
3559 struct intel_guc *guc = ce_to_guc(ce);
3560
3561 GEM_BUG_ON(context_enabled(ce));
3562 GEM_BUG_ON(intel_context_is_barrier(ce));
3563
3564 unpin_guc_id(guc, ce);
3565 lrc_unpin(ce);
3566
3567 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3568 intel_engine_pm_put_async(engine);
3569 }
3570
guc_virtual_context_enter(struct intel_context * ce)3571 static void guc_virtual_context_enter(struct intel_context *ce)
3572 {
3573 intel_engine_mask_t tmp, mask = ce->engine->mask;
3574 struct intel_engine_cs *engine;
3575
3576 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3577 intel_engine_pm_get(engine);
3578
3579 intel_timeline_enter(ce->timeline);
3580 }
3581
guc_virtual_context_exit(struct intel_context * ce)3582 static void guc_virtual_context_exit(struct intel_context *ce)
3583 {
3584 intel_engine_mask_t tmp, mask = ce->engine->mask;
3585 struct intel_engine_cs *engine;
3586
3587 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)
3588 intel_engine_pm_put(engine);
3589
3590 intel_timeline_exit(ce->timeline);
3591 }
3592
guc_virtual_context_alloc(struct intel_context * ce)3593 static int guc_virtual_context_alloc(struct intel_context *ce)
3594 {
3595 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3596
3597 return lrc_alloc(ce, engine);
3598 }
3599
3600 static const struct intel_context_ops virtual_guc_context_ops = {
3601 .alloc = guc_virtual_context_alloc,
3602
3603 .pre_pin = guc_virtual_context_pre_pin,
3604 .pin = guc_virtual_context_pin,
3605 .unpin = guc_virtual_context_unpin,
3606 .post_unpin = guc_context_post_unpin,
3607
3608 .revoke = guc_context_revoke,
3609
3610 .cancel_request = guc_context_cancel_request,
3611
3612 .enter = guc_virtual_context_enter,
3613 .exit = guc_virtual_context_exit,
3614
3615 .sched_disable = guc_context_sched_disable,
3616
3617 .destroy = guc_context_destroy,
3618
3619 .get_sibling = guc_virtual_get_sibling,
3620 };
3621
guc_parent_context_pin(struct intel_context * ce,void * vaddr)3622 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
3623 {
3624 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3625 struct intel_guc *guc = ce_to_guc(ce);
3626 int ret;
3627
3628 GEM_BUG_ON(!intel_context_is_parent(ce));
3629 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3630
3631 ret = pin_guc_id(guc, ce);
3632 if (unlikely(ret < 0))
3633 return ret;
3634
3635 return __guc_context_pin(ce, engine, vaddr);
3636 }
3637
guc_child_context_pin(struct intel_context * ce,void * vaddr)3638 static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
3639 {
3640 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3641
3642 GEM_BUG_ON(!intel_context_is_child(ce));
3643 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3644
3645 __intel_context_pin(ce->parallel.parent);
3646 return __guc_context_pin(ce, engine, vaddr);
3647 }
3648
guc_parent_context_unpin(struct intel_context * ce)3649 static void guc_parent_context_unpin(struct intel_context *ce)
3650 {
3651 struct intel_guc *guc = ce_to_guc(ce);
3652
3653 GEM_BUG_ON(context_enabled(ce));
3654 GEM_BUG_ON(intel_context_is_barrier(ce));
3655 GEM_BUG_ON(!intel_context_is_parent(ce));
3656 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3657
3658 unpin_guc_id(guc, ce);
3659 lrc_unpin(ce);
3660 }
3661
guc_child_context_unpin(struct intel_context * ce)3662 static void guc_child_context_unpin(struct intel_context *ce)
3663 {
3664 GEM_BUG_ON(context_enabled(ce));
3665 GEM_BUG_ON(intel_context_is_barrier(ce));
3666 GEM_BUG_ON(!intel_context_is_child(ce));
3667 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3668
3669 lrc_unpin(ce);
3670 }
3671
guc_child_context_post_unpin(struct intel_context * ce)3672 static void guc_child_context_post_unpin(struct intel_context *ce)
3673 {
3674 GEM_BUG_ON(!intel_context_is_child(ce));
3675 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
3676 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
3677
3678 lrc_post_unpin(ce);
3679 intel_context_unpin(ce->parallel.parent);
3680 }
3681
guc_child_context_destroy(struct kref * kref)3682 static void guc_child_context_destroy(struct kref *kref)
3683 {
3684 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3685
3686 __guc_context_destroy(ce);
3687 }
3688
3689 static const struct intel_context_ops virtual_parent_context_ops = {
3690 .alloc = guc_virtual_context_alloc,
3691
3692 .pre_pin = guc_context_pre_pin,
3693 .pin = guc_parent_context_pin,
3694 .unpin = guc_parent_context_unpin,
3695 .post_unpin = guc_context_post_unpin,
3696
3697 .revoke = guc_context_revoke,
3698
3699 .cancel_request = guc_context_cancel_request,
3700
3701 .enter = guc_virtual_context_enter,
3702 .exit = guc_virtual_context_exit,
3703
3704 .sched_disable = guc_context_sched_disable,
3705
3706 .destroy = guc_context_destroy,
3707
3708 .get_sibling = guc_virtual_get_sibling,
3709 };
3710
3711 static const struct intel_context_ops virtual_child_context_ops = {
3712 .alloc = guc_virtual_context_alloc,
3713
3714 .pre_pin = guc_context_pre_pin,
3715 .pin = guc_child_context_pin,
3716 .unpin = guc_child_context_unpin,
3717 .post_unpin = guc_child_context_post_unpin,
3718
3719 .cancel_request = guc_context_cancel_request,
3720
3721 .enter = guc_virtual_context_enter,
3722 .exit = guc_virtual_context_exit,
3723
3724 .destroy = guc_child_context_destroy,
3725
3726 .get_sibling = guc_virtual_get_sibling,
3727 };
3728
3729 /*
3730 * The below override of the breadcrumbs is enabled when the user configures a
3731 * context for parallel submission (multi-lrc, parent-child).
3732 *
3733 * The overridden breadcrumbs implements an algorithm which allows the GuC to
3734 * safely preempt all the hw contexts configured for parallel submission
3735 * between each BB. The contract between the i915 and GuC is if the parent
3736 * context can be preempted, all the children can be preempted, and the GuC will
3737 * always try to preempt the parent before the children. A handshake between the
3738 * parent / children breadcrumbs ensures the i915 holds up its end of the deal
3739 * creating a window to preempt between each set of BBs.
3740 */
3741 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
3742 u64 offset, u32 len,
3743 const unsigned int flags);
3744 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
3745 u64 offset, u32 len,
3746 const unsigned int flags);
3747 static u32 *
3748 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
3749 u32 *cs);
3750 static u32 *
3751 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
3752 u32 *cs);
3753
3754 static struct intel_context *
guc_create_parallel(struct intel_engine_cs ** engines,unsigned int num_siblings,unsigned int width)3755 guc_create_parallel(struct intel_engine_cs **engines,
3756 unsigned int num_siblings,
3757 unsigned int width)
3758 {
3759 struct intel_engine_cs **siblings = NULL;
3760 struct intel_context *parent = NULL, *ce, *err;
3761 int i, j;
3762
3763 siblings = kmalloc_array(num_siblings,
3764 sizeof(*siblings),
3765 GFP_KERNEL);
3766 if (!siblings)
3767 return ERR_PTR(-ENOMEM);
3768
3769 for (i = 0; i < width; ++i) {
3770 for (j = 0; j < num_siblings; ++j)
3771 siblings[j] = engines[i * num_siblings + j];
3772
3773 ce = intel_engine_create_virtual(siblings, num_siblings,
3774 FORCE_VIRTUAL);
3775 if (IS_ERR(ce)) {
3776 err = ERR_CAST(ce);
3777 goto unwind;
3778 }
3779
3780 if (i == 0) {
3781 parent = ce;
3782 parent->ops = &virtual_parent_context_ops;
3783 } else {
3784 ce->ops = &virtual_child_context_ops;
3785 intel_context_bind_parent_child(parent, ce);
3786 }
3787 }
3788
3789 parent->parallel.fence_context = dma_fence_context_alloc(1);
3790
3791 parent->engine->emit_bb_start =
3792 emit_bb_start_parent_no_preempt_mid_batch;
3793 parent->engine->emit_fini_breadcrumb =
3794 emit_fini_breadcrumb_parent_no_preempt_mid_batch;
3795 parent->engine->emit_fini_breadcrumb_dw =
3796 12 + 4 * parent->parallel.number_children;
3797 for_each_child(parent, ce) {
3798 ce->engine->emit_bb_start =
3799 emit_bb_start_child_no_preempt_mid_batch;
3800 ce->engine->emit_fini_breadcrumb =
3801 emit_fini_breadcrumb_child_no_preempt_mid_batch;
3802 ce->engine->emit_fini_breadcrumb_dw = 16;
3803 }
3804
3805 kfree(siblings);
3806 return parent;
3807
3808 unwind:
3809 if (parent)
3810 intel_context_put(parent);
3811 kfree(siblings);
3812 return err;
3813 }
3814
3815 static bool
guc_irq_enable_breadcrumbs(struct intel_breadcrumbs * b)3816 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
3817 {
3818 struct intel_engine_cs *sibling;
3819 intel_engine_mask_t tmp, mask = b->engine_mask;
3820 bool result = false;
3821
3822 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3823 result |= intel_engine_irq_enable(sibling);
3824
3825 return result;
3826 }
3827
3828 static void
guc_irq_disable_breadcrumbs(struct intel_breadcrumbs * b)3829 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b)
3830 {
3831 struct intel_engine_cs *sibling;
3832 intel_engine_mask_t tmp, mask = b->engine_mask;
3833
3834 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)
3835 intel_engine_irq_disable(sibling);
3836 }
3837
guc_init_breadcrumbs(struct intel_engine_cs * engine)3838 static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
3839 {
3840 int i;
3841
3842 /*
3843 * In GuC submission mode we do not know which physical engine a request
3844 * will be scheduled on, this creates a problem because the breadcrumb
3845 * interrupt is per physical engine. To work around this we attach
3846 * requests and direct all breadcrumb interrupts to the first instance
3847 * of an engine per class. In addition all breadcrumb interrupts are
3848 * enabled / disabled across an engine class in unison.
3849 */
3850 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) {
3851 struct intel_engine_cs *sibling =
3852 engine->gt->engine_class[engine->class][i];
3853
3854 if (sibling) {
3855 if (engine->breadcrumbs != sibling->breadcrumbs) {
3856 intel_breadcrumbs_put(engine->breadcrumbs);
3857 engine->breadcrumbs =
3858 intel_breadcrumbs_get(sibling->breadcrumbs);
3859 }
3860 break;
3861 }
3862 }
3863
3864 if (engine->breadcrumbs) {
3865 engine->breadcrumbs->engine_mask |= engine->mask;
3866 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs;
3867 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs;
3868 }
3869 }
3870
guc_bump_inflight_request_prio(struct i915_request * rq,int prio)3871 static void guc_bump_inflight_request_prio(struct i915_request *rq,
3872 int prio)
3873 {
3874 struct intel_context *ce = request_to_scheduling_context(rq);
3875 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
3876
3877 /* Short circuit function */
3878 if (prio < I915_PRIORITY_NORMAL ||
3879 rq->guc_prio == GUC_PRIO_FINI ||
3880 (rq->guc_prio != GUC_PRIO_INIT &&
3881 !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
3882 return;
3883
3884 spin_lock(&ce->guc_state.lock);
3885 if (rq->guc_prio != GUC_PRIO_FINI) {
3886 if (rq->guc_prio != GUC_PRIO_INIT)
3887 sub_context_inflight_prio(ce, rq->guc_prio);
3888 rq->guc_prio = new_guc_prio;
3889 add_context_inflight_prio(ce, rq->guc_prio);
3890 update_context_prio(ce);
3891 }
3892 spin_unlock(&ce->guc_state.lock);
3893 }
3894
guc_retire_inflight_request_prio(struct i915_request * rq)3895 static void guc_retire_inflight_request_prio(struct i915_request *rq)
3896 {
3897 struct intel_context *ce = request_to_scheduling_context(rq);
3898
3899 spin_lock(&ce->guc_state.lock);
3900 guc_prio_fini(rq, ce);
3901 spin_unlock(&ce->guc_state.lock);
3902 }
3903
sanitize_hwsp(struct intel_engine_cs * engine)3904 static void sanitize_hwsp(struct intel_engine_cs *engine)
3905 {
3906 struct intel_timeline *tl;
3907
3908 list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
3909 intel_timeline_reset_seqno(tl);
3910 }
3911
guc_sanitize(struct intel_engine_cs * engine)3912 static void guc_sanitize(struct intel_engine_cs *engine)
3913 {
3914 /*
3915 * Poison residual state on resume, in case the suspend didn't!
3916 *
3917 * We have to assume that across suspend/resume (or other loss
3918 * of control) that the contents of our pinned buffers has been
3919 * lost, replaced by garbage. Since this doesn't always happen,
3920 * let's poison such state so that we more quickly spot when
3921 * we falsely assume it has been preserved.
3922 */
3923 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3924 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
3925
3926 /*
3927 * The kernel_context HWSP is stored in the status_page. As above,
3928 * that may be lost on resume/initialisation, and so we need to
3929 * reset the value in the HWSP.
3930 */
3931 sanitize_hwsp(engine);
3932
3933 /* And scrub the dirty cachelines for the HWSP */
3934 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
3935
3936 intel_engine_reset_pinned_contexts(engine);
3937 }
3938
setup_hwsp(struct intel_engine_cs * engine)3939 static void setup_hwsp(struct intel_engine_cs *engine)
3940 {
3941 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
3942
3943 ENGINE_WRITE_FW(engine,
3944 RING_HWS_PGA,
3945 i915_ggtt_offset(engine->status_page.vma));
3946 }
3947
start_engine(struct intel_engine_cs * engine)3948 static void start_engine(struct intel_engine_cs *engine)
3949 {
3950 ENGINE_WRITE_FW(engine,
3951 RING_MODE_GEN7,
3952 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
3953
3954 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
3955 ENGINE_POSTING_READ(engine, RING_MI_MODE);
3956 }
3957
guc_resume(struct intel_engine_cs * engine)3958 static int guc_resume(struct intel_engine_cs *engine)
3959 {
3960 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
3961
3962 intel_mocs_init_engine(engine);
3963
3964 intel_breadcrumbs_reset(engine->breadcrumbs);
3965
3966 setup_hwsp(engine);
3967 start_engine(engine);
3968
3969 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
3970 xehp_enable_ccs_engines(engine);
3971
3972 return 0;
3973 }
3974
guc_sched_engine_disabled(struct i915_sched_engine * sched_engine)3975 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
3976 {
3977 return !sched_engine->tasklet.callback;
3978 }
3979
guc_set_default_submission(struct intel_engine_cs * engine)3980 static void guc_set_default_submission(struct intel_engine_cs *engine)
3981 {
3982 engine->submit_request = guc_submit_request;
3983 }
3984
guc_kernel_context_pin(struct intel_guc * guc,struct intel_context * ce)3985 static inline void guc_kernel_context_pin(struct intel_guc *guc,
3986 struct intel_context *ce)
3987 {
3988 /*
3989 * Note: we purposefully do not check the returns below because
3990 * the registration can only fail if a reset is just starting.
3991 * This is called at the end of reset so presumably another reset
3992 * isn't happening and even it did this code would be run again.
3993 */
3994
3995 if (context_guc_id_invalid(ce))
3996 pin_guc_id(guc, ce);
3997
3998 try_context_registration(ce, true);
3999 }
4000
guc_init_lrc_mapping(struct intel_guc * guc)4001 static inline void guc_init_lrc_mapping(struct intel_guc *guc)
4002 {
4003 struct intel_gt *gt = guc_to_gt(guc);
4004 struct intel_engine_cs *engine;
4005 enum intel_engine_id id;
4006
4007 /* make sure all descriptors are clean... */
4008 xa_destroy(&guc->context_lookup);
4009
4010 /*
4011 * A reset might have occurred while we had a pending stalled request,
4012 * so make sure we clean that up.
4013 */
4014 guc->stalled_request = NULL;
4015 guc->submission_stall_reason = STALL_NONE;
4016
4017 /*
4018 * Some contexts might have been pinned before we enabled GuC
4019 * submission, so we need to add them to the GuC bookeeping.
4020 * Also, after a reset the of the GuC we want to make sure that the
4021 * information shared with GuC is properly reset. The kernel LRCs are
4022 * not attached to the gem_context, so they need to be added separately.
4023 */
4024 for_each_engine(engine, gt, id) {
4025 struct intel_context *ce;
4026
4027 list_for_each_entry(ce, &engine->pinned_contexts_list,
4028 pinned_contexts_link)
4029 guc_kernel_context_pin(guc, ce);
4030 }
4031 }
4032
guc_release(struct intel_engine_cs * engine)4033 static void guc_release(struct intel_engine_cs *engine)
4034 {
4035 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
4036
4037 intel_engine_cleanup_common(engine);
4038 lrc_fini_wa_ctx(engine);
4039 }
4040
virtual_guc_bump_serial(struct intel_engine_cs * engine)4041 static void virtual_guc_bump_serial(struct intel_engine_cs *engine)
4042 {
4043 struct intel_engine_cs *e;
4044 intel_engine_mask_t tmp, mask = engine->mask;
4045
4046 for_each_engine_masked(e, engine->gt, mask, tmp)
4047 e->serial++;
4048 }
4049
guc_default_vfuncs(struct intel_engine_cs * engine)4050 static void guc_default_vfuncs(struct intel_engine_cs *engine)
4051 {
4052 /* Default vfuncs which can be overridden by each engine. */
4053
4054 engine->resume = guc_resume;
4055
4056 engine->cops = &guc_context_ops;
4057 engine->request_alloc = guc_request_alloc;
4058 engine->add_active_request = add_to_context;
4059 engine->remove_active_request = remove_from_context;
4060
4061 engine->sched_engine->schedule = i915_schedule;
4062
4063 engine->reset.prepare = guc_engine_reset_prepare;
4064 engine->reset.rewind = guc_rewind_nop;
4065 engine->reset.cancel = guc_reset_nop;
4066 engine->reset.finish = guc_reset_nop;
4067
4068 engine->emit_flush = gen8_emit_flush_xcs;
4069 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4070 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
4071 if (GRAPHICS_VER(engine->i915) >= 12) {
4072 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
4073 engine->emit_flush = gen12_emit_flush_xcs;
4074 }
4075 engine->set_default_submission = guc_set_default_submission;
4076 engine->busyness = guc_engine_busyness;
4077
4078 engine->flags |= I915_ENGINE_SUPPORTS_STATS;
4079 engine->flags |= I915_ENGINE_HAS_PREEMPTION;
4080 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
4081
4082 /* Wa_14014475959:dg2 */
4083 if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS)
4084 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
4085
4086 /*
4087 * TODO: GuC supports timeslicing and semaphores as well, but they're
4088 * handled by the firmware so some minor tweaks are required before
4089 * enabling.
4090 *
4091 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4092 */
4093
4094 engine->emit_bb_start = gen8_emit_bb_start;
4095 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
4096 engine->emit_bb_start = gen125_emit_bb_start;
4097 }
4098
rcs_submission_override(struct intel_engine_cs * engine)4099 static void rcs_submission_override(struct intel_engine_cs *engine)
4100 {
4101 switch (GRAPHICS_VER(engine->i915)) {
4102 case 12:
4103 engine->emit_flush = gen12_emit_flush_rcs;
4104 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4105 break;
4106 case 11:
4107 engine->emit_flush = gen11_emit_flush_rcs;
4108 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4109 break;
4110 default:
4111 engine->emit_flush = gen8_emit_flush_rcs;
4112 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4113 break;
4114 }
4115 }
4116
guc_default_irqs(struct intel_engine_cs * engine)4117 static inline void guc_default_irqs(struct intel_engine_cs *engine)
4118 {
4119 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
4120 intel_engine_set_irq_handler(engine, cs_irq_handler);
4121 }
4122
guc_sched_engine_destroy(struct kref * kref)4123 static void guc_sched_engine_destroy(struct kref *kref)
4124 {
4125 struct i915_sched_engine *sched_engine =
4126 container_of(kref, typeof(*sched_engine), ref);
4127 struct intel_guc *guc = sched_engine->private_data;
4128
4129 guc->sched_engine = NULL;
4130 tasklet_kill(&sched_engine->tasklet); /* flush the callback */
4131 kfree(sched_engine);
4132 }
4133
intel_guc_submission_setup(struct intel_engine_cs * engine)4134 int intel_guc_submission_setup(struct intel_engine_cs *engine)
4135 {
4136 struct drm_i915_private *i915 = engine->i915;
4137 struct intel_guc *guc = &engine->gt->uc.guc;
4138
4139 /*
4140 * The setup relies on several assumptions (e.g. irqs always enabled)
4141 * that are only valid on gen11+
4142 */
4143 GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
4144
4145 if (!guc->sched_engine) {
4146 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
4147 if (!guc->sched_engine)
4148 return -ENOMEM;
4149
4150 guc->sched_engine->schedule = i915_schedule;
4151 guc->sched_engine->disabled = guc_sched_engine_disabled;
4152 guc->sched_engine->private_data = guc;
4153 guc->sched_engine->destroy = guc_sched_engine_destroy;
4154 guc->sched_engine->bump_inflight_request_prio =
4155 guc_bump_inflight_request_prio;
4156 guc->sched_engine->retire_inflight_request_prio =
4157 guc_retire_inflight_request_prio;
4158 tasklet_setup(&guc->sched_engine->tasklet,
4159 guc_submission_tasklet);
4160 }
4161 i915_sched_engine_put(engine->sched_engine);
4162 engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
4163
4164 guc_default_vfuncs(engine);
4165 guc_default_irqs(engine);
4166 guc_init_breadcrumbs(engine);
4167
4168 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)
4169 rcs_submission_override(engine);
4170
4171 lrc_init_wa_ctx(engine);
4172
4173 /* Finally, take ownership and responsibility for cleanup! */
4174 engine->sanitize = guc_sanitize;
4175 engine->release = guc_release;
4176
4177 return 0;
4178 }
4179
intel_guc_submission_enable(struct intel_guc * guc)4180 void intel_guc_submission_enable(struct intel_guc *guc)
4181 {
4182 struct intel_gt *gt = guc_to_gt(guc);
4183
4184 /* Enable and route to GuC */
4185 if (GRAPHICS_VER(gt->i915) >= 12)
4186 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES,
4187 GUC_SEM_INTR_ROUTE_TO_GUC |
4188 GUC_SEM_INTR_ENABLE_ALL);
4189
4190 guc_init_lrc_mapping(guc);
4191 guc_init_engine_stats(guc);
4192 }
4193
intel_guc_submission_disable(struct intel_guc * guc)4194 void intel_guc_submission_disable(struct intel_guc *guc)
4195 {
4196 struct intel_gt *gt = guc_to_gt(guc);
4197
4198 /* Note: By the time we're here, GuC may have already been reset */
4199
4200 /* Disable and route to host */
4201 if (GRAPHICS_VER(gt->i915) >= 12)
4202 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0);
4203 }
4204
__guc_submission_supported(struct intel_guc * guc)4205 static bool __guc_submission_supported(struct intel_guc *guc)
4206 {
4207 /* GuC submission is unavailable for pre-Gen11 */
4208 return intel_guc_is_supported(guc) &&
4209 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
4210 }
4211
__guc_submission_selected(struct intel_guc * guc)4212 static bool __guc_submission_selected(struct intel_guc *guc)
4213 {
4214 struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
4215
4216 if (!intel_guc_submission_is_supported(guc))
4217 return false;
4218
4219 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
4220 }
4221
intel_guc_submission_init_early(struct intel_guc * guc)4222 void intel_guc_submission_init_early(struct intel_guc *guc)
4223 {
4224 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
4225
4226 spin_lock_init(&guc->submission_state.lock);
4227 INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
4228 ida_init(&guc->submission_state.guc_ids);
4229 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
4230 INIT_WORK(&guc->submission_state.destroyed_worker,
4231 destroyed_worker_func);
4232 INIT_WORK(&guc->submission_state.reset_fail_worker,
4233 reset_fail_worker_func);
4234
4235 spin_lock_init(&guc->timestamp.lock);
4236 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
4237
4238 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
4239 guc->submission_supported = __guc_submission_supported(guc);
4240 guc->submission_selected = __guc_submission_selected(guc);
4241 }
4242
4243 static inline struct intel_context *
g2h_context_lookup(struct intel_guc * guc,u32 ctx_id)4244 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
4245 {
4246 struct intel_context *ce;
4247
4248 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
4249 drm_err(&guc_to_gt(guc)->i915->drm,
4250 "Invalid ctx_id %u\n", ctx_id);
4251 return NULL;
4252 }
4253
4254 ce = __get_context(guc, ctx_id);
4255 if (unlikely(!ce)) {
4256 drm_err(&guc_to_gt(guc)->i915->drm,
4257 "Context is NULL, ctx_id %u\n", ctx_id);
4258 return NULL;
4259 }
4260
4261 if (unlikely(intel_context_is_child(ce))) {
4262 drm_err(&guc_to_gt(guc)->i915->drm,
4263 "Context is child, ctx_id %u\n", ctx_id);
4264 return NULL;
4265 }
4266
4267 return ce;
4268 }
4269
intel_guc_deregister_done_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4270 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
4271 const u32 *msg,
4272 u32 len)
4273 {
4274 struct intel_context *ce;
4275 u32 ctx_id;
4276
4277 if (unlikely(len < 1)) {
4278 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
4279 return -EPROTO;
4280 }
4281 ctx_id = msg[0];
4282
4283 ce = g2h_context_lookup(guc, ctx_id);
4284 if (unlikely(!ce))
4285 return -EPROTO;
4286
4287 trace_intel_context_deregister_done(ce);
4288
4289 #ifdef CONFIG_DRM_I915_SELFTEST
4290 if (unlikely(ce->drop_deregister)) {
4291 ce->drop_deregister = false;
4292 return 0;
4293 }
4294 #endif
4295
4296 if (context_wait_for_deregister_to_register(ce)) {
4297 struct intel_runtime_pm *runtime_pm =
4298 &ce->engine->gt->i915->runtime_pm;
4299 intel_wakeref_t wakeref;
4300
4301 /*
4302 * Previous owner of this guc_id has been deregistered, now safe
4303 * register this context.
4304 */
4305 with_intel_runtime_pm(runtime_pm, wakeref)
4306 register_context(ce, true);
4307 guc_signal_context_fence(ce);
4308 intel_context_put(ce);
4309 } else if (context_destroyed(ce)) {
4310 /* Context has been destroyed */
4311 intel_gt_pm_put_async(guc_to_gt(guc));
4312 release_guc_id(guc, ce);
4313 __guc_context_destroy(ce);
4314 }
4315
4316 decr_outstanding_submission_g2h(guc);
4317
4318 return 0;
4319 }
4320
intel_guc_sched_done_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4321 int intel_guc_sched_done_process_msg(struct intel_guc *guc,
4322 const u32 *msg,
4323 u32 len)
4324 {
4325 struct intel_context *ce;
4326 unsigned long flags;
4327 u32 ctx_id;
4328
4329 if (unlikely(len < 2)) {
4330 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
4331 return -EPROTO;
4332 }
4333 ctx_id = msg[0];
4334
4335 ce = g2h_context_lookup(guc, ctx_id);
4336 if (unlikely(!ce))
4337 return -EPROTO;
4338
4339 if (unlikely(context_destroyed(ce) ||
4340 (!context_pending_enable(ce) &&
4341 !context_pending_disable(ce)))) {
4342 drm_err(&guc_to_gt(guc)->i915->drm,
4343 "Bad context sched_state 0x%x, ctx_id %u\n",
4344 ce->guc_state.sched_state, ctx_id);
4345 return -EPROTO;
4346 }
4347
4348 trace_intel_context_sched_done(ce);
4349
4350 if (context_pending_enable(ce)) {
4351 #ifdef CONFIG_DRM_I915_SELFTEST
4352 if (unlikely(ce->drop_schedule_enable)) {
4353 ce->drop_schedule_enable = false;
4354 return 0;
4355 }
4356 #endif
4357
4358 spin_lock_irqsave(&ce->guc_state.lock, flags);
4359 clr_context_pending_enable(ce);
4360 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4361 } else if (context_pending_disable(ce)) {
4362 bool banned;
4363
4364 #ifdef CONFIG_DRM_I915_SELFTEST
4365 if (unlikely(ce->drop_schedule_disable)) {
4366 ce->drop_schedule_disable = false;
4367 return 0;
4368 }
4369 #endif
4370
4371 /*
4372 * Unpin must be done before __guc_signal_context_fence,
4373 * otherwise a race exists between the requests getting
4374 * submitted + retired before this unpin completes resulting in
4375 * the pin_count going to zero and the context still being
4376 * enabled.
4377 */
4378 intel_context_sched_disable_unpin(ce);
4379
4380 spin_lock_irqsave(&ce->guc_state.lock, flags);
4381 banned = context_banned(ce);
4382 clr_context_banned(ce);
4383 clr_context_pending_disable(ce);
4384 __guc_signal_context_fence(ce);
4385 guc_blocked_fence_complete(ce);
4386 spin_unlock_irqrestore(&ce->guc_state.lock, flags);
4387
4388 if (banned) {
4389 guc_cancel_context_requests(ce);
4390 intel_engine_signal_breadcrumbs(ce->engine);
4391 }
4392 }
4393
4394 decr_outstanding_submission_g2h(guc);
4395 intel_context_put(ce);
4396
4397 return 0;
4398 }
4399
capture_error_state(struct intel_guc * guc,struct intel_context * ce)4400 static void capture_error_state(struct intel_guc *guc,
4401 struct intel_context *ce)
4402 {
4403 struct intel_gt *gt = guc_to_gt(guc);
4404 struct drm_i915_private *i915 = gt->i915;
4405 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
4406 intel_wakeref_t wakeref;
4407
4408 intel_engine_set_hung_context(engine, ce);
4409 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
4410 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE);
4411 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
4412 }
4413
guc_context_replay(struct intel_context * ce)4414 static void guc_context_replay(struct intel_context *ce)
4415 {
4416 struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
4417
4418 __guc_reset_context(ce, ce->engine->mask);
4419 tasklet_hi_schedule(&sched_engine->tasklet);
4420 }
4421
guc_handle_context_reset(struct intel_guc * guc,struct intel_context * ce)4422 static void guc_handle_context_reset(struct intel_guc *guc,
4423 struct intel_context *ce)
4424 {
4425 trace_intel_context_reset(ce);
4426
4427 if (likely(intel_context_is_schedulable(ce))) {
4428 capture_error_state(guc, ce);
4429 guc_context_replay(ce);
4430 } else {
4431 drm_info(&guc_to_gt(guc)->i915->drm,
4432 "Ignoring context reset notification of exiting context 0x%04X on %s",
4433 ce->guc_id.id, ce->engine->name);
4434 }
4435 }
4436
intel_guc_context_reset_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4437 int intel_guc_context_reset_process_msg(struct intel_guc *guc,
4438 const u32 *msg, u32 len)
4439 {
4440 struct intel_context *ce;
4441 unsigned long flags;
4442 int ctx_id;
4443
4444 if (unlikely(len != 1)) {
4445 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
4446 return -EPROTO;
4447 }
4448
4449 ctx_id = msg[0];
4450
4451 /*
4452 * The context lookup uses the xarray but lookups only require an RCU lock
4453 * not the full spinlock. So take the lock explicitly and keep it until the
4454 * context has been reference count locked to ensure it can't be destroyed
4455 * asynchronously until the reset is done.
4456 */
4457 xa_lock_irqsave(&guc->context_lookup, flags);
4458 ce = g2h_context_lookup(guc, ctx_id);
4459 if (ce)
4460 intel_context_get(ce);
4461 xa_unlock_irqrestore(&guc->context_lookup, flags);
4462
4463 if (unlikely(!ce))
4464 return -EPROTO;
4465
4466 guc_handle_context_reset(guc, ce);
4467 intel_context_put(ce);
4468
4469 return 0;
4470 }
4471
intel_guc_error_capture_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4472 int intel_guc_error_capture_process_msg(struct intel_guc *guc,
4473 const u32 *msg, u32 len)
4474 {
4475 u32 status;
4476
4477 if (unlikely(len != 1)) {
4478 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
4479 return -EPROTO;
4480 }
4481
4482 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
4483 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
4484 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space");
4485
4486 intel_guc_capture_process(guc);
4487
4488 return 0;
4489 }
4490
4491 struct intel_engine_cs *
intel_guc_lookup_engine(struct intel_guc * guc,u8 guc_class,u8 instance)4492 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
4493 {
4494 struct intel_gt *gt = guc_to_gt(guc);
4495 u8 engine_class = guc_class_to_engine_class(guc_class);
4496
4497 /* Class index is checked in class converter */
4498 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE);
4499
4500 return gt->engine_class[engine_class][instance];
4501 }
4502
reset_fail_worker_func(struct work_struct * w)4503 static void reset_fail_worker_func(struct work_struct *w)
4504 {
4505 struct intel_guc *guc = container_of(w, struct intel_guc,
4506 submission_state.reset_fail_worker);
4507 struct intel_gt *gt = guc_to_gt(guc);
4508 intel_engine_mask_t reset_fail_mask;
4509 unsigned long flags;
4510
4511 spin_lock_irqsave(&guc->submission_state.lock, flags);
4512 reset_fail_mask = guc->submission_state.reset_fail_mask;
4513 guc->submission_state.reset_fail_mask = 0;
4514 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4515
4516 if (likely(reset_fail_mask))
4517 intel_gt_handle_error(gt, reset_fail_mask,
4518 I915_ERROR_CAPTURE,
4519 "GuC failed to reset engine mask=0x%x\n",
4520 reset_fail_mask);
4521 }
4522
intel_guc_engine_failure_process_msg(struct intel_guc * guc,const u32 * msg,u32 len)4523 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
4524 const u32 *msg, u32 len)
4525 {
4526 struct intel_engine_cs *engine;
4527 struct intel_gt *gt = guc_to_gt(guc);
4528 u8 guc_class, instance;
4529 u32 reason;
4530 unsigned long flags;
4531
4532 if (unlikely(len != 3)) {
4533 drm_err(>->i915->drm, "Invalid length %u", len);
4534 return -EPROTO;
4535 }
4536
4537 guc_class = msg[0];
4538 instance = msg[1];
4539 reason = msg[2];
4540
4541 engine = intel_guc_lookup_engine(guc, guc_class, instance);
4542 if (unlikely(!engine)) {
4543 drm_err(>->i915->drm,
4544 "Invalid engine %d:%d", guc_class, instance);
4545 return -EPROTO;
4546 }
4547
4548 /*
4549 * This is an unexpected failure of a hardware feature. So, log a real
4550 * error message not just the informational that comes with the reset.
4551 */
4552 drm_err(>->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
4553 guc_class, instance, engine->name, reason);
4554
4555 spin_lock_irqsave(&guc->submission_state.lock, flags);
4556 guc->submission_state.reset_fail_mask |= engine->mask;
4557 spin_unlock_irqrestore(&guc->submission_state.lock, flags);
4558
4559 /*
4560 * A GT reset flushes this worker queue (G2H handler) so we must use
4561 * another worker to trigger a GT reset.
4562 */
4563 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
4564
4565 return 0;
4566 }
4567
intel_guc_find_hung_context(struct intel_engine_cs * engine)4568 void intel_guc_find_hung_context(struct intel_engine_cs *engine)
4569 {
4570 struct intel_guc *guc = &engine->gt->uc.guc;
4571 struct intel_context *ce;
4572 struct i915_request *rq;
4573 unsigned long index;
4574 unsigned long flags;
4575
4576 /* Reset called during driver load? GuC not yet initialised! */
4577 if (unlikely(!guc_submission_initialized(guc)))
4578 return;
4579
4580 xa_lock_irqsave(&guc->context_lookup, flags);
4581 xa_for_each(&guc->context_lookup, index, ce) {
4582 if (!kref_get_unless_zero(&ce->ref))
4583 continue;
4584
4585 xa_unlock(&guc->context_lookup);
4586
4587 if (!intel_context_is_pinned(ce))
4588 goto next;
4589
4590 if (intel_engine_is_virtual(ce->engine)) {
4591 if (!(ce->engine->mask & engine->mask))
4592 goto next;
4593 } else {
4594 if (ce->engine != engine)
4595 goto next;
4596 }
4597
4598 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
4599 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
4600 continue;
4601
4602 intel_engine_set_hung_context(engine, ce);
4603
4604 /* Can only cope with one hang at a time... */
4605 intel_context_put(ce);
4606 xa_lock(&guc->context_lookup);
4607 goto done;
4608 }
4609 next:
4610 intel_context_put(ce);
4611 xa_lock(&guc->context_lookup);
4612 }
4613 done:
4614 xa_unlock_irqrestore(&guc->context_lookup, flags);
4615 }
4616
intel_guc_dump_active_requests(struct intel_engine_cs * engine,struct i915_request * hung_rq,struct drm_printer * m)4617 void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
4618 struct i915_request *hung_rq,
4619 struct drm_printer *m)
4620 {
4621 struct intel_guc *guc = &engine->gt->uc.guc;
4622 struct intel_context *ce;
4623 unsigned long index;
4624 unsigned long flags;
4625
4626 /* Reset called during driver load? GuC not yet initialised! */
4627 if (unlikely(!guc_submission_initialized(guc)))
4628 return;
4629
4630 xa_lock_irqsave(&guc->context_lookup, flags);
4631 xa_for_each(&guc->context_lookup, index, ce) {
4632 if (!kref_get_unless_zero(&ce->ref))
4633 continue;
4634
4635 xa_unlock(&guc->context_lookup);
4636
4637 if (!intel_context_is_pinned(ce))
4638 goto next;
4639
4640 if (intel_engine_is_virtual(ce->engine)) {
4641 if (!(ce->engine->mask & engine->mask))
4642 goto next;
4643 } else {
4644 if (ce->engine != engine)
4645 goto next;
4646 }
4647
4648 spin_lock(&ce->guc_state.lock);
4649 intel_engine_dump_active_requests(&ce->guc_state.requests,
4650 hung_rq, m);
4651 spin_unlock(&ce->guc_state.lock);
4652
4653 next:
4654 intel_context_put(ce);
4655 xa_lock(&guc->context_lookup);
4656 }
4657 xa_unlock_irqrestore(&guc->context_lookup, flags);
4658 }
4659
intel_guc_submission_print_info(struct intel_guc * guc,struct drm_printer * p)4660 void intel_guc_submission_print_info(struct intel_guc *guc,
4661 struct drm_printer *p)
4662 {
4663 struct i915_sched_engine *sched_engine = guc->sched_engine;
4664 struct rb_node *rb;
4665 unsigned long flags;
4666
4667 if (!sched_engine)
4668 return;
4669
4670 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
4671 atomic_read(&guc->outstanding_submission_g2h));
4672 drm_printf(p, "GuC tasklet count: %u\n\n",
4673 atomic_read(&sched_engine->tasklet.count));
4674
4675 spin_lock_irqsave(&sched_engine->lock, flags);
4676 drm_printf(p, "Requests in GuC submit tasklet:\n");
4677 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
4678 struct i915_priolist *pl = to_priolist(rb);
4679 struct i915_request *rq;
4680
4681 priolist_for_each_request(rq, pl)
4682 drm_printf(p, "guc_id=%u, seqno=%llu\n",
4683 rq->context->guc_id.id,
4684 rq->fence.seqno);
4685 }
4686 spin_unlock_irqrestore(&sched_engine->lock, flags);
4687 drm_printf(p, "\n");
4688 }
4689
guc_log_context_priority(struct drm_printer * p,struct intel_context * ce)4690 static inline void guc_log_context_priority(struct drm_printer *p,
4691 struct intel_context *ce)
4692 {
4693 int i;
4694
4695 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
4696 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
4697 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
4698 i < GUC_CLIENT_PRIORITY_NUM; ++i) {
4699 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
4700 i, ce->guc_state.prio_count[i]);
4701 }
4702 drm_printf(p, "\n");
4703 }
4704
guc_log_context(struct drm_printer * p,struct intel_context * ce)4705 static inline void guc_log_context(struct drm_printer *p,
4706 struct intel_context *ce)
4707 {
4708 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
4709 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
4710 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
4711 ce->ring->head,
4712 ce->lrc_reg_state[CTX_RING_HEAD]);
4713 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
4714 ce->ring->tail,
4715 ce->lrc_reg_state[CTX_RING_TAIL]);
4716 drm_printf(p, "\t\tContext Pin Count: %u\n",
4717 atomic_read(&ce->pin_count));
4718 drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
4719 atomic_read(&ce->guc_id.ref));
4720 drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
4721 ce->guc_state.sched_state);
4722 }
4723
intel_guc_submission_print_context_info(struct intel_guc * guc,struct drm_printer * p)4724 void intel_guc_submission_print_context_info(struct intel_guc *guc,
4725 struct drm_printer *p)
4726 {
4727 struct intel_context *ce;
4728 unsigned long index;
4729 unsigned long flags;
4730
4731 xa_lock_irqsave(&guc->context_lookup, flags);
4732 xa_for_each(&guc->context_lookup, index, ce) {
4733 GEM_BUG_ON(intel_context_is_child(ce));
4734
4735 guc_log_context(p, ce);
4736 guc_log_context_priority(p, ce);
4737
4738 if (intel_context_is_parent(ce)) {
4739 struct intel_context *child;
4740
4741 drm_printf(p, "\t\tNumber children: %u\n",
4742 ce->parallel.number_children);
4743
4744 if (ce->parallel.guc.wq_status) {
4745 drm_printf(p, "\t\tWQI Head: %u\n",
4746 READ_ONCE(*ce->parallel.guc.wq_head));
4747 drm_printf(p, "\t\tWQI Tail: %u\n",
4748 READ_ONCE(*ce->parallel.guc.wq_tail));
4749 drm_printf(p, "\t\tWQI Status: %u\n\n",
4750 READ_ONCE(*ce->parallel.guc.wq_status));
4751 }
4752
4753 if (ce->engine->emit_bb_start ==
4754 emit_bb_start_parent_no_preempt_mid_batch) {
4755 u8 i;
4756
4757 drm_printf(p, "\t\tChildren Go: %u\n\n",
4758 get_children_go_value(ce));
4759 for (i = 0; i < ce->parallel.number_children; ++i)
4760 drm_printf(p, "\t\tChildren Join: %u\n",
4761 get_children_join_value(ce, i));
4762 }
4763
4764 for_each_child(ce, child)
4765 guc_log_context(p, child);
4766 }
4767 }
4768 xa_unlock_irqrestore(&guc->context_lookup, flags);
4769 }
4770
get_children_go_addr(struct intel_context * ce)4771 static inline u32 get_children_go_addr(struct intel_context *ce)
4772 {
4773 GEM_BUG_ON(!intel_context_is_parent(ce));
4774
4775 return i915_ggtt_offset(ce->state) +
4776 __get_parent_scratch_offset(ce) +
4777 offsetof(struct parent_scratch, go.semaphore);
4778 }
4779
get_children_join_addr(struct intel_context * ce,u8 child_index)4780 static inline u32 get_children_join_addr(struct intel_context *ce,
4781 u8 child_index)
4782 {
4783 GEM_BUG_ON(!intel_context_is_parent(ce));
4784
4785 return i915_ggtt_offset(ce->state) +
4786 __get_parent_scratch_offset(ce) +
4787 offsetof(struct parent_scratch, join[child_index].semaphore);
4788 }
4789
4790 #define PARENT_GO_BB 1
4791 #define PARENT_GO_FINI_BREADCRUMB 0
4792 #define CHILD_GO_BB 1
4793 #define CHILD_GO_FINI_BREADCRUMB 0
emit_bb_start_parent_no_preempt_mid_batch(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)4794 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
4795 u64 offset, u32 len,
4796 const unsigned int flags)
4797 {
4798 struct intel_context *ce = rq->context;
4799 u32 *cs;
4800 u8 i;
4801
4802 GEM_BUG_ON(!intel_context_is_parent(ce));
4803
4804 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
4805 if (IS_ERR(cs))
4806 return PTR_ERR(cs);
4807
4808 /* Wait on children */
4809 for (i = 0; i < ce->parallel.number_children; ++i) {
4810 *cs++ = (MI_SEMAPHORE_WAIT |
4811 MI_SEMAPHORE_GLOBAL_GTT |
4812 MI_SEMAPHORE_POLL |
4813 MI_SEMAPHORE_SAD_EQ_SDD);
4814 *cs++ = PARENT_GO_BB;
4815 *cs++ = get_children_join_addr(ce, i);
4816 *cs++ = 0;
4817 }
4818
4819 /* Turn off preemption */
4820 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4821 *cs++ = MI_NOOP;
4822
4823 /* Tell children go */
4824 cs = gen8_emit_ggtt_write(cs,
4825 CHILD_GO_BB,
4826 get_children_go_addr(ce),
4827 0);
4828
4829 /* Jump to batch */
4830 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4831 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4832 *cs++ = lower_32_bits(offset);
4833 *cs++ = upper_32_bits(offset);
4834 *cs++ = MI_NOOP;
4835
4836 intel_ring_advance(rq, cs);
4837
4838 return 0;
4839 }
4840
emit_bb_start_child_no_preempt_mid_batch(struct i915_request * rq,u64 offset,u32 len,const unsigned int flags)4841 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
4842 u64 offset, u32 len,
4843 const unsigned int flags)
4844 {
4845 struct intel_context *ce = rq->context;
4846 struct intel_context *parent = intel_context_to_parent(ce);
4847 u32 *cs;
4848
4849 GEM_BUG_ON(!intel_context_is_child(ce));
4850
4851 cs = intel_ring_begin(rq, 12);
4852 if (IS_ERR(cs))
4853 return PTR_ERR(cs);
4854
4855 /* Signal parent */
4856 cs = gen8_emit_ggtt_write(cs,
4857 PARENT_GO_BB,
4858 get_children_join_addr(parent,
4859 ce->parallel.child_index),
4860 0);
4861
4862 /* Wait on parent for go */
4863 *cs++ = (MI_SEMAPHORE_WAIT |
4864 MI_SEMAPHORE_GLOBAL_GTT |
4865 MI_SEMAPHORE_POLL |
4866 MI_SEMAPHORE_SAD_EQ_SDD);
4867 *cs++ = CHILD_GO_BB;
4868 *cs++ = get_children_go_addr(parent);
4869 *cs++ = 0;
4870
4871 /* Turn off preemption */
4872 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4873
4874 /* Jump to batch */
4875 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4876 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4877 *cs++ = lower_32_bits(offset);
4878 *cs++ = upper_32_bits(offset);
4879
4880 intel_ring_advance(rq, cs);
4881
4882 return 0;
4883 }
4884
4885 static u32 *
__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)4886 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4887 u32 *cs)
4888 {
4889 struct intel_context *ce = rq->context;
4890 u8 i;
4891
4892 GEM_BUG_ON(!intel_context_is_parent(ce));
4893
4894 /* Wait on children */
4895 for (i = 0; i < ce->parallel.number_children; ++i) {
4896 *cs++ = (MI_SEMAPHORE_WAIT |
4897 MI_SEMAPHORE_GLOBAL_GTT |
4898 MI_SEMAPHORE_POLL |
4899 MI_SEMAPHORE_SAD_EQ_SDD);
4900 *cs++ = PARENT_GO_FINI_BREADCRUMB;
4901 *cs++ = get_children_join_addr(ce, i);
4902 *cs++ = 0;
4903 }
4904
4905 /* Turn on preemption */
4906 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4907 *cs++ = MI_NOOP;
4908
4909 /* Tell children go */
4910 cs = gen8_emit_ggtt_write(cs,
4911 CHILD_GO_FINI_BREADCRUMB,
4912 get_children_go_addr(ce),
4913 0);
4914
4915 return cs;
4916 }
4917
4918 /*
4919 * If this true, a submission of multi-lrc requests had an error and the
4920 * requests need to be skipped. The front end (execuf IOCTL) should've called
4921 * i915_request_skip which squashes the BB but we still need to emit the fini
4922 * breadrcrumbs seqno write. At this point we don't know how many of the
4923 * requests in the multi-lrc submission were generated so we can't do the
4924 * handshake between the parent and children (e.g. if 4 requests should be
4925 * generated but 2nd hit an error only 1 would be seen by the GuC backend).
4926 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
4927 * has occurred on any of the requests in submission / relationship.
4928 */
skip_handshake(struct i915_request * rq)4929 static inline bool skip_handshake(struct i915_request *rq)
4930 {
4931 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
4932 }
4933
4934 #define NON_SKIP_LEN 6
4935 static u32 *
emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)4936 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4937 u32 *cs)
4938 {
4939 struct intel_context *ce = rq->context;
4940 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
4941 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
4942
4943 GEM_BUG_ON(!intel_context_is_parent(ce));
4944
4945 if (unlikely(skip_handshake(rq))) {
4946 /*
4947 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
4948 * the NON_SKIP_LEN comes from the length of the emits below.
4949 */
4950 memset(cs, 0, sizeof(u32) *
4951 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
4952 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
4953 } else {
4954 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
4955 }
4956
4957 /* Emit fini breadcrumb */
4958 before_fini_breadcrumb_user_interrupt_cs = cs;
4959 cs = gen8_emit_ggtt_write(cs,
4960 rq->fence.seqno,
4961 i915_request_active_timeline(rq)->hwsp_offset,
4962 0);
4963
4964 /* User interrupt */
4965 *cs++ = MI_USER_INTERRUPT;
4966 *cs++ = MI_NOOP;
4967
4968 /* Ensure our math for skip + emit is correct */
4969 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
4970 cs);
4971 GEM_BUG_ON(start_fini_breadcrumb_cs +
4972 ce->engine->emit_fini_breadcrumb_dw != cs);
4973
4974 rq->tail = intel_ring_offset(rq, cs);
4975
4976 return cs;
4977 }
4978
4979 static u32 *
__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)4980 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
4981 u32 *cs)
4982 {
4983 struct intel_context *ce = rq->context;
4984 struct intel_context *parent = intel_context_to_parent(ce);
4985
4986 GEM_BUG_ON(!intel_context_is_child(ce));
4987
4988 /* Turn on preemption */
4989 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4990 *cs++ = MI_NOOP;
4991
4992 /* Signal parent */
4993 cs = gen8_emit_ggtt_write(cs,
4994 PARENT_GO_FINI_BREADCRUMB,
4995 get_children_join_addr(parent,
4996 ce->parallel.child_index),
4997 0);
4998
4999 /* Wait parent on for go */
5000 *cs++ = (MI_SEMAPHORE_WAIT |
5001 MI_SEMAPHORE_GLOBAL_GTT |
5002 MI_SEMAPHORE_POLL |
5003 MI_SEMAPHORE_SAD_EQ_SDD);
5004 *cs++ = CHILD_GO_FINI_BREADCRUMB;
5005 *cs++ = get_children_go_addr(parent);
5006 *cs++ = 0;
5007
5008 return cs;
5009 }
5010
5011 static u32 *
emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request * rq,u32 * cs)5012 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5013 u32 *cs)
5014 {
5015 struct intel_context *ce = rq->context;
5016 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
5017 __maybe_unused u32 *start_fini_breadcrumb_cs = cs;
5018
5019 GEM_BUG_ON(!intel_context_is_child(ce));
5020
5021 if (unlikely(skip_handshake(rq))) {
5022 /*
5023 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
5024 * the NON_SKIP_LEN comes from the length of the emits below.
5025 */
5026 memset(cs, 0, sizeof(u32) *
5027 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
5028 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5029 } else {
5030 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
5031 }
5032
5033 /* Emit fini breadcrumb */
5034 before_fini_breadcrumb_user_interrupt_cs = cs;
5035 cs = gen8_emit_ggtt_write(cs,
5036 rq->fence.seqno,
5037 i915_request_active_timeline(rq)->hwsp_offset,
5038 0);
5039
5040 /* User interrupt */
5041 *cs++ = MI_USER_INTERRUPT;
5042 *cs++ = MI_NOOP;
5043
5044 /* Ensure our math for skip + emit is correct */
5045 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
5046 cs);
5047 GEM_BUG_ON(start_fini_breadcrumb_cs +
5048 ce->engine->emit_fini_breadcrumb_dw != cs);
5049
5050 rq->tail = intel_ring_offset(rq, cs);
5051
5052 return cs;
5053 }
5054
5055 #undef NON_SKIP_LEN
5056
5057 static struct intel_context *
guc_create_virtual(struct intel_engine_cs ** siblings,unsigned int count,unsigned long flags)5058 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
5059 unsigned long flags)
5060 {
5061 struct guc_virtual_engine *ve;
5062 struct intel_guc *guc;
5063 unsigned int n;
5064 int err;
5065
5066 ve = kzalloc(sizeof(*ve), GFP_KERNEL);
5067 if (!ve)
5068 return ERR_PTR(-ENOMEM);
5069
5070 guc = &siblings[0]->gt->uc.guc;
5071
5072 ve->base.i915 = siblings[0]->i915;
5073 ve->base.gt = siblings[0]->gt;
5074 ve->base.uncore = siblings[0]->uncore;
5075 ve->base.id = -1;
5076
5077 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5078 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5079 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5080 ve->base.saturated = ALL_ENGINES;
5081
5082 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5083
5084 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
5085
5086 ve->base.cops = &virtual_guc_context_ops;
5087 ve->base.request_alloc = guc_request_alloc;
5088 ve->base.bump_serial = virtual_guc_bump_serial;
5089
5090 ve->base.submit_request = guc_submit_request;
5091
5092 ve->base.flags = I915_ENGINE_IS_VIRTUAL;
5093
5094 intel_context_init(&ve->context, &ve->base);
5095
5096 for (n = 0; n < count; n++) {
5097 struct intel_engine_cs *sibling = siblings[n];
5098
5099 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5100 if (sibling->mask & ve->base.mask) {
5101 DRM_DEBUG("duplicate %s entry in load balancer\n",
5102 sibling->name);
5103 err = -EINVAL;
5104 goto err_put;
5105 }
5106
5107 ve->base.mask |= sibling->mask;
5108 ve->base.logical_mask |= sibling->logical_mask;
5109
5110 if (n != 0 && ve->base.class != sibling->class) {
5111 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5112 sibling->class, ve->base.class);
5113 err = -EINVAL;
5114 goto err_put;
5115 } else if (n == 0) {
5116 ve->base.class = sibling->class;
5117 ve->base.uabi_class = sibling->uabi_class;
5118 snprintf(ve->base.name, sizeof(ve->base.name),
5119 "v%dx%d", ve->base.class, count);
5120 ve->base.context_size = sibling->context_size;
5121
5122 ve->base.add_active_request =
5123 sibling->add_active_request;
5124 ve->base.remove_active_request =
5125 sibling->remove_active_request;
5126 ve->base.emit_bb_start = sibling->emit_bb_start;
5127 ve->base.emit_flush = sibling->emit_flush;
5128 ve->base.emit_init_breadcrumb =
5129 sibling->emit_init_breadcrumb;
5130 ve->base.emit_fini_breadcrumb =
5131 sibling->emit_fini_breadcrumb;
5132 ve->base.emit_fini_breadcrumb_dw =
5133 sibling->emit_fini_breadcrumb_dw;
5134 ve->base.breadcrumbs =
5135 intel_breadcrumbs_get(sibling->breadcrumbs);
5136
5137 ve->base.flags |= sibling->flags;
5138
5139 ve->base.props.timeslice_duration_ms =
5140 sibling->props.timeslice_duration_ms;
5141 ve->base.props.preempt_timeout_ms =
5142 sibling->props.preempt_timeout_ms;
5143 }
5144 }
5145
5146 return &ve->context;
5147
5148 err_put:
5149 intel_context_put(&ve->context);
5150 return ERR_PTR(err);
5151 }
5152
intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs * ve)5153 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
5154 {
5155 struct intel_engine_cs *engine;
5156 intel_engine_mask_t tmp, mask = ve->mask;
5157
5158 for_each_engine_masked(engine, ve->gt, mask, tmp)
5159 if (READ_ONCE(engine->props.heartbeat_interval_ms))
5160 return true;
5161
5162 return false;
5163 }
5164
5165 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5166 #include "selftest_guc.c"
5167 #include "selftest_guc_multi_lrc.c"
5168 #include "selftest_guc_hangcheck.c"
5169 #endif
5170