1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include <drm/drm_print.h>
26
27 #include "gem/i915_gem_context.h"
28
29 #include "i915_drv.h"
30
31 #include "intel_breadcrumbs.h"
32 #include "intel_context.h"
33 #include "intel_engine.h"
34 #include "intel_engine_pm.h"
35 #include "intel_engine_user.h"
36 #include "intel_gt.h"
37 #include "intel_gt_requests.h"
38 #include "intel_gt_pm.h"
39 #include "intel_lrc.h"
40 #include "intel_reset.h"
41 #include "intel_ring.h"
42
43 /* Haswell does have the CXT_SIZE register however it does not appear to be
44 * valid. Now, docs explain in dwords what is in the context object. The full
45 * size is 70720 bytes, however, the power context and execlist context will
46 * never be saved (power context is stored elsewhere, and execlists don't work
47 * on HSW) - so the final size, including the extra state required for the
48 * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
49 */
50 #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
51
52 #define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
53 #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
54 #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
55 #define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE)
56 #define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE)
57
58 #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE)
59
60 #define MAX_MMIO_BASES 3
61 struct engine_info {
62 unsigned int hw_id;
63 u8 class;
64 u8 instance;
65 /* mmio bases table *must* be sorted in reverse gen order */
66 struct engine_mmio_base {
67 u32 gen : 8;
68 u32 base : 24;
69 } mmio_bases[MAX_MMIO_BASES];
70 };
71
72 static const struct engine_info intel_engines[] = {
73 [RCS0] = {
74 .hw_id = RCS0_HW,
75 .class = RENDER_CLASS,
76 .instance = 0,
77 .mmio_bases = {
78 { .gen = 1, .base = RENDER_RING_BASE }
79 },
80 },
81 [BCS0] = {
82 .hw_id = BCS0_HW,
83 .class = COPY_ENGINE_CLASS,
84 .instance = 0,
85 .mmio_bases = {
86 { .gen = 6, .base = BLT_RING_BASE }
87 },
88 },
89 [VCS0] = {
90 .hw_id = VCS0_HW,
91 .class = VIDEO_DECODE_CLASS,
92 .instance = 0,
93 .mmio_bases = {
94 { .gen = 11, .base = GEN11_BSD_RING_BASE },
95 { .gen = 6, .base = GEN6_BSD_RING_BASE },
96 { .gen = 4, .base = BSD_RING_BASE }
97 },
98 },
99 [VCS1] = {
100 .hw_id = VCS1_HW,
101 .class = VIDEO_DECODE_CLASS,
102 .instance = 1,
103 .mmio_bases = {
104 { .gen = 11, .base = GEN11_BSD2_RING_BASE },
105 { .gen = 8, .base = GEN8_BSD2_RING_BASE }
106 },
107 },
108 [VCS2] = {
109 .hw_id = VCS2_HW,
110 .class = VIDEO_DECODE_CLASS,
111 .instance = 2,
112 .mmio_bases = {
113 { .gen = 11, .base = GEN11_BSD3_RING_BASE }
114 },
115 },
116 [VCS3] = {
117 .hw_id = VCS3_HW,
118 .class = VIDEO_DECODE_CLASS,
119 .instance = 3,
120 .mmio_bases = {
121 { .gen = 11, .base = GEN11_BSD4_RING_BASE }
122 },
123 },
124 [VECS0] = {
125 .hw_id = VECS0_HW,
126 .class = VIDEO_ENHANCEMENT_CLASS,
127 .instance = 0,
128 .mmio_bases = {
129 { .gen = 11, .base = GEN11_VEBOX_RING_BASE },
130 { .gen = 7, .base = VEBOX_RING_BASE }
131 },
132 },
133 [VECS1] = {
134 .hw_id = VECS1_HW,
135 .class = VIDEO_ENHANCEMENT_CLASS,
136 .instance = 1,
137 .mmio_bases = {
138 { .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
139 },
140 },
141 };
142
143 /**
144 * intel_engine_context_size() - return the size of the context for an engine
145 * @gt: the gt
146 * @class: engine class
147 *
148 * Each engine class may require a different amount of space for a context
149 * image.
150 *
151 * Return: size (in bytes) of an engine class specific context image
152 *
153 * Note: this size includes the HWSP, which is part of the context image
154 * in LRC mode, but does not include the "shared data page" used with
155 * GuC submission. The caller should account for this if using the GuC.
156 */
intel_engine_context_size(struct intel_gt * gt,u8 class)157 u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
158 {
159 struct intel_uncore *uncore = gt->uncore;
160 u32 cxt_size;
161
162 BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
163
164 switch (class) {
165 case RENDER_CLASS:
166 switch (INTEL_GEN(gt->i915)) {
167 default:
168 MISSING_CASE(INTEL_GEN(gt->i915));
169 return DEFAULT_LR_CONTEXT_RENDER_SIZE;
170 case 12:
171 case 11:
172 return GEN11_LR_CONTEXT_RENDER_SIZE;
173 case 10:
174 return GEN10_LR_CONTEXT_RENDER_SIZE;
175 case 9:
176 return GEN9_LR_CONTEXT_RENDER_SIZE;
177 case 8:
178 return GEN8_LR_CONTEXT_RENDER_SIZE;
179 case 7:
180 if (IS_HASWELL(gt->i915))
181 return HSW_CXT_TOTAL_SIZE;
182
183 cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE);
184 return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
185 PAGE_SIZE);
186 case 6:
187 cxt_size = intel_uncore_read(uncore, CXT_SIZE);
188 return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
189 PAGE_SIZE);
190 case 5:
191 case 4:
192 /*
193 * There is a discrepancy here between the size reported
194 * by the register and the size of the context layout
195 * in the docs. Both are described as authorative!
196 *
197 * The discrepancy is on the order of a few cachelines,
198 * but the total is under one page (4k), which is our
199 * minimum allocation anyway so it should all come
200 * out in the wash.
201 */
202 cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
203 drm_dbg(>->i915->drm,
204 "gen%d CXT_SIZE = %d bytes [0x%08x]\n",
205 INTEL_GEN(gt->i915), cxt_size * 64,
206 cxt_size - 1);
207 return round_up(cxt_size * 64, PAGE_SIZE);
208 case 3:
209 case 2:
210 /* For the special day when i810 gets merged. */
211 case 1:
212 return 0;
213 }
214 break;
215 default:
216 MISSING_CASE(class);
217 fallthrough;
218 case VIDEO_DECODE_CLASS:
219 case VIDEO_ENHANCEMENT_CLASS:
220 case COPY_ENGINE_CLASS:
221 if (INTEL_GEN(gt->i915) < 8)
222 return 0;
223 return GEN8_LR_CONTEXT_OTHER_SIZE;
224 }
225 }
226
__engine_mmio_base(struct drm_i915_private * i915,const struct engine_mmio_base * bases)227 static u32 __engine_mmio_base(struct drm_i915_private *i915,
228 const struct engine_mmio_base *bases)
229 {
230 int i;
231
232 for (i = 0; i < MAX_MMIO_BASES; i++)
233 if (INTEL_GEN(i915) >= bases[i].gen)
234 break;
235
236 GEM_BUG_ON(i == MAX_MMIO_BASES);
237 GEM_BUG_ON(!bases[i].base);
238
239 return bases[i].base;
240 }
241
__sprint_engine_name(struct intel_engine_cs * engine)242 static void __sprint_engine_name(struct intel_engine_cs *engine)
243 {
244 /*
245 * Before we know what the uABI name for this engine will be,
246 * we still would like to keep track of this engine in the debug logs.
247 * We throw in a ' here as a reminder that this isn't its final name.
248 */
249 GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u",
250 intel_engine_class_repr(engine->class),
251 engine->instance) >= sizeof(engine->name));
252 }
253
intel_engine_set_hwsp_writemask(struct intel_engine_cs * engine,u32 mask)254 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
255 {
256 /*
257 * Though they added more rings on g4x/ilk, they did not add
258 * per-engine HWSTAM until gen6.
259 */
260 if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS)
261 return;
262
263 if (INTEL_GEN(engine->i915) >= 3)
264 ENGINE_WRITE(engine, RING_HWSTAM, mask);
265 else
266 ENGINE_WRITE16(engine, RING_HWSTAM, mask);
267 }
268
intel_engine_sanitize_mmio(struct intel_engine_cs * engine)269 static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
270 {
271 /* Mask off all writes into the unknown HWSP */
272 intel_engine_set_hwsp_writemask(engine, ~0u);
273 }
274
intel_engine_setup(struct intel_gt * gt,enum intel_engine_id id)275 static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
276 {
277 const struct engine_info *info = &intel_engines[id];
278 struct drm_i915_private *i915 = gt->i915;
279 struct intel_engine_cs *engine;
280
281 BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
282 BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
283
284 if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine)))
285 return -EINVAL;
286
287 if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
288 return -EINVAL;
289
290 if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
291 return -EINVAL;
292
293 if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
294 return -EINVAL;
295
296 engine = kzalloc(sizeof(*engine), GFP_KERNEL);
297 if (!engine)
298 return -ENOMEM;
299
300 BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
301
302 engine->id = id;
303 engine->legacy_idx = INVALID_ENGINE;
304 engine->mask = BIT(id);
305 engine->i915 = i915;
306 engine->gt = gt;
307 engine->uncore = gt->uncore;
308 engine->hw_id = engine->guc_id = info->hw_id;
309 engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases);
310
311 engine->class = info->class;
312 engine->instance = info->instance;
313 __sprint_engine_name(engine);
314
315 engine->props.heartbeat_interval_ms =
316 CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
317 engine->props.max_busywait_duration_ns =
318 CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT;
319 engine->props.preempt_timeout_ms =
320 CONFIG_DRM_I915_PREEMPT_TIMEOUT;
321 engine->props.stop_timeout_ms =
322 CONFIG_DRM_I915_STOP_TIMEOUT;
323 engine->props.timeslice_duration_ms =
324 CONFIG_DRM_I915_TIMESLICE_DURATION;
325
326 /* Override to uninterruptible for OpenCL workloads. */
327 if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS)
328 engine->props.preempt_timeout_ms = 0;
329
330 engine->defaults = engine->props; /* never to change again */
331
332 engine->context_size = intel_engine_context_size(gt, engine->class);
333 if (WARN_ON(engine->context_size > BIT(20)))
334 engine->context_size = 0;
335 if (engine->context_size)
336 DRIVER_CAPS(i915)->has_logical_contexts = true;
337
338 /* Nothing to do here, execute in order of dependencies */
339 engine->schedule = NULL;
340
341 ewma__engine_latency_init(&engine->latency);
342 seqlock_init(&engine->stats.lock);
343
344 ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
345
346 /* Scrub mmio state on takeover */
347 intel_engine_sanitize_mmio(engine);
348
349 gt->engine_class[info->class][info->instance] = engine;
350 gt->engine[id] = engine;
351
352 return 0;
353 }
354
__setup_engine_capabilities(struct intel_engine_cs * engine)355 static void __setup_engine_capabilities(struct intel_engine_cs *engine)
356 {
357 struct drm_i915_private *i915 = engine->i915;
358
359 if (engine->class == VIDEO_DECODE_CLASS) {
360 /*
361 * HEVC support is present on first engine instance
362 * before Gen11 and on all instances afterwards.
363 */
364 if (INTEL_GEN(i915) >= 11 ||
365 (INTEL_GEN(i915) >= 9 && engine->instance == 0))
366 engine->uabi_capabilities |=
367 I915_VIDEO_CLASS_CAPABILITY_HEVC;
368
369 /*
370 * SFC block is present only on even logical engine
371 * instances.
372 */
373 if ((INTEL_GEN(i915) >= 11 &&
374 (engine->gt->info.vdbox_sfc_access &
375 BIT(engine->instance))) ||
376 (INTEL_GEN(i915) >= 9 && engine->instance == 0))
377 engine->uabi_capabilities |=
378 I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
379 } else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
380 if (INTEL_GEN(i915) >= 9)
381 engine->uabi_capabilities |=
382 I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
383 }
384 }
385
intel_setup_engine_capabilities(struct intel_gt * gt)386 static void intel_setup_engine_capabilities(struct intel_gt *gt)
387 {
388 struct intel_engine_cs *engine;
389 enum intel_engine_id id;
390
391 for_each_engine(engine, gt, id)
392 __setup_engine_capabilities(engine);
393 }
394
395 /**
396 * intel_engines_release() - free the resources allocated for Command Streamers
397 * @gt: pointer to struct intel_gt
398 */
intel_engines_release(struct intel_gt * gt)399 void intel_engines_release(struct intel_gt *gt)
400 {
401 struct intel_engine_cs *engine;
402 enum intel_engine_id id;
403
404 /*
405 * Before we release the resources held by engine, we must be certain
406 * that the HW is no longer accessing them -- having the GPU scribble
407 * to or read from a page being used for something else causes no end
408 * of fun.
409 *
410 * The GPU should be reset by this point, but assume the worst just
411 * in case we aborted before completely initialising the engines.
412 */
413 GEM_BUG_ON(intel_gt_pm_is_awake(gt));
414 if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
415 __intel_gt_reset(gt, ALL_ENGINES);
416
417 /* Decouple the backend; but keep the layout for late GPU resets */
418 for_each_engine(engine, gt, id) {
419 if (!engine->release)
420 continue;
421
422 intel_wakeref_wait_for_idle(&engine->wakeref);
423 GEM_BUG_ON(intel_engine_pm_is_awake(engine));
424
425 engine->release(engine);
426 engine->release = NULL;
427
428 memset(&engine->reset, 0, sizeof(engine->reset));
429 }
430 }
431
intel_engine_free_request_pool(struct intel_engine_cs * engine)432 void intel_engine_free_request_pool(struct intel_engine_cs *engine)
433 {
434 if (!engine->request_pool)
435 return;
436
437 kmem_cache_free(i915_request_slab_cache(), engine->request_pool);
438 }
439
intel_engines_free(struct intel_gt * gt)440 void intel_engines_free(struct intel_gt *gt)
441 {
442 struct intel_engine_cs *engine;
443 enum intel_engine_id id;
444
445 /* Free the requests! dma-resv keeps fences around for an eternity */
446 rcu_barrier();
447
448 for_each_engine(engine, gt, id) {
449 intel_engine_free_request_pool(engine);
450 kfree(engine);
451 gt->engine[id] = NULL;
452 }
453 }
454
455 /*
456 * Determine which engines are fused off in our particular hardware.
457 * Note that we have a catch-22 situation where we need to be able to access
458 * the blitter forcewake domain to read the engine fuses, but at the same time
459 * we need to know which engines are available on the system to know which
460 * forcewake domains are present. We solve this by intializing the forcewake
461 * domains based on the full engine mask in the platform capabilities before
462 * calling this function and pruning the domains for fused-off engines
463 * afterwards.
464 */
init_engine_mask(struct intel_gt * gt)465 static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
466 {
467 struct drm_i915_private *i915 = gt->i915;
468 struct intel_gt_info *info = >->info;
469 struct intel_uncore *uncore = gt->uncore;
470 unsigned int logical_vdbox = 0;
471 unsigned int i;
472 u32 media_fuse;
473 u16 vdbox_mask;
474 u16 vebox_mask;
475
476 info->engine_mask = INTEL_INFO(i915)->platform_engine_mask;
477
478 if (INTEL_GEN(i915) < 11)
479 return info->engine_mask;
480
481 media_fuse = ~intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE);
482
483 vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
484 vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
485 GEN11_GT_VEBOX_DISABLE_SHIFT;
486
487 for (i = 0; i < I915_MAX_VCS; i++) {
488 if (!HAS_ENGINE(gt, _VCS(i))) {
489 vdbox_mask &= ~BIT(i);
490 continue;
491 }
492
493 if (!(BIT(i) & vdbox_mask)) {
494 info->engine_mask &= ~BIT(_VCS(i));
495 drm_dbg(&i915->drm, "vcs%u fused off\n", i);
496 continue;
497 }
498
499 /*
500 * In Gen11, only even numbered logical VDBOXes are
501 * hooked up to an SFC (Scaler & Format Converter) unit.
502 * In TGL each VDBOX has access to an SFC.
503 */
504 if (INTEL_GEN(i915) >= 12 || logical_vdbox++ % 2 == 0)
505 gt->info.vdbox_sfc_access |= BIT(i);
506 }
507 drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n",
508 vdbox_mask, VDBOX_MASK(gt));
509 GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt));
510
511 for (i = 0; i < I915_MAX_VECS; i++) {
512 if (!HAS_ENGINE(gt, _VECS(i))) {
513 vebox_mask &= ~BIT(i);
514 continue;
515 }
516
517 if (!(BIT(i) & vebox_mask)) {
518 info->engine_mask &= ~BIT(_VECS(i));
519 drm_dbg(&i915->drm, "vecs%u fused off\n", i);
520 }
521 }
522 drm_dbg(&i915->drm, "vebox enable: %04x, instances: %04lx\n",
523 vebox_mask, VEBOX_MASK(gt));
524 GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
525
526 return info->engine_mask;
527 }
528
529 /**
530 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
531 * @gt: pointer to struct intel_gt
532 *
533 * Return: non-zero if the initialization failed.
534 */
intel_engines_init_mmio(struct intel_gt * gt)535 int intel_engines_init_mmio(struct intel_gt *gt)
536 {
537 struct drm_i915_private *i915 = gt->i915;
538 const unsigned int engine_mask = init_engine_mask(gt);
539 unsigned int mask = 0;
540 unsigned int i;
541 int err;
542
543 drm_WARN_ON(&i915->drm, engine_mask == 0);
544 drm_WARN_ON(&i915->drm, engine_mask &
545 GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
546
547 if (i915_inject_probe_failure(i915))
548 return -ENODEV;
549
550 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
551 if (!HAS_ENGINE(gt, i))
552 continue;
553
554 err = intel_engine_setup(gt, i);
555 if (err)
556 goto cleanup;
557
558 mask |= BIT(i);
559 }
560
561 /*
562 * Catch failures to update intel_engines table when the new engines
563 * are added to the driver by a warning and disabling the forgotten
564 * engines.
565 */
566 if (drm_WARN_ON(&i915->drm, mask != engine_mask))
567 gt->info.engine_mask = mask;
568
569 gt->info.num_engines = hweight32(mask);
570
571 intel_gt_check_and_clear_faults(gt);
572
573 intel_setup_engine_capabilities(gt);
574
575 intel_uncore_prune_engine_fw_domains(gt->uncore, gt);
576
577 return 0;
578
579 cleanup:
580 intel_engines_free(gt);
581 return err;
582 }
583
intel_engine_init_execlists(struct intel_engine_cs * engine)584 void intel_engine_init_execlists(struct intel_engine_cs *engine)
585 {
586 struct intel_engine_execlists * const execlists = &engine->execlists;
587
588 execlists->port_mask = 1;
589 GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
590 GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
591
592 memset(execlists->pending, 0, sizeof(execlists->pending));
593 execlists->active =
594 memset(execlists->inflight, 0, sizeof(execlists->inflight));
595
596 execlists->queue_priority_hint = INT_MIN;
597 execlists->queue = RB_ROOT_CACHED;
598 }
599
cleanup_status_page(struct intel_engine_cs * engine)600 static void cleanup_status_page(struct intel_engine_cs *engine)
601 {
602 struct i915_vma *vma;
603
604 /* Prevent writes into HWSP after returning the page to the system */
605 intel_engine_set_hwsp_writemask(engine, ~0u);
606
607 vma = fetch_and_zero(&engine->status_page.vma);
608 if (!vma)
609 return;
610
611 if (!HWS_NEEDS_PHYSICAL(engine->i915))
612 i915_vma_unpin(vma);
613
614 i915_gem_object_unpin_map(vma->obj);
615 i915_gem_object_put(vma->obj);
616 }
617
pin_ggtt_status_page(struct intel_engine_cs * engine,struct i915_vma * vma)618 static int pin_ggtt_status_page(struct intel_engine_cs *engine,
619 struct i915_vma *vma)
620 {
621 unsigned int flags;
622
623 if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
624 /*
625 * On g33, we cannot place HWS above 256MiB, so
626 * restrict its pinning to the low mappable arena.
627 * Though this restriction is not documented for
628 * gen4, gen5, or byt, they also behave similarly
629 * and hang if the HWS is placed at the top of the
630 * GTT. To generalise, it appears that all !llc
631 * platforms have issues with us placing the HWS
632 * above the mappable region (even though we never
633 * actually map it).
634 */
635 flags = PIN_MAPPABLE;
636 else
637 flags = PIN_HIGH;
638
639 return i915_ggtt_pin(vma, NULL, 0, flags);
640 }
641
init_status_page(struct intel_engine_cs * engine)642 static int init_status_page(struct intel_engine_cs *engine)
643 {
644 struct drm_i915_gem_object *obj;
645 struct i915_vma *vma;
646 void *vaddr;
647 int ret;
648
649 /*
650 * Though the HWS register does support 36bit addresses, historically
651 * we have had hangs and corruption reported due to wild writes if
652 * the HWS is placed above 4G. We only allow objects to be allocated
653 * in GFP_DMA32 for i965, and no earlier physical address users had
654 * access to more than 4G.
655 */
656 obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
657 if (IS_ERR(obj)) {
658 drm_err(&engine->i915->drm,
659 "Failed to allocate status page\n");
660 return PTR_ERR(obj);
661 }
662
663 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
664
665 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
666 if (IS_ERR(vma)) {
667 ret = PTR_ERR(vma);
668 goto err;
669 }
670
671 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
672 if (IS_ERR(vaddr)) {
673 ret = PTR_ERR(vaddr);
674 goto err;
675 }
676
677 engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
678 engine->status_page.vma = vma;
679
680 if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
681 ret = pin_ggtt_status_page(engine, vma);
682 if (ret)
683 goto err_unpin;
684 }
685
686 return 0;
687
688 err_unpin:
689 i915_gem_object_unpin_map(obj);
690 err:
691 i915_gem_object_put(obj);
692 return ret;
693 }
694
engine_setup_common(struct intel_engine_cs * engine)695 static int engine_setup_common(struct intel_engine_cs *engine)
696 {
697 int err;
698
699 init_llist_head(&engine->barrier_tasks);
700
701 err = init_status_page(engine);
702 if (err)
703 return err;
704
705 engine->breadcrumbs = intel_breadcrumbs_create(engine);
706 if (!engine->breadcrumbs) {
707 err = -ENOMEM;
708 goto err_status;
709 }
710
711 intel_engine_init_active(engine, ENGINE_PHYSICAL);
712 intel_engine_init_execlists(engine);
713 intel_engine_init_cmd_parser(engine);
714 intel_engine_init__pm(engine);
715 intel_engine_init_retire(engine);
716
717 /* Use the whole device by default */
718 engine->sseu =
719 intel_sseu_from_device_info(&engine->gt->info.sseu);
720
721 intel_engine_init_workarounds(engine);
722 intel_engine_init_whitelist(engine);
723 intel_engine_init_ctx_wa(engine);
724
725 return 0;
726
727 err_status:
728 cleanup_status_page(engine);
729 return err;
730 }
731
732 struct measure_breadcrumb {
733 struct i915_request rq;
734 struct intel_ring ring;
735 u32 cs[2048];
736 };
737
measure_breadcrumb_dw(struct intel_context * ce)738 static int measure_breadcrumb_dw(struct intel_context *ce)
739 {
740 struct intel_engine_cs *engine = ce->engine;
741 struct measure_breadcrumb *frame;
742 int dw;
743
744 GEM_BUG_ON(!engine->gt->scratch);
745
746 frame = kzalloc(sizeof(*frame), GFP_KERNEL);
747 if (!frame)
748 return -ENOMEM;
749
750 frame->rq.engine = engine;
751 frame->rq.context = ce;
752 rcu_assign_pointer(frame->rq.timeline, ce->timeline);
753
754 frame->ring.vaddr = frame->cs;
755 frame->ring.size = sizeof(frame->cs);
756 frame->ring.wrap =
757 BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size);
758 frame->ring.effective_size = frame->ring.size;
759 intel_ring_update_space(&frame->ring);
760 frame->rq.ring = &frame->ring;
761
762 mutex_lock(&ce->timeline->mutex);
763 spin_lock_irq(&engine->active.lock);
764
765 dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
766
767 spin_unlock_irq(&engine->active.lock);
768 mutex_unlock(&ce->timeline->mutex);
769
770 GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
771
772 kfree(frame);
773 return dw;
774 }
775
776 void
intel_engine_init_active(struct intel_engine_cs * engine,unsigned int subclass)777 intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
778 {
779 INIT_LIST_HEAD(&engine->active.requests);
780 INIT_LIST_HEAD(&engine->active.hold);
781
782 spin_lock_init(&engine->active.lock);
783 lockdep_set_subclass(&engine->active.lock, subclass);
784
785 /*
786 * Due to an interesting quirk in lockdep's internal debug tracking,
787 * after setting a subclass we must ensure the lock is used. Otherwise,
788 * nr_unused_locks is incremented once too often.
789 */
790 #ifdef CONFIG_DEBUG_LOCK_ALLOC
791 local_irq_disable();
792 lock_map_acquire(&engine->active.lock.dep_map);
793 lock_map_release(&engine->active.lock.dep_map);
794 local_irq_enable();
795 #endif
796 }
797
798 static struct intel_context *
create_pinned_context(struct intel_engine_cs * engine,unsigned int hwsp,struct lock_class_key * key,const char * name)799 create_pinned_context(struct intel_engine_cs *engine,
800 unsigned int hwsp,
801 struct lock_class_key *key,
802 const char *name)
803 {
804 struct intel_context *ce;
805 int err;
806
807 ce = intel_context_create(engine);
808 if (IS_ERR(ce))
809 return ce;
810
811 __set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
812 ce->timeline = page_pack_bits(NULL, hwsp);
813
814 err = intel_context_pin(ce); /* perma-pin so it is always available */
815 if (err) {
816 intel_context_put(ce);
817 return ERR_PTR(err);
818 }
819
820 /*
821 * Give our perma-pinned kernel timelines a separate lockdep class,
822 * so that we can use them from within the normal user timelines
823 * should we need to inject GPU operations during their request
824 * construction.
825 */
826 lockdep_set_class_and_name(&ce->timeline->mutex, key, name);
827
828 return ce;
829 }
830
831 static struct intel_context *
create_kernel_context(struct intel_engine_cs * engine)832 create_kernel_context(struct intel_engine_cs *engine)
833 {
834 static struct lock_class_key kernel;
835
836 return create_pinned_context(engine, I915_GEM_HWS_SEQNO_ADDR,
837 &kernel, "kernel_context");
838 }
839
840 /**
841 * intel_engines_init_common - initialize cengine state which might require hw access
842 * @engine: Engine to initialize.
843 *
844 * Initializes @engine@ structure members shared between legacy and execlists
845 * submission modes which do require hardware access.
846 *
847 * Typcally done at later stages of submission mode specific engine setup.
848 *
849 * Returns zero on success or an error code on failure.
850 */
engine_init_common(struct intel_engine_cs * engine)851 static int engine_init_common(struct intel_engine_cs *engine)
852 {
853 struct intel_context *ce;
854 int ret;
855
856 engine->set_default_submission(engine);
857
858 /*
859 * We may need to do things with the shrinker which
860 * require us to immediately switch back to the default
861 * context. This can cause a problem as pinning the
862 * default context also requires GTT space which may not
863 * be available. To avoid this we always pin the default
864 * context.
865 */
866 ce = create_kernel_context(engine);
867 if (IS_ERR(ce))
868 return PTR_ERR(ce);
869
870 ret = measure_breadcrumb_dw(ce);
871 if (ret < 0)
872 goto err_context;
873
874 engine->emit_fini_breadcrumb_dw = ret;
875 engine->kernel_context = ce;
876
877 return 0;
878
879 err_context:
880 intel_context_put(ce);
881 return ret;
882 }
883
intel_engines_init(struct intel_gt * gt)884 int intel_engines_init(struct intel_gt *gt)
885 {
886 int (*setup)(struct intel_engine_cs *engine);
887 struct intel_engine_cs *engine;
888 enum intel_engine_id id;
889 int err;
890
891 if (HAS_EXECLISTS(gt->i915))
892 setup = intel_execlists_submission_setup;
893 else
894 setup = intel_ring_submission_setup;
895
896 for_each_engine(engine, gt, id) {
897 err = engine_setup_common(engine);
898 if (err)
899 return err;
900
901 err = setup(engine);
902 if (err)
903 return err;
904
905 err = engine_init_common(engine);
906 if (err)
907 return err;
908
909 intel_engine_add_user(engine);
910 }
911
912 return 0;
913 }
914
915 /**
916 * intel_engines_cleanup_common - cleans up the engine state created by
917 * the common initiailizers.
918 * @engine: Engine to cleanup.
919 *
920 * This cleans up everything created by the common helpers.
921 */
intel_engine_cleanup_common(struct intel_engine_cs * engine)922 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
923 {
924 GEM_BUG_ON(!list_empty(&engine->active.requests));
925 tasklet_kill(&engine->execlists.tasklet); /* flush the callback */
926
927 cleanup_status_page(engine);
928 intel_breadcrumbs_free(engine->breadcrumbs);
929
930 intel_engine_fini_retire(engine);
931 intel_engine_cleanup_cmd_parser(engine);
932
933 if (engine->default_state)
934 fput(engine->default_state);
935
936 if (engine->kernel_context) {
937 intel_context_unpin(engine->kernel_context);
938 intel_context_put(engine->kernel_context);
939 }
940 GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
941
942 intel_wa_list_free(&engine->ctx_wa_list);
943 intel_wa_list_free(&engine->wa_list);
944 intel_wa_list_free(&engine->whitelist);
945 }
946
947 /**
948 * intel_engine_resume - re-initializes the HW state of the engine
949 * @engine: Engine to resume.
950 *
951 * Returns zero on success or an error code on failure.
952 */
intel_engine_resume(struct intel_engine_cs * engine)953 int intel_engine_resume(struct intel_engine_cs *engine)
954 {
955 intel_engine_apply_workarounds(engine);
956 intel_engine_apply_whitelist(engine);
957
958 return engine->resume(engine);
959 }
960
intel_engine_get_active_head(const struct intel_engine_cs * engine)961 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
962 {
963 struct drm_i915_private *i915 = engine->i915;
964
965 u64 acthd;
966
967 if (INTEL_GEN(i915) >= 8)
968 acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
969 else if (INTEL_GEN(i915) >= 4)
970 acthd = ENGINE_READ(engine, RING_ACTHD);
971 else
972 acthd = ENGINE_READ(engine, ACTHD);
973
974 return acthd;
975 }
976
intel_engine_get_last_batch_head(const struct intel_engine_cs * engine)977 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
978 {
979 u64 bbaddr;
980
981 if (INTEL_GEN(engine->i915) >= 8)
982 bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
983 else
984 bbaddr = ENGINE_READ(engine, RING_BBADDR);
985
986 return bbaddr;
987 }
988
stop_timeout(const struct intel_engine_cs * engine)989 static unsigned long stop_timeout(const struct intel_engine_cs *engine)
990 {
991 if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */
992 return 0;
993
994 /*
995 * If we are doing a normal GPU reset, we can take our time and allow
996 * the engine to quiesce. We've stopped submission to the engine, and
997 * if we wait long enough an innocent context should complete and
998 * leave the engine idle. So they should not be caught unaware by
999 * the forthcoming GPU reset (which usually follows the stop_cs)!
1000 */
1001 return READ_ONCE(engine->props.stop_timeout_ms);
1002 }
1003
intel_engine_stop_cs(struct intel_engine_cs * engine)1004 int intel_engine_stop_cs(struct intel_engine_cs *engine)
1005 {
1006 struct intel_uncore *uncore = engine->uncore;
1007 const u32 base = engine->mmio_base;
1008 const i915_reg_t mode = RING_MI_MODE(base);
1009 int err;
1010
1011 if (INTEL_GEN(engine->i915) < 3)
1012 return -ENODEV;
1013
1014 ENGINE_TRACE(engine, "\n");
1015
1016 intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
1017
1018 err = 0;
1019 if (__intel_wait_for_register_fw(uncore,
1020 mode, MODE_IDLE, MODE_IDLE,
1021 1000, stop_timeout(engine),
1022 NULL)) {
1023 ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE\n");
1024 err = -ETIMEDOUT;
1025 }
1026
1027 /* A final mmio read to let GPU writes be hopefully flushed to memory */
1028 intel_uncore_posting_read_fw(uncore, mode);
1029
1030 return err;
1031 }
1032
intel_engine_cancel_stop_cs(struct intel_engine_cs * engine)1033 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
1034 {
1035 ENGINE_TRACE(engine, "\n");
1036
1037 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
1038 }
1039
i915_cache_level_str(struct drm_i915_private * i915,int type)1040 const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
1041 {
1042 switch (type) {
1043 case I915_CACHE_NONE: return " uncached";
1044 case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
1045 case I915_CACHE_L3_LLC: return " L3+LLC";
1046 case I915_CACHE_WT: return " WT";
1047 default: return "";
1048 }
1049 }
1050
1051 static u32
read_subslice_reg(const struct intel_engine_cs * engine,int slice,int subslice,i915_reg_t reg)1052 read_subslice_reg(const struct intel_engine_cs *engine,
1053 int slice, int subslice, i915_reg_t reg)
1054 {
1055 struct drm_i915_private *i915 = engine->i915;
1056 struct intel_uncore *uncore = engine->uncore;
1057 u32 mcr_mask, mcr_ss, mcr, old_mcr, val;
1058 enum forcewake_domains fw_domains;
1059
1060 if (INTEL_GEN(i915) >= 11) {
1061 mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
1062 mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
1063 } else {
1064 mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
1065 mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
1066 }
1067
1068 fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
1069 FW_REG_READ);
1070 fw_domains |= intel_uncore_forcewake_for_reg(uncore,
1071 GEN8_MCR_SELECTOR,
1072 FW_REG_READ | FW_REG_WRITE);
1073
1074 spin_lock_irq(&uncore->lock);
1075 intel_uncore_forcewake_get__locked(uncore, fw_domains);
1076
1077 old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
1078
1079 mcr &= ~mcr_mask;
1080 mcr |= mcr_ss;
1081 intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
1082
1083 val = intel_uncore_read_fw(uncore, reg);
1084
1085 mcr &= ~mcr_mask;
1086 mcr |= old_mcr & mcr_mask;
1087
1088 intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
1089
1090 intel_uncore_forcewake_put__locked(uncore, fw_domains);
1091 spin_unlock_irq(&uncore->lock);
1092
1093 return val;
1094 }
1095
1096 /* NB: please notice the memset */
intel_engine_get_instdone(const struct intel_engine_cs * engine,struct intel_instdone * instdone)1097 void intel_engine_get_instdone(const struct intel_engine_cs *engine,
1098 struct intel_instdone *instdone)
1099 {
1100 struct drm_i915_private *i915 = engine->i915;
1101 const struct sseu_dev_info *sseu = &engine->gt->info.sseu;
1102 struct intel_uncore *uncore = engine->uncore;
1103 u32 mmio_base = engine->mmio_base;
1104 int slice;
1105 int subslice;
1106
1107 memset(instdone, 0, sizeof(*instdone));
1108
1109 switch (INTEL_GEN(i915)) {
1110 default:
1111 instdone->instdone =
1112 intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1113
1114 if (engine->id != RCS0)
1115 break;
1116
1117 instdone->slice_common =
1118 intel_uncore_read(uncore, GEN7_SC_INSTDONE);
1119 if (INTEL_GEN(i915) >= 12) {
1120 instdone->slice_common_extra[0] =
1121 intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA);
1122 instdone->slice_common_extra[1] =
1123 intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2);
1124 }
1125 for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
1126 instdone->sampler[slice][subslice] =
1127 read_subslice_reg(engine, slice, subslice,
1128 GEN7_SAMPLER_INSTDONE);
1129 instdone->row[slice][subslice] =
1130 read_subslice_reg(engine, slice, subslice,
1131 GEN7_ROW_INSTDONE);
1132 }
1133 break;
1134 case 7:
1135 instdone->instdone =
1136 intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1137
1138 if (engine->id != RCS0)
1139 break;
1140
1141 instdone->slice_common =
1142 intel_uncore_read(uncore, GEN7_SC_INSTDONE);
1143 instdone->sampler[0][0] =
1144 intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
1145 instdone->row[0][0] =
1146 intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
1147
1148 break;
1149 case 6:
1150 case 5:
1151 case 4:
1152 instdone->instdone =
1153 intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1154 if (engine->id == RCS0)
1155 /* HACK: Using the wrong struct member */
1156 instdone->slice_common =
1157 intel_uncore_read(uncore, GEN4_INSTDONE1);
1158 break;
1159 case 3:
1160 case 2:
1161 instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
1162 break;
1163 }
1164 }
1165
ring_is_idle(struct intel_engine_cs * engine)1166 static bool ring_is_idle(struct intel_engine_cs *engine)
1167 {
1168 bool idle = true;
1169
1170 if (I915_SELFTEST_ONLY(!engine->mmio_base))
1171 return true;
1172
1173 if (!intel_engine_pm_get_if_awake(engine))
1174 return true;
1175
1176 /* First check that no commands are left in the ring */
1177 if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
1178 (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
1179 idle = false;
1180
1181 /* No bit for gen2, so assume the CS parser is idle */
1182 if (INTEL_GEN(engine->i915) > 2 &&
1183 !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
1184 idle = false;
1185
1186 intel_engine_pm_put(engine);
1187
1188 return idle;
1189 }
1190
intel_engine_flush_submission(struct intel_engine_cs * engine)1191 void intel_engine_flush_submission(struct intel_engine_cs *engine)
1192 {
1193 struct tasklet_struct *t = &engine->execlists.tasklet;
1194
1195 if (!t->func)
1196 return;
1197
1198 /* Synchronise and wait for the tasklet on another CPU */
1199 tasklet_kill(t);
1200
1201 /* Having cancelled the tasklet, ensure that is run */
1202 local_bh_disable();
1203 if (tasklet_trylock(t)) {
1204 /* Must wait for any GPU reset in progress. */
1205 if (__tasklet_is_enabled(t))
1206 t->func(t->data);
1207 tasklet_unlock(t);
1208 }
1209 local_bh_enable();
1210 }
1211
1212 /**
1213 * intel_engine_is_idle() - Report if the engine has finished process all work
1214 * @engine: the intel_engine_cs
1215 *
1216 * Return true if there are no requests pending, nothing left to be submitted
1217 * to hardware, and that the engine is idle.
1218 */
intel_engine_is_idle(struct intel_engine_cs * engine)1219 bool intel_engine_is_idle(struct intel_engine_cs *engine)
1220 {
1221 /* More white lies, if wedged, hw state is inconsistent */
1222 if (intel_gt_is_wedged(engine->gt))
1223 return true;
1224
1225 if (!intel_engine_pm_is_awake(engine))
1226 return true;
1227
1228 /* Waiting to drain ELSP? */
1229 if (execlists_active(&engine->execlists)) {
1230 synchronize_hardirq(engine->i915->drm.pdev->irq);
1231
1232 intel_engine_flush_submission(engine);
1233
1234 if (execlists_active(&engine->execlists))
1235 return false;
1236 }
1237
1238 /* ELSP is empty, but there are ready requests? E.g. after reset */
1239 if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
1240 return false;
1241
1242 /* Ring stopped? */
1243 return ring_is_idle(engine);
1244 }
1245
intel_engines_are_idle(struct intel_gt * gt)1246 bool intel_engines_are_idle(struct intel_gt *gt)
1247 {
1248 struct intel_engine_cs *engine;
1249 enum intel_engine_id id;
1250
1251 /*
1252 * If the driver is wedged, HW state may be very inconsistent and
1253 * report that it is still busy, even though we have stopped using it.
1254 */
1255 if (intel_gt_is_wedged(gt))
1256 return true;
1257
1258 /* Already parked (and passed an idleness test); must still be idle */
1259 if (!READ_ONCE(gt->awake))
1260 return true;
1261
1262 for_each_engine(engine, gt, id) {
1263 if (!intel_engine_is_idle(engine))
1264 return false;
1265 }
1266
1267 return true;
1268 }
1269
intel_engines_reset_default_submission(struct intel_gt * gt)1270 void intel_engines_reset_default_submission(struct intel_gt *gt)
1271 {
1272 struct intel_engine_cs *engine;
1273 enum intel_engine_id id;
1274
1275 for_each_engine(engine, gt, id)
1276 engine->set_default_submission(engine);
1277 }
1278
intel_engine_can_store_dword(struct intel_engine_cs * engine)1279 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
1280 {
1281 switch (INTEL_GEN(engine->i915)) {
1282 case 2:
1283 return false; /* uses physical not virtual addresses */
1284 case 3:
1285 /* maybe only uses physical not virtual addresses */
1286 return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
1287 case 4:
1288 return !IS_I965G(engine->i915); /* who knows! */
1289 case 6:
1290 return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
1291 default:
1292 return true;
1293 }
1294 }
1295
print_sched_attr(const struct i915_sched_attr * attr,char * buf,int x,int len)1296 static int print_sched_attr(const struct i915_sched_attr *attr,
1297 char *buf, int x, int len)
1298 {
1299 if (attr->priority == I915_PRIORITY_INVALID)
1300 return x;
1301
1302 x += snprintf(buf + x, len - x,
1303 " prio=%d", attr->priority);
1304
1305 return x;
1306 }
1307
print_request(struct drm_printer * m,struct i915_request * rq,const char * prefix)1308 static void print_request(struct drm_printer *m,
1309 struct i915_request *rq,
1310 const char *prefix)
1311 {
1312 const char *name = rq->fence.ops->get_timeline_name(&rq->fence);
1313 char buf[80] = "";
1314 int x = 0;
1315
1316 x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf));
1317
1318 drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n",
1319 prefix,
1320 rq->fence.context, rq->fence.seqno,
1321 i915_request_completed(rq) ? "!" :
1322 i915_request_started(rq) ? "*" :
1323 "",
1324 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
1325 &rq->fence.flags) ? "+" :
1326 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1327 &rq->fence.flags) ? "-" :
1328 "",
1329 buf,
1330 jiffies_to_msecs(jiffies - rq->emitted_jiffies),
1331 name);
1332 }
1333
get_timeline(struct i915_request * rq)1334 static struct intel_timeline *get_timeline(struct i915_request *rq)
1335 {
1336 struct intel_timeline *tl;
1337
1338 /*
1339 * Even though we are holding the engine->active.lock here, there
1340 * is no control over the submission queue per-se and we are
1341 * inspecting the active state at a random point in time, with an
1342 * unknown queue. Play safe and make sure the timeline remains valid.
1343 * (Only being used for pretty printing, one extra kref shouldn't
1344 * cause a camel stampede!)
1345 */
1346 rcu_read_lock();
1347 tl = rcu_dereference(rq->timeline);
1348 if (!kref_get_unless_zero(&tl->kref))
1349 tl = NULL;
1350 rcu_read_unlock();
1351
1352 return tl;
1353 }
1354
print_ring(char * buf,int sz,struct i915_request * rq)1355 static int print_ring(char *buf, int sz, struct i915_request *rq)
1356 {
1357 int len = 0;
1358
1359 if (!i915_request_signaled(rq)) {
1360 struct intel_timeline *tl = get_timeline(rq);
1361
1362 len = scnprintf(buf, sz,
1363 "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
1364 i915_ggtt_offset(rq->ring->vma),
1365 tl ? tl->hwsp_offset : 0,
1366 hwsp_seqno(rq),
1367 DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
1368 1000 * 1000));
1369
1370 if (tl)
1371 intel_timeline_put(tl);
1372 }
1373
1374 return len;
1375 }
1376
hexdump(struct drm_printer * m,const void * buf,size_t len)1377 static void hexdump(struct drm_printer *m, const void *buf, size_t len)
1378 {
1379 const size_t rowsize = 8 * sizeof(u32);
1380 const void *prev = NULL;
1381 bool skip = false;
1382 size_t pos;
1383
1384 for (pos = 0; pos < len; pos += rowsize) {
1385 char line[128];
1386
1387 if (prev && !memcmp(prev, buf + pos, rowsize)) {
1388 if (!skip) {
1389 drm_printf(m, "*\n");
1390 skip = true;
1391 }
1392 continue;
1393 }
1394
1395 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
1396 rowsize, sizeof(u32),
1397 line, sizeof(line),
1398 false) >= sizeof(line));
1399 drm_printf(m, "[%04zx] %s\n", pos, line);
1400
1401 prev = buf + pos;
1402 skip = false;
1403 }
1404 }
1405
repr_timer(const struct timer_list * t)1406 static const char *repr_timer(const struct timer_list *t)
1407 {
1408 if (!READ_ONCE(t->expires))
1409 return "inactive";
1410
1411 if (timer_pending(t))
1412 return "active";
1413
1414 return "expired";
1415 }
1416
intel_engine_print_registers(struct intel_engine_cs * engine,struct drm_printer * m)1417 static void intel_engine_print_registers(struct intel_engine_cs *engine,
1418 struct drm_printer *m)
1419 {
1420 struct drm_i915_private *dev_priv = engine->i915;
1421 struct intel_engine_execlists * const execlists = &engine->execlists;
1422 u64 addr;
1423
1424 if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
1425 drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
1426 if (HAS_EXECLISTS(dev_priv)) {
1427 drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
1428 ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
1429 drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
1430 ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
1431 }
1432 drm_printf(m, "\tRING_START: 0x%08x\n",
1433 ENGINE_READ(engine, RING_START));
1434 drm_printf(m, "\tRING_HEAD: 0x%08x\n",
1435 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
1436 drm_printf(m, "\tRING_TAIL: 0x%08x\n",
1437 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
1438 drm_printf(m, "\tRING_CTL: 0x%08x%s\n",
1439 ENGINE_READ(engine, RING_CTL),
1440 ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
1441 if (INTEL_GEN(engine->i915) > 2) {
1442 drm_printf(m, "\tRING_MODE: 0x%08x%s\n",
1443 ENGINE_READ(engine, RING_MI_MODE),
1444 ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
1445 }
1446
1447 if (INTEL_GEN(dev_priv) >= 6) {
1448 drm_printf(m, "\tRING_IMR: 0x%08x\n",
1449 ENGINE_READ(engine, RING_IMR));
1450 drm_printf(m, "\tRING_ESR: 0x%08x\n",
1451 ENGINE_READ(engine, RING_ESR));
1452 drm_printf(m, "\tRING_EMR: 0x%08x\n",
1453 ENGINE_READ(engine, RING_EMR));
1454 drm_printf(m, "\tRING_EIR: 0x%08x\n",
1455 ENGINE_READ(engine, RING_EIR));
1456 }
1457
1458 addr = intel_engine_get_active_head(engine);
1459 drm_printf(m, "\tACTHD: 0x%08x_%08x\n",
1460 upper_32_bits(addr), lower_32_bits(addr));
1461 addr = intel_engine_get_last_batch_head(engine);
1462 drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
1463 upper_32_bits(addr), lower_32_bits(addr));
1464 if (INTEL_GEN(dev_priv) >= 8)
1465 addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
1466 else if (INTEL_GEN(dev_priv) >= 4)
1467 addr = ENGINE_READ(engine, RING_DMA_FADD);
1468 else
1469 addr = ENGINE_READ(engine, DMA_FADD_I8XX);
1470 drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
1471 upper_32_bits(addr), lower_32_bits(addr));
1472 if (INTEL_GEN(dev_priv) >= 4) {
1473 drm_printf(m, "\tIPEIR: 0x%08x\n",
1474 ENGINE_READ(engine, RING_IPEIR));
1475 drm_printf(m, "\tIPEHR: 0x%08x\n",
1476 ENGINE_READ(engine, RING_IPEHR));
1477 } else {
1478 drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
1479 drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
1480 }
1481
1482 if (HAS_EXECLISTS(dev_priv)) {
1483 struct i915_request * const *port, *rq;
1484 const u32 *hws =
1485 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
1486 const u8 num_entries = execlists->csb_size;
1487 unsigned int idx;
1488 u8 read, write;
1489
1490 drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n",
1491 yesno(test_bit(TASKLET_STATE_SCHED,
1492 &engine->execlists.tasklet.state)),
1493 enableddisabled(!atomic_read(&engine->execlists.tasklet.count)),
1494 repr_timer(&engine->execlists.preempt),
1495 repr_timer(&engine->execlists.timer));
1496
1497 read = execlists->csb_head;
1498 write = READ_ONCE(*execlists->csb_write);
1499
1500 drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
1501 ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
1502 ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
1503 read, write, num_entries);
1504
1505 if (read >= num_entries)
1506 read = 0;
1507 if (write >= num_entries)
1508 write = 0;
1509 if (read > write)
1510 write += num_entries;
1511 while (read < write) {
1512 idx = ++read % num_entries;
1513 drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
1514 idx, hws[idx * 2], hws[idx * 2 + 1]);
1515 }
1516
1517 execlists_active_lock_bh(execlists);
1518 rcu_read_lock();
1519 for (port = execlists->active; (rq = *port); port++) {
1520 char hdr[160];
1521 int len;
1522
1523 len = scnprintf(hdr, sizeof(hdr),
1524 "\t\tActive[%d]: ccid:%08x%s%s, ",
1525 (int)(port - execlists->active),
1526 rq->context->lrc.ccid,
1527 intel_context_is_closed(rq->context) ? "!" : "",
1528 intel_context_is_banned(rq->context) ? "*" : "");
1529 len += print_ring(hdr + len, sizeof(hdr) - len, rq);
1530 scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
1531 print_request(m, rq, hdr);
1532 }
1533 for (port = execlists->pending; (rq = *port); port++) {
1534 char hdr[160];
1535 int len;
1536
1537 len = scnprintf(hdr, sizeof(hdr),
1538 "\t\tPending[%d]: ccid:%08x%s%s, ",
1539 (int)(port - execlists->pending),
1540 rq->context->lrc.ccid,
1541 intel_context_is_closed(rq->context) ? "!" : "",
1542 intel_context_is_banned(rq->context) ? "*" : "");
1543 len += print_ring(hdr + len, sizeof(hdr) - len, rq);
1544 scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
1545 print_request(m, rq, hdr);
1546 }
1547 rcu_read_unlock();
1548 execlists_active_unlock_bh(execlists);
1549 } else if (INTEL_GEN(dev_priv) > 6) {
1550 drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
1551 ENGINE_READ(engine, RING_PP_DIR_BASE));
1552 drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
1553 ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
1554 drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
1555 ENGINE_READ(engine, RING_PP_DIR_DCLV));
1556 }
1557 }
1558
print_request_ring(struct drm_printer * m,struct i915_request * rq)1559 static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
1560 {
1561 void *ring;
1562 int size;
1563
1564 drm_printf(m,
1565 "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
1566 rq->head, rq->postfix, rq->tail,
1567 rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
1568 rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
1569
1570 size = rq->tail - rq->head;
1571 if (rq->tail < rq->head)
1572 size += rq->ring->size;
1573
1574 ring = kmalloc(size, GFP_ATOMIC);
1575 if (ring) {
1576 const void *vaddr = rq->ring->vaddr;
1577 unsigned int head = rq->head;
1578 unsigned int len = 0;
1579
1580 if (rq->tail < head) {
1581 len = rq->ring->size - head;
1582 memcpy(ring, vaddr + head, len);
1583 head = 0;
1584 }
1585 memcpy(ring + len, vaddr + head, size - len);
1586
1587 hexdump(m, ring, size);
1588 kfree(ring);
1589 }
1590 }
1591
list_count(struct list_head * list)1592 static unsigned long list_count(struct list_head *list)
1593 {
1594 struct list_head *pos;
1595 unsigned long count = 0;
1596
1597 list_for_each(pos, list)
1598 count++;
1599
1600 return count;
1601 }
1602
intel_engine_dump(struct intel_engine_cs * engine,struct drm_printer * m,const char * header,...)1603 void intel_engine_dump(struct intel_engine_cs *engine,
1604 struct drm_printer *m,
1605 const char *header, ...)
1606 {
1607 struct i915_gpu_error * const error = &engine->i915->gpu_error;
1608 struct i915_request *rq;
1609 intel_wakeref_t wakeref;
1610 unsigned long flags;
1611 ktime_t dummy;
1612
1613 if (header) {
1614 va_list ap;
1615
1616 va_start(ap, header);
1617 drm_vprintf(m, header, &ap);
1618 va_end(ap);
1619 }
1620
1621 if (intel_gt_is_wedged(engine->gt))
1622 drm_printf(m, "*** WEDGED ***\n");
1623
1624 drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
1625 drm_printf(m, "\tBarriers?: %s\n",
1626 yesno(!llist_empty(&engine->barrier_tasks)));
1627 drm_printf(m, "\tLatency: %luus\n",
1628 ewma__engine_latency_read(&engine->latency));
1629 if (intel_engine_supports_stats(engine))
1630 drm_printf(m, "\tRuntime: %llums\n",
1631 ktime_to_ms(intel_engine_get_busy_time(engine,
1632 &dummy)));
1633 drm_printf(m, "\tForcewake: %x domains, %d active\n",
1634 engine->fw_domain, atomic_read(&engine->fw_active));
1635
1636 rcu_read_lock();
1637 rq = READ_ONCE(engine->heartbeat.systole);
1638 if (rq)
1639 drm_printf(m, "\tHeartbeat: %d ms ago\n",
1640 jiffies_to_msecs(jiffies - rq->emitted_jiffies));
1641 rcu_read_unlock();
1642 drm_printf(m, "\tReset count: %d (global %d)\n",
1643 i915_reset_engine_count(error, engine),
1644 i915_reset_count(error));
1645
1646 drm_printf(m, "\tRequests:\n");
1647
1648 spin_lock_irqsave(&engine->active.lock, flags);
1649 rq = intel_engine_find_active_request(engine);
1650 if (rq) {
1651 struct intel_timeline *tl = get_timeline(rq);
1652
1653 print_request(m, rq, "\t\tactive ");
1654
1655 drm_printf(m, "\t\tring->start: 0x%08x\n",
1656 i915_ggtt_offset(rq->ring->vma));
1657 drm_printf(m, "\t\tring->head: 0x%08x\n",
1658 rq->ring->head);
1659 drm_printf(m, "\t\tring->tail: 0x%08x\n",
1660 rq->ring->tail);
1661 drm_printf(m, "\t\tring->emit: 0x%08x\n",
1662 rq->ring->emit);
1663 drm_printf(m, "\t\tring->space: 0x%08x\n",
1664 rq->ring->space);
1665
1666 if (tl) {
1667 drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
1668 tl->hwsp_offset);
1669 intel_timeline_put(tl);
1670 }
1671
1672 print_request_ring(m, rq);
1673
1674 if (rq->context->lrc_reg_state) {
1675 drm_printf(m, "Logical Ring Context:\n");
1676 hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
1677 }
1678 }
1679 drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->active.hold));
1680 spin_unlock_irqrestore(&engine->active.lock, flags);
1681
1682 drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base);
1683 wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
1684 if (wakeref) {
1685 intel_engine_print_registers(engine, m);
1686 intel_runtime_pm_put(engine->uncore->rpm, wakeref);
1687 } else {
1688 drm_printf(m, "\tDevice is asleep; skipping register dump\n");
1689 }
1690
1691 intel_execlists_show_requests(engine, m, print_request, 8);
1692
1693 drm_printf(m, "HWSP:\n");
1694 hexdump(m, engine->status_page.addr, PAGE_SIZE);
1695
1696 drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
1697
1698 intel_engine_print_breadcrumbs(engine, m);
1699 }
1700
__intel_engine_get_busy_time(struct intel_engine_cs * engine,ktime_t * now)1701 static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine,
1702 ktime_t *now)
1703 {
1704 ktime_t total = engine->stats.total;
1705
1706 /*
1707 * If the engine is executing something at the moment
1708 * add it to the total.
1709 */
1710 *now = ktime_get();
1711 if (atomic_read(&engine->stats.active))
1712 total = ktime_add(total, ktime_sub(*now, engine->stats.start));
1713
1714 return total;
1715 }
1716
1717 /**
1718 * intel_engine_get_busy_time() - Return current accumulated engine busyness
1719 * @engine: engine to report on
1720 * @now: monotonic timestamp of sampling
1721 *
1722 * Returns accumulated time @engine was busy since engine stats were enabled.
1723 */
intel_engine_get_busy_time(struct intel_engine_cs * engine,ktime_t * now)1724 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
1725 {
1726 unsigned int seq;
1727 ktime_t total;
1728
1729 do {
1730 seq = read_seqbegin(&engine->stats.lock);
1731 total = __intel_engine_get_busy_time(engine, now);
1732 } while (read_seqretry(&engine->stats.lock, seq));
1733
1734 return total;
1735 }
1736
match_ring(struct i915_request * rq)1737 static bool match_ring(struct i915_request *rq)
1738 {
1739 u32 ring = ENGINE_READ(rq->engine, RING_START);
1740
1741 return ring == i915_ggtt_offset(rq->ring->vma);
1742 }
1743
1744 struct i915_request *
intel_engine_find_active_request(struct intel_engine_cs * engine)1745 intel_engine_find_active_request(struct intel_engine_cs *engine)
1746 {
1747 struct i915_request *request, *active = NULL;
1748
1749 /*
1750 * We are called by the error capture, reset and to dump engine
1751 * state at random points in time. In particular, note that neither is
1752 * crucially ordered with an interrupt. After a hang, the GPU is dead
1753 * and we assume that no more writes can happen (we waited long enough
1754 * for all writes that were in transaction to be flushed) - adding an
1755 * extra delay for a recent interrupt is pointless. Hence, we do
1756 * not need an engine->irq_seqno_barrier() before the seqno reads.
1757 * At all other times, we must assume the GPU is still running, but
1758 * we only care about the snapshot of this moment.
1759 */
1760 lockdep_assert_held(&engine->active.lock);
1761
1762 rcu_read_lock();
1763 request = execlists_active(&engine->execlists);
1764 if (request) {
1765 struct intel_timeline *tl = request->context->timeline;
1766
1767 list_for_each_entry_from_reverse(request, &tl->requests, link) {
1768 if (i915_request_completed(request))
1769 break;
1770
1771 active = request;
1772 }
1773 }
1774 rcu_read_unlock();
1775 if (active)
1776 return active;
1777
1778 list_for_each_entry(request, &engine->active.requests, sched.link) {
1779 if (i915_request_completed(request))
1780 continue;
1781
1782 if (!i915_request_started(request))
1783 continue;
1784
1785 /* More than one preemptible request may match! */
1786 if (!match_ring(request))
1787 continue;
1788
1789 active = request;
1790 break;
1791 }
1792
1793 return active;
1794 }
1795
1796 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1797 #include "mock_engine.c"
1798 #include "selftest_engine.c"
1799 #include "selftest_engine_cs.c"
1800 #endif
1801