1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright © 2018 Intel Corporation
4 */
5
6 #include <linux/sort.h>
7
8 #include "intel_gpu_commands.h"
9 #include "intel_gt_pm.h"
10 #include "intel_rps.h"
11
12 #include "i915_selftest.h"
13 #include "selftests/igt_flush_test.h"
14
15 #define COUNT 5
16
cmp_u32(const void * A,const void * B)17 static int cmp_u32(const void *A, const void *B)
18 {
19 const u32 *a = A, *b = B;
20
21 return *a - *b;
22 }
23
perf_begin(struct intel_gt * gt)24 static void perf_begin(struct intel_gt *gt)
25 {
26 intel_gt_pm_get(gt);
27
28 /* Boost gpufreq to max [waitboost] and keep it fixed */
29 atomic_inc(>->rps.num_waiters);
30 queue_work(gt->i915->unordered_wq, >->rps.work);
31 flush_work(>->rps.work);
32 }
33
perf_end(struct intel_gt * gt)34 static int perf_end(struct intel_gt *gt)
35 {
36 atomic_dec(>->rps.num_waiters);
37 intel_gt_pm_put(gt);
38
39 return igt_flush_test(gt->i915);
40 }
41
timestamp_reg(struct intel_engine_cs * engine)42 static i915_reg_t timestamp_reg(struct intel_engine_cs *engine)
43 {
44 struct drm_i915_private *i915 = engine->i915;
45
46 if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915))
47 return RING_TIMESTAMP_UDW(engine->mmio_base);
48 else
49 return RING_TIMESTAMP(engine->mmio_base);
50 }
51
write_timestamp(struct i915_request * rq,int slot)52 static int write_timestamp(struct i915_request *rq, int slot)
53 {
54 struct intel_timeline *tl =
55 rcu_dereference_protected(rq->timeline,
56 !i915_request_signaled(rq));
57 u32 cmd;
58 u32 *cs;
59
60 cs = intel_ring_begin(rq, 4);
61 if (IS_ERR(cs))
62 return PTR_ERR(cs);
63
64 cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
65 if (GRAPHICS_VER(rq->i915) >= 8)
66 cmd++;
67 *cs++ = cmd;
68 *cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine));
69 *cs++ = tl->hwsp_offset + slot * sizeof(u32);
70 *cs++ = 0;
71
72 intel_ring_advance(rq, cs);
73
74 return 0;
75 }
76
create_empty_batch(struct intel_context * ce)77 static struct i915_vma *create_empty_batch(struct intel_context *ce)
78 {
79 struct drm_i915_gem_object *obj;
80 struct i915_vma *vma;
81 u32 *cs;
82 int err;
83
84 obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE);
85 if (IS_ERR(obj))
86 return ERR_CAST(obj);
87
88 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
89 if (IS_ERR(cs)) {
90 err = PTR_ERR(cs);
91 goto err_put;
92 }
93
94 cs[0] = MI_BATCH_BUFFER_END;
95
96 i915_gem_object_flush_map(obj);
97
98 vma = i915_vma_instance(obj, ce->vm, NULL);
99 if (IS_ERR(vma)) {
100 err = PTR_ERR(vma);
101 goto err_unpin;
102 }
103
104 err = i915_vma_pin(vma, 0, 0, PIN_USER);
105 if (err)
106 goto err_unpin;
107
108 i915_gem_object_unpin_map(obj);
109 return vma;
110
111 err_unpin:
112 i915_gem_object_unpin_map(obj);
113 err_put:
114 i915_gem_object_put(obj);
115 return ERR_PTR(err);
116 }
117
trifilter(u32 * a)118 static u32 trifilter(u32 *a)
119 {
120 u64 sum;
121
122 sort(a, COUNT, sizeof(*a), cmp_u32, NULL);
123
124 sum = mul_u32_u32(a[2], 2);
125 sum += a[1];
126 sum += a[3];
127
128 return sum >> 2;
129 }
130
perf_mi_bb_start(void * arg)131 static int perf_mi_bb_start(void *arg)
132 {
133 struct intel_gt *gt = arg;
134 struct intel_engine_cs *engine;
135 enum intel_engine_id id;
136 int err = 0;
137
138 if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
139 return 0;
140
141 perf_begin(gt);
142 for_each_engine(engine, gt, id) {
143 struct intel_context *ce = engine->kernel_context;
144 struct i915_vma *batch;
145 u32 cycles[COUNT];
146 int i;
147
148 if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
149 continue;
150
151 intel_engine_pm_get(engine);
152
153 batch = create_empty_batch(ce);
154 if (IS_ERR(batch)) {
155 err = PTR_ERR(batch);
156 intel_engine_pm_put(engine);
157 break;
158 }
159
160 err = i915_vma_sync(batch);
161 if (err) {
162 intel_engine_pm_put(engine);
163 i915_vma_put(batch);
164 break;
165 }
166
167 for (i = 0; i < ARRAY_SIZE(cycles); i++) {
168 struct i915_request *rq;
169
170 rq = i915_request_create(ce);
171 if (IS_ERR(rq)) {
172 err = PTR_ERR(rq);
173 break;
174 }
175
176 err = write_timestamp(rq, 2);
177 if (err)
178 goto out;
179
180 err = rq->engine->emit_bb_start(rq,
181 i915_vma_offset(batch), 8,
182 0);
183 if (err)
184 goto out;
185
186 err = write_timestamp(rq, 3);
187 if (err)
188 goto out;
189
190 out:
191 i915_request_get(rq);
192 i915_request_add(rq);
193
194 if (i915_request_wait(rq, 0, HZ / 5) < 0)
195 err = -EIO;
196 i915_request_put(rq);
197 if (err)
198 break;
199
200 cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2];
201 }
202 i915_vma_put(batch);
203 intel_engine_pm_put(engine);
204 if (err)
205 break;
206
207 pr_info("%s: MI_BB_START cycles: %u\n",
208 engine->name, trifilter(cycles));
209 }
210 if (perf_end(gt))
211 err = -EIO;
212
213 return err;
214 }
215
create_nop_batch(struct intel_context * ce)216 static struct i915_vma *create_nop_batch(struct intel_context *ce)
217 {
218 struct drm_i915_gem_object *obj;
219 struct i915_vma *vma;
220 u32 *cs;
221 int err;
222
223 obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K);
224 if (IS_ERR(obj))
225 return ERR_CAST(obj);
226
227 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
228 if (IS_ERR(cs)) {
229 err = PTR_ERR(cs);
230 goto err_put;
231 }
232
233 memset(cs, 0, SZ_64K);
234 cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END;
235
236 i915_gem_object_flush_map(obj);
237
238 vma = i915_vma_instance(obj, ce->vm, NULL);
239 if (IS_ERR(vma)) {
240 err = PTR_ERR(vma);
241 goto err_unpin;
242 }
243
244 err = i915_vma_pin(vma, 0, 0, PIN_USER);
245 if (err)
246 goto err_unpin;
247
248 i915_gem_object_unpin_map(obj);
249 return vma;
250
251 err_unpin:
252 i915_gem_object_unpin_map(obj);
253 err_put:
254 i915_gem_object_put(obj);
255 return ERR_PTR(err);
256 }
257
perf_mi_noop(void * arg)258 static int perf_mi_noop(void *arg)
259 {
260 struct intel_gt *gt = arg;
261 struct intel_engine_cs *engine;
262 enum intel_engine_id id;
263 int err = 0;
264
265 if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
266 return 0;
267
268 perf_begin(gt);
269 for_each_engine(engine, gt, id) {
270 struct intel_context *ce = engine->kernel_context;
271 struct i915_vma *base, *nop;
272 u32 cycles[COUNT];
273 int i;
274
275 if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
276 continue;
277
278 intel_engine_pm_get(engine);
279
280 base = create_empty_batch(ce);
281 if (IS_ERR(base)) {
282 err = PTR_ERR(base);
283 intel_engine_pm_put(engine);
284 break;
285 }
286
287 err = i915_vma_sync(base);
288 if (err) {
289 i915_vma_put(base);
290 intel_engine_pm_put(engine);
291 break;
292 }
293
294 nop = create_nop_batch(ce);
295 if (IS_ERR(nop)) {
296 err = PTR_ERR(nop);
297 i915_vma_put(base);
298 intel_engine_pm_put(engine);
299 break;
300 }
301
302 err = i915_vma_sync(nop);
303 if (err) {
304 i915_vma_put(nop);
305 i915_vma_put(base);
306 intel_engine_pm_put(engine);
307 break;
308 }
309
310 for (i = 0; i < ARRAY_SIZE(cycles); i++) {
311 struct i915_request *rq;
312
313 rq = i915_request_create(ce);
314 if (IS_ERR(rq)) {
315 err = PTR_ERR(rq);
316 break;
317 }
318
319 err = write_timestamp(rq, 2);
320 if (err)
321 goto out;
322
323 err = rq->engine->emit_bb_start(rq,
324 i915_vma_offset(base), 8,
325 0);
326 if (err)
327 goto out;
328
329 err = write_timestamp(rq, 3);
330 if (err)
331 goto out;
332
333 err = rq->engine->emit_bb_start(rq,
334 i915_vma_offset(nop),
335 i915_vma_size(nop),
336 0);
337 if (err)
338 goto out;
339
340 err = write_timestamp(rq, 4);
341 if (err)
342 goto out;
343
344 out:
345 i915_request_get(rq);
346 i915_request_add(rq);
347
348 if (i915_request_wait(rq, 0, HZ / 5) < 0)
349 err = -EIO;
350 i915_request_put(rq);
351 if (err)
352 break;
353
354 cycles[i] =
355 (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) -
356 (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]);
357 }
358 i915_vma_put(nop);
359 i915_vma_put(base);
360 intel_engine_pm_put(engine);
361 if (err)
362 break;
363
364 pr_info("%s: 16K MI_NOOP cycles: %u\n",
365 engine->name, trifilter(cycles));
366 }
367 if (perf_end(gt))
368 err = -EIO;
369
370 return err;
371 }
372
intel_engine_cs_perf_selftests(struct drm_i915_private * i915)373 int intel_engine_cs_perf_selftests(struct drm_i915_private *i915)
374 {
375 static const struct i915_subtest tests[] = {
376 SUBTEST(perf_mi_bb_start),
377 SUBTEST(perf_mi_noop),
378 };
379
380 if (intel_gt_is_wedged(to_gt(i915)))
381 return 0;
382
383 return intel_gt_live_subtests(tests, to_gt(i915));
384 }
385
intel_mmio_bases_check(void * arg)386 static int intel_mmio_bases_check(void *arg)
387 {
388 int i, j;
389
390 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
391 const struct engine_info *info = &intel_engines[i];
392 u8 prev = U8_MAX;
393
394 for (j = 0; j < MAX_MMIO_BASES; j++) {
395 u8 ver = info->mmio_bases[j].graphics_ver;
396 u32 base = info->mmio_bases[j].base;
397
398 if (ver >= prev) {
399 pr_err("%s(%s, class:%d, instance:%d): mmio base for graphics ver %u is before the one for ver %u\n",
400 __func__,
401 intel_engine_class_repr(info->class),
402 info->class, info->instance,
403 prev, ver);
404 return -EINVAL;
405 }
406
407 if (ver == 0)
408 break;
409
410 if (!base) {
411 pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for graphics ver %u at entry %u\n",
412 __func__,
413 intel_engine_class_repr(info->class),
414 info->class, info->instance,
415 base, ver, j);
416 return -EINVAL;
417 }
418
419 prev = ver;
420 }
421
422 pr_debug("%s: min graphics version supported for %s%d is %u\n",
423 __func__,
424 intel_engine_class_repr(info->class),
425 info->instance,
426 prev);
427 }
428
429 return 0;
430 }
431
intel_engine_cs_mock_selftests(void)432 int intel_engine_cs_mock_selftests(void)
433 {
434 static const struct i915_subtest tests[] = {
435 SUBTEST(intel_mmio_bases_check),
436 };
437
438 return i915_subtests(tests, NULL);
439 }
440