1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2020 Intel Corporation
4 */
5
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8
9 #include "intel_engine_heartbeat.h"
10 #include "intel_engine_pm.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt_clock_utils.h"
13 #include "intel_gt_pm.h"
14 #include "intel_rc6.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftest_rps.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/librapl.h"
20
21 /* Try to isolate the impact of cstates from determing frequency response */
22 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
23
dummy_rps_work(struct work_struct * wrk)24 static void dummy_rps_work(struct work_struct *wrk)
25 {
26 }
27
cmp_u64(const void * A,const void * B)28 static int cmp_u64(const void *A, const void *B)
29 {
30 const u64 *a = A, *b = B;
31
32 if (*a < *b)
33 return -1;
34 else if (*a > *b)
35 return 1;
36 else
37 return 0;
38 }
39
cmp_u32(const void * A,const void * B)40 static int cmp_u32(const void *A, const void *B)
41 {
42 const u32 *a = A, *b = B;
43
44 if (*a < *b)
45 return -1;
46 else if (*a > *b)
47 return 1;
48 else
49 return 0;
50 }
51
52 static struct i915_vma *
create_spin_counter(struct intel_engine_cs * engine,struct i915_address_space * vm,bool srm,u32 ** cancel,u32 ** counter)53 create_spin_counter(struct intel_engine_cs *engine,
54 struct i915_address_space *vm,
55 bool srm,
56 u32 **cancel,
57 u32 **counter)
58 {
59 enum {
60 COUNT,
61 INC,
62 __NGPR__,
63 };
64 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
65 struct drm_i915_gem_object *obj;
66 struct i915_vma *vma;
67 unsigned long end;
68 u32 *base, *cs;
69 int loop, i;
70 int err;
71
72 obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
73 if (IS_ERR(obj))
74 return ERR_CAST(obj);
75
76 end = obj->base.size / sizeof(u32) - 1;
77
78 vma = i915_vma_instance(obj, vm, NULL);
79 if (IS_ERR(vma)) {
80 err = PTR_ERR(vma);
81 goto err_put;
82 }
83
84 err = i915_vma_pin(vma, 0, 0, PIN_USER);
85 if (err)
86 goto err_unlock;
87
88 i915_vma_lock(vma);
89
90 base = i915_gem_object_pin_map(obj, I915_MAP_WC);
91 if (IS_ERR(base)) {
92 err = PTR_ERR(base);
93 goto err_unpin;
94 }
95 cs = base;
96
97 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
98 for (i = 0; i < __NGPR__; i++) {
99 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
100 *cs++ = 0;
101 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
102 *cs++ = 0;
103 }
104
105 *cs++ = MI_LOAD_REGISTER_IMM(1);
106 *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
107 *cs++ = 1;
108
109 loop = cs - base;
110
111 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
112 for (i = 0; i < 1024; i++) {
113 *cs++ = MI_MATH(4);
114 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
115 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
116 *cs++ = MI_MATH_ADD;
117 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
118
119 if (srm) {
120 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
121 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
122 *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
123 *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
124 }
125 }
126
127 *cs++ = MI_BATCH_BUFFER_START_GEN8;
128 *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
129 *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
130 GEM_BUG_ON(cs - base > end);
131
132 i915_gem_object_flush_map(obj);
133
134 *cancel = base + loop;
135 *counter = srm ? memset32(base + end, 0, 1) : NULL;
136 return vma;
137
138 err_unpin:
139 i915_vma_unpin(vma);
140 err_unlock:
141 i915_vma_unlock(vma);
142 err_put:
143 i915_gem_object_put(obj);
144 return ERR_PTR(err);
145 }
146
wait_for_freq(struct intel_rps * rps,u8 freq,int timeout_ms)147 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
148 {
149 u8 history[64], i;
150 unsigned long end;
151 int sleep;
152
153 i = 0;
154 memset(history, freq, sizeof(history));
155 sleep = 20;
156
157 /* The PCU does not change instantly, but drifts towards the goal? */
158 end = jiffies + msecs_to_jiffies(timeout_ms);
159 do {
160 u8 act;
161
162 act = read_cagf(rps);
163 if (time_after(jiffies, end))
164 return act;
165
166 /* Target acquired */
167 if (act == freq)
168 return act;
169
170 /* Any change within the last N samples? */
171 if (!memchr_inv(history, act, sizeof(history)))
172 return act;
173
174 history[i] = act;
175 i = (i + 1) % ARRAY_SIZE(history);
176
177 usleep_range(sleep, 2 * sleep);
178 sleep *= 2;
179 if (sleep > timeout_ms * 20)
180 sleep = timeout_ms * 20;
181 } while (1);
182 }
183
rps_set_check(struct intel_rps * rps,u8 freq)184 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
185 {
186 mutex_lock(&rps->lock);
187 GEM_BUG_ON(!intel_rps_is_active(rps));
188 intel_rps_set(rps, freq);
189 GEM_BUG_ON(rps->last_freq != freq);
190 mutex_unlock(&rps->lock);
191
192 return wait_for_freq(rps, freq, 50);
193 }
194
show_pstate_limits(struct intel_rps * rps)195 static void show_pstate_limits(struct intel_rps *rps)
196 {
197 struct drm_i915_private *i915 = rps_to_i915(rps);
198
199 if (IS_BROXTON(i915)) {
200 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
201 i915_mmio_reg_offset(BXT_RP_STATE_CAP),
202 intel_uncore_read(rps_to_uncore(rps),
203 BXT_RP_STATE_CAP));
204 } else if (IS_GEN(i915, 9)) {
205 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
206 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
207 intel_uncore_read(rps_to_uncore(rps),
208 GEN9_RP_STATE_LIMITS));
209 }
210 }
211
live_rps_clock_interval(void * arg)212 int live_rps_clock_interval(void *arg)
213 {
214 struct intel_gt *gt = arg;
215 struct intel_rps *rps = >->rps;
216 void (*saved_work)(struct work_struct *wrk);
217 struct intel_engine_cs *engine;
218 enum intel_engine_id id;
219 struct igt_spinner spin;
220 int err = 0;
221
222 if (!intel_rps_is_enabled(rps))
223 return 0;
224
225 if (igt_spinner_init(&spin, gt))
226 return -ENOMEM;
227
228 intel_gt_pm_wait_for_idle(gt);
229 saved_work = rps->work.func;
230 rps->work.func = dummy_rps_work;
231
232 intel_gt_pm_get(gt);
233 intel_rps_disable(>->rps);
234
235 intel_gt_check_clock_frequency(gt);
236
237 for_each_engine(engine, gt, id) {
238 struct i915_request *rq;
239 u32 cycles;
240 u64 dt;
241
242 if (!intel_engine_can_store_dword(engine))
243 continue;
244
245 st_engine_heartbeat_disable(engine);
246
247 rq = igt_spinner_create_request(&spin,
248 engine->kernel_context,
249 MI_NOOP);
250 if (IS_ERR(rq)) {
251 st_engine_heartbeat_enable(engine);
252 err = PTR_ERR(rq);
253 break;
254 }
255
256 i915_request_add(rq);
257
258 if (!igt_wait_for_spinner(&spin, rq)) {
259 pr_err("%s: RPS spinner did not start\n",
260 engine->name);
261 igt_spinner_end(&spin);
262 st_engine_heartbeat_enable(engine);
263 intel_gt_set_wedged(engine->gt);
264 err = -EIO;
265 break;
266 }
267
268 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
269
270 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
271
272 /* Set the evaluation interval to infinity! */
273 intel_uncore_write_fw(gt->uncore,
274 GEN6_RP_UP_EI, 0xffffffff);
275 intel_uncore_write_fw(gt->uncore,
276 GEN6_RP_UP_THRESHOLD, 0xffffffff);
277
278 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
279 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
280
281 if (wait_for(intel_uncore_read_fw(gt->uncore,
282 GEN6_RP_CUR_UP_EI),
283 10)) {
284 /* Just skip the test; assume lack of HW support */
285 pr_notice("%s: rps evaluation interval not ticking\n",
286 engine->name);
287 err = -ENODEV;
288 } else {
289 ktime_t dt_[5];
290 u32 cycles_[5];
291 int i;
292
293 for (i = 0; i < 5; i++) {
294 preempt_disable();
295
296 dt_[i] = ktime_get();
297 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
298
299 udelay(1000);
300
301 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
302 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
303
304 preempt_enable();
305 }
306
307 /* Use the median of both cycle/dt; close enough */
308 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
309 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
310 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
311 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
312 }
313
314 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
315 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
316
317 igt_spinner_end(&spin);
318 st_engine_heartbeat_enable(engine);
319
320 if (err == 0) {
321 u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
322 u32 expected =
323 intel_gt_ns_to_pm_interval(gt, dt);
324
325 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
326 engine->name, cycles, time, dt, expected,
327 gt->clock_frequency / 1000);
328
329 if (10 * time < 8 * dt ||
330 8 * time > 10 * dt) {
331 pr_err("%s: rps clock time does not match walltime!\n",
332 engine->name);
333 err = -EINVAL;
334 }
335
336 if (10 * expected < 8 * cycles ||
337 8 * expected > 10 * cycles) {
338 pr_err("%s: walltime does not match rps clock ticks!\n",
339 engine->name);
340 err = -EINVAL;
341 }
342 }
343
344 if (igt_flush_test(gt->i915))
345 err = -EIO;
346
347 break; /* once is enough */
348 }
349
350 intel_rps_enable(>->rps);
351 intel_gt_pm_put(gt);
352
353 igt_spinner_fini(&spin);
354
355 intel_gt_pm_wait_for_idle(gt);
356 rps->work.func = saved_work;
357
358 if (err == -ENODEV) /* skipped, don't report a fail */
359 err = 0;
360
361 return err;
362 }
363
live_rps_control(void * arg)364 int live_rps_control(void *arg)
365 {
366 struct intel_gt *gt = arg;
367 struct intel_rps *rps = >->rps;
368 void (*saved_work)(struct work_struct *wrk);
369 struct intel_engine_cs *engine;
370 enum intel_engine_id id;
371 struct igt_spinner spin;
372 int err = 0;
373
374 /*
375 * Check that the actual frequency matches our requested frequency,
376 * to verify our control mechanism. We have to be careful that the
377 * PCU may throttle the GPU in which case the actual frequency used
378 * will be lowered than requested.
379 */
380
381 if (!intel_rps_is_enabled(rps))
382 return 0;
383
384 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
385 return 0;
386
387 if (igt_spinner_init(&spin, gt))
388 return -ENOMEM;
389
390 intel_gt_pm_wait_for_idle(gt);
391 saved_work = rps->work.func;
392 rps->work.func = dummy_rps_work;
393
394 intel_gt_pm_get(gt);
395 for_each_engine(engine, gt, id) {
396 struct i915_request *rq;
397 ktime_t min_dt, max_dt;
398 int f, limit;
399 int min, max;
400
401 if (!intel_engine_can_store_dword(engine))
402 continue;
403
404 st_engine_heartbeat_disable(engine);
405
406 rq = igt_spinner_create_request(&spin,
407 engine->kernel_context,
408 MI_NOOP);
409 if (IS_ERR(rq)) {
410 err = PTR_ERR(rq);
411 break;
412 }
413
414 i915_request_add(rq);
415
416 if (!igt_wait_for_spinner(&spin, rq)) {
417 pr_err("%s: RPS spinner did not start\n",
418 engine->name);
419 igt_spinner_end(&spin);
420 st_engine_heartbeat_enable(engine);
421 intel_gt_set_wedged(engine->gt);
422 err = -EIO;
423 break;
424 }
425
426 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
427 pr_err("%s: could not set minimum frequency [%x], only %x!\n",
428 engine->name, rps->min_freq, read_cagf(rps));
429 igt_spinner_end(&spin);
430 st_engine_heartbeat_enable(engine);
431 show_pstate_limits(rps);
432 err = -EINVAL;
433 break;
434 }
435
436 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
437 if (rps_set_check(rps, f) < f)
438 break;
439 }
440
441 limit = rps_set_check(rps, f);
442
443 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
444 pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
445 engine->name, rps->min_freq, read_cagf(rps));
446 igt_spinner_end(&spin);
447 st_engine_heartbeat_enable(engine);
448 show_pstate_limits(rps);
449 err = -EINVAL;
450 break;
451 }
452
453 max_dt = ktime_get();
454 max = rps_set_check(rps, limit);
455 max_dt = ktime_sub(ktime_get(), max_dt);
456
457 min_dt = ktime_get();
458 min = rps_set_check(rps, rps->min_freq);
459 min_dt = ktime_sub(ktime_get(), min_dt);
460
461 igt_spinner_end(&spin);
462 st_engine_heartbeat_enable(engine);
463
464 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
465 engine->name,
466 rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
467 rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
468 limit, intel_gpu_freq(rps, limit),
469 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
470
471 if (limit == rps->min_freq) {
472 pr_err("%s: GPU throttled to minimum!\n",
473 engine->name);
474 show_pstate_limits(rps);
475 err = -ENODEV;
476 break;
477 }
478
479 if (igt_flush_test(gt->i915)) {
480 err = -EIO;
481 break;
482 }
483 }
484 intel_gt_pm_put(gt);
485
486 igt_spinner_fini(&spin);
487
488 intel_gt_pm_wait_for_idle(gt);
489 rps->work.func = saved_work;
490
491 return err;
492 }
493
show_pcu_config(struct intel_rps * rps)494 static void show_pcu_config(struct intel_rps *rps)
495 {
496 struct drm_i915_private *i915 = rps_to_i915(rps);
497 unsigned int max_gpu_freq, min_gpu_freq;
498 intel_wakeref_t wakeref;
499 int gpu_freq;
500
501 if (!HAS_LLC(i915))
502 return;
503
504 min_gpu_freq = rps->min_freq;
505 max_gpu_freq = rps->max_freq;
506 if (INTEL_GEN(i915) >= 9) {
507 /* Convert GT frequency to 50 HZ units */
508 min_gpu_freq /= GEN9_FREQ_SCALER;
509 max_gpu_freq /= GEN9_FREQ_SCALER;
510 }
511
512 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
513
514 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
515 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
516 int ia_freq = gpu_freq;
517
518 sandybridge_pcode_read(i915,
519 GEN6_PCODE_READ_MIN_FREQ_TABLE,
520 &ia_freq, NULL);
521
522 pr_info("%5d %5d %5d\n",
523 gpu_freq * 50,
524 ((ia_freq >> 0) & 0xff) * 100,
525 ((ia_freq >> 8) & 0xff) * 100);
526 }
527
528 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
529 }
530
__measure_frequency(u32 * cntr,int duration_ms)531 static u64 __measure_frequency(u32 *cntr, int duration_ms)
532 {
533 u64 dc, dt;
534
535 dt = ktime_get();
536 dc = READ_ONCE(*cntr);
537 usleep_range(1000 * duration_ms, 2000 * duration_ms);
538 dc = READ_ONCE(*cntr) - dc;
539 dt = ktime_get() - dt;
540
541 return div64_u64(1000 * 1000 * dc, dt);
542 }
543
measure_frequency_at(struct intel_rps * rps,u32 * cntr,int * freq)544 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
545 {
546 u64 x[5];
547 int i;
548
549 *freq = rps_set_check(rps, *freq);
550 for (i = 0; i < 5; i++)
551 x[i] = __measure_frequency(cntr, 2);
552 *freq = (*freq + read_cagf(rps)) / 2;
553
554 /* A simple triangle filter for better result stability */
555 sort(x, 5, sizeof(*x), cmp_u64, NULL);
556 return div_u64(x[1] + 2 * x[2] + x[3], 4);
557 }
558
__measure_cs_frequency(struct intel_engine_cs * engine,int duration_ms)559 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
560 int duration_ms)
561 {
562 u64 dc, dt;
563
564 dt = ktime_get();
565 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
566 usleep_range(1000 * duration_ms, 2000 * duration_ms);
567 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
568 dt = ktime_get() - dt;
569
570 return div64_u64(1000 * 1000 * dc, dt);
571 }
572
measure_cs_frequency_at(struct intel_rps * rps,struct intel_engine_cs * engine,int * freq)573 static u64 measure_cs_frequency_at(struct intel_rps *rps,
574 struct intel_engine_cs *engine,
575 int *freq)
576 {
577 u64 x[5];
578 int i;
579
580 *freq = rps_set_check(rps, *freq);
581 for (i = 0; i < 5; i++)
582 x[i] = __measure_cs_frequency(engine, 2);
583 *freq = (*freq + read_cagf(rps)) / 2;
584
585 /* A simple triangle filter for better result stability */
586 sort(x, 5, sizeof(*x), cmp_u64, NULL);
587 return div_u64(x[1] + 2 * x[2] + x[3], 4);
588 }
589
scaled_within(u64 x,u64 y,u32 f_n,u32 f_d)590 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
591 {
592 return f_d * x > f_n * y && f_n * x < f_d * y;
593 }
594
live_rps_frequency_cs(void * arg)595 int live_rps_frequency_cs(void *arg)
596 {
597 void (*saved_work)(struct work_struct *wrk);
598 struct intel_gt *gt = arg;
599 struct intel_rps *rps = >->rps;
600 struct intel_engine_cs *engine;
601 struct pm_qos_request qos;
602 enum intel_engine_id id;
603 int err = 0;
604
605 /*
606 * The premise is that the GPU does change freqency at our behest.
607 * Let's check there is a correspondence between the requested
608 * frequency, the actual frequency, and the observed clock rate.
609 */
610
611 if (!intel_rps_is_enabled(rps))
612 return 0;
613
614 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
615 return 0;
616
617 if (CPU_LATENCY >= 0)
618 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
619
620 intel_gt_pm_wait_for_idle(gt);
621 saved_work = rps->work.func;
622 rps->work.func = dummy_rps_work;
623
624 for_each_engine(engine, gt, id) {
625 struct i915_request *rq;
626 struct i915_vma *vma;
627 u32 *cancel, *cntr;
628 struct {
629 u64 count;
630 int freq;
631 } min, max;
632
633 st_engine_heartbeat_disable(engine);
634
635 vma = create_spin_counter(engine,
636 engine->kernel_context->vm, false,
637 &cancel, &cntr);
638 if (IS_ERR(vma)) {
639 err = PTR_ERR(vma);
640 st_engine_heartbeat_enable(engine);
641 break;
642 }
643
644 rq = intel_engine_create_kernel_request(engine);
645 if (IS_ERR(rq)) {
646 err = PTR_ERR(rq);
647 goto err_vma;
648 }
649
650 err = i915_request_await_object(rq, vma->obj, false);
651 if (!err)
652 err = i915_vma_move_to_active(vma, rq, 0);
653 if (!err)
654 err = rq->engine->emit_bb_start(rq,
655 vma->node.start,
656 PAGE_SIZE, 0);
657 i915_request_add(rq);
658 if (err)
659 goto err_vma;
660
661 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
662 10)) {
663 pr_err("%s: timed loop did not start\n",
664 engine->name);
665 goto err_vma;
666 }
667
668 min.freq = rps->min_freq;
669 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
670
671 max.freq = rps->max_freq;
672 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
673
674 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
675 engine->name,
676 min.count, intel_gpu_freq(rps, min.freq),
677 max.count, intel_gpu_freq(rps, max.freq),
678 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
679 max.freq * min.count));
680
681 if (!scaled_within(max.freq * min.count,
682 min.freq * max.count,
683 2, 3)) {
684 int f;
685
686 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
687 engine->name,
688 max.freq * min.count,
689 min.freq * max.count);
690 show_pcu_config(rps);
691
692 for (f = min.freq + 1; f <= rps->max_freq; f++) {
693 int act = f;
694 u64 count;
695
696 count = measure_cs_frequency_at(rps, engine, &act);
697 if (act < f)
698 break;
699
700 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
701 engine->name,
702 act, intel_gpu_freq(rps, act), count,
703 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
704 act * min.count));
705
706 f = act; /* may skip ahead [pcu granularity] */
707 }
708
709 err = -EINTR; /* ignore error, continue on with test */
710 }
711
712 err_vma:
713 *cancel = MI_BATCH_BUFFER_END;
714 i915_gem_object_flush_map(vma->obj);
715 i915_gem_object_unpin_map(vma->obj);
716 i915_vma_unpin(vma);
717 i915_vma_unlock(vma);
718 i915_vma_put(vma);
719
720 st_engine_heartbeat_enable(engine);
721 if (igt_flush_test(gt->i915))
722 err = -EIO;
723 if (err)
724 break;
725 }
726
727 intel_gt_pm_wait_for_idle(gt);
728 rps->work.func = saved_work;
729
730 if (CPU_LATENCY >= 0)
731 cpu_latency_qos_remove_request(&qos);
732
733 return err;
734 }
735
live_rps_frequency_srm(void * arg)736 int live_rps_frequency_srm(void *arg)
737 {
738 void (*saved_work)(struct work_struct *wrk);
739 struct intel_gt *gt = arg;
740 struct intel_rps *rps = >->rps;
741 struct intel_engine_cs *engine;
742 struct pm_qos_request qos;
743 enum intel_engine_id id;
744 int err = 0;
745
746 /*
747 * The premise is that the GPU does change freqency at our behest.
748 * Let's check there is a correspondence between the requested
749 * frequency, the actual frequency, and the observed clock rate.
750 */
751
752 if (!intel_rps_is_enabled(rps))
753 return 0;
754
755 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
756 return 0;
757
758 if (CPU_LATENCY >= 0)
759 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
760
761 intel_gt_pm_wait_for_idle(gt);
762 saved_work = rps->work.func;
763 rps->work.func = dummy_rps_work;
764
765 for_each_engine(engine, gt, id) {
766 struct i915_request *rq;
767 struct i915_vma *vma;
768 u32 *cancel, *cntr;
769 struct {
770 u64 count;
771 int freq;
772 } min, max;
773
774 st_engine_heartbeat_disable(engine);
775
776 vma = create_spin_counter(engine,
777 engine->kernel_context->vm, true,
778 &cancel, &cntr);
779 if (IS_ERR(vma)) {
780 err = PTR_ERR(vma);
781 st_engine_heartbeat_enable(engine);
782 break;
783 }
784
785 rq = intel_engine_create_kernel_request(engine);
786 if (IS_ERR(rq)) {
787 err = PTR_ERR(rq);
788 goto err_vma;
789 }
790
791 err = i915_request_await_object(rq, vma->obj, false);
792 if (!err)
793 err = i915_vma_move_to_active(vma, rq, 0);
794 if (!err)
795 err = rq->engine->emit_bb_start(rq,
796 vma->node.start,
797 PAGE_SIZE, 0);
798 i915_request_add(rq);
799 if (err)
800 goto err_vma;
801
802 if (wait_for(READ_ONCE(*cntr), 10)) {
803 pr_err("%s: timed loop did not start\n",
804 engine->name);
805 goto err_vma;
806 }
807
808 min.freq = rps->min_freq;
809 min.count = measure_frequency_at(rps, cntr, &min.freq);
810
811 max.freq = rps->max_freq;
812 max.count = measure_frequency_at(rps, cntr, &max.freq);
813
814 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
815 engine->name,
816 min.count, intel_gpu_freq(rps, min.freq),
817 max.count, intel_gpu_freq(rps, max.freq),
818 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
819 max.freq * min.count));
820
821 if (!scaled_within(max.freq * min.count,
822 min.freq * max.count,
823 1, 2)) {
824 int f;
825
826 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
827 engine->name,
828 max.freq * min.count,
829 min.freq * max.count);
830 show_pcu_config(rps);
831
832 for (f = min.freq + 1; f <= rps->max_freq; f++) {
833 int act = f;
834 u64 count;
835
836 count = measure_frequency_at(rps, cntr, &act);
837 if (act < f)
838 break;
839
840 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
841 engine->name,
842 act, intel_gpu_freq(rps, act), count,
843 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
844 act * min.count));
845
846 f = act; /* may skip ahead [pcu granularity] */
847 }
848
849 err = -EINTR; /* ignore error, continue on with test */
850 }
851
852 err_vma:
853 *cancel = MI_BATCH_BUFFER_END;
854 i915_gem_object_flush_map(vma->obj);
855 i915_gem_object_unpin_map(vma->obj);
856 i915_vma_unpin(vma);
857 i915_vma_unlock(vma);
858 i915_vma_put(vma);
859
860 st_engine_heartbeat_enable(engine);
861 if (igt_flush_test(gt->i915))
862 err = -EIO;
863 if (err)
864 break;
865 }
866
867 intel_gt_pm_wait_for_idle(gt);
868 rps->work.func = saved_work;
869
870 if (CPU_LATENCY >= 0)
871 cpu_latency_qos_remove_request(&qos);
872
873 return err;
874 }
875
sleep_for_ei(struct intel_rps * rps,int timeout_us)876 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
877 {
878 /* Flush any previous EI */
879 usleep_range(timeout_us, 2 * timeout_us);
880
881 /* Reset the interrupt status */
882 rps_disable_interrupts(rps);
883 GEM_BUG_ON(rps->pm_iir);
884 rps_enable_interrupts(rps);
885
886 /* And then wait for the timeout, for real this time */
887 usleep_range(2 * timeout_us, 3 * timeout_us);
888 }
889
__rps_up_interrupt(struct intel_rps * rps,struct intel_engine_cs * engine,struct igt_spinner * spin)890 static int __rps_up_interrupt(struct intel_rps *rps,
891 struct intel_engine_cs *engine,
892 struct igt_spinner *spin)
893 {
894 struct intel_uncore *uncore = engine->uncore;
895 struct i915_request *rq;
896 u32 timeout;
897
898 if (!intel_engine_can_store_dword(engine))
899 return 0;
900
901 rps_set_check(rps, rps->min_freq);
902
903 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
904 if (IS_ERR(rq))
905 return PTR_ERR(rq);
906
907 i915_request_get(rq);
908 i915_request_add(rq);
909
910 if (!igt_wait_for_spinner(spin, rq)) {
911 pr_err("%s: RPS spinner did not start\n",
912 engine->name);
913 i915_request_put(rq);
914 intel_gt_set_wedged(engine->gt);
915 return -EIO;
916 }
917
918 if (!intel_rps_is_active(rps)) {
919 pr_err("%s: RPS not enabled on starting spinner\n",
920 engine->name);
921 igt_spinner_end(spin);
922 i915_request_put(rq);
923 return -EINVAL;
924 }
925
926 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
927 pr_err("%s: RPS did not register UP interrupt\n",
928 engine->name);
929 i915_request_put(rq);
930 return -EINVAL;
931 }
932
933 if (rps->last_freq != rps->min_freq) {
934 pr_err("%s: RPS did not program min frequency\n",
935 engine->name);
936 i915_request_put(rq);
937 return -EINVAL;
938 }
939
940 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
941 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
942 timeout = DIV_ROUND_UP(timeout, 1000);
943
944 sleep_for_ei(rps, timeout);
945 GEM_BUG_ON(i915_request_completed(rq));
946
947 igt_spinner_end(spin);
948 i915_request_put(rq);
949
950 if (rps->cur_freq != rps->min_freq) {
951 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
952 engine->name, intel_rps_read_actual_frequency(rps));
953 return -EINVAL;
954 }
955
956 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
957 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
958 engine->name, rps->pm_iir,
959 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
960 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
961 intel_uncore_read(uncore, GEN6_RP_UP_EI));
962 return -EINVAL;
963 }
964
965 return 0;
966 }
967
__rps_down_interrupt(struct intel_rps * rps,struct intel_engine_cs * engine)968 static int __rps_down_interrupt(struct intel_rps *rps,
969 struct intel_engine_cs *engine)
970 {
971 struct intel_uncore *uncore = engine->uncore;
972 u32 timeout;
973
974 rps_set_check(rps, rps->max_freq);
975
976 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
977 pr_err("%s: RPS did not register DOWN interrupt\n",
978 engine->name);
979 return -EINVAL;
980 }
981
982 if (rps->last_freq != rps->max_freq) {
983 pr_err("%s: RPS did not program max frequency\n",
984 engine->name);
985 return -EINVAL;
986 }
987
988 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
989 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
990 timeout = DIV_ROUND_UP(timeout, 1000);
991
992 sleep_for_ei(rps, timeout);
993
994 if (rps->cur_freq != rps->max_freq) {
995 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
996 engine->name,
997 intel_rps_read_actual_frequency(rps));
998 return -EINVAL;
999 }
1000
1001 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1002 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1003 engine->name, rps->pm_iir,
1004 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1005 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1006 intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1007 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1008 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1009 intel_uncore_read(uncore, GEN6_RP_UP_EI));
1010 return -EINVAL;
1011 }
1012
1013 return 0;
1014 }
1015
live_rps_interrupt(void * arg)1016 int live_rps_interrupt(void *arg)
1017 {
1018 struct intel_gt *gt = arg;
1019 struct intel_rps *rps = >->rps;
1020 void (*saved_work)(struct work_struct *wrk);
1021 struct intel_engine_cs *engine;
1022 enum intel_engine_id id;
1023 struct igt_spinner spin;
1024 u32 pm_events;
1025 int err = 0;
1026
1027 /*
1028 * First, let's check whether or not we are receiving interrupts.
1029 */
1030
1031 if (!intel_rps_has_interrupts(rps))
1032 return 0;
1033
1034 intel_gt_pm_get(gt);
1035 pm_events = rps->pm_events;
1036 intel_gt_pm_put(gt);
1037 if (!pm_events) {
1038 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1039 return -ENODEV;
1040 }
1041
1042 if (igt_spinner_init(&spin, gt))
1043 return -ENOMEM;
1044
1045 intel_gt_pm_wait_for_idle(gt);
1046 saved_work = rps->work.func;
1047 rps->work.func = dummy_rps_work;
1048
1049 for_each_engine(engine, gt, id) {
1050 /* Keep the engine busy with a spinner; expect an UP! */
1051 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1052 intel_gt_pm_wait_for_idle(engine->gt);
1053 GEM_BUG_ON(intel_rps_is_active(rps));
1054
1055 st_engine_heartbeat_disable(engine);
1056
1057 err = __rps_up_interrupt(rps, engine, &spin);
1058
1059 st_engine_heartbeat_enable(engine);
1060 if (err)
1061 goto out;
1062
1063 intel_gt_pm_wait_for_idle(engine->gt);
1064 }
1065
1066 /* Keep the engine awake but idle and check for DOWN */
1067 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1068 st_engine_heartbeat_disable(engine);
1069 intel_rc6_disable(>->rc6);
1070
1071 err = __rps_down_interrupt(rps, engine);
1072
1073 intel_rc6_enable(>->rc6);
1074 st_engine_heartbeat_enable(engine);
1075 if (err)
1076 goto out;
1077 }
1078 }
1079
1080 out:
1081 if (igt_flush_test(gt->i915))
1082 err = -EIO;
1083
1084 igt_spinner_fini(&spin);
1085
1086 intel_gt_pm_wait_for_idle(gt);
1087 rps->work.func = saved_work;
1088
1089 return err;
1090 }
1091
__measure_power(int duration_ms)1092 static u64 __measure_power(int duration_ms)
1093 {
1094 u64 dE, dt;
1095
1096 dt = ktime_get();
1097 dE = librapl_energy_uJ();
1098 usleep_range(1000 * duration_ms, 2000 * duration_ms);
1099 dE = librapl_energy_uJ() - dE;
1100 dt = ktime_get() - dt;
1101
1102 return div64_u64(1000 * 1000 * dE, dt);
1103 }
1104
measure_power_at(struct intel_rps * rps,int * freq)1105 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1106 {
1107 u64 x[5];
1108 int i;
1109
1110 *freq = rps_set_check(rps, *freq);
1111 for (i = 0; i < 5; i++)
1112 x[i] = __measure_power(5);
1113 *freq = (*freq + read_cagf(rps)) / 2;
1114
1115 /* A simple triangle filter for better result stability */
1116 sort(x, 5, sizeof(*x), cmp_u64, NULL);
1117 return div_u64(x[1] + 2 * x[2] + x[3], 4);
1118 }
1119
live_rps_power(void * arg)1120 int live_rps_power(void *arg)
1121 {
1122 struct intel_gt *gt = arg;
1123 struct intel_rps *rps = >->rps;
1124 void (*saved_work)(struct work_struct *wrk);
1125 struct intel_engine_cs *engine;
1126 enum intel_engine_id id;
1127 struct igt_spinner spin;
1128 int err = 0;
1129
1130 /*
1131 * Our fundamental assumption is that running at lower frequency
1132 * actually saves power. Let's see if our RAPL measurement support
1133 * that theory.
1134 */
1135
1136 if (!intel_rps_is_enabled(rps))
1137 return 0;
1138
1139 if (!librapl_energy_uJ())
1140 return 0;
1141
1142 if (igt_spinner_init(&spin, gt))
1143 return -ENOMEM;
1144
1145 intel_gt_pm_wait_for_idle(gt);
1146 saved_work = rps->work.func;
1147 rps->work.func = dummy_rps_work;
1148
1149 for_each_engine(engine, gt, id) {
1150 struct i915_request *rq;
1151 struct {
1152 u64 power;
1153 int freq;
1154 } min, max;
1155
1156 if (!intel_engine_can_store_dword(engine))
1157 continue;
1158
1159 st_engine_heartbeat_disable(engine);
1160
1161 rq = igt_spinner_create_request(&spin,
1162 engine->kernel_context,
1163 MI_NOOP);
1164 if (IS_ERR(rq)) {
1165 st_engine_heartbeat_enable(engine);
1166 err = PTR_ERR(rq);
1167 break;
1168 }
1169
1170 i915_request_add(rq);
1171
1172 if (!igt_wait_for_spinner(&spin, rq)) {
1173 pr_err("%s: RPS spinner did not start\n",
1174 engine->name);
1175 igt_spinner_end(&spin);
1176 st_engine_heartbeat_enable(engine);
1177 intel_gt_set_wedged(engine->gt);
1178 err = -EIO;
1179 break;
1180 }
1181
1182 max.freq = rps->max_freq;
1183 max.power = measure_power_at(rps, &max.freq);
1184
1185 min.freq = rps->min_freq;
1186 min.power = measure_power_at(rps, &min.freq);
1187
1188 igt_spinner_end(&spin);
1189 st_engine_heartbeat_enable(engine);
1190
1191 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1192 engine->name,
1193 min.power, intel_gpu_freq(rps, min.freq),
1194 max.power, intel_gpu_freq(rps, max.freq));
1195
1196 if (10 * min.freq >= 9 * max.freq) {
1197 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1198 min.freq, intel_gpu_freq(rps, min.freq),
1199 max.freq, intel_gpu_freq(rps, max.freq));
1200 continue;
1201 }
1202
1203 if (11 * min.power > 10 * max.power) {
1204 pr_err("%s: did not conserve power when setting lower frequency!\n",
1205 engine->name);
1206 err = -EINVAL;
1207 break;
1208 }
1209
1210 if (igt_flush_test(gt->i915)) {
1211 err = -EIO;
1212 break;
1213 }
1214 }
1215
1216 igt_spinner_fini(&spin);
1217
1218 intel_gt_pm_wait_for_idle(gt);
1219 rps->work.func = saved_work;
1220
1221 return err;
1222 }
1223
live_rps_dynamic(void * arg)1224 int live_rps_dynamic(void *arg)
1225 {
1226 struct intel_gt *gt = arg;
1227 struct intel_rps *rps = >->rps;
1228 struct intel_engine_cs *engine;
1229 enum intel_engine_id id;
1230 struct igt_spinner spin;
1231 int err = 0;
1232
1233 /*
1234 * We've looked at the bascs, and have established that we
1235 * can change the clock frequency and that the HW will generate
1236 * interrupts based on load. Now we check how we integrate those
1237 * moving parts into dynamic reclocking based on load.
1238 */
1239
1240 if (!intel_rps_is_enabled(rps))
1241 return 0;
1242
1243 if (igt_spinner_init(&spin, gt))
1244 return -ENOMEM;
1245
1246 if (intel_rps_has_interrupts(rps))
1247 pr_info("RPS has interrupt support\n");
1248 if (intel_rps_uses_timer(rps))
1249 pr_info("RPS has timer support\n");
1250
1251 for_each_engine(engine, gt, id) {
1252 struct i915_request *rq;
1253 struct {
1254 ktime_t dt;
1255 u8 freq;
1256 } min, max;
1257
1258 if (!intel_engine_can_store_dword(engine))
1259 continue;
1260
1261 intel_gt_pm_wait_for_idle(gt);
1262 GEM_BUG_ON(intel_rps_is_active(rps));
1263 rps->cur_freq = rps->min_freq;
1264
1265 intel_engine_pm_get(engine);
1266 intel_rc6_disable(>->rc6);
1267 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1268
1269 rq = igt_spinner_create_request(&spin,
1270 engine->kernel_context,
1271 MI_NOOP);
1272 if (IS_ERR(rq)) {
1273 err = PTR_ERR(rq);
1274 goto err;
1275 }
1276
1277 i915_request_add(rq);
1278
1279 max.dt = ktime_get();
1280 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1281 max.dt = ktime_sub(ktime_get(), max.dt);
1282
1283 igt_spinner_end(&spin);
1284
1285 min.dt = ktime_get();
1286 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1287 min.dt = ktime_sub(ktime_get(), min.dt);
1288
1289 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1290 engine->name,
1291 max.freq, intel_gpu_freq(rps, max.freq),
1292 ktime_to_ns(max.dt),
1293 min.freq, intel_gpu_freq(rps, min.freq),
1294 ktime_to_ns(min.dt));
1295 if (min.freq >= max.freq) {
1296 pr_err("%s: dynamic reclocking of spinner failed\n!",
1297 engine->name);
1298 err = -EINVAL;
1299 }
1300
1301 err:
1302 intel_rc6_enable(>->rc6);
1303 intel_engine_pm_put(engine);
1304
1305 if (igt_flush_test(gt->i915))
1306 err = -EIO;
1307 if (err)
1308 break;
1309 }
1310
1311 igt_spinner_fini(&spin);
1312
1313 return err;
1314 }
1315