1 /*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2017-2018 Intel Corporation
5 */
6
7 #include <linux/irq.h>
8 #include "i915_pmu.h"
9 #include "intel_ringbuffer.h"
10 #include "i915_drv.h"
11
12 /* Frequency for the sampling timer for events which need it. */
13 #define FREQUENCY 200
14 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
15
16 #define ENGINE_SAMPLE_MASK \
17 (BIT(I915_SAMPLE_BUSY) | \
18 BIT(I915_SAMPLE_WAIT) | \
19 BIT(I915_SAMPLE_SEMA))
20
21 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
22
23 static cpumask_t i915_pmu_cpumask;
24
engine_config_sample(u64 config)25 static u8 engine_config_sample(u64 config)
26 {
27 return config & I915_PMU_SAMPLE_MASK;
28 }
29
engine_event_sample(struct perf_event * event)30 static u8 engine_event_sample(struct perf_event *event)
31 {
32 return engine_config_sample(event->attr.config);
33 }
34
engine_event_class(struct perf_event * event)35 static u8 engine_event_class(struct perf_event *event)
36 {
37 return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
38 }
39
engine_event_instance(struct perf_event * event)40 static u8 engine_event_instance(struct perf_event *event)
41 {
42 return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
43 }
44
is_engine_config(u64 config)45 static bool is_engine_config(u64 config)
46 {
47 return config < __I915_PMU_OTHER(0);
48 }
49
config_enabled_bit(u64 config)50 static unsigned int config_enabled_bit(u64 config)
51 {
52 if (is_engine_config(config))
53 return engine_config_sample(config);
54 else
55 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
56 }
57
config_enabled_mask(u64 config)58 static u64 config_enabled_mask(u64 config)
59 {
60 return BIT_ULL(config_enabled_bit(config));
61 }
62
is_engine_event(struct perf_event * event)63 static bool is_engine_event(struct perf_event *event)
64 {
65 return is_engine_config(event->attr.config);
66 }
67
event_enabled_bit(struct perf_event * event)68 static unsigned int event_enabled_bit(struct perf_event *event)
69 {
70 return config_enabled_bit(event->attr.config);
71 }
72
pmu_needs_timer(struct drm_i915_private * i915,bool gpu_active)73 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
74 {
75 u64 enable;
76
77 /*
78 * Only some counters need the sampling timer.
79 *
80 * We start with a bitmask of all currently enabled events.
81 */
82 enable = i915->pmu.enable;
83
84 /*
85 * Mask out all the ones which do not need the timer, or in
86 * other words keep all the ones that could need the timer.
87 */
88 enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
89 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
90 ENGINE_SAMPLE_MASK;
91
92 /*
93 * When the GPU is idle per-engine counters do not need to be
94 * running so clear those bits out.
95 */
96 if (!gpu_active)
97 enable &= ~ENGINE_SAMPLE_MASK;
98 /*
99 * Also there is software busyness tracking available we do not
100 * need the timer for I915_SAMPLE_BUSY counter.
101 *
102 * Use RCS as proxy for all engines.
103 */
104 else if (intel_engine_supports_stats(i915->engine[RCS]))
105 enable &= ~BIT(I915_SAMPLE_BUSY);
106
107 /*
108 * If some bits remain it means we need the sampling timer running.
109 */
110 return enable;
111 }
112
i915_pmu_gt_parked(struct drm_i915_private * i915)113 void i915_pmu_gt_parked(struct drm_i915_private *i915)
114 {
115 if (!i915->pmu.base.event_init)
116 return;
117
118 spin_lock_irq(&i915->pmu.lock);
119 /*
120 * Signal sampling timer to stop if only engine events are enabled and
121 * GPU went idle.
122 */
123 i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
124 spin_unlock_irq(&i915->pmu.lock);
125 }
126
__i915_pmu_maybe_start_timer(struct drm_i915_private * i915)127 static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
128 {
129 if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
130 i915->pmu.timer_enabled = true;
131 i915->pmu.timer_last = ktime_get();
132 hrtimer_start_range_ns(&i915->pmu.timer,
133 ns_to_ktime(PERIOD), 0,
134 HRTIMER_MODE_REL_PINNED);
135 }
136 }
137
i915_pmu_gt_unparked(struct drm_i915_private * i915)138 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
139 {
140 if (!i915->pmu.base.event_init)
141 return;
142
143 spin_lock_irq(&i915->pmu.lock);
144 /*
145 * Re-enable sampling timer when GPU goes active.
146 */
147 __i915_pmu_maybe_start_timer(i915);
148 spin_unlock_irq(&i915->pmu.lock);
149 }
150
grab_forcewake(struct drm_i915_private * i915,bool fw)151 static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
152 {
153 if (!fw)
154 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
155
156 return true;
157 }
158
159 static void
add_sample(struct i915_pmu_sample * sample,u32 val)160 add_sample(struct i915_pmu_sample *sample, u32 val)
161 {
162 sample->cur += val;
163 }
164
165 static void
engines_sample(struct drm_i915_private * dev_priv,unsigned int period_ns)166 engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
167 {
168 struct intel_engine_cs *engine;
169 enum intel_engine_id id;
170 bool fw = false;
171
172 if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
173 return;
174
175 if (!dev_priv->gt.awake)
176 return;
177
178 if (!intel_runtime_pm_get_if_in_use(dev_priv))
179 return;
180
181 for_each_engine(engine, dev_priv, id) {
182 u32 current_seqno = intel_engine_get_seqno(engine);
183 u32 last_seqno = intel_engine_last_submit(engine);
184 u32 val;
185
186 val = !i915_seqno_passed(current_seqno, last_seqno);
187
188 if (val)
189 add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
190 period_ns);
191
192 if (val && (engine->pmu.enable &
193 (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
194 fw = grab_forcewake(dev_priv, fw);
195
196 val = I915_READ_FW(RING_CTL(engine->mmio_base));
197 } else {
198 val = 0;
199 }
200
201 if (val & RING_WAIT)
202 add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
203 period_ns);
204
205 if (val & RING_WAIT_SEMAPHORE)
206 add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
207 period_ns);
208 }
209
210 if (fw)
211 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
212
213 intel_runtime_pm_put(dev_priv);
214 }
215
216 static void
add_sample_mult(struct i915_pmu_sample * sample,u32 val,u32 mul)217 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
218 {
219 sample->cur += mul_u32_u32(val, mul);
220 }
221
222 static void
frequency_sample(struct drm_i915_private * dev_priv,unsigned int period_ns)223 frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
224 {
225 if (dev_priv->pmu.enable &
226 config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
227 u32 val;
228
229 val = dev_priv->gt_pm.rps.cur_freq;
230 if (dev_priv->gt.awake &&
231 intel_runtime_pm_get_if_in_use(dev_priv)) {
232 val = intel_get_cagf(dev_priv,
233 I915_READ_NOTRACE(GEN6_RPSTAT1));
234 intel_runtime_pm_put(dev_priv);
235 }
236
237 add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
238 intel_gpu_freq(dev_priv, val),
239 period_ns / 1000);
240 }
241
242 if (dev_priv->pmu.enable &
243 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
244 add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ],
245 intel_gpu_freq(dev_priv,
246 dev_priv->gt_pm.rps.cur_freq),
247 period_ns / 1000);
248 }
249 }
250
i915_sample(struct hrtimer * hrtimer)251 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
252 {
253 struct drm_i915_private *i915 =
254 container_of(hrtimer, struct drm_i915_private, pmu.timer);
255 unsigned int period_ns;
256 ktime_t now;
257
258 if (!READ_ONCE(i915->pmu.timer_enabled))
259 return HRTIMER_NORESTART;
260
261 now = ktime_get();
262 period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last));
263 i915->pmu.timer_last = now;
264
265 /*
266 * Strictly speaking the passed in period may not be 100% accurate for
267 * all internal calculation, since some amount of time can be spent on
268 * grabbing the forcewake. However the potential error from timer call-
269 * back delay greatly dominates this so we keep it simple.
270 */
271 engines_sample(i915, period_ns);
272 frequency_sample(i915, period_ns);
273
274 hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
275
276 return HRTIMER_RESTART;
277 }
278
count_interrupts(struct drm_i915_private * i915)279 static u64 count_interrupts(struct drm_i915_private *i915)
280 {
281 /* open-coded kstat_irqs() */
282 struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
283 u64 sum = 0;
284 int cpu;
285
286 if (!desc || !desc->kstat_irqs)
287 return 0;
288
289 for_each_possible_cpu(cpu)
290 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
291
292 return sum;
293 }
294
engine_event_destroy(struct perf_event * event)295 static void engine_event_destroy(struct perf_event *event)
296 {
297 struct drm_i915_private *i915 =
298 container_of(event->pmu, typeof(*i915), pmu.base);
299 struct intel_engine_cs *engine;
300
301 engine = intel_engine_lookup_user(i915,
302 engine_event_class(event),
303 engine_event_instance(event));
304 if (WARN_ON_ONCE(!engine))
305 return;
306
307 if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
308 intel_engine_supports_stats(engine))
309 intel_disable_engine_stats(engine);
310 }
311
i915_pmu_event_destroy(struct perf_event * event)312 static void i915_pmu_event_destroy(struct perf_event *event)
313 {
314 WARN_ON(event->parent);
315
316 if (is_engine_event(event))
317 engine_event_destroy(event);
318 }
319
320 static int
engine_event_status(struct intel_engine_cs * engine,enum drm_i915_pmu_engine_sample sample)321 engine_event_status(struct intel_engine_cs *engine,
322 enum drm_i915_pmu_engine_sample sample)
323 {
324 switch (sample) {
325 case I915_SAMPLE_BUSY:
326 case I915_SAMPLE_WAIT:
327 break;
328 case I915_SAMPLE_SEMA:
329 if (INTEL_GEN(engine->i915) < 6)
330 return -ENODEV;
331 break;
332 default:
333 return -ENOENT;
334 }
335
336 return 0;
337 }
338
339 static int
config_status(struct drm_i915_private * i915,u64 config)340 config_status(struct drm_i915_private *i915, u64 config)
341 {
342 switch (config) {
343 case I915_PMU_ACTUAL_FREQUENCY:
344 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
345 /* Requires a mutex for sampling! */
346 return -ENODEV;
347 /* Fall-through. */
348 case I915_PMU_REQUESTED_FREQUENCY:
349 if (INTEL_GEN(i915) < 6)
350 return -ENODEV;
351 break;
352 case I915_PMU_INTERRUPTS:
353 break;
354 case I915_PMU_RC6_RESIDENCY:
355 if (!HAS_RC6(i915))
356 return -ENODEV;
357 break;
358 default:
359 return -ENOENT;
360 }
361
362 return 0;
363 }
364
engine_event_init(struct perf_event * event)365 static int engine_event_init(struct perf_event *event)
366 {
367 struct drm_i915_private *i915 =
368 container_of(event->pmu, typeof(*i915), pmu.base);
369 struct intel_engine_cs *engine;
370 u8 sample;
371 int ret;
372
373 engine = intel_engine_lookup_user(i915, engine_event_class(event),
374 engine_event_instance(event));
375 if (!engine)
376 return -ENODEV;
377
378 sample = engine_event_sample(event);
379 ret = engine_event_status(engine, sample);
380 if (ret)
381 return ret;
382
383 if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
384 ret = intel_enable_engine_stats(engine);
385
386 return ret;
387 }
388
i915_pmu_event_init(struct perf_event * event)389 static int i915_pmu_event_init(struct perf_event *event)
390 {
391 struct drm_i915_private *i915 =
392 container_of(event->pmu, typeof(*i915), pmu.base);
393 int ret;
394
395 if (event->attr.type != event->pmu->type)
396 return -ENOENT;
397
398 /* unsupported modes and filters */
399 if (event->attr.sample_period) /* no sampling */
400 return -EINVAL;
401
402 if (has_branch_stack(event))
403 return -EOPNOTSUPP;
404
405 if (event->cpu < 0)
406 return -EINVAL;
407
408 /* only allow running on one cpu at a time */
409 if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
410 return -EINVAL;
411
412 if (is_engine_event(event))
413 ret = engine_event_init(event);
414 else
415 ret = config_status(i915, event->attr.config);
416 if (ret)
417 return ret;
418
419 if (!event->parent)
420 event->destroy = i915_pmu_event_destroy;
421
422 return 0;
423 }
424
__get_rc6(struct drm_i915_private * i915)425 static u64 __get_rc6(struct drm_i915_private *i915)
426 {
427 u64 val;
428
429 val = intel_rc6_residency_ns(i915,
430 IS_VALLEYVIEW(i915) ?
431 VLV_GT_RENDER_RC6 :
432 GEN6_GT_GFX_RC6);
433
434 if (HAS_RC6p(i915))
435 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
436
437 if (HAS_RC6pp(i915))
438 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
439
440 return val;
441 }
442
get_rc6(struct drm_i915_private * i915)443 static u64 get_rc6(struct drm_i915_private *i915)
444 {
445 #if IS_ENABLED(CONFIG_PM)
446 unsigned long flags;
447 u64 val;
448
449 if (intel_runtime_pm_get_if_in_use(i915)) {
450 val = __get_rc6(i915);
451 intel_runtime_pm_put(i915);
452
453 /*
454 * If we are coming back from being runtime suspended we must
455 * be careful not to report a larger value than returned
456 * previously.
457 */
458
459 spin_lock_irqsave(&i915->pmu.lock, flags);
460
461 if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
462 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
463 i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
464 } else {
465 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
466 }
467
468 spin_unlock_irqrestore(&i915->pmu.lock, flags);
469 } else {
470 struct pci_dev *pdev = i915->drm.pdev;
471 struct device *kdev = &pdev->dev;
472
473 /*
474 * We are runtime suspended.
475 *
476 * Report the delta from when the device was suspended to now,
477 * on top of the last known real value, as the approximated RC6
478 * counter value.
479 */
480 spin_lock_irqsave(&i915->pmu.lock, flags);
481 spin_lock(&kdev->power.lock);
482
483 /*
484 * After the above branch intel_runtime_pm_get_if_in_use failed
485 * to get the runtime PM reference we cannot assume we are in
486 * runtime suspend since we can either: a) race with coming out
487 * of it before we took the power.lock, or b) there are other
488 * states than suspended which can bring us here.
489 *
490 * We need to double-check that we are indeed currently runtime
491 * suspended and if not we cannot do better than report the last
492 * known RC6 value.
493 */
494 if (kdev->power.runtime_status == RPM_SUSPENDED) {
495 if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
496 i915->pmu.suspended_jiffies_last =
497 kdev->power.suspended_jiffies;
498
499 val = kdev->power.suspended_jiffies -
500 i915->pmu.suspended_jiffies_last;
501 val += jiffies - kdev->power.accounting_timestamp;
502
503 val = jiffies_to_nsecs(val);
504 val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
505
506 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
507 } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
508 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
509 } else {
510 val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
511 }
512
513 spin_unlock(&kdev->power.lock);
514 spin_unlock_irqrestore(&i915->pmu.lock, flags);
515 }
516
517 return val;
518 #else
519 return __get_rc6(i915);
520 #endif
521 }
522
__i915_pmu_event_read(struct perf_event * event)523 static u64 __i915_pmu_event_read(struct perf_event *event)
524 {
525 struct drm_i915_private *i915 =
526 container_of(event->pmu, typeof(*i915), pmu.base);
527 u64 val = 0;
528
529 if (is_engine_event(event)) {
530 u8 sample = engine_event_sample(event);
531 struct intel_engine_cs *engine;
532
533 engine = intel_engine_lookup_user(i915,
534 engine_event_class(event),
535 engine_event_instance(event));
536
537 if (WARN_ON_ONCE(!engine)) {
538 /* Do nothing */
539 } else if (sample == I915_SAMPLE_BUSY &&
540 intel_engine_supports_stats(engine)) {
541 val = ktime_to_ns(intel_engine_get_busy_time(engine));
542 } else {
543 val = engine->pmu.sample[sample].cur;
544 }
545 } else {
546 switch (event->attr.config) {
547 case I915_PMU_ACTUAL_FREQUENCY:
548 val =
549 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
550 USEC_PER_SEC /* to MHz */);
551 break;
552 case I915_PMU_REQUESTED_FREQUENCY:
553 val =
554 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
555 USEC_PER_SEC /* to MHz */);
556 break;
557 case I915_PMU_INTERRUPTS:
558 val = count_interrupts(i915);
559 break;
560 case I915_PMU_RC6_RESIDENCY:
561 val = get_rc6(i915);
562 break;
563 }
564 }
565
566 return val;
567 }
568
i915_pmu_event_read(struct perf_event * event)569 static void i915_pmu_event_read(struct perf_event *event)
570 {
571 struct hw_perf_event *hwc = &event->hw;
572 u64 prev, new;
573
574 again:
575 prev = local64_read(&hwc->prev_count);
576 new = __i915_pmu_event_read(event);
577
578 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
579 goto again;
580
581 local64_add(new - prev, &event->count);
582 }
583
i915_pmu_enable(struct perf_event * event)584 static void i915_pmu_enable(struct perf_event *event)
585 {
586 struct drm_i915_private *i915 =
587 container_of(event->pmu, typeof(*i915), pmu.base);
588 unsigned int bit = event_enabled_bit(event);
589 unsigned long flags;
590
591 spin_lock_irqsave(&i915->pmu.lock, flags);
592
593 /*
594 * Update the bitmask of enabled events and increment
595 * the event reference counter.
596 */
597 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
598 GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
599 i915->pmu.enable |= BIT_ULL(bit);
600 i915->pmu.enable_count[bit]++;
601
602 /*
603 * Start the sampling timer if needed and not already enabled.
604 */
605 __i915_pmu_maybe_start_timer(i915);
606
607 /*
608 * For per-engine events the bitmask and reference counting
609 * is stored per engine.
610 */
611 if (is_engine_event(event)) {
612 u8 sample = engine_event_sample(event);
613 struct intel_engine_cs *engine;
614
615 engine = intel_engine_lookup_user(i915,
616 engine_event_class(event),
617 engine_event_instance(event));
618 GEM_BUG_ON(!engine);
619 engine->pmu.enable |= BIT(sample);
620
621 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
622 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
623 engine->pmu.enable_count[sample]++;
624 }
625
626 spin_unlock_irqrestore(&i915->pmu.lock, flags);
627
628 /*
629 * Store the current counter value so we can report the correct delta
630 * for all listeners. Even when the event was already enabled and has
631 * an existing non-zero value.
632 */
633 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
634 }
635
i915_pmu_disable(struct perf_event * event)636 static void i915_pmu_disable(struct perf_event *event)
637 {
638 struct drm_i915_private *i915 =
639 container_of(event->pmu, typeof(*i915), pmu.base);
640 unsigned int bit = event_enabled_bit(event);
641 unsigned long flags;
642
643 spin_lock_irqsave(&i915->pmu.lock, flags);
644
645 if (is_engine_event(event)) {
646 u8 sample = engine_event_sample(event);
647 struct intel_engine_cs *engine;
648
649 engine = intel_engine_lookup_user(i915,
650 engine_event_class(event),
651 engine_event_instance(event));
652 GEM_BUG_ON(!engine);
653 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
654 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
655 /*
656 * Decrement the reference count and clear the enabled
657 * bitmask when the last listener on an event goes away.
658 */
659 if (--engine->pmu.enable_count[sample] == 0)
660 engine->pmu.enable &= ~BIT(sample);
661 }
662
663 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
664 GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
665 /*
666 * Decrement the reference count and clear the enabled
667 * bitmask when the last listener on an event goes away.
668 */
669 if (--i915->pmu.enable_count[bit] == 0) {
670 i915->pmu.enable &= ~BIT_ULL(bit);
671 i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
672 }
673
674 spin_unlock_irqrestore(&i915->pmu.lock, flags);
675 }
676
i915_pmu_event_start(struct perf_event * event,int flags)677 static void i915_pmu_event_start(struct perf_event *event, int flags)
678 {
679 i915_pmu_enable(event);
680 event->hw.state = 0;
681 }
682
i915_pmu_event_stop(struct perf_event * event,int flags)683 static void i915_pmu_event_stop(struct perf_event *event, int flags)
684 {
685 if (flags & PERF_EF_UPDATE)
686 i915_pmu_event_read(event);
687 i915_pmu_disable(event);
688 event->hw.state = PERF_HES_STOPPED;
689 }
690
i915_pmu_event_add(struct perf_event * event,int flags)691 static int i915_pmu_event_add(struct perf_event *event, int flags)
692 {
693 if (flags & PERF_EF_START)
694 i915_pmu_event_start(event, flags);
695
696 return 0;
697 }
698
i915_pmu_event_del(struct perf_event * event,int flags)699 static void i915_pmu_event_del(struct perf_event *event, int flags)
700 {
701 i915_pmu_event_stop(event, PERF_EF_UPDATE);
702 }
703
i915_pmu_event_event_idx(struct perf_event * event)704 static int i915_pmu_event_event_idx(struct perf_event *event)
705 {
706 return 0;
707 }
708
709 struct i915_str_attribute {
710 struct device_attribute attr;
711 const char *str;
712 };
713
i915_pmu_format_show(struct device * dev,struct device_attribute * attr,char * buf)714 static ssize_t i915_pmu_format_show(struct device *dev,
715 struct device_attribute *attr, char *buf)
716 {
717 struct i915_str_attribute *eattr;
718
719 eattr = container_of(attr, struct i915_str_attribute, attr);
720 return sprintf(buf, "%s\n", eattr->str);
721 }
722
723 #define I915_PMU_FORMAT_ATTR(_name, _config) \
724 (&((struct i915_str_attribute[]) { \
725 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
726 .str = _config, } \
727 })[0].attr.attr)
728
729 static struct attribute *i915_pmu_format_attrs[] = {
730 I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
731 NULL,
732 };
733
734 static const struct attribute_group i915_pmu_format_attr_group = {
735 .name = "format",
736 .attrs = i915_pmu_format_attrs,
737 };
738
739 struct i915_ext_attribute {
740 struct device_attribute attr;
741 unsigned long val;
742 };
743
i915_pmu_event_show(struct device * dev,struct device_attribute * attr,char * buf)744 static ssize_t i915_pmu_event_show(struct device *dev,
745 struct device_attribute *attr, char *buf)
746 {
747 struct i915_ext_attribute *eattr;
748
749 eattr = container_of(attr, struct i915_ext_attribute, attr);
750 return sprintf(buf, "config=0x%lx\n", eattr->val);
751 }
752
753 static struct attribute_group i915_pmu_events_attr_group = {
754 .name = "events",
755 /* Patch in attrs at runtime. */
756 };
757
758 static ssize_t
i915_pmu_get_attr_cpumask(struct device * dev,struct device_attribute * attr,char * buf)759 i915_pmu_get_attr_cpumask(struct device *dev,
760 struct device_attribute *attr,
761 char *buf)
762 {
763 return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
764 }
765
766 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
767
768 static struct attribute *i915_cpumask_attrs[] = {
769 &dev_attr_cpumask.attr,
770 NULL,
771 };
772
773 static const struct attribute_group i915_pmu_cpumask_attr_group = {
774 .attrs = i915_cpumask_attrs,
775 };
776
777 static const struct attribute_group *i915_pmu_attr_groups[] = {
778 &i915_pmu_format_attr_group,
779 &i915_pmu_events_attr_group,
780 &i915_pmu_cpumask_attr_group,
781 NULL
782 };
783
784 #define __event(__config, __name, __unit) \
785 { \
786 .config = (__config), \
787 .name = (__name), \
788 .unit = (__unit), \
789 }
790
791 #define __engine_event(__sample, __name) \
792 { \
793 .sample = (__sample), \
794 .name = (__name), \
795 }
796
797 static struct i915_ext_attribute *
add_i915_attr(struct i915_ext_attribute * attr,const char * name,u64 config)798 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
799 {
800 sysfs_attr_init(&attr->attr.attr);
801 attr->attr.attr.name = name;
802 attr->attr.attr.mode = 0444;
803 attr->attr.show = i915_pmu_event_show;
804 attr->val = config;
805
806 return ++attr;
807 }
808
809 static struct perf_pmu_events_attr *
add_pmu_attr(struct perf_pmu_events_attr * attr,const char * name,const char * str)810 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
811 const char *str)
812 {
813 sysfs_attr_init(&attr->attr.attr);
814 attr->attr.attr.name = name;
815 attr->attr.attr.mode = 0444;
816 attr->attr.show = perf_event_sysfs_show;
817 attr->event_str = str;
818
819 return ++attr;
820 }
821
822 static struct attribute **
create_event_attributes(struct drm_i915_private * i915)823 create_event_attributes(struct drm_i915_private *i915)
824 {
825 static const struct {
826 u64 config;
827 const char *name;
828 const char *unit;
829 } events[] = {
830 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
831 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
832 __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
833 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
834 };
835 static const struct {
836 enum drm_i915_pmu_engine_sample sample;
837 char *name;
838 } engine_events[] = {
839 __engine_event(I915_SAMPLE_BUSY, "busy"),
840 __engine_event(I915_SAMPLE_SEMA, "sema"),
841 __engine_event(I915_SAMPLE_WAIT, "wait"),
842 };
843 unsigned int count = 0;
844 struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
845 struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
846 struct attribute **attr = NULL, **attr_iter;
847 struct intel_engine_cs *engine;
848 enum intel_engine_id id;
849 unsigned int i;
850
851 /* Count how many counters we will be exposing. */
852 for (i = 0; i < ARRAY_SIZE(events); i++) {
853 if (!config_status(i915, events[i].config))
854 count++;
855 }
856
857 for_each_engine(engine, i915, id) {
858 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
859 if (!engine_event_status(engine,
860 engine_events[i].sample))
861 count++;
862 }
863 }
864
865 /* Allocate attribute objects and table. */
866 i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
867 if (!i915_attr)
868 goto err_alloc;
869
870 pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
871 if (!pmu_attr)
872 goto err_alloc;
873
874 /* Max one pointer of each attribute type plus a termination entry. */
875 attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
876 if (!attr)
877 goto err_alloc;
878
879 i915_iter = i915_attr;
880 pmu_iter = pmu_attr;
881 attr_iter = attr;
882
883 /* Initialize supported non-engine counters. */
884 for (i = 0; i < ARRAY_SIZE(events); i++) {
885 char *str;
886
887 if (config_status(i915, events[i].config))
888 continue;
889
890 str = kstrdup(events[i].name, GFP_KERNEL);
891 if (!str)
892 goto err;
893
894 *attr_iter++ = &i915_iter->attr.attr;
895 i915_iter = add_i915_attr(i915_iter, str, events[i].config);
896
897 if (events[i].unit) {
898 str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
899 if (!str)
900 goto err;
901
902 *attr_iter++ = &pmu_iter->attr.attr;
903 pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
904 }
905 }
906
907 /* Initialize supported engine counters. */
908 for_each_engine(engine, i915, id) {
909 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
910 char *str;
911
912 if (engine_event_status(engine,
913 engine_events[i].sample))
914 continue;
915
916 str = kasprintf(GFP_KERNEL, "%s-%s",
917 engine->name, engine_events[i].name);
918 if (!str)
919 goto err;
920
921 *attr_iter++ = &i915_iter->attr.attr;
922 i915_iter =
923 add_i915_attr(i915_iter, str,
924 __I915_PMU_ENGINE(engine->uabi_class,
925 engine->instance,
926 engine_events[i].sample));
927
928 str = kasprintf(GFP_KERNEL, "%s-%s.unit",
929 engine->name, engine_events[i].name);
930 if (!str)
931 goto err;
932
933 *attr_iter++ = &pmu_iter->attr.attr;
934 pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
935 }
936 }
937
938 i915->pmu.i915_attr = i915_attr;
939 i915->pmu.pmu_attr = pmu_attr;
940
941 return attr;
942
943 err:;
944 for (attr_iter = attr; *attr_iter; attr_iter++)
945 kfree((*attr_iter)->name);
946
947 err_alloc:
948 kfree(attr);
949 kfree(i915_attr);
950 kfree(pmu_attr);
951
952 return NULL;
953 }
954
free_event_attributes(struct drm_i915_private * i915)955 static void free_event_attributes(struct drm_i915_private *i915)
956 {
957 struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
958
959 for (; *attr_iter; attr_iter++)
960 kfree((*attr_iter)->name);
961
962 kfree(i915_pmu_events_attr_group.attrs);
963 kfree(i915->pmu.i915_attr);
964 kfree(i915->pmu.pmu_attr);
965
966 i915_pmu_events_attr_group.attrs = NULL;
967 i915->pmu.i915_attr = NULL;
968 i915->pmu.pmu_attr = NULL;
969 }
970
i915_pmu_cpu_online(unsigned int cpu,struct hlist_node * node)971 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
972 {
973 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
974
975 GEM_BUG_ON(!pmu->base.event_init);
976
977 /* Select the first online CPU as a designated reader. */
978 if (!cpumask_weight(&i915_pmu_cpumask))
979 cpumask_set_cpu(cpu, &i915_pmu_cpumask);
980
981 return 0;
982 }
983
i915_pmu_cpu_offline(unsigned int cpu,struct hlist_node * node)984 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
985 {
986 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
987 unsigned int target;
988
989 GEM_BUG_ON(!pmu->base.event_init);
990
991 if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
992 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
993 /* Migrate events if there is a valid target */
994 if (target < nr_cpu_ids) {
995 cpumask_set_cpu(target, &i915_pmu_cpumask);
996 perf_pmu_migrate_context(&pmu->base, cpu, target);
997 }
998 }
999
1000 return 0;
1001 }
1002
1003 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1004
i915_pmu_register_cpuhp_state(struct drm_i915_private * i915)1005 static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
1006 {
1007 enum cpuhp_state slot;
1008 int ret;
1009
1010 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1011 "perf/x86/intel/i915:online",
1012 i915_pmu_cpu_online,
1013 i915_pmu_cpu_offline);
1014 if (ret < 0)
1015 return ret;
1016
1017 slot = ret;
1018 ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
1019 if (ret) {
1020 cpuhp_remove_multi_state(slot);
1021 return ret;
1022 }
1023
1024 cpuhp_slot = slot;
1025 return 0;
1026 }
1027
i915_pmu_unregister_cpuhp_state(struct drm_i915_private * i915)1028 static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
1029 {
1030 WARN_ON(cpuhp_slot == CPUHP_INVALID);
1031 WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
1032 cpuhp_remove_multi_state(cpuhp_slot);
1033 }
1034
i915_pmu_register(struct drm_i915_private * i915)1035 void i915_pmu_register(struct drm_i915_private *i915)
1036 {
1037 int ret;
1038
1039 if (INTEL_GEN(i915) <= 2) {
1040 DRM_INFO("PMU not supported for this GPU.");
1041 return;
1042 }
1043
1044 i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
1045 if (!i915_pmu_events_attr_group.attrs) {
1046 ret = -ENOMEM;
1047 goto err;
1048 }
1049
1050 i915->pmu.base.attr_groups = i915_pmu_attr_groups;
1051 i915->pmu.base.task_ctx_nr = perf_invalid_context;
1052 i915->pmu.base.event_init = i915_pmu_event_init;
1053 i915->pmu.base.add = i915_pmu_event_add;
1054 i915->pmu.base.del = i915_pmu_event_del;
1055 i915->pmu.base.start = i915_pmu_event_start;
1056 i915->pmu.base.stop = i915_pmu_event_stop;
1057 i915->pmu.base.read = i915_pmu_event_read;
1058 i915->pmu.base.event_idx = i915_pmu_event_event_idx;
1059
1060 spin_lock_init(&i915->pmu.lock);
1061 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1062 i915->pmu.timer.function = i915_sample;
1063
1064 ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
1065 if (ret)
1066 goto err;
1067
1068 ret = i915_pmu_register_cpuhp_state(i915);
1069 if (ret)
1070 goto err_unreg;
1071
1072 return;
1073
1074 err_unreg:
1075 perf_pmu_unregister(&i915->pmu.base);
1076 err:
1077 i915->pmu.base.event_init = NULL;
1078 free_event_attributes(i915);
1079 DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
1080 }
1081
i915_pmu_unregister(struct drm_i915_private * i915)1082 void i915_pmu_unregister(struct drm_i915_private *i915)
1083 {
1084 if (!i915->pmu.base.event_init)
1085 return;
1086
1087 WARN_ON(i915->pmu.enable);
1088
1089 hrtimer_cancel(&i915->pmu.timer);
1090
1091 i915_pmu_unregister_cpuhp_state(i915);
1092
1093 perf_pmu_unregister(&i915->pmu.base);
1094 i915->pmu.base.event_init = NULL;
1095 free_event_attributes(i915);
1096 }
1097