1 /*
2  * Xtensa Performance Monitor Module driver
3  * See Tensilica Debug User's Guide for PMU registers documentation.
4  *
5  * Copyright (C) 2015 Cadence Design Systems Inc.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 
12 #include <linux/interrupt.h>
13 #include <linux/irqdomain.h>
14 #include <linux/module.h>
15 #include <linux/of.h>
16 #include <linux/perf_event.h>
17 #include <linux/platform_device.h>
18 
19 #include <asm/processor.h>
20 #include <asm/stacktrace.h>
21 
22 /* Global control/status for all perf counters */
23 #define XTENSA_PMU_PMG			0x1000
24 /* Perf counter values */
25 #define XTENSA_PMU_PM(i)		(0x1080 + (i) * 4)
26 /* Perf counter control registers */
27 #define XTENSA_PMU_PMCTRL(i)		(0x1100 + (i) * 4)
28 /* Perf counter status registers */
29 #define XTENSA_PMU_PMSTAT(i)		(0x1180 + (i) * 4)
30 
31 #define XTENSA_PMU_PMG_PMEN		0x1
32 
33 #define XTENSA_PMU_COUNTER_MASK		0xffffffffULL
34 #define XTENSA_PMU_COUNTER_MAX		0x7fffffff
35 
36 #define XTENSA_PMU_PMCTRL_INTEN		0x00000001
37 #define XTENSA_PMU_PMCTRL_KRNLCNT	0x00000008
38 #define XTENSA_PMU_PMCTRL_TRACELEVEL	0x000000f0
39 #define XTENSA_PMU_PMCTRL_SELECT_SHIFT	8
40 #define XTENSA_PMU_PMCTRL_SELECT	0x00001f00
41 #define XTENSA_PMU_PMCTRL_MASK_SHIFT	16
42 #define XTENSA_PMU_PMCTRL_MASK		0xffff0000
43 
44 #define XTENSA_PMU_MASK(select, mask) \
45 	(((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
46 	 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
47 	 XTENSA_PMU_PMCTRL_TRACELEVEL | \
48 	 XTENSA_PMU_PMCTRL_INTEN)
49 
50 #define XTENSA_PMU_PMSTAT_OVFL		0x00000001
51 #define XTENSA_PMU_PMSTAT_INTASRT	0x00000010
52 
53 struct xtensa_pmu_events {
54 	/* Array of events currently on this core */
55 	struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
56 	/* Bitmap of used hardware counters */
57 	unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
58 };
59 static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
60 
61 static const u32 xtensa_hw_ctl[] = {
62 	[PERF_COUNT_HW_CPU_CYCLES]		= XTENSA_PMU_MASK(0, 0x1),
63 	[PERF_COUNT_HW_INSTRUCTIONS]		= XTENSA_PMU_MASK(2, 0xffff),
64 	[PERF_COUNT_HW_CACHE_REFERENCES]	= XTENSA_PMU_MASK(10, 0x1),
65 	[PERF_COUNT_HW_CACHE_MISSES]		= XTENSA_PMU_MASK(12, 0x1),
66 	/* Taken and non-taken branches + taken loop ends */
67 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= XTENSA_PMU_MASK(2, 0x490),
68 	/* Instruction-related + other global stall cycles */
69 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= XTENSA_PMU_MASK(4, 0x1ff),
70 	/* Data-related global stall cycles */
71 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= XTENSA_PMU_MASK(3, 0x1ff),
72 };
73 
74 #define C(_x) PERF_COUNT_HW_CACHE_##_x
75 
76 static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
77 	[C(L1D)] = {
78 		[C(OP_READ)] = {
79 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(10, 0x1),
80 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(10, 0x2),
81 		},
82 		[C(OP_WRITE)] = {
83 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(11, 0x1),
84 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(11, 0x2),
85 		},
86 	},
87 	[C(L1I)] = {
88 		[C(OP_READ)] = {
89 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(8, 0x1),
90 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(8, 0x2),
91 		},
92 	},
93 	[C(DTLB)] = {
94 		[C(OP_READ)] = {
95 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(9, 0x1),
96 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(9, 0x8),
97 		},
98 	},
99 	[C(ITLB)] = {
100 		[C(OP_READ)] = {
101 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(7, 0x1),
102 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(7, 0x8),
103 		},
104 	},
105 };
106 
xtensa_pmu_cache_event(u64 config)107 static int xtensa_pmu_cache_event(u64 config)
108 {
109 	unsigned int cache_type, cache_op, cache_result;
110 	int ret;
111 
112 	cache_type = (config >>  0) & 0xff;
113 	cache_op = (config >>  8) & 0xff;
114 	cache_result = (config >> 16) & 0xff;
115 
116 	if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
117 	    cache_op >= C(OP_MAX) ||
118 	    cache_result >= C(RESULT_MAX))
119 		return -EINVAL;
120 
121 	ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
122 
123 	if (ret == 0)
124 		return -EINVAL;
125 
126 	return ret;
127 }
128 
xtensa_pmu_read_counter(int idx)129 static inline uint32_t xtensa_pmu_read_counter(int idx)
130 {
131 	return get_er(XTENSA_PMU_PM(idx));
132 }
133 
xtensa_pmu_write_counter(int idx,uint32_t v)134 static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
135 {
136 	set_er(v, XTENSA_PMU_PM(idx));
137 }
138 
xtensa_perf_event_update(struct perf_event * event,struct hw_perf_event * hwc,int idx)139 static void xtensa_perf_event_update(struct perf_event *event,
140 				     struct hw_perf_event *hwc, int idx)
141 {
142 	uint64_t prev_raw_count, new_raw_count;
143 	int64_t delta;
144 
145 	do {
146 		prev_raw_count = local64_read(&hwc->prev_count);
147 		new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
148 	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
149 				 new_raw_count) != prev_raw_count);
150 
151 	delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
152 
153 	local64_add(delta, &event->count);
154 	local64_sub(delta, &hwc->period_left);
155 }
156 
xtensa_perf_event_set_period(struct perf_event * event,struct hw_perf_event * hwc,int idx)157 static bool xtensa_perf_event_set_period(struct perf_event *event,
158 					 struct hw_perf_event *hwc, int idx)
159 {
160 	bool rc = false;
161 	s64 left;
162 
163 	if (!is_sampling_event(event)) {
164 		left = XTENSA_PMU_COUNTER_MAX;
165 	} else {
166 		s64 period = hwc->sample_period;
167 
168 		left = local64_read(&hwc->period_left);
169 		if (left <= -period) {
170 			left = period;
171 			local64_set(&hwc->period_left, left);
172 			hwc->last_period = period;
173 			rc = true;
174 		} else if (left <= 0) {
175 			left += period;
176 			local64_set(&hwc->period_left, left);
177 			hwc->last_period = period;
178 			rc = true;
179 		}
180 		if (left > XTENSA_PMU_COUNTER_MAX)
181 			left = XTENSA_PMU_COUNTER_MAX;
182 	}
183 
184 	local64_set(&hwc->prev_count, -left);
185 	xtensa_pmu_write_counter(idx, -left);
186 	perf_event_update_userpage(event);
187 
188 	return rc;
189 }
190 
xtensa_pmu_enable(struct pmu * pmu)191 static void xtensa_pmu_enable(struct pmu *pmu)
192 {
193 	set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
194 }
195 
xtensa_pmu_disable(struct pmu * pmu)196 static void xtensa_pmu_disable(struct pmu *pmu)
197 {
198 	set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
199 }
200 
xtensa_pmu_event_init(struct perf_event * event)201 static int xtensa_pmu_event_init(struct perf_event *event)
202 {
203 	int ret;
204 
205 	switch (event->attr.type) {
206 	case PERF_TYPE_HARDWARE:
207 		if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
208 		    xtensa_hw_ctl[event->attr.config] == 0)
209 			return -EINVAL;
210 		event->hw.config = xtensa_hw_ctl[event->attr.config];
211 		return 0;
212 
213 	case PERF_TYPE_HW_CACHE:
214 		ret = xtensa_pmu_cache_event(event->attr.config);
215 		if (ret < 0)
216 			return ret;
217 		event->hw.config = ret;
218 		return 0;
219 
220 	case PERF_TYPE_RAW:
221 		/* Not 'previous counter' select */
222 		if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
223 		    (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
224 			return -EINVAL;
225 		event->hw.config = (event->attr.config &
226 				    (XTENSA_PMU_PMCTRL_KRNLCNT |
227 				     XTENSA_PMU_PMCTRL_TRACELEVEL |
228 				     XTENSA_PMU_PMCTRL_SELECT |
229 				     XTENSA_PMU_PMCTRL_MASK)) |
230 			XTENSA_PMU_PMCTRL_INTEN;
231 		return 0;
232 
233 	default:
234 		return -ENOENT;
235 	}
236 }
237 
238 /*
239  * Starts/Stops a counter present on the PMU. The PMI handler
240  * should stop the counter when perf_event_overflow() returns
241  * !0. ->start() will be used to continue.
242  */
xtensa_pmu_start(struct perf_event * event,int flags)243 static void xtensa_pmu_start(struct perf_event *event, int flags)
244 {
245 	struct hw_perf_event *hwc = &event->hw;
246 	int idx = hwc->idx;
247 
248 	if (WARN_ON_ONCE(idx == -1))
249 		return;
250 
251 	if (flags & PERF_EF_RELOAD) {
252 		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
253 		xtensa_perf_event_set_period(event, hwc, idx);
254 	}
255 
256 	hwc->state = 0;
257 
258 	set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
259 }
260 
xtensa_pmu_stop(struct perf_event * event,int flags)261 static void xtensa_pmu_stop(struct perf_event *event, int flags)
262 {
263 	struct hw_perf_event *hwc = &event->hw;
264 	int idx = hwc->idx;
265 
266 	if (!(hwc->state & PERF_HES_STOPPED)) {
267 		set_er(0, XTENSA_PMU_PMCTRL(idx));
268 		set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
269 		       XTENSA_PMU_PMSTAT(idx));
270 		hwc->state |= PERF_HES_STOPPED;
271 	}
272 
273 	if ((flags & PERF_EF_UPDATE) &&
274 	    !(event->hw.state & PERF_HES_UPTODATE)) {
275 		xtensa_perf_event_update(event, &event->hw, idx);
276 		event->hw.state |= PERF_HES_UPTODATE;
277 	}
278 }
279 
280 /*
281  * Adds/Removes a counter to/from the PMU, can be done inside
282  * a transaction, see the ->*_txn() methods.
283  */
xtensa_pmu_add(struct perf_event * event,int flags)284 static int xtensa_pmu_add(struct perf_event *event, int flags)
285 {
286 	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
287 	struct hw_perf_event *hwc = &event->hw;
288 	int idx = hwc->idx;
289 
290 	if (__test_and_set_bit(idx, ev->used_mask)) {
291 		idx = find_first_zero_bit(ev->used_mask,
292 					  XCHAL_NUM_PERF_COUNTERS);
293 		if (idx == XCHAL_NUM_PERF_COUNTERS)
294 			return -EAGAIN;
295 
296 		__set_bit(idx, ev->used_mask);
297 		hwc->idx = idx;
298 	}
299 	ev->event[idx] = event;
300 
301 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
302 
303 	if (flags & PERF_EF_START)
304 		xtensa_pmu_start(event, PERF_EF_RELOAD);
305 
306 	perf_event_update_userpage(event);
307 	return 0;
308 }
309 
xtensa_pmu_del(struct perf_event * event,int flags)310 static void xtensa_pmu_del(struct perf_event *event, int flags)
311 {
312 	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
313 
314 	xtensa_pmu_stop(event, PERF_EF_UPDATE);
315 	__clear_bit(event->hw.idx, ev->used_mask);
316 	perf_event_update_userpage(event);
317 }
318 
xtensa_pmu_read(struct perf_event * event)319 static void xtensa_pmu_read(struct perf_event *event)
320 {
321 	xtensa_perf_event_update(event, &event->hw, event->hw.idx);
322 }
323 
callchain_trace(struct stackframe * frame,void * data)324 static int callchain_trace(struct stackframe *frame, void *data)
325 {
326 	struct perf_callchain_entry_ctx *entry = data;
327 
328 	perf_callchain_store(entry, frame->pc);
329 	return 0;
330 }
331 
perf_callchain_kernel(struct perf_callchain_entry_ctx * entry,struct pt_regs * regs)332 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
333 			   struct pt_regs *regs)
334 {
335 	xtensa_backtrace_kernel(regs, entry->max_stack,
336 				callchain_trace, NULL, entry);
337 }
338 
perf_callchain_user(struct perf_callchain_entry_ctx * entry,struct pt_regs * regs)339 void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
340 			 struct pt_regs *regs)
341 {
342 	xtensa_backtrace_user(regs, entry->max_stack,
343 			      callchain_trace, entry);
344 }
345 
perf_event_print_debug(void)346 void perf_event_print_debug(void)
347 {
348 	unsigned long flags;
349 	unsigned i;
350 
351 	local_irq_save(flags);
352 	pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
353 		get_er(XTENSA_PMU_PMG));
354 	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
355 		pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
356 			i, get_er(XTENSA_PMU_PM(i)),
357 			i, get_er(XTENSA_PMU_PMCTRL(i)),
358 			i, get_er(XTENSA_PMU_PMSTAT(i)));
359 	local_irq_restore(flags);
360 }
361 
xtensa_pmu_irq_handler(int irq,void * dev_id)362 irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
363 {
364 	irqreturn_t rc = IRQ_NONE;
365 	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
366 	unsigned i;
367 
368 	for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS);
369 	     i < XCHAL_NUM_PERF_COUNTERS;
370 	     i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) {
371 		uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
372 		struct perf_event *event = ev->event[i];
373 		struct hw_perf_event *hwc = &event->hw;
374 		u64 last_period;
375 
376 		if (!(v & XTENSA_PMU_PMSTAT_OVFL))
377 			continue;
378 
379 		set_er(v, XTENSA_PMU_PMSTAT(i));
380 		xtensa_perf_event_update(event, hwc, i);
381 		last_period = hwc->last_period;
382 		if (xtensa_perf_event_set_period(event, hwc, i)) {
383 			struct perf_sample_data data;
384 			struct pt_regs *regs = get_irq_regs();
385 
386 			perf_sample_data_init(&data, 0, last_period);
387 			if (perf_event_overflow(event, &data, regs))
388 				xtensa_pmu_stop(event, 0);
389 		}
390 
391 		rc = IRQ_HANDLED;
392 	}
393 	return rc;
394 }
395 
396 static struct pmu xtensa_pmu = {
397 	.pmu_enable = xtensa_pmu_enable,
398 	.pmu_disable = xtensa_pmu_disable,
399 	.event_init = xtensa_pmu_event_init,
400 	.add = xtensa_pmu_add,
401 	.del = xtensa_pmu_del,
402 	.start = xtensa_pmu_start,
403 	.stop = xtensa_pmu_stop,
404 	.read = xtensa_pmu_read,
405 };
406 
xtensa_pmu_setup(int cpu)407 static int xtensa_pmu_setup(int cpu)
408 {
409 	unsigned i;
410 
411 	set_er(0, XTENSA_PMU_PMG);
412 	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
413 		set_er(0, XTENSA_PMU_PMCTRL(i));
414 		set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
415 	}
416 	return 0;
417 }
418 
xtensa_pmu_init(void)419 static int __init xtensa_pmu_init(void)
420 {
421 	int ret;
422 	int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
423 
424 	ret = cpuhp_setup_state(CPUHP_AP_PERF_XTENSA_STARTING,
425 				"perf/xtensa:starting", xtensa_pmu_setup,
426 				NULL);
427 	if (ret) {
428 		pr_err("xtensa_pmu: failed to register CPU-hotplug.\n");
429 		return ret;
430 	}
431 #if XTENSA_FAKE_NMI
432 	enable_irq(irq);
433 #else
434 	ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
435 			  "pmu", NULL);
436 	if (ret < 0)
437 		return ret;
438 #endif
439 
440 	ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
441 	if (ret)
442 		free_irq(irq, NULL);
443 
444 	return ret;
445 }
446 early_initcall(xtensa_pmu_init);
447