1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * HiSilicon SoC Hardware event counters support
4  *
5  * Copyright (C) 2017 Hisilicon Limited
6  * Author: Anurup M <anurup.m@huawei.com>
7  *         Shaokun Zhang <zhangshaokun@hisilicon.com>
8  *
9  * This code is based on the uncore PMUs like arm-cci and arm-ccn.
10  */
11 #include <linux/bitmap.h>
12 #include <linux/bitops.h>
13 #include <linux/bug.h>
14 #include <linux/err.h>
15 #include <linux/errno.h>
16 #include <linux/interrupt.h>
17 
18 #include <asm/local64.h>
19 
20 #include "hisi_uncore_pmu.h"
21 
22 #define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
23 #define HISI_MAX_PERIOD(nr) (BIT_ULL(nr) - 1)
24 
25 /*
26  * PMU format attributes
27  */
hisi_format_sysfs_show(struct device * dev,struct device_attribute * attr,char * buf)28 ssize_t hisi_format_sysfs_show(struct device *dev,
29 			       struct device_attribute *attr, char *buf)
30 {
31 	struct dev_ext_attribute *eattr;
32 
33 	eattr = container_of(attr, struct dev_ext_attribute, attr);
34 
35 	return sprintf(buf, "%s\n", (char *)eattr->var);
36 }
37 
38 /*
39  * PMU event attributes
40  */
hisi_event_sysfs_show(struct device * dev,struct device_attribute * attr,char * page)41 ssize_t hisi_event_sysfs_show(struct device *dev,
42 			      struct device_attribute *attr, char *page)
43 {
44 	struct dev_ext_attribute *eattr;
45 
46 	eattr = container_of(attr, struct dev_ext_attribute, attr);
47 
48 	return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var);
49 }
50 
51 /*
52  * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show
53  */
hisi_cpumask_sysfs_show(struct device * dev,struct device_attribute * attr,char * buf)54 ssize_t hisi_cpumask_sysfs_show(struct device *dev,
55 				struct device_attribute *attr, char *buf)
56 {
57 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
58 
59 	return sprintf(buf, "%d\n", hisi_pmu->on_cpu);
60 }
61 
hisi_validate_event_group(struct perf_event * event)62 static bool hisi_validate_event_group(struct perf_event *event)
63 {
64 	struct perf_event *sibling, *leader = event->group_leader;
65 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
66 	/* Include count for the event */
67 	int counters = 1;
68 
69 	if (!is_software_event(leader)) {
70 		/*
71 		 * We must NOT create groups containing mixed PMUs, although
72 		 * software events are acceptable
73 		 */
74 		if (leader->pmu != event->pmu)
75 			return false;
76 
77 		/* Increment counter for the leader */
78 		if (leader != event)
79 			counters++;
80 	}
81 
82 	for_each_sibling_event(sibling, event->group_leader) {
83 		if (is_software_event(sibling))
84 			continue;
85 		if (sibling->pmu != event->pmu)
86 			return false;
87 		/* Increment counter for each sibling */
88 		counters++;
89 	}
90 
91 	/* The group can not count events more than the counters in the HW */
92 	return counters <= hisi_pmu->num_counters;
93 }
94 
hisi_uncore_pmu_counter_valid(struct hisi_pmu * hisi_pmu,int idx)95 int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx)
96 {
97 	return idx >= 0 && idx < hisi_pmu->num_counters;
98 }
99 
hisi_uncore_pmu_get_event_idx(struct perf_event * event)100 int hisi_uncore_pmu_get_event_idx(struct perf_event *event)
101 {
102 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
103 	unsigned long *used_mask = hisi_pmu->pmu_events.used_mask;
104 	u32 num_counters = hisi_pmu->num_counters;
105 	int idx;
106 
107 	idx = find_first_zero_bit(used_mask, num_counters);
108 	if (idx == num_counters)
109 		return -EAGAIN;
110 
111 	set_bit(idx, used_mask);
112 
113 	return idx;
114 }
115 
hisi_uncore_pmu_clear_event_idx(struct hisi_pmu * hisi_pmu,int idx)116 static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx)
117 {
118 	if (!hisi_uncore_pmu_counter_valid(hisi_pmu, idx)) {
119 		dev_err(hisi_pmu->dev, "Unsupported event index:%d!\n", idx);
120 		return;
121 	}
122 
123 	clear_bit(idx, hisi_pmu->pmu_events.used_mask);
124 }
125 
hisi_uncore_pmu_event_init(struct perf_event * event)126 int hisi_uncore_pmu_event_init(struct perf_event *event)
127 {
128 	struct hw_perf_event *hwc = &event->hw;
129 	struct hisi_pmu *hisi_pmu;
130 
131 	if (event->attr.type != event->pmu->type)
132 		return -ENOENT;
133 
134 	/*
135 	 * We do not support sampling as the counters are all
136 	 * shared by all CPU cores in a CPU die(SCCL). Also we
137 	 * do not support attach to a task(per-process mode)
138 	 */
139 	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
140 		return -EOPNOTSUPP;
141 
142 	/*
143 	 *  The uncore counters not specific to any CPU, so cannot
144 	 *  support per-task
145 	 */
146 	if (event->cpu < 0)
147 		return -EINVAL;
148 
149 	/*
150 	 * Validate if the events in group does not exceed the
151 	 * available counters in hardware.
152 	 */
153 	if (!hisi_validate_event_group(event))
154 		return -EINVAL;
155 
156 	hisi_pmu = to_hisi_pmu(event->pmu);
157 	if (event->attr.config > hisi_pmu->check_event)
158 		return -EINVAL;
159 
160 	if (hisi_pmu->on_cpu == -1)
161 		return -EINVAL;
162 	/*
163 	 * We don't assign an index until we actually place the event onto
164 	 * hardware. Use -1 to signify that we haven't decided where to put it
165 	 * yet.
166 	 */
167 	hwc->idx		= -1;
168 	hwc->config_base	= event->attr.config;
169 
170 	/* Enforce to use the same CPU for all events in this PMU */
171 	event->cpu = hisi_pmu->on_cpu;
172 
173 	return 0;
174 }
175 
176 /*
177  * Set the counter to count the event that we're interested in,
178  * and enable interrupt and counter.
179  */
hisi_uncore_pmu_enable_event(struct perf_event * event)180 static void hisi_uncore_pmu_enable_event(struct perf_event *event)
181 {
182 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
183 	struct hw_perf_event *hwc = &event->hw;
184 
185 	hisi_pmu->ops->write_evtype(hisi_pmu, hwc->idx,
186 				    HISI_GET_EVENTID(event));
187 
188 	hisi_pmu->ops->enable_counter_int(hisi_pmu, hwc);
189 	hisi_pmu->ops->enable_counter(hisi_pmu, hwc);
190 }
191 
192 /*
193  * Disable counter and interrupt.
194  */
hisi_uncore_pmu_disable_event(struct perf_event * event)195 static void hisi_uncore_pmu_disable_event(struct perf_event *event)
196 {
197 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
198 	struct hw_perf_event *hwc = &event->hw;
199 
200 	hisi_pmu->ops->disable_counter(hisi_pmu, hwc);
201 	hisi_pmu->ops->disable_counter_int(hisi_pmu, hwc);
202 }
203 
hisi_uncore_pmu_set_event_period(struct perf_event * event)204 void hisi_uncore_pmu_set_event_period(struct perf_event *event)
205 {
206 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
207 	struct hw_perf_event *hwc = &event->hw;
208 
209 	/*
210 	 * The HiSilicon PMU counters support 32 bits or 48 bits, depending on
211 	 * the PMU. We reduce it to 2^(counter_bits - 1) to account for the
212 	 * extreme interrupt latency. So we could hopefully handle the overflow
213 	 * interrupt before another 2^(counter_bits - 1) events occur and the
214 	 * counter overtakes its previous value.
215 	 */
216 	u64 val = BIT_ULL(hisi_pmu->counter_bits - 1);
217 
218 	local64_set(&hwc->prev_count, val);
219 	/* Write start value to the hardware event counter */
220 	hisi_pmu->ops->write_counter(hisi_pmu, hwc, val);
221 }
222 
hisi_uncore_pmu_event_update(struct perf_event * event)223 void hisi_uncore_pmu_event_update(struct perf_event *event)
224 {
225 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
226 	struct hw_perf_event *hwc = &event->hw;
227 	u64 delta, prev_raw_count, new_raw_count;
228 
229 	do {
230 		/* Read the count from the counter register */
231 		new_raw_count = hisi_pmu->ops->read_counter(hisi_pmu, hwc);
232 		prev_raw_count = local64_read(&hwc->prev_count);
233 	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
234 				 new_raw_count) != prev_raw_count);
235 	/*
236 	 * compute the delta
237 	 */
238 	delta = (new_raw_count - prev_raw_count) &
239 		HISI_MAX_PERIOD(hisi_pmu->counter_bits);
240 	local64_add(delta, &event->count);
241 }
242 
hisi_uncore_pmu_start(struct perf_event * event,int flags)243 void hisi_uncore_pmu_start(struct perf_event *event, int flags)
244 {
245 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
246 	struct hw_perf_event *hwc = &event->hw;
247 
248 	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
249 		return;
250 
251 	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
252 	hwc->state = 0;
253 	hisi_uncore_pmu_set_event_period(event);
254 
255 	if (flags & PERF_EF_RELOAD) {
256 		u64 prev_raw_count =  local64_read(&hwc->prev_count);
257 
258 		hisi_pmu->ops->write_counter(hisi_pmu, hwc, prev_raw_count);
259 	}
260 
261 	hisi_uncore_pmu_enable_event(event);
262 	perf_event_update_userpage(event);
263 }
264 
hisi_uncore_pmu_stop(struct perf_event * event,int flags)265 void hisi_uncore_pmu_stop(struct perf_event *event, int flags)
266 {
267 	struct hw_perf_event *hwc = &event->hw;
268 
269 	hisi_uncore_pmu_disable_event(event);
270 	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
271 	hwc->state |= PERF_HES_STOPPED;
272 
273 	if (hwc->state & PERF_HES_UPTODATE)
274 		return;
275 
276 	/* Read hardware counter and update the perf counter statistics */
277 	hisi_uncore_pmu_event_update(event);
278 	hwc->state |= PERF_HES_UPTODATE;
279 }
280 
hisi_uncore_pmu_add(struct perf_event * event,int flags)281 int hisi_uncore_pmu_add(struct perf_event *event, int flags)
282 {
283 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
284 	struct hw_perf_event *hwc = &event->hw;
285 	int idx;
286 
287 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
288 
289 	/* Get an available counter index for counting */
290 	idx = hisi_pmu->ops->get_event_idx(event);
291 	if (idx < 0)
292 		return idx;
293 
294 	event->hw.idx = idx;
295 	hisi_pmu->pmu_events.hw_events[idx] = event;
296 
297 	if (flags & PERF_EF_START)
298 		hisi_uncore_pmu_start(event, PERF_EF_RELOAD);
299 
300 	return 0;
301 }
302 
hisi_uncore_pmu_del(struct perf_event * event,int flags)303 void hisi_uncore_pmu_del(struct perf_event *event, int flags)
304 {
305 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
306 	struct hw_perf_event *hwc = &event->hw;
307 
308 	hisi_uncore_pmu_stop(event, PERF_EF_UPDATE);
309 	hisi_uncore_pmu_clear_event_idx(hisi_pmu, hwc->idx);
310 	perf_event_update_userpage(event);
311 	hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL;
312 }
313 
hisi_uncore_pmu_read(struct perf_event * event)314 void hisi_uncore_pmu_read(struct perf_event *event)
315 {
316 	/* Read hardware counter and update the perf counter statistics */
317 	hisi_uncore_pmu_event_update(event);
318 }
319 
hisi_uncore_pmu_enable(struct pmu * pmu)320 void hisi_uncore_pmu_enable(struct pmu *pmu)
321 {
322 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
323 	int enabled = bitmap_weight(hisi_pmu->pmu_events.used_mask,
324 				    hisi_pmu->num_counters);
325 
326 	if (!enabled)
327 		return;
328 
329 	hisi_pmu->ops->start_counters(hisi_pmu);
330 }
331 
hisi_uncore_pmu_disable(struct pmu * pmu)332 void hisi_uncore_pmu_disable(struct pmu *pmu)
333 {
334 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu);
335 
336 	hisi_pmu->ops->stop_counters(hisi_pmu);
337 }
338 
339 /*
340  * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1.
341  * If multi-threading is supported, CCL_ID is the low 3-bits in MPIDR[Aff2]
342  * and SCCL_ID is the upper 5-bits of Aff2 field; if not, SCCL_ID
343  * is in MPIDR[Aff2] and CCL_ID is in MPIDR[Aff1].
344  */
hisi_read_sccl_and_ccl_id(int * sccl_id,int * ccl_id)345 static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id)
346 {
347 	u64 mpidr = read_cpuid_mpidr();
348 
349 	if (mpidr & MPIDR_MT_BITMASK) {
350 		int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2);
351 
352 		if (sccl_id)
353 			*sccl_id = aff2 >> 3;
354 		if (ccl_id)
355 			*ccl_id = aff2 & 0x7;
356 	} else {
357 		if (sccl_id)
358 			*sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
359 		if (ccl_id)
360 			*ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
361 	}
362 }
363 
364 /*
365  * Check whether the CPU is associated with this uncore PMU
366  */
hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu * hisi_pmu)367 static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu)
368 {
369 	int sccl_id, ccl_id;
370 
371 	if (hisi_pmu->ccl_id == -1) {
372 		/* If CCL_ID is -1, the PMU only shares the same SCCL */
373 		hisi_read_sccl_and_ccl_id(&sccl_id, NULL);
374 
375 		return sccl_id == hisi_pmu->sccl_id;
376 	}
377 
378 	hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id);
379 
380 	return sccl_id == hisi_pmu->sccl_id && ccl_id == hisi_pmu->ccl_id;
381 }
382 
hisi_uncore_pmu_online_cpu(unsigned int cpu,struct hlist_node * node)383 int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
384 {
385 	struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
386 						     node);
387 
388 	if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu))
389 		return 0;
390 
391 	cpumask_set_cpu(cpu, &hisi_pmu->associated_cpus);
392 
393 	/* If another CPU is already managing this PMU, simply return. */
394 	if (hisi_pmu->on_cpu != -1)
395 		return 0;
396 
397 	/* Use this CPU in cpumask for event counting */
398 	hisi_pmu->on_cpu = cpu;
399 
400 	/* Overflow interrupt also should use the same CPU */
401 	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
402 
403 	return 0;
404 }
405 
hisi_uncore_pmu_offline_cpu(unsigned int cpu,struct hlist_node * node)406 int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
407 {
408 	struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
409 						     node);
410 	cpumask_t pmu_online_cpus;
411 	unsigned int target;
412 
413 	if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus))
414 		return 0;
415 
416 	/* Nothing to do if this CPU doesn't own the PMU */
417 	if (hisi_pmu->on_cpu != cpu)
418 		return 0;
419 
420 	/* Give up ownership of the PMU */
421 	hisi_pmu->on_cpu = -1;
422 
423 	/* Choose a new CPU to migrate ownership of the PMU to */
424 	cpumask_and(&pmu_online_cpus, &hisi_pmu->associated_cpus,
425 		    cpu_online_mask);
426 	target = cpumask_any_but(&pmu_online_cpus, cpu);
427 	if (target >= nr_cpu_ids)
428 		return 0;
429 
430 	perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target);
431 	/* Use this CPU for event counting */
432 	hisi_pmu->on_cpu = target;
433 	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
434 
435 	return 0;
436 }
437