1 // SPDX-License-Identifier: GPL-2.0
2 #include "debug.h"
3 #include "evlist.h"
4 #include "evsel.h"
5 #include "parse-events.h"
6 #include <errno.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <api/fs/fs.h>
10 #include <subcmd/parse-options.h>
11 #include <perf/cpumap.h>
12 #include "cloexec.h"
13 #include "record.h"
14 #include "../perf-sys.h"
15 
16 typedef void (*setup_probe_fn_t)(struct evsel *evsel);
17 
perf_do_probe_api(setup_probe_fn_t fn,int cpu,const char * str)18 static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
19 {
20 	struct evlist *evlist;
21 	struct evsel *evsel;
22 	unsigned long flags = perf_event_open_cloexec_flag();
23 	int err = -EAGAIN, fd;
24 	static pid_t pid = -1;
25 
26 	evlist = evlist__new();
27 	if (!evlist)
28 		return -ENOMEM;
29 
30 	if (parse_events(evlist, str, NULL))
31 		goto out_delete;
32 
33 	evsel = evlist__first(evlist);
34 
35 	while (1) {
36 		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
37 		if (fd < 0) {
38 			if (pid == -1 && errno == EACCES) {
39 				pid = 0;
40 				continue;
41 			}
42 			goto out_delete;
43 		}
44 		break;
45 	}
46 	close(fd);
47 
48 	fn(evsel);
49 
50 	fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
51 	if (fd < 0) {
52 		if (errno == EINVAL)
53 			err = -EINVAL;
54 		goto out_delete;
55 	}
56 	close(fd);
57 	err = 0;
58 
59 out_delete:
60 	evlist__delete(evlist);
61 	return err;
62 }
63 
perf_probe_api(setup_probe_fn_t fn)64 static bool perf_probe_api(setup_probe_fn_t fn)
65 {
66 	const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
67 	struct perf_cpu_map *cpus;
68 	int cpu, ret, i = 0;
69 
70 	cpus = perf_cpu_map__new(NULL);
71 	if (!cpus)
72 		return false;
73 	cpu = cpus->map[0];
74 	perf_cpu_map__put(cpus);
75 
76 	do {
77 		ret = perf_do_probe_api(fn, cpu, try[i++]);
78 		if (!ret)
79 			return true;
80 	} while (ret == -EAGAIN && try[i]);
81 
82 	return false;
83 }
84 
perf_probe_sample_identifier(struct evsel * evsel)85 static void perf_probe_sample_identifier(struct evsel *evsel)
86 {
87 	evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
88 }
89 
perf_probe_comm_exec(struct evsel * evsel)90 static void perf_probe_comm_exec(struct evsel *evsel)
91 {
92 	evsel->core.attr.comm_exec = 1;
93 }
94 
perf_probe_context_switch(struct evsel * evsel)95 static void perf_probe_context_switch(struct evsel *evsel)
96 {
97 	evsel->core.attr.context_switch = 1;
98 }
99 
perf_can_sample_identifier(void)100 bool perf_can_sample_identifier(void)
101 {
102 	return perf_probe_api(perf_probe_sample_identifier);
103 }
104 
perf_can_comm_exec(void)105 static bool perf_can_comm_exec(void)
106 {
107 	return perf_probe_api(perf_probe_comm_exec);
108 }
109 
perf_can_record_switch_events(void)110 bool perf_can_record_switch_events(void)
111 {
112 	return perf_probe_api(perf_probe_context_switch);
113 }
114 
perf_can_record_cpu_wide(void)115 bool perf_can_record_cpu_wide(void)
116 {
117 	struct perf_event_attr attr = {
118 		.type = PERF_TYPE_SOFTWARE,
119 		.config = PERF_COUNT_SW_CPU_CLOCK,
120 		.exclude_kernel = 1,
121 	};
122 	struct perf_cpu_map *cpus;
123 	int cpu, fd;
124 
125 	cpus = perf_cpu_map__new(NULL);
126 	if (!cpus)
127 		return false;
128 	cpu = cpus->map[0];
129 	perf_cpu_map__put(cpus);
130 
131 	fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
132 	if (fd < 0)
133 		return false;
134 	close(fd);
135 
136 	return true;
137 }
138 
perf_evlist__config(struct evlist * evlist,struct record_opts * opts,struct callchain_param * callchain)139 void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
140 			 struct callchain_param *callchain)
141 {
142 	struct evsel *evsel;
143 	bool use_sample_identifier = false;
144 	bool use_comm_exec;
145 	bool sample_id = opts->sample_id;
146 
147 	/*
148 	 * Set the evsel leader links before we configure attributes,
149 	 * since some might depend on this info.
150 	 */
151 	if (opts->group)
152 		perf_evlist__set_leader(evlist);
153 
154 	if (evlist->core.cpus->map[0] < 0)
155 		opts->no_inherit = true;
156 
157 	use_comm_exec = perf_can_comm_exec();
158 
159 	evlist__for_each_entry(evlist, evsel) {
160 		perf_evsel__config(evsel, opts, callchain);
161 		if (evsel->tracking && use_comm_exec)
162 			evsel->core.attr.comm_exec = 1;
163 	}
164 
165 	if (opts->full_auxtrace) {
166 		/*
167 		 * Need to be able to synthesize and parse selected events with
168 		 * arbitrary sample types, which requires always being able to
169 		 * match the id.
170 		 */
171 		use_sample_identifier = perf_can_sample_identifier();
172 		sample_id = true;
173 	} else if (evlist->core.nr_entries > 1) {
174 		struct evsel *first = evlist__first(evlist);
175 
176 		evlist__for_each_entry(evlist, evsel) {
177 			if (evsel->core.attr.sample_type == first->core.attr.sample_type)
178 				continue;
179 			use_sample_identifier = perf_can_sample_identifier();
180 			break;
181 		}
182 		sample_id = true;
183 	}
184 
185 	if (sample_id) {
186 		evlist__for_each_entry(evlist, evsel)
187 			perf_evsel__set_sample_id(evsel, use_sample_identifier);
188 	}
189 
190 	perf_evlist__set_id_pos(evlist);
191 }
192 
get_max_rate(unsigned int * rate)193 static int get_max_rate(unsigned int *rate)
194 {
195 	return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
196 }
197 
record_opts__config_freq(struct record_opts * opts)198 static int record_opts__config_freq(struct record_opts *opts)
199 {
200 	bool user_freq = opts->user_freq != UINT_MAX;
201 	unsigned int max_rate;
202 
203 	if (opts->user_interval != ULLONG_MAX)
204 		opts->default_interval = opts->user_interval;
205 	if (user_freq)
206 		opts->freq = opts->user_freq;
207 
208 	/*
209 	 * User specified count overrides default frequency.
210 	 */
211 	if (opts->default_interval)
212 		opts->freq = 0;
213 	else if (opts->freq) {
214 		opts->default_interval = opts->freq;
215 	} else {
216 		pr_err("frequency and count are zero, aborting\n");
217 		return -1;
218 	}
219 
220 	if (get_max_rate(&max_rate))
221 		return 0;
222 
223 	/*
224 	 * User specified frequency is over current maximum.
225 	 */
226 	if (user_freq && (max_rate < opts->freq)) {
227 		if (opts->strict_freq) {
228 			pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n"
229 			       "       Please use -F freq option with a lower value or consider\n"
230 			       "       tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
231 			       max_rate);
232 			return -1;
233 		} else {
234 			pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n"
235 				   "         The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n"
236 				   "         The kernel will lower it when perf's interrupts take too long.\n"
237 				   "         Use --strict-freq to disable this throttling, refusing to record.\n",
238 				   max_rate, opts->freq, max_rate);
239 
240 			opts->freq = max_rate;
241 		}
242 	}
243 
244 	/*
245 	 * Default frequency is over current maximum.
246 	 */
247 	if (max_rate < opts->freq) {
248 		pr_warning("Lowering default frequency rate to %u.\n"
249 			   "Please consider tweaking "
250 			   "/proc/sys/kernel/perf_event_max_sample_rate.\n",
251 			   max_rate);
252 		opts->freq = max_rate;
253 	}
254 
255 	return 0;
256 }
257 
record_opts__config(struct record_opts * opts)258 int record_opts__config(struct record_opts *opts)
259 {
260 	return record_opts__config_freq(opts);
261 }
262 
perf_evlist__can_select_event(struct evlist * evlist,const char * str)263 bool perf_evlist__can_select_event(struct evlist *evlist, const char *str)
264 {
265 	struct evlist *temp_evlist;
266 	struct evsel *evsel;
267 	int err, fd, cpu;
268 	bool ret = false;
269 	pid_t pid = -1;
270 
271 	temp_evlist = evlist__new();
272 	if (!temp_evlist)
273 		return false;
274 
275 	err = parse_events(temp_evlist, str, NULL);
276 	if (err)
277 		goto out_delete;
278 
279 	evsel = evlist__last(temp_evlist);
280 
281 	if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) {
282 		struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
283 
284 		cpu =  cpus ? cpus->map[0] : 0;
285 		perf_cpu_map__put(cpus);
286 	} else {
287 		cpu = evlist->core.cpus->map[0];
288 	}
289 
290 	while (1) {
291 		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1,
292 					 perf_event_open_cloexec_flag());
293 		if (fd < 0) {
294 			if (pid == -1 && errno == EACCES) {
295 				pid = 0;
296 				continue;
297 			}
298 			goto out_delete;
299 		}
300 		break;
301 	}
302 	close(fd);
303 	ret = true;
304 
305 out_delete:
306 	evlist__delete(temp_evlist);
307 	return ret;
308 }
309 
record__parse_freq(const struct option * opt,const char * str,int unset __maybe_unused)310 int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused)
311 {
312 	unsigned int freq;
313 	struct record_opts *opts = opt->value;
314 
315 	if (!str)
316 		return -EINVAL;
317 
318 	if (strcasecmp(str, "max") == 0) {
319 		if (get_max_rate(&freq)) {
320 			pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n");
321 			return -1;
322 		}
323 		pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq);
324 	} else {
325 		freq = atoi(str);
326 	}
327 
328 	opts->user_freq = freq;
329 	return 0;
330 }
331