1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
3  */
4 static const char *__doc__ =
5 "XDP CPU redirect tool, using BPF_MAP_TYPE_CPUMAP\n"
6 "Usage: xdp_redirect_cpu -d <IFINDEX|IFNAME> -c 0 ... -c N\n"
7 "Valid specification for CPUMAP BPF program:\n"
8 "  --mprog-name/-e pass (use built-in XDP_PASS program)\n"
9 "  --mprog-name/-e drop (use built-in XDP_DROP program)\n"
10 "  --redirect-device/-r <ifindex|ifname> (use built-in DEVMAP redirect program)\n"
11 "  Custom CPUMAP BPF program:\n"
12 "    --mprog-filename/-f <filename> --mprog-name/-e <program>\n"
13 "    Optionally, also pass --redirect-map/-m and --redirect-device/-r together\n"
14 "    to configure DEVMAP in BPF object <filename>\n";
15 
16 #include <errno.h>
17 #include <signal.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <stdbool.h>
21 #include <string.h>
22 #include <unistd.h>
23 #include <locale.h>
24 #include <sys/resource.h>
25 #include <sys/sysinfo.h>
26 #include <getopt.h>
27 #include <net/if.h>
28 #include <time.h>
29 #include <linux/limits.h>
30 #include <arpa/inet.h>
31 #include <linux/if_link.h>
32 #include <bpf/bpf.h>
33 #include <bpf/libbpf.h>
34 #include "bpf_util.h"
35 #include "xdp_sample_user.h"
36 #include "xdp_redirect_cpu.skel.h"
37 
38 static int map_fd;
39 static int avail_fd;
40 static int count_fd;
41 
42 static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
43 		  SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT |
44 		  SAMPLE_EXCEPTION_CNT;
45 
46 DEFINE_SAMPLE_INIT(xdp_redirect_cpu);
47 
48 static const struct option long_options[] = {
49 	{ "help", no_argument, NULL, 'h' },
50 	{ "dev", required_argument, NULL, 'd' },
51 	{ "skb-mode", no_argument, NULL, 'S' },
52 	{ "progname", required_argument, NULL, 'p' },
53 	{ "qsize", required_argument, NULL, 'q' },
54 	{ "cpu", required_argument, NULL, 'c' },
55 	{ "stress-mode", no_argument, NULL, 'x' },
56 	{ "force", no_argument, NULL, 'F' },
57 	{ "interval", required_argument, NULL, 'i' },
58 	{ "verbose", no_argument, NULL, 'v' },
59 	{ "stats", no_argument, NULL, 's' },
60 	{ "mprog-name", required_argument, NULL, 'e' },
61 	{ "mprog-filename", required_argument, NULL, 'f' },
62 	{ "redirect-device", required_argument, NULL, 'r' },
63 	{ "redirect-map", required_argument, NULL, 'm' },
64 	{}
65 };
66 
print_avail_progs(struct bpf_object * obj)67 static void print_avail_progs(struct bpf_object *obj)
68 {
69 	struct bpf_program *pos;
70 
71 	printf(" Programs to be used for -p/--progname:\n");
72 	bpf_object__for_each_program(pos, obj) {
73 		if (bpf_program__is_xdp(pos)) {
74 			if (!strncmp(bpf_program__name(pos), "xdp_prognum",
75 				     sizeof("xdp_prognum") - 1))
76 				printf(" %s\n", bpf_program__name(pos));
77 		}
78 	}
79 }
80 
usage(char * argv[],const struct option * long_options,const char * doc,int mask,bool error,struct bpf_object * obj)81 static void usage(char *argv[], const struct option *long_options,
82 		  const char *doc, int mask, bool error, struct bpf_object *obj)
83 {
84 	sample_usage(argv, long_options, doc, mask, error);
85 	print_avail_progs(obj);
86 }
87 
create_cpu_entry(__u32 cpu,struct bpf_cpumap_val * value,__u32 avail_idx,bool new)88 static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
89 			    __u32 avail_idx, bool new)
90 {
91 	__u32 curr_cpus_count = 0;
92 	__u32 key = 0;
93 	int ret;
94 
95 	/* Add a CPU entry to cpumap, as this allocate a cpu entry in
96 	 * the kernel for the cpu.
97 	 */
98 	ret = bpf_map_update_elem(map_fd, &cpu, value, 0);
99 	if (ret < 0) {
100 		fprintf(stderr, "Create CPU entry failed: %s\n", strerror(errno));
101 		return ret;
102 	}
103 
104 	/* Inform bpf_prog's that a new CPU is available to select
105 	 * from via some control maps.
106 	 */
107 	ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0);
108 	if (ret < 0) {
109 		fprintf(stderr, "Add to avail CPUs failed: %s\n", strerror(errno));
110 		return ret;
111 	}
112 
113 	/* When not replacing/updating existing entry, bump the count */
114 	ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count);
115 	if (ret < 0) {
116 		fprintf(stderr, "Failed reading curr cpus_count: %s\n",
117 			strerror(errno));
118 		return ret;
119 	}
120 	if (new) {
121 		curr_cpus_count++;
122 		ret = bpf_map_update_elem(count_fd, &key,
123 					  &curr_cpus_count, 0);
124 		if (ret < 0) {
125 			fprintf(stderr, "Failed write curr cpus_count: %s\n",
126 				strerror(errno));
127 			return ret;
128 		}
129 	}
130 
131 	printf("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n",
132 	       new ? "Add new" : "Replace", cpu, avail_idx,
133 	       value->qsize, value->bpf_prog.fd, curr_cpus_count);
134 
135 	return 0;
136 }
137 
138 /* CPUs are zero-indexed. Thus, add a special sentinel default value
139  * in map cpus_available to mark CPU index'es not configured
140  */
mark_cpus_unavailable(void)141 static int mark_cpus_unavailable(void)
142 {
143 	int ret, i, n_cpus = libbpf_num_possible_cpus();
144 	__u32 invalid_cpu = n_cpus;
145 
146 	for (i = 0; i < n_cpus; i++) {
147 		ret = bpf_map_update_elem(avail_fd, &i,
148 					  &invalid_cpu, 0);
149 		if (ret < 0) {
150 			fprintf(stderr, "Failed marking CPU unavailable: %s\n",
151 				strerror(errno));
152 			return ret;
153 		}
154 	}
155 
156 	return 0;
157 }
158 
159 /* Stress cpumap management code by concurrently changing underlying cpumap */
stress_cpumap(void * ctx)160 static void stress_cpumap(void *ctx)
161 {
162 	struct bpf_cpumap_val *value = ctx;
163 
164 	/* Changing qsize will cause kernel to free and alloc a new
165 	 * bpf_cpu_map_entry, with an associated/complicated tear-down
166 	 * procedure.
167 	 */
168 	value->qsize = 1024;
169 	create_cpu_entry(1, value, 0, false);
170 	value->qsize = 8;
171 	create_cpu_entry(1, value, 0, false);
172 	value->qsize = 16000;
173 	create_cpu_entry(1, value, 0, false);
174 }
175 
set_cpumap_prog(struct xdp_redirect_cpu * skel,const char * redir_interface,const char * redir_map,const char * mprog_filename,const char * mprog_name)176 static int set_cpumap_prog(struct xdp_redirect_cpu *skel,
177 			   const char *redir_interface, const char *redir_map,
178 			   const char *mprog_filename, const char *mprog_name)
179 {
180 	if (mprog_filename) {
181 		struct bpf_program *prog;
182 		struct bpf_object *obj;
183 		int ret;
184 
185 		if (!mprog_name) {
186 			fprintf(stderr, "BPF program not specified for file %s\n",
187 				mprog_filename);
188 			goto end;
189 		}
190 		if ((redir_interface && !redir_map) || (!redir_interface && redir_map)) {
191 			fprintf(stderr, "--redirect-%s specified but --redirect-%s not specified\n",
192 				redir_interface ? "device" : "map", redir_interface ? "map" : "device");
193 			goto end;
194 		}
195 
196 		/* Custom BPF program */
197 		obj = bpf_object__open_file(mprog_filename, NULL);
198 		if (!obj) {
199 			ret = -errno;
200 			fprintf(stderr, "Failed to bpf_prog_load_xattr: %s\n",
201 				strerror(errno));
202 			return ret;
203 		}
204 
205 		ret = bpf_object__load(obj);
206 		if (ret < 0) {
207 			ret = -errno;
208 			fprintf(stderr, "Failed to bpf_object__load: %s\n",
209 				strerror(errno));
210 			return ret;
211 		}
212 
213 		if (redir_map) {
214 			int err, redir_map_fd, ifindex_out, key = 0;
215 
216 			redir_map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
217 			if (redir_map_fd < 0) {
218 				fprintf(stderr, "Failed to bpf_object__find_map_fd_by_name: %s\n",
219 					strerror(errno));
220 				return redir_map_fd;
221 			}
222 
223 			ifindex_out = if_nametoindex(redir_interface);
224 			if (!ifindex_out)
225 				ifindex_out = strtoul(redir_interface, NULL, 0);
226 			if (!ifindex_out) {
227 				fprintf(stderr, "Bad interface name or index\n");
228 				return -EINVAL;
229 			}
230 
231 			err = bpf_map_update_elem(redir_map_fd, &key, &ifindex_out, 0);
232 			if (err < 0)
233 				return err;
234 		}
235 
236 		prog = bpf_object__find_program_by_name(obj, mprog_name);
237 		if (!prog) {
238 			ret = -errno;
239 			fprintf(stderr, "Failed to bpf_object__find_program_by_name: %s\n",
240 				strerror(errno));
241 			return ret;
242 		}
243 
244 		return bpf_program__fd(prog);
245 	} else {
246 		if (mprog_name) {
247 			if (redir_interface || redir_map) {
248 				fprintf(stderr, "Need to specify --mprog-filename/-f\n");
249 				goto end;
250 			}
251 			if (!strcmp(mprog_name, "pass") || !strcmp(mprog_name, "drop")) {
252 				/* Use built-in pass/drop programs */
253 				return *mprog_name == 'p' ? bpf_program__fd(skel->progs.xdp_redirect_cpu_pass)
254 					: bpf_program__fd(skel->progs.xdp_redirect_cpu_drop);
255 			} else {
256 				fprintf(stderr, "Unknown name \"%s\" for built-in BPF program\n",
257 					mprog_name);
258 				goto end;
259 			}
260 		} else {
261 			if (redir_map) {
262 				fprintf(stderr, "Need to specify --mprog-filename, --mprog-name and"
263 					" --redirect-device with --redirect-map\n");
264 				goto end;
265 			}
266 			if (redir_interface) {
267 				/* Use built-in devmap redirect */
268 				struct bpf_devmap_val val = {};
269 				int ifindex_out, err;
270 				__u32 key = 0;
271 
272 				if (!redir_interface)
273 					return 0;
274 
275 				ifindex_out = if_nametoindex(redir_interface);
276 				if (!ifindex_out)
277 					ifindex_out = strtoul(redir_interface, NULL, 0);
278 				if (!ifindex_out) {
279 					fprintf(stderr, "Bad interface name or index\n");
280 					return -EINVAL;
281 				}
282 
283 				if (get_mac_addr(ifindex_out, skel->bss->tx_mac_addr) < 0) {
284 					printf("Get interface %d mac failed\n", ifindex_out);
285 					return -EINVAL;
286 				}
287 
288 				val.ifindex = ifindex_out;
289 				val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_egress_prog);
290 				err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0);
291 				if (err < 0)
292 					return -errno;
293 
294 				return bpf_program__fd(skel->progs.xdp_redirect_cpu_devmap);
295 			}
296 		}
297 	}
298 
299 	/* Disabled */
300 	return 0;
301 end:
302 	fprintf(stderr, "Invalid options for CPUMAP BPF program\n");
303 	return -EINVAL;
304 }
305 
main(int argc,char ** argv)306 int main(int argc, char **argv)
307 {
308 	const char *redir_interface = NULL, *redir_map = NULL;
309 	const char *mprog_filename = NULL, *mprog_name = NULL;
310 	struct xdp_redirect_cpu *skel;
311 	struct bpf_map_info info = {};
312 	char ifname_buf[IF_NAMESIZE];
313 	struct bpf_cpumap_val value;
314 	__u32 infosz = sizeof(info);
315 	int ret = EXIT_FAIL_OPTION;
316 	unsigned long interval = 2;
317 	bool stress_mode = false;
318 	struct bpf_program *prog;
319 	const char *prog_name;
320 	bool generic = false;
321 	bool force = false;
322 	int added_cpus = 0;
323 	bool error = true;
324 	int longindex = 0;
325 	int add_cpu = -1;
326 	int ifindex = -1;
327 	int *cpu, i, opt;
328 	char *ifname;
329 	__u32 qsize;
330 	int n_cpus;
331 
332 	n_cpus = libbpf_num_possible_cpus();
333 
334 	/* Notice: Choosing the queue size is very important when CPU is
335 	 * configured with power-saving states.
336 	 *
337 	 * If deepest state take 133 usec to wakeup from (133/10^6). When link
338 	 * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can
339 	 * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) =
340 	 * 166250 bytes. With MTU size packets this is 110 packets, and with
341 	 * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets.
342 	 *
343 	 * Setting default cpumap queue to 2048 as worst-case (small packet)
344 	 * should be +64 packet due kthread wakeup call (due to xdp_do_flush)
345 	 * worst-case is 2043 packets.
346 	 *
347 	 * Sysadm can configured system to avoid deep-sleep via:
348 	 *   tuned-adm profile network-latency
349 	 */
350 	qsize = 2048;
351 
352 	skel = xdp_redirect_cpu__open();
353 	if (!skel) {
354 		fprintf(stderr, "Failed to xdp_redirect_cpu__open: %s\n",
355 			strerror(errno));
356 		ret = EXIT_FAIL_BPF;
357 		goto end;
358 	}
359 
360 	ret = sample_init_pre_load(skel);
361 	if (ret < 0) {
362 		fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
363 		ret = EXIT_FAIL_BPF;
364 		goto end_destroy;
365 	}
366 
367 	if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) {
368 		fprintf(stderr, "Failed to set max entries for cpu_map map: %s",
369 			strerror(errno));
370 		ret = EXIT_FAIL_BPF;
371 		goto end_destroy;
372 	}
373 
374 	if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) {
375 		fprintf(stderr, "Failed to set max entries for cpus_available map: %s",
376 			strerror(errno));
377 		ret = EXIT_FAIL_BPF;
378 		goto end_destroy;
379 	}
380 
381 	cpu = calloc(n_cpus, sizeof(int));
382 	if (!cpu) {
383 		fprintf(stderr, "Failed to allocate cpu array\n");
384 		goto end_destroy;
385 	}
386 
387 	prog = skel->progs.xdp_prognum5_lb_hash_ip_pairs;
388 	while ((opt = getopt_long(argc, argv, "d:si:Sxp:f:e:r:m:c:q:Fvh",
389 				  long_options, &longindex)) != -1) {
390 		switch (opt) {
391 		case 'd':
392 			if (strlen(optarg) >= IF_NAMESIZE) {
393 				fprintf(stderr, "-d/--dev name too long\n");
394 				goto end_cpu;
395 			}
396 			ifname = (char *)&ifname_buf;
397 			safe_strncpy(ifname, optarg, sizeof(ifname));
398 			ifindex = if_nametoindex(ifname);
399 			if (!ifindex)
400 				ifindex = strtoul(optarg, NULL, 0);
401 			if (!ifindex) {
402 				fprintf(stderr, "Bad interface index or name (%d): %s\n",
403 					errno, strerror(errno));
404 				usage(argv, long_options, __doc__, mask, true, skel->obj);
405 				goto end_cpu;
406 			}
407 			break;
408 		case 's':
409 			mask |= SAMPLE_REDIRECT_MAP_CNT;
410 			break;
411 		case 'i':
412 			interval = strtoul(optarg, NULL, 0);
413 			break;
414 		case 'S':
415 			generic = true;
416 			break;
417 		case 'x':
418 			stress_mode = true;
419 			break;
420 		case 'p':
421 			/* Selecting eBPF prog to load */
422 			prog_name = optarg;
423 			prog = bpf_object__find_program_by_name(skel->obj,
424 								prog_name);
425 			if (!prog) {
426 				fprintf(stderr,
427 					"Failed to find program %s specified by"
428 					" option -p/--progname\n",
429 					prog_name);
430 				print_avail_progs(skel->obj);
431 				goto end_cpu;
432 			}
433 			break;
434 		case 'f':
435 			mprog_filename = optarg;
436 			break;
437 		case 'e':
438 			mprog_name = optarg;
439 			break;
440 		case 'r':
441 			redir_interface = optarg;
442 			mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI;
443 			break;
444 		case 'm':
445 			redir_map = optarg;
446 			break;
447 		case 'c':
448 			/* Add multiple CPUs */
449 			add_cpu = strtoul(optarg, NULL, 0);
450 			if (add_cpu >= n_cpus) {
451 				fprintf(stderr,
452 				"--cpu nr too large for cpumap err (%d):%s\n",
453 					errno, strerror(errno));
454 				usage(argv, long_options, __doc__, mask, true, skel->obj);
455 				goto end_cpu;
456 			}
457 			cpu[added_cpus++] = add_cpu;
458 			break;
459 		case 'q':
460 			qsize = strtoul(optarg, NULL, 0);
461 			break;
462 		case 'F':
463 			force = true;
464 			break;
465 		case 'v':
466 			sample_switch_mode();
467 			break;
468 		case 'h':
469 			error = false;
470 		default:
471 			usage(argv, long_options, __doc__, mask, error, skel->obj);
472 			goto end_cpu;
473 		}
474 	}
475 
476 	ret = EXIT_FAIL_OPTION;
477 	if (ifindex == -1) {
478 		fprintf(stderr, "Required option --dev missing\n");
479 		usage(argv, long_options, __doc__, mask, true, skel->obj);
480 		goto end_cpu;
481 	}
482 
483 	if (add_cpu == -1) {
484 		fprintf(stderr, "Required option --cpu missing\n"
485 				"Specify multiple --cpu option to add more\n");
486 		usage(argv, long_options, __doc__, mask, true, skel->obj);
487 		goto end_cpu;
488 	}
489 
490 	skel->rodata->from_match[0] = ifindex;
491 	if (redir_interface)
492 		skel->rodata->to_match[0] = if_nametoindex(redir_interface);
493 
494 	ret = xdp_redirect_cpu__load(skel);
495 	if (ret < 0) {
496 		fprintf(stderr, "Failed to xdp_redirect_cpu__load: %s\n",
497 			strerror(errno));
498 		goto end_cpu;
499 	}
500 
501 	ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz);
502 	if (ret < 0) {
503 		fprintf(stderr, "Failed bpf_obj_get_info_by_fd for cpumap: %s\n",
504 			strerror(errno));
505 		goto end_cpu;
506 	}
507 
508 	skel->bss->cpumap_map_id = info.id;
509 
510 	map_fd = bpf_map__fd(skel->maps.cpu_map);
511 	avail_fd = bpf_map__fd(skel->maps.cpus_available);
512 	count_fd = bpf_map__fd(skel->maps.cpus_count);
513 
514 	ret = mark_cpus_unavailable();
515 	if (ret < 0) {
516 		fprintf(stderr, "Unable to mark CPUs as unavailable\n");
517 		goto end_cpu;
518 	}
519 
520 	ret = sample_init(skel, mask);
521 	if (ret < 0) {
522 		fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
523 		ret = EXIT_FAIL;
524 		goto end_cpu;
525 	}
526 
527 	value.bpf_prog.fd = set_cpumap_prog(skel, redir_interface, redir_map,
528 					    mprog_filename, mprog_name);
529 	if (value.bpf_prog.fd < 0) {
530 		fprintf(stderr, "Failed to set CPUMAP BPF program: %s\n",
531 			strerror(-value.bpf_prog.fd));
532 		usage(argv, long_options, __doc__, mask, true, skel->obj);
533 		ret = EXIT_FAIL_BPF;
534 		goto end_cpu;
535 	}
536 	value.qsize = qsize;
537 
538 	for (i = 0; i < added_cpus; i++) {
539 		if (create_cpu_entry(cpu[i], &value, i, true) < 0) {
540 			fprintf(stderr, "Cannot proceed, exiting\n");
541 			usage(argv, long_options, __doc__, mask, true, skel->obj);
542 			goto end_cpu;
543 		}
544 	}
545 
546 	ret = EXIT_FAIL_XDP;
547 	if (sample_install_xdp(prog, ifindex, generic, force) < 0)
548 		goto end_cpu;
549 
550 	ret = sample_run(interval, stress_mode ? stress_cpumap : NULL, &value);
551 	if (ret < 0) {
552 		fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
553 		ret = EXIT_FAIL;
554 		goto end_cpu;
555 	}
556 	ret = EXIT_OK;
557 end_cpu:
558 	free(cpu);
559 end_destroy:
560 	xdp_redirect_cpu__destroy(skel);
561 end:
562 	sample_exit(ret);
563 }
564