1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018 Facebook */
3 
4 #include <stdlib.h>
5 #include <unistd.h>
6 #include <stdbool.h>
7 #include <string.h>
8 #include <errno.h>
9 #include <assert.h>
10 #include <fcntl.h>
11 #include <linux/bpf.h>
12 #include <linux/err.h>
13 #include <linux/types.h>
14 #include <linux/if_ether.h>
15 #include <sys/types.h>
16 #include <sys/epoll.h>
17 #include <sys/socket.h>
18 #include <netinet/in.h>
19 #include <bpf/bpf.h>
20 #include <bpf/libbpf.h>
21 #include "bpf_rlimit.h"
22 #include "bpf_util.h"
23 
24 #include "test_progs.h"
25 #include "test_select_reuseport_common.h"
26 
27 #define MAX_TEST_NAME 80
28 #define MIN_TCPHDR_LEN 20
29 #define UDPHDR_LEN 8
30 
31 #define TCP_SYNCOOKIE_SYSCTL "/proc/sys/net/ipv4/tcp_syncookies"
32 #define TCP_FO_SYSCTL "/proc/sys/net/ipv4/tcp_fastopen"
33 #define REUSEPORT_ARRAY_SIZE 32
34 
35 static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
36 static __u32 expected_results[NR_RESULTS];
37 static int sk_fds[REUSEPORT_ARRAY_SIZE];
38 static int reuseport_array = -1, outer_map = -1;
39 static enum bpf_map_type inner_map_type;
40 static int select_by_skb_data_prog;
41 static int saved_tcp_syncookie = -1;
42 static struct bpf_object *obj;
43 static int saved_tcp_fo = -1;
44 static __u32 index_zero;
45 static int epfd;
46 
47 static union sa46 {
48 	struct sockaddr_in6 v6;
49 	struct sockaddr_in v4;
50 	sa_family_t family;
51 } srv_sa;
52 
53 #define RET_IF(condition, tag, format...) ({				\
54 	if (CHECK_FAIL(condition)) {					\
55 		printf(tag " " format);					\
56 		return;							\
57 	}								\
58 })
59 
60 #define RET_ERR(condition, tag, format...) ({				\
61 	if (CHECK_FAIL(condition)) {					\
62 		printf(tag " " format);					\
63 		return -1;						\
64 	}								\
65 })
66 
create_maps(enum bpf_map_type inner_type)67 static int create_maps(enum bpf_map_type inner_type)
68 {
69 	struct bpf_create_map_attr attr = {};
70 
71 	inner_map_type = inner_type;
72 
73 	/* Creating reuseport_array */
74 	attr.name = "reuseport_array";
75 	attr.map_type = inner_type;
76 	attr.key_size = sizeof(__u32);
77 	attr.value_size = sizeof(__u32);
78 	attr.max_entries = REUSEPORT_ARRAY_SIZE;
79 
80 	reuseport_array = bpf_create_map_xattr(&attr);
81 	RET_ERR(reuseport_array == -1, "creating reuseport_array",
82 		"reuseport_array:%d errno:%d\n", reuseport_array, errno);
83 
84 	/* Creating outer_map */
85 	attr.name = "outer_map";
86 	attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
87 	attr.key_size = sizeof(__u32);
88 	attr.value_size = sizeof(__u32);
89 	attr.max_entries = 1;
90 	attr.inner_map_fd = reuseport_array;
91 	outer_map = bpf_create_map_xattr(&attr);
92 	RET_ERR(outer_map == -1, "creating outer_map",
93 		"outer_map:%d errno:%d\n", outer_map, errno);
94 
95 	return 0;
96 }
97 
prepare_bpf_obj(void)98 static int prepare_bpf_obj(void)
99 {
100 	struct bpf_program *prog;
101 	struct bpf_map *map;
102 	int err;
103 
104 	obj = bpf_object__open("test_select_reuseport_kern.o");
105 	RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
106 		"obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
107 
108 	map = bpf_object__find_map_by_name(obj, "outer_map");
109 	RET_ERR(!map, "find outer_map", "!map\n");
110 	err = bpf_map__reuse_fd(map, outer_map);
111 	RET_ERR(err, "reuse outer_map", "err:%d\n", err);
112 
113 	err = bpf_object__load(obj);
114 	RET_ERR(err, "load bpf_object", "err:%d\n", err);
115 
116 	prog = bpf_program__next(NULL, obj);
117 	RET_ERR(!prog, "get first bpf_program", "!prog\n");
118 	select_by_skb_data_prog = bpf_program__fd(prog);
119 	RET_ERR(select_by_skb_data_prog == -1, "get prog fd",
120 		"select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
121 
122 	map = bpf_object__find_map_by_name(obj, "result_map");
123 	RET_ERR(!map, "find result_map", "!map\n");
124 	result_map = bpf_map__fd(map);
125 	RET_ERR(result_map == -1, "get result_map fd",
126 		"result_map:%d\n", result_map);
127 
128 	map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
129 	RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
130 	tmp_index_ovr_map = bpf_map__fd(map);
131 	RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
132 		"tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
133 
134 	map = bpf_object__find_map_by_name(obj, "linum_map");
135 	RET_ERR(!map, "find linum_map", "!map\n");
136 	linum_map = bpf_map__fd(map);
137 	RET_ERR(linum_map == -1, "get linum_map fd",
138 		"linum_map:%d\n", linum_map);
139 
140 	map = bpf_object__find_map_by_name(obj, "data_check_map");
141 	RET_ERR(!map, "find data_check_map", "!map\n");
142 	data_check_map = bpf_map__fd(map);
143 	RET_ERR(data_check_map == -1, "get data_check_map fd",
144 		"data_check_map:%d\n", data_check_map);
145 
146 	return 0;
147 }
148 
sa46_init_loopback(union sa46 * sa,sa_family_t family)149 static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
150 {
151 	memset(sa, 0, sizeof(*sa));
152 	sa->family = family;
153 	if (sa->family == AF_INET6)
154 		sa->v6.sin6_addr = in6addr_loopback;
155 	else
156 		sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
157 }
158 
sa46_init_inany(union sa46 * sa,sa_family_t family)159 static void sa46_init_inany(union sa46 *sa, sa_family_t family)
160 {
161 	memset(sa, 0, sizeof(*sa));
162 	sa->family = family;
163 	if (sa->family == AF_INET6)
164 		sa->v6.sin6_addr = in6addr_any;
165 	else
166 		sa->v4.sin_addr.s_addr = INADDR_ANY;
167 }
168 
read_int_sysctl(const char * sysctl)169 static int read_int_sysctl(const char *sysctl)
170 {
171 	char buf[16];
172 	int fd, ret;
173 
174 	fd = open(sysctl, 0);
175 	RET_ERR(fd == -1, "open(sysctl)",
176 		"sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
177 
178 	ret = read(fd, buf, sizeof(buf));
179 	RET_ERR(ret <= 0, "read(sysctl)",
180 		"sysctl:%s ret:%d errno:%d\n", sysctl, ret, errno);
181 
182 	close(fd);
183 	return atoi(buf);
184 }
185 
write_int_sysctl(const char * sysctl,int v)186 static int write_int_sysctl(const char *sysctl, int v)
187 {
188 	int fd, ret, size;
189 	char buf[16];
190 
191 	fd = open(sysctl, O_RDWR);
192 	RET_ERR(fd == -1, "open(sysctl)",
193 		"sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
194 
195 	size = snprintf(buf, sizeof(buf), "%d", v);
196 	ret = write(fd, buf, size);
197 	RET_ERR(ret != size, "write(sysctl)",
198 		"sysctl:%s ret:%d size:%d errno:%d\n",
199 		sysctl, ret, size, errno);
200 
201 	close(fd);
202 	return 0;
203 }
204 
restore_sysctls(void)205 static void restore_sysctls(void)
206 {
207 	if (saved_tcp_fo != -1)
208 		write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
209 	if (saved_tcp_syncookie != -1)
210 		write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
211 }
212 
enable_fastopen(void)213 static int enable_fastopen(void)
214 {
215 	int fo;
216 
217 	fo = read_int_sysctl(TCP_FO_SYSCTL);
218 	if (fo < 0)
219 		return -1;
220 
221 	return write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
222 }
223 
enable_syncookie(void)224 static int enable_syncookie(void)
225 {
226 	return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
227 }
228 
disable_syncookie(void)229 static int disable_syncookie(void)
230 {
231 	return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
232 }
233 
get_linum(void)234 static long get_linum(void)
235 {
236 	__u32 linum;
237 	int err;
238 
239 	err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
240 	RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
241 		err, errno);
242 
243 	return linum;
244 }
245 
check_data(int type,sa_family_t family,const struct cmd * cmd,int cli_fd)246 static void check_data(int type, sa_family_t family, const struct cmd *cmd,
247 		       int cli_fd)
248 {
249 	struct data_check expected = {}, result;
250 	union sa46 cli_sa;
251 	socklen_t addrlen;
252 	int err;
253 
254 	addrlen = sizeof(cli_sa);
255 	err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
256 			  &addrlen);
257 	RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
258 	       err, errno);
259 
260 	err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
261 	RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
262 	       err, errno);
263 
264 	if (type == SOCK_STREAM) {
265 		expected.len = MIN_TCPHDR_LEN;
266 		expected.ip_protocol = IPPROTO_TCP;
267 	} else {
268 		expected.len = UDPHDR_LEN;
269 		expected.ip_protocol = IPPROTO_UDP;
270 	}
271 
272 	if (family == AF_INET6) {
273 		expected.eth_protocol = htons(ETH_P_IPV6);
274 		expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
275 			!srv_sa.v6.sin6_addr.s6_addr32[2] &&
276 			!srv_sa.v6.sin6_addr.s6_addr32[1] &&
277 			!srv_sa.v6.sin6_addr.s6_addr32[0];
278 
279 		memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
280 		       sizeof(cli_sa.v6.sin6_addr));
281 		memcpy(&expected.skb_addrs[4], &in6addr_loopback,
282 		       sizeof(in6addr_loopback));
283 		expected.skb_ports[0] = cli_sa.v6.sin6_port;
284 		expected.skb_ports[1] = srv_sa.v6.sin6_port;
285 	} else {
286 		expected.eth_protocol = htons(ETH_P_IP);
287 		expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
288 
289 		expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
290 		expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
291 		expected.skb_ports[0] = cli_sa.v4.sin_port;
292 		expected.skb_ports[1] = srv_sa.v4.sin_port;
293 	}
294 
295 	if (memcmp(&result, &expected, offsetof(struct data_check,
296 						equal_check_end))) {
297 		printf("unexpected data_check\n");
298 		printf("  result: (0x%x, %u, %u)\n",
299 		       result.eth_protocol, result.ip_protocol,
300 		       result.bind_inany);
301 		printf("expected: (0x%x, %u, %u)\n",
302 		       expected.eth_protocol, expected.ip_protocol,
303 		       expected.bind_inany);
304 		RET_IF(1, "data_check result != expected",
305 		       "bpf_prog_linum:%ld\n", get_linum());
306 	}
307 
308 	RET_IF(!result.hash, "data_check result.hash empty",
309 	       "result.hash:%u", result.hash);
310 
311 	expected.len += cmd ? sizeof(*cmd) : 0;
312 	if (type == SOCK_STREAM)
313 		RET_IF(expected.len > result.len, "expected.len > result.len",
314 		       "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
315 		       expected.len, result.len, get_linum());
316 	else
317 		RET_IF(expected.len != result.len, "expected.len != result.len",
318 		       "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
319 		       expected.len, result.len, get_linum());
320 }
321 
result_to_str(enum result res)322 static const char *result_to_str(enum result res)
323 {
324 	switch (res) {
325 	case DROP_ERR_INNER_MAP:
326 		return "DROP_ERR_INNER_MAP";
327 	case DROP_ERR_SKB_DATA:
328 		return "DROP_ERR_SKB_DATA";
329 	case DROP_ERR_SK_SELECT_REUSEPORT:
330 		return "DROP_ERR_SK_SELECT_REUSEPORT";
331 	case DROP_MISC:
332 		return "DROP_MISC";
333 	case PASS:
334 		return "PASS";
335 	case PASS_ERR_SK_SELECT_REUSEPORT:
336 		return "PASS_ERR_SK_SELECT_REUSEPORT";
337 	default:
338 		return "UNKNOWN";
339 	}
340 }
341 
check_results(void)342 static void check_results(void)
343 {
344 	__u32 results[NR_RESULTS];
345 	__u32 i, broken = 0;
346 	int err;
347 
348 	for (i = 0; i < NR_RESULTS; i++) {
349 		err = bpf_map_lookup_elem(result_map, &i, &results[i]);
350 		RET_IF(err == -1, "lookup_elem(result_map)",
351 		       "i:%u err:%d errno:%d\n", i, err, errno);
352 	}
353 
354 	for (i = 0; i < NR_RESULTS; i++) {
355 		if (results[i] != expected_results[i]) {
356 			broken = i;
357 			break;
358 		}
359 	}
360 
361 	if (i == NR_RESULTS)
362 		return;
363 
364 	printf("unexpected result\n");
365 	printf(" result: [");
366 	printf("%u", results[0]);
367 	for (i = 1; i < NR_RESULTS; i++)
368 		printf(", %u", results[i]);
369 	printf("]\n");
370 
371 	printf("expected: [");
372 	printf("%u", expected_results[0]);
373 	for (i = 1; i < NR_RESULTS; i++)
374 		printf(", %u", expected_results[i]);
375 	printf("]\n");
376 
377 	printf("mismatch on %s (bpf_prog_linum:%ld)\n", result_to_str(broken),
378 	       get_linum());
379 
380 	CHECK_FAIL(true);
381 }
382 
send_data(int type,sa_family_t family,void * data,size_t len,enum result expected)383 static int send_data(int type, sa_family_t family, void *data, size_t len,
384 		     enum result expected)
385 {
386 	union sa46 cli_sa;
387 	int fd, err;
388 
389 	fd = socket(family, type, 0);
390 	RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
391 
392 	sa46_init_loopback(&cli_sa, family);
393 	err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
394 	RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
395 
396 	err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
397 		     sizeof(srv_sa));
398 	RET_ERR(err != len && expected >= PASS,
399 		"sendto()", "family:%u err:%d errno:%d expected:%d\n",
400 		family, err, errno, expected);
401 
402 	return fd;
403 }
404 
do_test(int type,sa_family_t family,struct cmd * cmd,enum result expected)405 static void do_test(int type, sa_family_t family, struct cmd *cmd,
406 		    enum result expected)
407 {
408 	int nev, srv_fd, cli_fd;
409 	struct epoll_event ev;
410 	struct cmd rcv_cmd;
411 	ssize_t nread;
412 
413 	cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
414 			   expected);
415 	if (cli_fd < 0)
416 		return;
417 	nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
418 	RET_IF((nev <= 0 && expected >= PASS) ||
419 	       (nev > 0 && expected < PASS),
420 	       "nev <> expected",
421 	       "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
422 	       nev, expected, type, family,
423 	       cmd ? cmd->reuseport_index : -1,
424 	       cmd ? cmd->pass_on_failure : -1);
425 	check_results();
426 	check_data(type, family, cmd, cli_fd);
427 
428 	if (expected < PASS)
429 		return;
430 
431 	RET_IF(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
432 	       cmd->reuseport_index != ev.data.u32,
433 	       "check cmd->reuseport_index",
434 	       "cmd:(%u, %u) ev.data.u32:%u\n",
435 	       cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
436 
437 	srv_fd = sk_fds[ev.data.u32];
438 	if (type == SOCK_STREAM) {
439 		int new_fd = accept(srv_fd, NULL, 0);
440 
441 		RET_IF(new_fd == -1, "accept(srv_fd)",
442 		       "ev.data.u32:%u new_fd:%d errno:%d\n",
443 		       ev.data.u32, new_fd, errno);
444 
445 		nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
446 		RET_IF(nread != sizeof(rcv_cmd),
447 		       "recv(new_fd)",
448 		       "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
449 		       ev.data.u32, nread, sizeof(rcv_cmd), errno);
450 
451 		close(new_fd);
452 	} else {
453 		nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
454 		RET_IF(nread != sizeof(rcv_cmd),
455 		       "recv(sk_fds)",
456 		       "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
457 		       ev.data.u32, nread, sizeof(rcv_cmd), errno);
458 	}
459 
460 	close(cli_fd);
461 }
462 
test_err_inner_map(int type,sa_family_t family)463 static void test_err_inner_map(int type, sa_family_t family)
464 {
465 	struct cmd cmd = {
466 		.reuseport_index = 0,
467 		.pass_on_failure = 0,
468 	};
469 
470 	expected_results[DROP_ERR_INNER_MAP]++;
471 	do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
472 }
473 
test_err_skb_data(int type,sa_family_t family)474 static void test_err_skb_data(int type, sa_family_t family)
475 {
476 	expected_results[DROP_ERR_SKB_DATA]++;
477 	do_test(type, family, NULL, DROP_ERR_SKB_DATA);
478 }
479 
test_err_sk_select_port(int type,sa_family_t family)480 static void test_err_sk_select_port(int type, sa_family_t family)
481 {
482 	struct cmd cmd = {
483 		.reuseport_index = REUSEPORT_ARRAY_SIZE,
484 		.pass_on_failure = 0,
485 	};
486 
487 	expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
488 	do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
489 }
490 
test_pass(int type,sa_family_t family)491 static void test_pass(int type, sa_family_t family)
492 {
493 	struct cmd cmd;
494 	int i;
495 
496 	cmd.pass_on_failure = 0;
497 	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
498 		expected_results[PASS]++;
499 		cmd.reuseport_index = i;
500 		do_test(type, family, &cmd, PASS);
501 	}
502 }
503 
test_syncookie(int type,sa_family_t family)504 static void test_syncookie(int type, sa_family_t family)
505 {
506 	int err, tmp_index = 1;
507 	struct cmd cmd = {
508 		.reuseport_index = 0,
509 		.pass_on_failure = 0,
510 	};
511 
512 	/*
513 	 * +1 for TCP-SYN and
514 	 * +1 for the TCP-ACK (ack the syncookie)
515 	 */
516 	expected_results[PASS] += 2;
517 	enable_syncookie();
518 	/*
519 	 * Simulate TCP-SYN and TCP-ACK are handled by two different sk:
520 	 * TCP-SYN: select sk_fds[tmp_index = 1] tmp_index is from the
521 	 *          tmp_index_ovr_map
522 	 * TCP-ACK: select sk_fds[reuseport_index = 0] reuseport_index
523 	 *          is from the cmd.reuseport_index
524 	 */
525 	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
526 				  &tmp_index, BPF_ANY);
527 	RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
528 	       "err:%d errno:%d\n", err, errno);
529 	do_test(type, family, &cmd, PASS);
530 	err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
531 				  &tmp_index);
532 	RET_IF(err == -1 || tmp_index != -1,
533 	       "lookup_elem(tmp_index_ovr_map)",
534 	       "err:%d errno:%d tmp_index:%d\n",
535 	       err, errno, tmp_index);
536 	disable_syncookie();
537 }
538 
test_pass_on_err(int type,sa_family_t family)539 static void test_pass_on_err(int type, sa_family_t family)
540 {
541 	struct cmd cmd = {
542 		.reuseport_index = REUSEPORT_ARRAY_SIZE,
543 		.pass_on_failure = 1,
544 	};
545 
546 	expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
547 	do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
548 }
549 
test_detach_bpf(int type,sa_family_t family)550 static void test_detach_bpf(int type, sa_family_t family)
551 {
552 #ifdef SO_DETACH_REUSEPORT_BPF
553 	__u32 nr_run_before = 0, nr_run_after = 0, tmp, i;
554 	struct epoll_event ev;
555 	int cli_fd, err, nev;
556 	struct cmd cmd = {};
557 	int optvalue = 0;
558 
559 	err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
560 			 &optvalue, sizeof(optvalue));
561 	RET_IF(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
562 	       "err:%d errno:%d\n", err, errno);
563 
564 	err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
565 			 &optvalue, sizeof(optvalue));
566 	RET_IF(err == 0 || errno != ENOENT,
567 	       "setsockopt(SO_DETACH_REUSEPORT_BPF)",
568 	       "err:%d errno:%d\n", err, errno);
569 
570 	for (i = 0; i < NR_RESULTS; i++) {
571 		err = bpf_map_lookup_elem(result_map, &i, &tmp);
572 		RET_IF(err == -1, "lookup_elem(result_map)",
573 		       "i:%u err:%d errno:%d\n", i, err, errno);
574 		nr_run_before += tmp;
575 	}
576 
577 	cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS);
578 	if (cli_fd < 0)
579 		return;
580 	nev = epoll_wait(epfd, &ev, 1, 5);
581 	RET_IF(nev <= 0, "nev <= 0",
582 	       "nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
583 	       nev,  type, family);
584 
585 	for (i = 0; i < NR_RESULTS; i++) {
586 		err = bpf_map_lookup_elem(result_map, &i, &tmp);
587 		RET_IF(err == -1, "lookup_elem(result_map)",
588 		       "i:%u err:%d errno:%d\n", i, err, errno);
589 		nr_run_after += tmp;
590 	}
591 
592 	RET_IF(nr_run_before != nr_run_after,
593 	       "nr_run_before != nr_run_after",
594 	       "nr_run_before:%u nr_run_after:%u\n",
595 	       nr_run_before, nr_run_after);
596 
597 	close(cli_fd);
598 #else
599 	test__skip();
600 #endif
601 }
602 
prepare_sk_fds(int type,sa_family_t family,bool inany)603 static void prepare_sk_fds(int type, sa_family_t family, bool inany)
604 {
605 	const int first = REUSEPORT_ARRAY_SIZE - 1;
606 	int i, err, optval = 1;
607 	struct epoll_event ev;
608 	socklen_t addrlen;
609 
610 	if (inany)
611 		sa46_init_inany(&srv_sa, family);
612 	else
613 		sa46_init_loopback(&srv_sa, family);
614 	addrlen = sizeof(srv_sa);
615 
616 	/*
617 	 * The sk_fds[] is filled from the back such that the order
618 	 * is exactly opposite to the (struct sock_reuseport *)reuse->socks[].
619 	 */
620 	for (i = first; i >= 0; i--) {
621 		sk_fds[i] = socket(family, type, 0);
622 		RET_IF(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
623 		       i, sk_fds[i], errno);
624 		err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
625 				 &optval, sizeof(optval));
626 		RET_IF(err == -1, "setsockopt(SO_REUSEPORT)",
627 		       "sk_fds[%d] err:%d errno:%d\n",
628 		       i, err, errno);
629 
630 		if (i == first) {
631 			err = setsockopt(sk_fds[i], SOL_SOCKET,
632 					 SO_ATTACH_REUSEPORT_EBPF,
633 					 &select_by_skb_data_prog,
634 					 sizeof(select_by_skb_data_prog));
635 			RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
636 			       "err:%d errno:%d\n", err, errno);
637 		}
638 
639 		err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
640 		RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
641 		       i, err, errno);
642 
643 		if (type == SOCK_STREAM) {
644 			err = listen(sk_fds[i], 10);
645 			RET_IF(err == -1, "listen()",
646 			       "sk_fds[%d] err:%d errno:%d\n",
647 			       i, err, errno);
648 		}
649 
650 		err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
651 					  BPF_NOEXIST);
652 		RET_IF(err == -1, "update_elem(reuseport_array)",
653 		       "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
654 
655 		if (i == first) {
656 			socklen_t addrlen = sizeof(srv_sa);
657 
658 			err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
659 					  &addrlen);
660 			RET_IF(err == -1, "getsockname()",
661 			       "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
662 		}
663 	}
664 
665 	epfd = epoll_create(1);
666 	RET_IF(epfd == -1, "epoll_create(1)",
667 	       "epfd:%d errno:%d\n", epfd, errno);
668 
669 	ev.events = EPOLLIN;
670 	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
671 		ev.data.u32 = i;
672 		err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
673 		RET_IF(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
674 	}
675 }
676 
setup_per_test(int type,sa_family_t family,bool inany,bool no_inner_map)677 static void setup_per_test(int type, sa_family_t family, bool inany,
678 			   bool no_inner_map)
679 {
680 	int ovr = -1, err;
681 
682 	prepare_sk_fds(type, family, inany);
683 	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
684 				  BPF_ANY);
685 	RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
686 	       "err:%d errno:%d\n", err, errno);
687 
688 	/* Install reuseport_array to outer_map? */
689 	if (no_inner_map)
690 		return;
691 
692 	err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
693 				  BPF_ANY);
694 	RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)",
695 	       "err:%d errno:%d\n", err, errno);
696 }
697 
cleanup_per_test(bool no_inner_map)698 static void cleanup_per_test(bool no_inner_map)
699 {
700 	int i, err, zero = 0;
701 
702 	memset(expected_results, 0, sizeof(expected_results));
703 
704 	for (i = 0; i < NR_RESULTS; i++) {
705 		err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY);
706 		RET_IF(err, "reset elem in result_map",
707 		       "i:%u err:%d errno:%d\n", i, err, errno);
708 	}
709 
710 	err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY);
711 	RET_IF(err, "reset line number in linum_map", "err:%d errno:%d\n",
712 	       err, errno);
713 
714 	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++)
715 		close(sk_fds[i]);
716 	close(epfd);
717 
718 	/* Delete reuseport_array from outer_map? */
719 	if (no_inner_map)
720 		return;
721 
722 	err = bpf_map_delete_elem(outer_map, &index_zero);
723 	RET_IF(err == -1, "delete_elem(outer_map)",
724 	       "err:%d errno:%d\n", err, errno);
725 }
726 
cleanup(void)727 static void cleanup(void)
728 {
729 	if (outer_map != -1) {
730 		close(outer_map);
731 		outer_map = -1;
732 	}
733 
734 	if (reuseport_array != -1) {
735 		close(reuseport_array);
736 		reuseport_array = -1;
737 	}
738 
739 	if (obj) {
740 		bpf_object__close(obj);
741 		obj = NULL;
742 	}
743 
744 	memset(expected_results, 0, sizeof(expected_results));
745 }
746 
maptype_str(enum bpf_map_type type)747 static const char *maptype_str(enum bpf_map_type type)
748 {
749 	switch (type) {
750 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
751 		return "reuseport_sockarray";
752 	case BPF_MAP_TYPE_SOCKMAP:
753 		return "sockmap";
754 	case BPF_MAP_TYPE_SOCKHASH:
755 		return "sockhash";
756 	default:
757 		return "unknown";
758 	}
759 }
760 
family_str(sa_family_t family)761 static const char *family_str(sa_family_t family)
762 {
763 	switch (family) {
764 	case AF_INET:
765 		return "IPv4";
766 	case AF_INET6:
767 		return "IPv6";
768 	default:
769 		return "unknown";
770 	}
771 }
772 
sotype_str(int sotype)773 static const char *sotype_str(int sotype)
774 {
775 	switch (sotype) {
776 	case SOCK_STREAM:
777 		return "TCP";
778 	case SOCK_DGRAM:
779 		return "UDP";
780 	default:
781 		return "unknown";
782 	}
783 }
784 
785 #define TEST_INIT(fn_, ...) { .fn = fn_, .name = #fn_, __VA_ARGS__ }
786 
test_config(int sotype,sa_family_t family,bool inany)787 static void test_config(int sotype, sa_family_t family, bool inany)
788 {
789 	const struct test {
790 		void (*fn)(int sotype, sa_family_t family);
791 		const char *name;
792 		bool no_inner_map;
793 		int need_sotype;
794 	} tests[] = {
795 		TEST_INIT(test_err_inner_map,
796 			  .no_inner_map = true),
797 		TEST_INIT(test_err_skb_data),
798 		TEST_INIT(test_err_sk_select_port),
799 		TEST_INIT(test_pass),
800 		TEST_INIT(test_syncookie,
801 			  .need_sotype = SOCK_STREAM),
802 		TEST_INIT(test_pass_on_err),
803 		TEST_INIT(test_detach_bpf),
804 	};
805 	char s[MAX_TEST_NAME];
806 	const struct test *t;
807 
808 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
809 		if (t->need_sotype && t->need_sotype != sotype)
810 			continue; /* test not compatible with socket type */
811 
812 		snprintf(s, sizeof(s), "%s %s/%s %s %s",
813 			 maptype_str(inner_map_type),
814 			 family_str(family), sotype_str(sotype),
815 			 inany ? "INANY" : "LOOPBACK", t->name);
816 
817 		if (!test__start_subtest(s))
818 			continue;
819 
820 		setup_per_test(sotype, family, inany, t->no_inner_map);
821 		t->fn(sotype, family);
822 		cleanup_per_test(t->no_inner_map);
823 	}
824 }
825 
826 #define BIND_INANY true
827 
test_all(void)828 static void test_all(void)
829 {
830 	const struct config {
831 		int sotype;
832 		sa_family_t family;
833 		bool inany;
834 	} configs[] = {
835 		{ SOCK_STREAM, AF_INET },
836 		{ SOCK_STREAM, AF_INET, BIND_INANY },
837 		{ SOCK_STREAM, AF_INET6 },
838 		{ SOCK_STREAM, AF_INET6, BIND_INANY },
839 		{ SOCK_DGRAM, AF_INET },
840 		{ SOCK_DGRAM, AF_INET6 },
841 	};
842 	const struct config *c;
843 
844 	for (c = configs; c < configs + ARRAY_SIZE(configs); c++)
845 		test_config(c->sotype, c->family, c->inany);
846 }
847 
test_map_type(enum bpf_map_type mt)848 void test_map_type(enum bpf_map_type mt)
849 {
850 	if (create_maps(mt))
851 		goto out;
852 	if (prepare_bpf_obj())
853 		goto out;
854 
855 	test_all();
856 out:
857 	cleanup();
858 }
859 
test_select_reuseport(void)860 void test_select_reuseport(void)
861 {
862 	saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
863 	if (saved_tcp_fo < 0)
864 		goto out;
865 	saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
866 	if (saved_tcp_syncookie < 0)
867 		goto out;
868 
869 	if (enable_fastopen())
870 		goto out;
871 	if (disable_syncookie())
872 		goto out;
873 
874 	test_map_type(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
875 	test_map_type(BPF_MAP_TYPE_SOCKMAP);
876 	test_map_type(BPF_MAP_TYPE_SOCKHASH);
877 out:
878 	restore_sysctls();
879 }
880