1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2020 Intel Corporation. */
3 
4 /*
5  * Some functions in this program are taken from
6  * Linux kernel samples/bpf/xdpsock* and modified
7  * for use.
8  *
9  * See test_xsk.sh for detailed information on test topology
10  * and prerequisite network setup.
11  *
12  * This test program contains two threads, each thread is single socket with
13  * a unique UMEM. It validates in-order packet delivery and packet content
14  * by sending packets to each other.
15  *
16  * Tests Information:
17  * ------------------
18  * These selftests test AF_XDP SKB and Native/DRV modes using veth
19  * Virtual Ethernet interfaces.
20  *
21  * For each mode, the following tests are run:
22  *    a. nopoll - soft-irq processing
23  *    b. poll - using poll() syscall
24  *    c. Socket Teardown
25  *       Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
26  *       both sockets, then repeat multiple times. Only nopoll mode is used
27  *    d. Bi-directional sockets
28  *       Configure sockets as bi-directional tx/rx sockets, sets up fill and
29  *       completion rings on each socket, tx/rx in both directions. Only nopoll
30  *       mode is used
31  *    e. Statistics
32  *       Trigger some error conditions and ensure that the appropriate statistics
33  *       are incremented. Within this test, the following statistics are tested:
34  *       i.   rx dropped
35  *            Increase the UMEM frame headroom to a value which results in
36  *            insufficient space in the rx buffer for both the packet and the headroom.
37  *       ii.  tx invalid
38  *            Set the 'len' field of tx descriptors to an invalid value (umem frame
39  *            size + 1).
40  *       iii. rx ring full
41  *            Reduce the size of the RX ring to a fraction of the fill ring size.
42  *       iv.  fill queue empty
43  *            Do not populate the fill queue and then try to receive pkts.
44  *    f. bpf_link resource persistence
45  *       Configure sockets at indexes 0 and 1, run a traffic on queue ids 0,
46  *       then remove xsk sockets from queue 0 on both veth interfaces and
47  *       finally run a traffic on queues ids 1
48  *
49  * Total tests: 12
50  *
51  * Flow:
52  * -----
53  * - Single process spawns two threads: Tx and Rx
54  * - Each of these two threads attach to a veth interface within their assigned
55  *   namespaces
56  * - Each thread Creates one AF_XDP socket connected to a unique umem for each
57  *   veth interface
58  * - Tx thread Transmits 10k packets from veth<xxxx> to veth<yyyy>
59  * - Rx thread verifies if all 10k packets were received and delivered in-order,
60  *   and have the right content
61  *
62  * Enable/disable packet dump mode:
63  * --------------------------
64  * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
65  * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
66  */
67 
68 #define _GNU_SOURCE
69 #include <fcntl.h>
70 #include <errno.h>
71 #include <getopt.h>
72 #include <asm/barrier.h>
73 #include <linux/if_link.h>
74 #include <linux/if_ether.h>
75 #include <linux/ip.h>
76 #include <linux/udp.h>
77 #include <arpa/inet.h>
78 #include <net/if.h>
79 #include <locale.h>
80 #include <poll.h>
81 #include <pthread.h>
82 #include <signal.h>
83 #include <stdbool.h>
84 #include <stdio.h>
85 #include <stdlib.h>
86 #include <string.h>
87 #include <stddef.h>
88 #include <sys/mman.h>
89 #include <sys/resource.h>
90 #include <sys/types.h>
91 #include <sys/queue.h>
92 #include <time.h>
93 #include <unistd.h>
94 #include <stdatomic.h>
95 #include <bpf/xsk.h>
96 #include "xdpxceiver.h"
97 #include "../kselftest.h"
98 
99 static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
100 static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
101 static const char *IP1 = "192.168.100.162";
102 static const char *IP2 = "192.168.100.161";
103 static const u16 UDP_PORT1 = 2020;
104 static const u16 UDP_PORT2 = 2121;
105 
__exit_with_error(int error,const char * file,const char * func,int line)106 static void __exit_with_error(int error, const char *file, const char *func, int line)
107 {
108 	ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
109 			      strerror(error));
110 	ksft_exit_xfail();
111 }
112 
113 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
114 
115 #define print_ksft_result(void)\
116 	(ksft_test_result_pass("PASS: %s %s %s%s%s%s\n", configured_mode ? "DRV" : "SKB",\
117 			       test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\
118 			       test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\
119 			       test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\
120 			       test_type == TEST_TYPE_STATS ? "Stats" : "",\
121 			       test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
122 
memset32_htonl(void * dest,u32 val,u32 size)123 static void memset32_htonl(void *dest, u32 val, u32 size)
124 {
125 	u32 *ptr = (u32 *)dest;
126 	int i;
127 
128 	val = htonl(val);
129 
130 	for (i = 0; i < (size & (~0x3)); i += 4)
131 		ptr[i >> 2] = val;
132 }
133 
134 /*
135  * Fold a partial checksum
136  * This function code has been taken from
137  * Linux kernel include/asm-generic/checksum.h
138  */
csum_fold(__u32 csum)139 static __u16 csum_fold(__u32 csum)
140 {
141 	u32 sum = (__force u32)csum;
142 
143 	sum = (sum & 0xffff) + (sum >> 16);
144 	sum = (sum & 0xffff) + (sum >> 16);
145 	return (__force __u16)~sum;
146 }
147 
148 /*
149  * This function code has been taken from
150  * Linux kernel lib/checksum.c
151  */
from64to32(u64 x)152 static u32 from64to32(u64 x)
153 {
154 	/* add up 32-bit and 32-bit for 32+c bit */
155 	x = (x & 0xffffffff) + (x >> 32);
156 	/* add up carry.. */
157 	x = (x & 0xffffffff) + (x >> 32);
158 	return (u32)x;
159 }
160 
161 /*
162  * This function code has been taken from
163  * Linux kernel lib/checksum.c
164  */
csum_tcpudp_nofold(__be32 saddr,__be32 daddr,__u32 len,__u8 proto,__u32 sum)165 static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
166 {
167 	unsigned long long s = (__force u32)sum;
168 
169 	s += (__force u32)saddr;
170 	s += (__force u32)daddr;
171 #ifdef __BIG_ENDIAN__
172 	s += proto + len;
173 #else
174 	s += (proto + len) << 8;
175 #endif
176 	return (__force __u32)from64to32(s);
177 }
178 
179 /*
180  * This function has been taken from
181  * Linux kernel include/asm-generic/checksum.h
182  */
csum_tcpudp_magic(__be32 saddr,__be32 daddr,__u32 len,__u8 proto,__u32 sum)183 static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
184 {
185 	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
186 }
187 
udp_csum(u32 saddr,u32 daddr,u32 len,u8 proto,u16 * udp_pkt)188 static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
189 {
190 	u32 csum = 0;
191 	u32 cnt = 0;
192 
193 	/* udp hdr and data */
194 	for (; cnt < len; cnt += 2)
195 		csum += udp_pkt[cnt >> 1];
196 
197 	return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
198 }
199 
gen_eth_hdr(struct ifobject * ifobject,struct ethhdr * eth_hdr)200 static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr)
201 {
202 	memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN);
203 	memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN);
204 	eth_hdr->h_proto = htons(ETH_P_IP);
205 }
206 
gen_ip_hdr(struct ifobject * ifobject,struct iphdr * ip_hdr)207 static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr)
208 {
209 	ip_hdr->version = IP_PKT_VER;
210 	ip_hdr->ihl = 0x5;
211 	ip_hdr->tos = IP_PKT_TOS;
212 	ip_hdr->tot_len = htons(IP_PKT_SIZE);
213 	ip_hdr->id = 0;
214 	ip_hdr->frag_off = 0;
215 	ip_hdr->ttl = IPDEFTTL;
216 	ip_hdr->protocol = IPPROTO_UDP;
217 	ip_hdr->saddr = ifobject->src_ip;
218 	ip_hdr->daddr = ifobject->dst_ip;
219 	ip_hdr->check = 0;
220 }
221 
gen_udp_hdr(u32 payload,void * pkt,struct ifobject * ifobject,struct udphdr * udp_hdr)222 static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject,
223 			struct udphdr *udp_hdr)
224 {
225 	udp_hdr->source = htons(ifobject->src_port);
226 	udp_hdr->dest = htons(ifobject->dst_port);
227 	udp_hdr->len = htons(UDP_PKT_SIZE);
228 	memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE);
229 }
230 
gen_udp_csum(struct udphdr * udp_hdr,struct iphdr * ip_hdr)231 static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
232 {
233 	udp_hdr->check = 0;
234 	udp_hdr->check =
235 	    udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
236 }
237 
xsk_configure_umem(struct ifobject * data,void * buffer,u64 size,int idx)238 static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size, int idx)
239 {
240 	struct xsk_umem_config cfg = {
241 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
242 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
243 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
244 		.frame_headroom = frame_headroom,
245 		.flags = XSK_UMEM__DEFAULT_FLAGS
246 	};
247 	struct xsk_umem_info *umem;
248 	int ret;
249 
250 	umem = calloc(1, sizeof(struct xsk_umem_info));
251 	if (!umem)
252 		exit_with_error(errno);
253 
254 	ret = xsk_umem__create(&umem->umem, buffer, size,
255 			       &umem->fq, &umem->cq, &cfg);
256 	if (ret)
257 		exit_with_error(-ret);
258 
259 	umem->buffer = buffer;
260 
261 	data->umem_arr[idx] = umem;
262 }
263 
xsk_populate_fill_ring(struct xsk_umem_info * umem)264 static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
265 {
266 	int ret, i;
267 	u32 idx = 0;
268 
269 	ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
270 	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
271 		exit_with_error(-ret);
272 	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
273 		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
274 	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
275 }
276 
xsk_configure_socket(struct ifobject * ifobject,int idx)277 static int xsk_configure_socket(struct ifobject *ifobject, int idx)
278 {
279 	struct xsk_socket_config cfg;
280 	struct xsk_socket_info *xsk;
281 	struct xsk_ring_cons *rxr;
282 	struct xsk_ring_prod *txr;
283 	int ret;
284 
285 	xsk = calloc(1, sizeof(struct xsk_socket_info));
286 	if (!xsk)
287 		exit_with_error(errno);
288 
289 	xsk->umem = ifobject->umem;
290 	cfg.rx_size = rxqsize;
291 	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
292 	cfg.libbpf_flags = 0;
293 	cfg.xdp_flags = xdp_flags;
294 	cfg.bind_flags = xdp_bind_flags;
295 
296 	if (test_type != TEST_TYPE_BIDI) {
297 		rxr = (ifobject->fv.vector == rx) ? &xsk->rx : NULL;
298 		txr = (ifobject->fv.vector == tx) ? &xsk->tx : NULL;
299 	} else {
300 		rxr = &xsk->rx;
301 		txr = &xsk->tx;
302 	}
303 
304 	ret = xsk_socket__create(&xsk->xsk, ifobject->ifname, idx,
305 				 ifobject->umem->umem, rxr, txr, &cfg);
306 	if (ret)
307 		return 1;
308 
309 	ifobject->xsk_arr[idx] = xsk;
310 
311 	return 0;
312 }
313 
314 static struct option long_options[] = {
315 	{"interface", required_argument, 0, 'i'},
316 	{"queue", optional_argument, 0, 'q'},
317 	{"dump-pkts", optional_argument, 0, 'D'},
318 	{"verbose", no_argument, 0, 'v'},
319 	{0, 0, 0, 0}
320 };
321 
usage(const char * prog)322 static void usage(const char *prog)
323 {
324 	const char *str =
325 		"  Usage: %s [OPTIONS]\n"
326 		"  Options:\n"
327 		"  -i, --interface      Use interface\n"
328 		"  -q, --queue=n        Use queue n (default 0)\n"
329 		"  -D, --dump-pkts      Dump packets L2 - L5\n"
330 		"  -v, --verbose        Verbose output\n";
331 
332 	ksft_print_msg(str, prog);
333 }
334 
switch_namespace(const char * nsname)335 static int switch_namespace(const char *nsname)
336 {
337 	char fqns[26] = "/var/run/netns/";
338 	int nsfd;
339 
340 	if (!nsname || strlen(nsname) == 0)
341 		return -1;
342 
343 	strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1);
344 	nsfd = open(fqns, O_RDONLY);
345 
346 	if (nsfd == -1)
347 		exit_with_error(errno);
348 
349 	if (setns(nsfd, 0) == -1)
350 		exit_with_error(errno);
351 
352 	print_verbose("NS switched: %s\n", nsname);
353 
354 	return nsfd;
355 }
356 
validate_interfaces(void)357 static int validate_interfaces(void)
358 {
359 	bool ret = true;
360 
361 	for (int i = 0; i < MAX_INTERFACES; i++) {
362 		if (!strcmp(ifdict[i]->ifname, "")) {
363 			ret = false;
364 			ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>.");
365 		}
366 	}
367 	return ret;
368 }
369 
parse_command_line(int argc,char ** argv)370 static void parse_command_line(int argc, char **argv)
371 {
372 	int option_index, interface_index = 0, c;
373 
374 	opterr = 0;
375 
376 	for (;;) {
377 		c = getopt_long(argc, argv, "i:Dv", long_options, &option_index);
378 
379 		if (c == -1)
380 			break;
381 
382 		switch (c) {
383 		case 'i':
384 			if (interface_index == MAX_INTERFACES)
385 				break;
386 			char *sptr, *token;
387 
388 			sptr = strndupa(optarg, strlen(optarg));
389 			memcpy(ifdict[interface_index]->ifname,
390 			       strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
391 			token = strsep(&sptr, ",");
392 			if (token)
393 				memcpy(ifdict[interface_index]->nsname, token,
394 				       MAX_INTERFACES_NAMESPACE_CHARS);
395 			interface_index++;
396 			break;
397 		case 'D':
398 			opt_pkt_dump = true;
399 			break;
400 		case 'v':
401 			opt_verbose = true;
402 			break;
403 		default:
404 			usage(basename(argv[0]));
405 			ksft_exit_xfail();
406 		}
407 	}
408 
409 	if (!validate_interfaces()) {
410 		usage(basename(argv[0]));
411 		ksft_exit_xfail();
412 	}
413 }
414 
pkt_stream_get_pkt(struct pkt_stream * pkt_stream,u32 pkt_nb)415 static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
416 {
417 	if (pkt_nb >= pkt_stream->nb_pkts)
418 		return NULL;
419 
420 	return &pkt_stream->pkts[pkt_nb];
421 }
422 
pkt_stream_generate(u32 nb_pkts,u32 pkt_len)423 static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
424 {
425 	struct pkt_stream *pkt_stream;
426 	u32 i;
427 
428 	pkt_stream = malloc(sizeof(*pkt_stream));
429 	if (!pkt_stream)
430 		exit_with_error(ENOMEM);
431 
432 	pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
433 	if (!pkt_stream->pkts)
434 		exit_with_error(ENOMEM);
435 
436 	pkt_stream->nb_pkts = nb_pkts;
437 	for (i = 0; i < nb_pkts; i++) {
438 		pkt_stream->pkts[i].addr = (i % num_frames) * XSK_UMEM__DEFAULT_FRAME_SIZE;
439 		pkt_stream->pkts[i].len = pkt_len;
440 		pkt_stream->pkts[i].payload = i;
441 	}
442 
443 	return pkt_stream;
444 }
445 
pkt_generate(struct ifobject * ifobject,u32 pkt_nb)446 static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
447 {
448 	struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
449 	struct udphdr *udp_hdr;
450 	struct ethhdr *eth_hdr;
451 	struct iphdr *ip_hdr;
452 	void *data;
453 
454 	if (!pkt)
455 		return NULL;
456 
457 	data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
458 	udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr));
459 	ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr));
460 	eth_hdr = (struct ethhdr *)data;
461 
462 	gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr);
463 	gen_ip_hdr(ifobject, ip_hdr);
464 	gen_udp_csum(udp_hdr, ip_hdr);
465 	gen_eth_hdr(ifobject, eth_hdr);
466 
467 	return pkt;
468 }
469 
pkt_dump(void * pkt,u32 len)470 static void pkt_dump(void *pkt, u32 len)
471 {
472 	char s[INET_ADDRSTRLEN];
473 	struct ethhdr *ethhdr;
474 	struct udphdr *udphdr;
475 	struct iphdr *iphdr;
476 	int payload, i;
477 
478 	ethhdr = pkt;
479 	iphdr = pkt + sizeof(*ethhdr);
480 	udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr);
481 
482 	/*extract L2 frame */
483 	fprintf(stdout, "DEBUG>> L2: dst mac: ");
484 	for (i = 0; i < ETH_ALEN; i++)
485 		fprintf(stdout, "%02X", ethhdr->h_dest[i]);
486 
487 	fprintf(stdout, "\nDEBUG>> L2: src mac: ");
488 	for (i = 0; i < ETH_ALEN; i++)
489 		fprintf(stdout, "%02X", ethhdr->h_source[i]);
490 
491 	/*extract L3 frame */
492 	fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
493 	fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
494 		inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
495 	fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
496 		inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
497 	/*extract L4 frame */
498 	fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
499 	fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
500 	/*extract L5 frame */
501 	payload = *((uint32_t *)(pkt + PKT_HDR_SIZE));
502 
503 	fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
504 	fprintf(stdout, "---------------------------------------\n");
505 }
506 
is_pkt_valid(struct pkt * pkt,void * buffer,const struct xdp_desc * desc)507 static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *desc)
508 {
509 	void *data = xsk_umem__get_data(buffer, desc->addr);
510 	struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
511 
512 	if (!pkt) {
513 		ksft_test_result_fail("ERROR: [%s] too many packets received\n", __func__);
514 		return false;
515 	}
516 
517 	if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
518 		u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
519 
520 		if (opt_pkt_dump && test_type != TEST_TYPE_STATS)
521 			pkt_dump(data, PKT_SIZE);
522 
523 		if (pkt->len != desc->len) {
524 			ksft_test_result_fail
525 				("ERROR: [%s] expected length [%d], got length [%d]\n",
526 					__func__, pkt->len, desc->len);
527 			return false;
528 		}
529 
530 		if (pkt->payload != seqnum) {
531 			ksft_test_result_fail
532 				("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n",
533 					__func__, pkt->payload, seqnum);
534 			return false;
535 		}
536 	} else {
537 		ksft_print_msg("Invalid frame received: ");
538 		ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
539 			       iphdr->tos);
540 		return false;
541 	}
542 
543 	return true;
544 }
545 
kick_tx(struct xsk_socket_info * xsk)546 static void kick_tx(struct xsk_socket_info *xsk)
547 {
548 	int ret;
549 
550 	ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
551 	if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN)
552 		return;
553 	exit_with_error(errno);
554 }
555 
complete_pkts(struct xsk_socket_info * xsk,int batch_size)556 static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
557 {
558 	unsigned int rcvd;
559 	u32 idx;
560 
561 	if (!xsk->outstanding_tx)
562 		return;
563 
564 	if (xsk_ring_prod__needs_wakeup(&xsk->tx))
565 		kick_tx(xsk);
566 
567 	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
568 	if (rcvd) {
569 		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
570 		xsk->outstanding_tx -= rcvd;
571 	}
572 }
573 
receive_pkts(struct pkt_stream * pkt_stream,struct xsk_socket_info * xsk,struct pollfd * fds)574 static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk,
575 			 struct pollfd *fds)
576 {
577 	u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkt_count = 0;
578 	struct pkt *pkt;
579 	int ret;
580 
581 	pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
582 	while (pkt) {
583 		rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
584 		if (!rcvd) {
585 			if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
586 				ret = poll(fds, 1, POLL_TMOUT);
587 				if (ret < 0)
588 					exit_with_error(-ret);
589 			}
590 			continue;
591 		}
592 
593 		ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
594 		while (ret != rcvd) {
595 			if (ret < 0)
596 				exit_with_error(-ret);
597 			if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
598 				ret = poll(fds, 1, POLL_TMOUT);
599 				if (ret < 0)
600 					exit_with_error(-ret);
601 			}
602 			ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
603 		}
604 
605 		for (i = 0; i < rcvd; i++) {
606 			const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
607 			u64 addr = desc->addr, orig;
608 
609 			orig = xsk_umem__extract_addr(addr);
610 			addr = xsk_umem__add_offset_to_addr(addr);
611 			if (!is_pkt_valid(pkt, xsk->umem->buffer, desc))
612 				return;
613 
614 			*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
615 			pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
616 		}
617 
618 		xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
619 		xsk_ring_cons__release(&xsk->rx, rcvd);
620 	}
621 }
622 
__send_pkts(struct ifobject * ifobject,u32 pkt_nb)623 static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
624 {
625 	struct xsk_socket_info *xsk = ifobject->xsk;
626 	u32 i, idx;
627 
628 	while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE)
629 		complete_pkts(xsk, BATCH_SIZE);
630 
631 	for (i = 0; i < BATCH_SIZE; i++) {
632 		struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
633 		struct pkt *pkt = pkt_generate(ifobject, pkt_nb);
634 
635 		if (!pkt)
636 			break;
637 
638 		tx_desc->addr = pkt->addr;
639 		tx_desc->len = pkt->len;
640 		pkt_nb++;
641 	}
642 
643 	xsk_ring_prod__submit(&xsk->tx, i);
644 	if (stat_test_type != STAT_TEST_TX_INVALID)
645 		xsk->outstanding_tx += i;
646 	else if (xsk_ring_prod__needs_wakeup(&xsk->tx))
647 		kick_tx(xsk);
648 	complete_pkts(xsk, i);
649 
650 	return i;
651 }
652 
wait_for_tx_completion(struct xsk_socket_info * xsk)653 static void wait_for_tx_completion(struct xsk_socket_info *xsk)
654 {
655 	while (xsk->outstanding_tx)
656 		complete_pkts(xsk, BATCH_SIZE);
657 }
658 
send_pkts(struct ifobject * ifobject)659 static void send_pkts(struct ifobject *ifobject)
660 {
661 	struct pollfd fds[MAX_SOCKS] = { };
662 	u32 pkt_cnt = 0;
663 
664 	fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
665 	fds[0].events = POLLOUT;
666 
667 	while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
668 		u32 sent;
669 
670 		if (test_type == TEST_TYPE_POLL) {
671 			int ret;
672 
673 			ret = poll(fds, 1, POLL_TMOUT);
674 			if (ret <= 0)
675 				continue;
676 
677 			if (!(fds[0].revents & POLLOUT))
678 				continue;
679 		}
680 
681 		sent = __send_pkts(ifobject, pkt_cnt);
682 		pkt_cnt += sent;
683 		usleep(10);
684 	}
685 
686 	wait_for_tx_completion(ifobject->xsk);
687 }
688 
rx_stats_are_valid(struct ifobject * ifobject)689 static bool rx_stats_are_valid(struct ifobject *ifobject)
690 {
691 	u32 xsk_stat = 0, expected_stat = ifobject->pkt_stream->nb_pkts;
692 	struct xsk_socket *xsk = ifobject->xsk->xsk;
693 	int fd = xsk_socket__fd(xsk);
694 	struct xdp_statistics stats;
695 	socklen_t optlen;
696 	int err;
697 
698 	optlen = sizeof(stats);
699 	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
700 	if (err) {
701 		ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
702 				      __func__, -err, strerror(-err));
703 		return true;
704 	}
705 
706 	if (optlen == sizeof(struct xdp_statistics)) {
707 		switch (stat_test_type) {
708 		case STAT_TEST_RX_DROPPED:
709 			xsk_stat = stats.rx_dropped;
710 			break;
711 		case STAT_TEST_TX_INVALID:
712 			return true;
713 		case STAT_TEST_RX_FULL:
714 			xsk_stat = stats.rx_ring_full;
715 			expected_stat -= RX_FULL_RXQSIZE;
716 			break;
717 		case STAT_TEST_RX_FILL_EMPTY:
718 			xsk_stat = stats.rx_fill_ring_empty_descs;
719 			break;
720 		default:
721 			break;
722 		}
723 
724 		if (xsk_stat == expected_stat)
725 			return true;
726 	}
727 
728 	return false;
729 }
730 
tx_stats_validate(struct ifobject * ifobject)731 static void tx_stats_validate(struct ifobject *ifobject)
732 {
733 	struct xsk_socket *xsk = ifobject->xsk->xsk;
734 	int fd = xsk_socket__fd(xsk);
735 	struct xdp_statistics stats;
736 	socklen_t optlen;
737 	int err;
738 
739 	optlen = sizeof(stats);
740 	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
741 	if (err) {
742 		ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
743 				      __func__, -err, strerror(-err));
744 		return;
745 	}
746 
747 	if (stats.tx_invalid_descs == ifobject->pkt_stream->nb_pkts)
748 		return;
749 
750 	ksft_test_result_fail("ERROR: [%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
751 			      __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
752 }
753 
thread_common_ops(struct ifobject * ifobject,void * bufs)754 static void thread_common_ops(struct ifobject *ifobject, void *bufs)
755 {
756 	u64 umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
757 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
758 	size_t mmap_sz = umem_sz;
759 	int ctr = 0;
760 	int ret;
761 
762 	ifobject->ns_fd = switch_namespace(ifobject->nsname);
763 
764 	if (test_type == TEST_TYPE_BPF_RES)
765 		mmap_sz *= 2;
766 
767 	bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
768 	if (bufs == MAP_FAILED)
769 		exit_with_error(errno);
770 
771 	while (ctr++ < SOCK_RECONF_CTR) {
772 		xsk_configure_umem(ifobject, bufs, umem_sz, 0);
773 		ifobject->umem = ifobject->umem_arr[0];
774 		ret = xsk_configure_socket(ifobject, 0);
775 		if (!ret)
776 			break;
777 
778 		/* Retry Create Socket if it fails as xsk_socket__create() is asynchronous */
779 		usleep(USLEEP_MAX);
780 		if (ctr >= SOCK_RECONF_CTR)
781 			exit_with_error(-ret);
782 	}
783 
784 	ifobject->umem = ifobject->umem_arr[0];
785 	ifobject->xsk = ifobject->xsk_arr[0];
786 
787 	if (test_type == TEST_TYPE_BPF_RES) {
788 		xsk_configure_umem(ifobject, (u8 *)bufs + umem_sz, umem_sz, 1);
789 		ifobject->umem = ifobject->umem_arr[1];
790 		ret = xsk_configure_socket(ifobject, 1);
791 	}
792 
793 	ifobject->umem = ifobject->umem_arr[0];
794 	ifobject->xsk = ifobject->xsk_arr[0];
795 	print_verbose("Interface [%s] vector [%s]\n",
796 		      ifobject->ifname, ifobject->fv.vector == tx ? "Tx" : "Rx");
797 }
798 
testapp_is_test_two_stepped(void)799 static bool testapp_is_test_two_stepped(void)
800 {
801 	return (test_type != TEST_TYPE_BIDI && test_type != TEST_TYPE_BPF_RES) || second_step;
802 }
803 
testapp_cleanup_xsk_res(struct ifobject * ifobj)804 static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
805 {
806 	if (testapp_is_test_two_stepped()) {
807 		xsk_socket__delete(ifobj->xsk->xsk);
808 		(void)xsk_umem__delete(ifobj->umem->umem);
809 	}
810 }
811 
worker_testapp_validate_tx(void * arg)812 static void *worker_testapp_validate_tx(void *arg)
813 {
814 	struct ifobject *ifobject = (struct ifobject *)arg;
815 	void *bufs = NULL;
816 
817 	if (!second_step)
818 		thread_common_ops(ifobject, bufs);
819 
820 	print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
821 		      ifobject->ifname);
822 	send_pkts(ifobject);
823 
824 	if (stat_test_type == STAT_TEST_TX_INVALID)
825 		tx_stats_validate(ifobject);
826 
827 	testapp_cleanup_xsk_res(ifobject);
828 	pthread_exit(NULL);
829 }
830 
worker_testapp_validate_rx(void * arg)831 static void *worker_testapp_validate_rx(void *arg)
832 {
833 	struct ifobject *ifobject = (struct ifobject *)arg;
834 	struct pollfd fds[MAX_SOCKS] = { };
835 	void *bufs = NULL;
836 
837 	if (!second_step)
838 		thread_common_ops(ifobject, bufs);
839 
840 	if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
841 		xsk_populate_fill_ring(ifobject->umem);
842 
843 	fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
844 	fds[0].events = POLLIN;
845 
846 	pthread_barrier_wait(&barr);
847 
848 	if (test_type == TEST_TYPE_STATS)
849 		while (!rx_stats_are_valid(ifobject))
850 			continue;
851 	else
852 		receive_pkts(ifobject->pkt_stream, ifobject->xsk, fds);
853 
854 	if (test_type == TEST_TYPE_TEARDOWN)
855 		print_verbose("Destroying socket\n");
856 
857 	testapp_cleanup_xsk_res(ifobject);
858 	pthread_exit(NULL);
859 }
860 
testapp_validate(void)861 static void testapp_validate(void)
862 {
863 	bool bidi = test_type == TEST_TYPE_BIDI;
864 	bool bpf = test_type == TEST_TYPE_BPF_RES;
865 	struct pkt_stream *pkt_stream;
866 
867 	if (pthread_barrier_init(&barr, NULL, 2))
868 		exit_with_error(errno);
869 
870 	if (stat_test_type == STAT_TEST_TX_INVALID)
871 		pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
872 	else
873 		pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, PKT_SIZE);
874 	ifdict_tx->pkt_stream = pkt_stream;
875 	ifdict_rx->pkt_stream = pkt_stream;
876 
877 	/*Spawn RX thread */
878 	pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
879 
880 	pthread_barrier_wait(&barr);
881 	if (pthread_barrier_destroy(&barr))
882 		exit_with_error(errno);
883 
884 	/*Spawn TX thread */
885 	pthread_create(&t1, NULL, ifdict_tx->func_ptr, ifdict_tx);
886 
887 	pthread_join(t1, NULL);
888 	pthread_join(t0, NULL);
889 
890 	if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS))
891 		print_ksft_result();
892 }
893 
testapp_teardown(void)894 static void testapp_teardown(void)
895 {
896 	int i;
897 
898 	for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
899 		print_verbose("Creating socket\n");
900 		testapp_validate();
901 	}
902 
903 	print_ksft_result();
904 }
905 
swap_vectors(struct ifobject * ifobj1,struct ifobject * ifobj2)906 static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2)
907 {
908 	void *(*tmp_func_ptr)(void *) = ifobj1->func_ptr;
909 	enum fvector tmp_vector = ifobj1->fv.vector;
910 
911 	ifobj1->func_ptr = ifobj2->func_ptr;
912 	ifobj1->fv.vector = ifobj2->fv.vector;
913 
914 	ifobj2->func_ptr = tmp_func_ptr;
915 	ifobj2->fv.vector = tmp_vector;
916 
917 	ifdict_tx = ifobj1;
918 	ifdict_rx = ifobj2;
919 }
920 
testapp_bidi(void)921 static void testapp_bidi(void)
922 {
923 	for (int i = 0; i < MAX_BIDI_ITER; i++) {
924 		print_verbose("Creating socket\n");
925 		testapp_validate();
926 		if (!second_step) {
927 			print_verbose("Switching Tx/Rx vectors\n");
928 			swap_vectors(ifdict[1], ifdict[0]);
929 		}
930 		second_step = true;
931 	}
932 
933 	swap_vectors(ifdict[0], ifdict[1]);
934 
935 	print_ksft_result();
936 }
937 
swap_xsk_res(void)938 static void swap_xsk_res(void)
939 {
940 	xsk_socket__delete(ifdict_tx->xsk->xsk);
941 	xsk_umem__delete(ifdict_tx->umem->umem);
942 	xsk_socket__delete(ifdict_rx->xsk->xsk);
943 	xsk_umem__delete(ifdict_rx->umem->umem);
944 	ifdict_tx->umem = ifdict_tx->umem_arr[1];
945 	ifdict_tx->xsk = ifdict_tx->xsk_arr[1];
946 	ifdict_rx->umem = ifdict_rx->umem_arr[1];
947 	ifdict_rx->xsk = ifdict_rx->xsk_arr[1];
948 }
949 
testapp_bpf_res(void)950 static void testapp_bpf_res(void)
951 {
952 	int i;
953 
954 	for (i = 0; i < MAX_BPF_ITER; i++) {
955 		print_verbose("Creating socket\n");
956 		testapp_validate();
957 		if (!second_step)
958 			swap_xsk_res();
959 		second_step = true;
960 	}
961 
962 	print_ksft_result();
963 }
964 
testapp_stats(void)965 static void testapp_stats(void)
966 {
967 	for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) {
968 		stat_test_type = i;
969 
970 		/* reset defaults */
971 		rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
972 		frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
973 
974 		switch (stat_test_type) {
975 		case STAT_TEST_RX_DROPPED:
976 			frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE -
977 						XDP_PACKET_HEADROOM - 1;
978 			break;
979 		case STAT_TEST_RX_FULL:
980 			rxqsize = RX_FULL_RXQSIZE;
981 			break;
982 		case STAT_TEST_TX_INVALID:
983 			continue;
984 		default:
985 			break;
986 		}
987 		testapp_validate();
988 	}
989 
990 	print_ksft_result();
991 }
992 
init_iface(struct ifobject * ifobj,const char * dst_mac,const char * src_mac,const char * dst_ip,const char * src_ip,const u16 dst_port,const u16 src_port,enum fvector vector)993 static void init_iface(struct ifobject *ifobj, const char *dst_mac,
994 		       const char *src_mac, const char *dst_ip,
995 		       const char *src_ip, const u16 dst_port,
996 		       const u16 src_port, enum fvector vector)
997 {
998 	struct in_addr ip;
999 
1000 	memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
1001 	memcpy(ifobj->src_mac, src_mac, ETH_ALEN);
1002 
1003 	inet_aton(dst_ip, &ip);
1004 	ifobj->dst_ip = ip.s_addr;
1005 
1006 	inet_aton(src_ip, &ip);
1007 	ifobj->src_ip = ip.s_addr;
1008 
1009 	ifobj->dst_port = dst_port;
1010 	ifobj->src_port = src_port;
1011 
1012 	if (vector == tx) {
1013 		ifobj->fv.vector = tx;
1014 		ifobj->func_ptr = worker_testapp_validate_tx;
1015 		ifdict_tx = ifobj;
1016 	} else {
1017 		ifobj->fv.vector = rx;
1018 		ifobj->func_ptr = worker_testapp_validate_rx;
1019 		ifdict_rx = ifobj;
1020 	}
1021 }
1022 
run_pkt_test(int mode,int type)1023 static void run_pkt_test(int mode, int type)
1024 {
1025 	test_type = type;
1026 
1027 	/* reset defaults after potential previous test */
1028 	xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
1029 	second_step = 0;
1030 	stat_test_type = -1;
1031 	rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
1032 	frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
1033 
1034 	configured_mode = mode;
1035 
1036 	switch (mode) {
1037 	case (TEST_MODE_SKB):
1038 		xdp_flags |= XDP_FLAGS_SKB_MODE;
1039 		break;
1040 	case (TEST_MODE_DRV):
1041 		xdp_flags |= XDP_FLAGS_DRV_MODE;
1042 		break;
1043 	default:
1044 		break;
1045 	}
1046 
1047 	switch (test_type) {
1048 	case TEST_TYPE_STATS:
1049 		testapp_stats();
1050 		break;
1051 	case TEST_TYPE_TEARDOWN:
1052 		testapp_teardown();
1053 		break;
1054 	case TEST_TYPE_BIDI:
1055 		testapp_bidi();
1056 		break;
1057 	case TEST_TYPE_BPF_RES:
1058 		testapp_bpf_res();
1059 		break;
1060 	default:
1061 		testapp_validate();
1062 		break;
1063 	}
1064 }
1065 
ifobject_create(void)1066 static struct ifobject *ifobject_create(void)
1067 {
1068 	struct ifobject *ifobj;
1069 
1070 	ifobj = calloc(1, sizeof(struct ifobject));
1071 	if (!ifobj)
1072 		return NULL;
1073 
1074 	ifobj->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
1075 	if (!ifobj->xsk_arr)
1076 		goto out_xsk_arr;
1077 
1078 	ifobj->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
1079 	if (!ifobj->umem_arr)
1080 		goto out_umem_arr;
1081 
1082 	return ifobj;
1083 
1084 out_umem_arr:
1085 	free(ifobj->xsk_arr);
1086 out_xsk_arr:
1087 	free(ifobj);
1088 	return NULL;
1089 }
1090 
ifobject_delete(struct ifobject * ifobj)1091 static void ifobject_delete(struct ifobject *ifobj)
1092 {
1093 	free(ifobj->umem_arr);
1094 	free(ifobj->xsk_arr);
1095 	free(ifobj);
1096 }
1097 
main(int argc,char ** argv)1098 int main(int argc, char **argv)
1099 {
1100 	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
1101 	int i, j;
1102 
1103 	if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
1104 		exit_with_error(errno);
1105 
1106 	for (i = 0; i < MAX_INTERFACES; i++) {
1107 		ifdict[i] = ifobject_create();
1108 		if (!ifdict[i])
1109 			exit_with_error(ENOMEM);
1110 	}
1111 
1112 	setlocale(LC_ALL, "");
1113 
1114 	parse_command_line(argc, argv);
1115 
1116 	init_iface(ifdict[tx], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
1117 	init_iface(ifdict[rx], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
1118 
1119 	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
1120 
1121 	for (i = 0; i < TEST_MODE_MAX; i++)
1122 		for (j = 0; j < TEST_TYPE_MAX; j++) {
1123 			run_pkt_test(i, j);
1124 			usleep(USLEEP_MAX);
1125 		}
1126 
1127 	for (i = 0; i < MAX_INTERFACES; i++)
1128 		ifobject_delete(ifdict[i]);
1129 
1130 	ksft_exit_pass();
1131 	return 0;
1132 }
1133