1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 
5 #include <assert.h>
6 #include <errno.h>
7 #include <fcntl.h>
8 #include <limits.h>
9 #include <string.h>
10 #include <stdarg.h>
11 #include <stdbool.h>
12 #include <stdint.h>
13 #include <inttypes.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <strings.h>
17 #include <unistd.h>
18 #include <time.h>
19 
20 #include <sys/ioctl.h>
21 #include <sys/socket.h>
22 #include <sys/types.h>
23 #include <sys/wait.h>
24 
25 #include <netdb.h>
26 #include <netinet/in.h>
27 
28 #include <linux/tcp.h>
29 #include <linux/sockios.h>
30 
31 #ifndef IPPROTO_MPTCP
32 #define IPPROTO_MPTCP 262
33 #endif
34 #ifndef SOL_MPTCP
35 #define SOL_MPTCP 284
36 #endif
37 
38 static int pf = AF_INET;
39 static int proto_tx = IPPROTO_MPTCP;
40 static int proto_rx = IPPROTO_MPTCP;
41 
die_perror(const char * msg)42 static void die_perror(const char *msg)
43 {
44 	perror(msg);
45 	exit(1);
46 }
47 
die_usage(int r)48 static void die_usage(int r)
49 {
50 	fprintf(stderr, "Usage: mptcp_inq [-6] [ -t tcp|mptcp ] [ -r tcp|mptcp]\n");
51 	exit(r);
52 }
53 
xerror(const char * fmt,...)54 static void xerror(const char *fmt, ...)
55 {
56 	va_list ap;
57 
58 	va_start(ap, fmt);
59 	vfprintf(stderr, fmt, ap);
60 	va_end(ap);
61 	fputc('\n', stderr);
62 	exit(1);
63 }
64 
getxinfo_strerr(int err)65 static const char *getxinfo_strerr(int err)
66 {
67 	if (err == EAI_SYSTEM)
68 		return strerror(errno);
69 
70 	return gai_strerror(err);
71 }
72 
xgetaddrinfo(const char * node,const char * service,const struct addrinfo * hints,struct addrinfo ** res)73 static void xgetaddrinfo(const char *node, const char *service,
74 			 const struct addrinfo *hints,
75 			 struct addrinfo **res)
76 {
77 	int err = getaddrinfo(node, service, hints, res);
78 
79 	if (err) {
80 		const char *errstr = getxinfo_strerr(err);
81 
82 		fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
83 			node ? node : "", service ? service : "", errstr);
84 		exit(1);
85 	}
86 }
87 
sock_listen_mptcp(const char * const listenaddr,const char * const port)88 static int sock_listen_mptcp(const char * const listenaddr,
89 			     const char * const port)
90 {
91 	int sock = -1;
92 	struct addrinfo hints = {
93 		.ai_protocol = IPPROTO_TCP,
94 		.ai_socktype = SOCK_STREAM,
95 		.ai_flags = AI_PASSIVE | AI_NUMERICHOST
96 	};
97 
98 	hints.ai_family = pf;
99 
100 	struct addrinfo *a, *addr;
101 	int one = 1;
102 
103 	xgetaddrinfo(listenaddr, port, &hints, &addr);
104 	hints.ai_family = pf;
105 
106 	for (a = addr; a; a = a->ai_next) {
107 		sock = socket(a->ai_family, a->ai_socktype, proto_rx);
108 		if (sock < 0)
109 			continue;
110 
111 		if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one,
112 				     sizeof(one)))
113 			perror("setsockopt");
114 
115 		if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
116 			break; /* success */
117 
118 		perror("bind");
119 		close(sock);
120 		sock = -1;
121 	}
122 
123 	freeaddrinfo(addr);
124 
125 	if (sock < 0)
126 		xerror("could not create listen socket");
127 
128 	if (listen(sock, 20))
129 		die_perror("listen");
130 
131 	return sock;
132 }
133 
sock_connect_mptcp(const char * const remoteaddr,const char * const port,int proto)134 static int sock_connect_mptcp(const char * const remoteaddr,
135 			      const char * const port, int proto)
136 {
137 	struct addrinfo hints = {
138 		.ai_protocol = IPPROTO_TCP,
139 		.ai_socktype = SOCK_STREAM,
140 	};
141 	struct addrinfo *a, *addr;
142 	int sock = -1;
143 
144 	hints.ai_family = pf;
145 
146 	xgetaddrinfo(remoteaddr, port, &hints, &addr);
147 	for (a = addr; a; a = a->ai_next) {
148 		sock = socket(a->ai_family, a->ai_socktype, proto);
149 		if (sock < 0)
150 			continue;
151 
152 		if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
153 			break; /* success */
154 
155 		die_perror("connect");
156 	}
157 
158 	if (sock < 0)
159 		xerror("could not create connect socket");
160 
161 	freeaddrinfo(addr);
162 	return sock;
163 }
164 
protostr_to_num(const char * s)165 static int protostr_to_num(const char *s)
166 {
167 	if (strcasecmp(s, "tcp") == 0)
168 		return IPPROTO_TCP;
169 	if (strcasecmp(s, "mptcp") == 0)
170 		return IPPROTO_MPTCP;
171 
172 	die_usage(1);
173 	return 0;
174 }
175 
parse_opts(int argc,char ** argv)176 static void parse_opts(int argc, char **argv)
177 {
178 	int c;
179 
180 	while ((c = getopt(argc, argv, "h6t:r:")) != -1) {
181 		switch (c) {
182 		case 'h':
183 			die_usage(0);
184 			break;
185 		case '6':
186 			pf = AF_INET6;
187 			break;
188 		case 't':
189 			proto_tx = protostr_to_num(optarg);
190 			break;
191 		case 'r':
192 			proto_rx = protostr_to_num(optarg);
193 			break;
194 		default:
195 			die_usage(1);
196 			break;
197 		}
198 	}
199 }
200 
201 /* wait up to timeout milliseconds */
wait_for_ack(int fd,int timeout,size_t total)202 static void wait_for_ack(int fd, int timeout, size_t total)
203 {
204 	int i;
205 
206 	for (i = 0; i < timeout; i++) {
207 		int nsd, ret, queued = -1;
208 		struct timespec req;
209 
210 		ret = ioctl(fd, TIOCOUTQ, &queued);
211 		if (ret < 0)
212 			die_perror("TIOCOUTQ");
213 
214 		ret = ioctl(fd, SIOCOUTQNSD, &nsd);
215 		if (ret < 0)
216 			die_perror("SIOCOUTQNSD");
217 
218 		if ((size_t)queued > total)
219 			xerror("TIOCOUTQ %u, but only %zu expected\n", queued, total);
220 		assert(nsd <= queued);
221 
222 		if (queued == 0)
223 			return;
224 
225 		/* wait for peer to ack rx of all data */
226 		req.tv_sec = 0;
227 		req.tv_nsec = 1 * 1000 * 1000ul; /* 1ms */
228 		nanosleep(&req, NULL);
229 	}
230 
231 	xerror("still tx data queued after %u ms\n", timeout);
232 }
233 
connect_one_server(int fd,int unixfd)234 static void connect_one_server(int fd, int unixfd)
235 {
236 	size_t len, i, total, sent;
237 	char buf[4096], buf2[4096];
238 	ssize_t ret;
239 
240 	len = rand() % (sizeof(buf) - 1);
241 
242 	if (len < 128)
243 		len = 128;
244 
245 	for (i = 0; i < len ; i++) {
246 		buf[i] = rand() % 26;
247 		buf[i] += 'A';
248 	}
249 
250 	buf[i] = '\n';
251 
252 	/* un-block server */
253 	ret = read(unixfd, buf2, 4);
254 	assert(ret == 4);
255 
256 	assert(strncmp(buf2, "xmit", 4) == 0);
257 
258 	ret = write(unixfd, &len, sizeof(len));
259 	assert(ret == (ssize_t)sizeof(len));
260 
261 	ret = write(fd, buf, len);
262 	if (ret < 0)
263 		die_perror("write");
264 
265 	if (ret != (ssize_t)len)
266 		xerror("short write");
267 
268 	ret = read(unixfd, buf2, 4);
269 	assert(strncmp(buf2, "huge", 4) == 0);
270 
271 	total = rand() % (16 * 1024 * 1024);
272 	total += (1 * 1024 * 1024);
273 	sent = total;
274 
275 	ret = write(unixfd, &total, sizeof(total));
276 	assert(ret == (ssize_t)sizeof(total));
277 
278 	wait_for_ack(fd, 5000, len);
279 
280 	while (total > 0) {
281 		if (total > sizeof(buf))
282 			len = sizeof(buf);
283 		else
284 			len = total;
285 
286 		ret = write(fd, buf, len);
287 		if (ret < 0)
288 			die_perror("write");
289 		total -= ret;
290 
291 		/* we don't have to care about buf content, only
292 		 * number of total bytes sent
293 		 */
294 	}
295 
296 	ret = read(unixfd, buf2, 4);
297 	assert(ret == 4);
298 	assert(strncmp(buf2, "shut", 4) == 0);
299 
300 	wait_for_ack(fd, 5000, sent);
301 
302 	ret = write(fd, buf, 1);
303 	assert(ret == 1);
304 	close(fd);
305 	ret = write(unixfd, "closed", 6);
306 	assert(ret == 6);
307 
308 	close(unixfd);
309 }
310 
get_tcp_inq(struct msghdr * msgh,unsigned int * inqv)311 static void get_tcp_inq(struct msghdr *msgh, unsigned int *inqv)
312 {
313 	struct cmsghdr *cmsg;
314 
315 	for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) {
316 		if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) {
317 			memcpy(inqv, CMSG_DATA(cmsg), sizeof(*inqv));
318 			return;
319 		}
320 	}
321 
322 	xerror("could not find TCP_CM_INQ cmsg type");
323 }
324 
process_one_client(int fd,int unixfd)325 static void process_one_client(int fd, int unixfd)
326 {
327 	unsigned int tcp_inq;
328 	size_t expect_len;
329 	char msg_buf[4096];
330 	char buf[4096];
331 	char tmp[16];
332 	struct iovec iov = {
333 		.iov_base = buf,
334 		.iov_len = 1,
335 	};
336 	struct msghdr msg = {
337 		.msg_iov = &iov,
338 		.msg_iovlen = 1,
339 		.msg_control = msg_buf,
340 		.msg_controllen = sizeof(msg_buf),
341 	};
342 	ssize_t ret, tot;
343 
344 	ret = write(unixfd, "xmit", 4);
345 	assert(ret == 4);
346 
347 	ret = read(unixfd, &expect_len, sizeof(expect_len));
348 	assert(ret == (ssize_t)sizeof(expect_len));
349 
350 	if (expect_len > sizeof(buf))
351 		xerror("expect len %zu exceeds buffer size", expect_len);
352 
353 	for (;;) {
354 		struct timespec req;
355 		unsigned int queued;
356 
357 		ret = ioctl(fd, FIONREAD, &queued);
358 		if (ret < 0)
359 			die_perror("FIONREAD");
360 		if (queued > expect_len)
361 			xerror("FIONREAD returned %u, but only %zu expected\n",
362 			       queued, expect_len);
363 		if (queued == expect_len)
364 			break;
365 
366 		req.tv_sec = 0;
367 		req.tv_nsec = 1000 * 1000ul;
368 		nanosleep(&req, NULL);
369 	}
370 
371 	/* read one byte, expect cmsg to return expected - 1 */
372 	ret = recvmsg(fd, &msg, 0);
373 	if (ret < 0)
374 		die_perror("recvmsg");
375 
376 	if (msg.msg_controllen == 0)
377 		xerror("msg_controllen is 0");
378 
379 	get_tcp_inq(&msg, &tcp_inq);
380 
381 	assert((size_t)tcp_inq == (expect_len - 1));
382 
383 	iov.iov_len = sizeof(buf);
384 	ret = recvmsg(fd, &msg, 0);
385 	if (ret < 0)
386 		die_perror("recvmsg");
387 
388 	/* should have gotten exact remainder of all pending data */
389 	assert(ret == (ssize_t)tcp_inq);
390 
391 	/* should be 0, all drained */
392 	get_tcp_inq(&msg, &tcp_inq);
393 	assert(tcp_inq == 0);
394 
395 	/* request a large swath of data. */
396 	ret = write(unixfd, "huge", 4);
397 	assert(ret == 4);
398 
399 	ret = read(unixfd, &expect_len, sizeof(expect_len));
400 	assert(ret == (ssize_t)sizeof(expect_len));
401 
402 	/* peer should send us a few mb of data */
403 	if (expect_len <= sizeof(buf))
404 		xerror("expect len %zu too small\n", expect_len);
405 
406 	tot = 0;
407 	do {
408 		iov.iov_len = sizeof(buf);
409 		ret = recvmsg(fd, &msg, 0);
410 		if (ret < 0)
411 			die_perror("recvmsg");
412 
413 		tot += ret;
414 
415 		get_tcp_inq(&msg, &tcp_inq);
416 
417 		if (tcp_inq > expect_len - tot)
418 			xerror("inq %d, remaining %d total_len %d\n",
419 			       tcp_inq, expect_len - tot, (int)expect_len);
420 
421 		assert(tcp_inq <= expect_len - tot);
422 	} while ((size_t)tot < expect_len);
423 
424 	ret = write(unixfd, "shut", 4);
425 	assert(ret == 4);
426 
427 	/* wait for hangup. Should have received one more byte of data. */
428 	ret = read(unixfd, tmp, sizeof(tmp));
429 	assert(ret == 6);
430 	assert(strncmp(tmp, "closed", 6) == 0);
431 
432 	sleep(1);
433 
434 	iov.iov_len = 1;
435 	ret = recvmsg(fd, &msg, 0);
436 	if (ret < 0)
437 		die_perror("recvmsg");
438 	assert(ret == 1);
439 
440 	get_tcp_inq(&msg, &tcp_inq);
441 
442 	/* tcp_inq should be 1 due to received fin. */
443 	assert(tcp_inq == 1);
444 
445 	iov.iov_len = 1;
446 	ret = recvmsg(fd, &msg, 0);
447 	if (ret < 0)
448 		die_perror("recvmsg");
449 
450 	/* expect EOF */
451 	assert(ret == 0);
452 	get_tcp_inq(&msg, &tcp_inq);
453 	assert(tcp_inq == 1);
454 
455 	close(fd);
456 }
457 
xaccept(int s)458 static int xaccept(int s)
459 {
460 	int fd = accept(s, NULL, 0);
461 
462 	if (fd < 0)
463 		die_perror("accept");
464 
465 	return fd;
466 }
467 
server(int unixfd)468 static int server(int unixfd)
469 {
470 	int fd = -1, r, on = 1;
471 
472 	switch (pf) {
473 	case AF_INET:
474 		fd = sock_listen_mptcp("127.0.0.1", "15432");
475 		break;
476 	case AF_INET6:
477 		fd = sock_listen_mptcp("::1", "15432");
478 		break;
479 	default:
480 		xerror("Unknown pf %d\n", pf);
481 		break;
482 	}
483 
484 	r = write(unixfd, "conn", 4);
485 	assert(r == 4);
486 
487 	alarm(15);
488 	r = xaccept(fd);
489 
490 	if (-1 == setsockopt(r, IPPROTO_TCP, TCP_INQ, &on, sizeof(on)))
491 		die_perror("setsockopt");
492 
493 	process_one_client(r, unixfd);
494 
495 	return 0;
496 }
497 
client(int unixfd)498 static int client(int unixfd)
499 {
500 	int fd = -1;
501 
502 	alarm(15);
503 
504 	switch (pf) {
505 	case AF_INET:
506 		fd = sock_connect_mptcp("127.0.0.1", "15432", proto_tx);
507 		break;
508 	case AF_INET6:
509 		fd = sock_connect_mptcp("::1", "15432", proto_tx);
510 		break;
511 	default:
512 		xerror("Unknown pf %d\n", pf);
513 	}
514 
515 	connect_one_server(fd, unixfd);
516 
517 	return 0;
518 }
519 
init_rng(void)520 static void init_rng(void)
521 {
522 	int fd = open("/dev/urandom", O_RDONLY);
523 	unsigned int foo;
524 
525 	if (fd > 0) {
526 		int ret = read(fd, &foo, sizeof(foo));
527 
528 		if (ret < 0)
529 			srand(fd + foo);
530 		close(fd);
531 	}
532 
533 	srand(foo);
534 }
535 
xfork(void)536 static pid_t xfork(void)
537 {
538 	pid_t p = fork();
539 
540 	if (p < 0)
541 		die_perror("fork");
542 	else if (p == 0)
543 		init_rng();
544 
545 	return p;
546 }
547 
rcheck(int wstatus,const char * what)548 static int rcheck(int wstatus, const char *what)
549 {
550 	if (WIFEXITED(wstatus)) {
551 		if (WEXITSTATUS(wstatus) == 0)
552 			return 0;
553 		fprintf(stderr, "%s exited, status=%d\n", what, WEXITSTATUS(wstatus));
554 		return WEXITSTATUS(wstatus);
555 	} else if (WIFSIGNALED(wstatus)) {
556 		xerror("%s killed by signal %d\n", what, WTERMSIG(wstatus));
557 	} else if (WIFSTOPPED(wstatus)) {
558 		xerror("%s stopped by signal %d\n", what, WSTOPSIG(wstatus));
559 	}
560 
561 	return 111;
562 }
563 
main(int argc,char * argv[])564 int main(int argc, char *argv[])
565 {
566 	int e1, e2, wstatus;
567 	pid_t s, c, ret;
568 	int unixfds[2];
569 
570 	parse_opts(argc, argv);
571 
572 	e1 = socketpair(AF_UNIX, SOCK_DGRAM, 0, unixfds);
573 	if (e1 < 0)
574 		die_perror("pipe");
575 
576 	s = xfork();
577 	if (s == 0)
578 		return server(unixfds[1]);
579 
580 	close(unixfds[1]);
581 
582 	/* wait until server bound a socket */
583 	e1 = read(unixfds[0], &e1, 4);
584 	assert(e1 == 4);
585 
586 	c = xfork();
587 	if (c == 0)
588 		return client(unixfds[0]);
589 
590 	close(unixfds[0]);
591 
592 	ret = waitpid(s, &wstatus, 0);
593 	if (ret == -1)
594 		die_perror("waitpid");
595 	e1 = rcheck(wstatus, "server");
596 	ret = waitpid(c, &wstatus, 0);
597 	if (ret == -1)
598 		die_perror("waitpid");
599 	e2 = rcheck(wstatus, "client");
600 
601 	return e1 ? e1 : e2;
602 }
603