1 /*
2  * Copyright (c) 2018-2020 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #include <zephyr/logging/log.h>
8 LOG_MODULE_REGISTER(net_tcp, CONFIG_NET_TCP_LOG_LEVEL);
9 
10 #include <stdarg.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <zephyr/kernel.h>
14 #include <zephyr/random/random.h>
15 
16 #if defined(CONFIG_NET_TCP_ISN_RFC6528)
17 #include <psa/crypto.h>
18 #endif
19 #include <zephyr/net/net_pkt.h>
20 #include <zephyr/net/net_context.h>
21 #include <zephyr/net/udp.h>
22 #include "ipv4.h"
23 #include "ipv6.h"
24 #include "connection.h"
25 #include "net_stats.h"
26 #include "net_private.h"
27 #include "tcp_internal.h"
28 #include "pmtu.h"
29 
30 #define ACK_TIMEOUT_MS tcp_max_timeout_ms
31 #define ACK_TIMEOUT K_MSEC(ACK_TIMEOUT_MS)
32 #define LAST_ACK_TIMEOUT_MS tcp_max_timeout_ms
33 #define LAST_ACK_TIMEOUT K_MSEC(LAST_ACK_TIMEOUT_MS)
34 #define FIN_TIMEOUT K_MSEC(tcp_max_timeout_ms)
35 #define ACK_DELAY K_MSEC(100)
36 #define ZWP_MAX_DELAY_MS 120000
37 #define DUPLICATE_ACK_RETRANSMIT_TRHESHOLD 3
38 
39 static int tcp_rto = CONFIG_NET_TCP_INIT_RETRANSMISSION_TIMEOUT;
40 static int tcp_retries = CONFIG_NET_TCP_RETRY_COUNT;
41 static int tcp_max_timeout_ms;
42 static int tcp_rx_window =
43 #if (CONFIG_NET_TCP_MAX_RECV_WINDOW_SIZE != 0)
44 	CONFIG_NET_TCP_MAX_RECV_WINDOW_SIZE;
45 #else
46 #if defined(CONFIG_NET_BUF_FIXED_DATA_SIZE)
47 	(CONFIG_NET_BUF_RX_COUNT * CONFIG_NET_BUF_DATA_SIZE) / 3;
48 #else
49 	CONFIG_NET_PKT_BUF_RX_DATA_POOL_SIZE / 3;
50 #endif /* CONFIG_NET_BUF_FIXED_DATA_SIZE */
51 #endif
52 static int tcp_tx_window =
53 #if (CONFIG_NET_TCP_MAX_SEND_WINDOW_SIZE != 0)
54 	CONFIG_NET_TCP_MAX_SEND_WINDOW_SIZE;
55 #else
56 #if defined(CONFIG_NET_BUF_FIXED_DATA_SIZE)
57 	(CONFIG_NET_BUF_TX_COUNT * CONFIG_NET_BUF_DATA_SIZE) / 3;
58 #else
59 	CONFIG_NET_PKT_BUF_TX_DATA_POOL_SIZE / 3;
60 #endif /* CONFIG_NET_BUF_FIXED_DATA_SIZE */
61 #endif
62 #ifdef CONFIG_NET_TCP_RANDOMIZED_RTO
63 #define TCP_RTO_MS (conn->rto)
64 #else
65 #define TCP_RTO_MS (tcp_rto)
66 #endif
67 
68 /* Define the number of MSS sections the congestion window is initialized at */
69 #define TCP_CONGESTION_INITIAL_WIN 1
70 #define TCP_CONGESTION_INITIAL_SSTHRESH 3
71 
72 static sys_slist_t tcp_conns = SYS_SLIST_STATIC_INIT(&tcp_conns);
73 
74 static K_MUTEX_DEFINE(tcp_lock);
75 
76 K_MEM_SLAB_DEFINE_STATIC(tcp_conns_slab, sizeof(struct tcp),
77 				CONFIG_NET_MAX_CONTEXTS, 4);
78 
79 static struct k_work_q tcp_work_q;
80 static K_KERNEL_STACK_DEFINE(work_q_stack, CONFIG_NET_TCP_WORKQ_STACK_SIZE);
81 
82 static enum net_verdict tcp_in(struct tcp *conn, struct net_pkt *pkt);
83 static bool is_destination_local(struct net_pkt *pkt);
84 static void tcp_out(struct tcp *conn, uint8_t flags);
85 static const char *tcp_state_to_str(enum tcp_state state, bool prefix);
86 
87 int (*tcp_send_cb)(struct net_pkt *pkt) = NULL;
88 size_t (*tcp_recv_cb)(struct tcp *conn, struct net_pkt *pkt) = NULL;
89 
tcp_get_seq(struct net_buf * buf)90 static uint32_t tcp_get_seq(struct net_buf *buf)
91 {
92 	return *(uint32_t *)net_buf_user_data(buf);
93 }
94 
tcp_set_seq(struct net_buf * buf,uint32_t seq)95 static void tcp_set_seq(struct net_buf *buf, uint32_t seq)
96 {
97 	*(uint32_t *)net_buf_user_data(buf) = seq;
98 }
99 
tcp_pkt_linearize(struct net_pkt * pkt,size_t pos,size_t len)100 static int tcp_pkt_linearize(struct net_pkt *pkt, size_t pos, size_t len)
101 {
102 	struct net_buf *buf, *first = pkt->cursor.buf, *second = first->frags;
103 	int ret = 0;
104 	size_t len1, len2;
105 
106 	if (net_pkt_get_len(pkt) < (pos + len)) {
107 		NET_ERR("Insufficient packet len=%zd (pos+len=%zu)",
108 			net_pkt_get_len(pkt), pos + len);
109 		ret = -EINVAL;
110 		goto out;
111 	}
112 
113 	buf = net_pkt_get_frag(pkt, len, TCP_PKT_ALLOC_TIMEOUT);
114 
115 	if (!buf || net_buf_max_len(buf) < len) {
116 		if (buf) {
117 			net_buf_unref(buf);
118 		}
119 		ret = -ENOBUFS;
120 		goto out;
121 	}
122 
123 	net_buf_linearize(buf->data, net_buf_max_len(buf), pkt->frags, pos, len);
124 	net_buf_add(buf, len);
125 
126 	len1 = first->len - (pkt->cursor.pos - pkt->cursor.buf->data);
127 	len2 = len - len1;
128 
129 	first->len -= len1;
130 
131 	while (len2) {
132 		size_t pull_len = MIN(second->len, len2);
133 		struct net_buf *next;
134 
135 		len2 -= pull_len;
136 		net_buf_pull(second, pull_len);
137 		next = second->frags;
138 		if (second->len == 0) {
139 			net_buf_unref(second);
140 		}
141 		second = next;
142 	}
143 
144 	buf->frags = second;
145 	first->frags = buf;
146  out:
147 	return ret;
148 }
149 
th_get(struct net_pkt * pkt)150 static struct tcphdr *th_get(struct net_pkt *pkt)
151 {
152 	size_t ip_len = net_pkt_ip_hdr_len(pkt) + net_pkt_ip_opts_len(pkt);
153 	struct tcphdr *th = NULL;
154  again:
155 	net_pkt_cursor_init(pkt);
156 	net_pkt_set_overwrite(pkt, true);
157 
158 	if (net_pkt_skip(pkt, ip_len) != 0) {
159 		goto out;
160 	}
161 
162 	if (!net_pkt_is_contiguous(pkt, sizeof(*th))) {
163 		if (tcp_pkt_linearize(pkt, ip_len, sizeof(*th)) < 0) {
164 			goto out;
165 		}
166 
167 		goto again;
168 	}
169 
170 	th = net_pkt_cursor_get_pos(pkt);
171  out:
172 	return th;
173 }
174 
tcp_endpoint_len(sa_family_t af)175 static size_t tcp_endpoint_len(sa_family_t af)
176 {
177 	return (af == AF_INET) ? sizeof(struct sockaddr_in) :
178 		sizeof(struct sockaddr_in6);
179 }
180 
tcp_endpoint_set(union tcp_endpoint * ep,struct net_pkt * pkt,enum pkt_addr src)181 static int tcp_endpoint_set(union tcp_endpoint *ep, struct net_pkt *pkt,
182 			    enum pkt_addr src)
183 {
184 	int ret = 0;
185 
186 	switch (net_pkt_family(pkt)) {
187 	case AF_INET:
188 		if (IS_ENABLED(CONFIG_NET_IPV4)) {
189 			struct net_ipv4_hdr *ip = NET_IPV4_HDR(pkt);
190 			struct tcphdr *th;
191 
192 			th = th_get(pkt);
193 			if (!th) {
194 				return -ENOBUFS;
195 			}
196 
197 			memset(ep, 0, sizeof(*ep));
198 
199 			ep->sin.sin_port = src == TCP_EP_SRC ? th_sport(th) :
200 							       th_dport(th);
201 			net_ipv4_addr_copy_raw((uint8_t *)&ep->sin.sin_addr,
202 					       src == TCP_EP_SRC ?
203 							ip->src : ip->dst);
204 			ep->sa.sa_family = AF_INET;
205 		} else {
206 			ret = -EINVAL;
207 		}
208 
209 		break;
210 
211 	case AF_INET6:
212 		if (IS_ENABLED(CONFIG_NET_IPV6)) {
213 			struct net_ipv6_hdr *ip = NET_IPV6_HDR(pkt);
214 			struct tcphdr *th;
215 
216 			th = th_get(pkt);
217 			if (!th) {
218 				return -ENOBUFS;
219 			}
220 
221 			memset(ep, 0, sizeof(*ep));
222 
223 			ep->sin6.sin6_port = src == TCP_EP_SRC ? th_sport(th) :
224 								 th_dport(th);
225 			net_ipv6_addr_copy_raw((uint8_t *)&ep->sin6.sin6_addr,
226 					       src == TCP_EP_SRC ?
227 							ip->src : ip->dst);
228 			ep->sa.sa_family = AF_INET6;
229 		} else {
230 			ret = -EINVAL;
231 		}
232 
233 		break;
234 
235 	default:
236 		NET_ERR("Unknown address family: %hu", net_pkt_family(pkt));
237 		ret = -EINVAL;
238 	}
239 
240 	return ret;
241 }
242 
net_tcp_endpoint_copy(struct net_context * ctx,struct sockaddr * local,struct sockaddr * peer,socklen_t * addrlen)243 int net_tcp_endpoint_copy(struct net_context *ctx,
244 			  struct sockaddr *local,
245 			  struct sockaddr *peer,
246 			  socklen_t *addrlen)
247 {
248 	const struct tcp *conn = ctx->tcp;
249 	socklen_t newlen = ctx->local.family == AF_INET ?
250 		sizeof(struct sockaddr_in) :
251 		sizeof(struct sockaddr_in6);
252 
253 	if (local != NULL) {
254 		/* If we are connected, then get the address we are actually
255 		 * using, otherwise get the address we are bound as these might
256 		 * be different if we are bound to any address.
257 		 */
258 		if (conn->state < TCP_ESTABLISHED) {
259 			if (IS_ENABLED(CONFIG_NET_IPV4) && ctx->local.family == AF_INET) {
260 				memcpy(&net_sin(local)->sin_addr,
261 				       net_sin_ptr(&ctx->local)->sin_addr,
262 				       sizeof(struct in_addr));
263 				net_sin(local)->sin_port = net_sin_ptr(&ctx->local)->sin_port;
264 				net_sin(local)->sin_family = AF_INET;
265 			} else if (IS_ENABLED(CONFIG_NET_IPV6) && ctx->local.family == AF_INET6) {
266 				memcpy(&net_sin6(local)->sin6_addr,
267 				       net_sin6_ptr(&ctx->local)->sin6_addr,
268 				       sizeof(struct in6_addr));
269 				net_sin6(local)->sin6_port = net_sin6_ptr(&ctx->local)->sin6_port;
270 				net_sin6(local)->sin6_family = AF_INET6;
271 				net_sin6(local)->sin6_scope_id =
272 					net_sin6_ptr(&ctx->local)->sin6_scope_id;
273 			} else {
274 				return -EINVAL;
275 			}
276 		} else {
277 			memcpy(local, &conn->src.sa, newlen);
278 		}
279 	}
280 
281 	if (peer != NULL) {
282 		memcpy(peer, &conn->dst.sa, newlen);
283 	}
284 
285 	return 0;
286 }
287 
tcp_flags(uint8_t flags)288 static const char *tcp_flags(uint8_t flags)
289 {
290 #define BUF_SIZE 25 /* 6 * 4 + 1 */
291 	static char buf[BUF_SIZE];
292 	int len = 0;
293 
294 	buf[0] = '\0';
295 
296 	if (flags) {
297 		if (flags & SYN) {
298 			len += snprintk(buf + len, BUF_SIZE - len, "SYN,");
299 		}
300 		if (flags & FIN) {
301 			len += snprintk(buf + len, BUF_SIZE - len, "FIN,");
302 		}
303 		if (flags & ACK) {
304 			len += snprintk(buf + len, BUF_SIZE - len, "ACK,");
305 		}
306 		if (flags & PSH) {
307 			len += snprintk(buf + len, BUF_SIZE - len, "PSH,");
308 		}
309 		if (flags & RST) {
310 			len += snprintk(buf + len, BUF_SIZE - len, "RST,");
311 		}
312 		if (flags & URG) {
313 			len += snprintk(buf + len, BUF_SIZE - len, "URG,");
314 		}
315 
316 		if (len > 0) {
317 			buf[len - 1] = '\0'; /* delete the last comma */
318 		}
319 	}
320 #undef BUF_SIZE
321 	return buf;
322 }
323 
tcp_data_len(struct net_pkt * pkt)324 static size_t tcp_data_len(struct net_pkt *pkt)
325 {
326 	struct tcphdr *th = th_get(pkt);
327 	size_t tcp_options_len = (th_off(th) - 5) * 4;
328 	int len = net_pkt_get_len(pkt) - net_pkt_ip_hdr_len(pkt) -
329 		net_pkt_ip_opts_len(pkt) - sizeof(*th) - tcp_options_len;
330 
331 	return len > 0 ? (size_t)len : 0;
332 }
333 
tcp_th(struct net_pkt * pkt)334 static const char *tcp_th(struct net_pkt *pkt)
335 {
336 #define BUF_SIZE 80
337 	static char buf[BUF_SIZE];
338 	int len = 0;
339 	struct tcphdr *th = th_get(pkt);
340 
341 	buf[0] = '\0';
342 
343 	if (th_off(th) < 5) {
344 		len += snprintk(buf + len, BUF_SIZE - len,
345 				"bogus th_off: %hu", (uint16_t)th_off(th));
346 		goto end;
347 	}
348 
349 	len += snprintk(buf + len, BUF_SIZE - len,
350 			"%s Seq=%u", tcp_flags(th_flags(th)), th_seq(th));
351 
352 	if (th_flags(th) & ACK) {
353 		len += snprintk(buf + len, BUF_SIZE - len,
354 				" Ack=%u", th_ack(th));
355 	}
356 
357 	len += snprintk(buf + len, BUF_SIZE - len,
358 			" Len=%ld", (long)tcp_data_len(pkt));
359 end:
360 #undef BUF_SIZE
361 	return buf;
362 }
363 
364 #define is_6lo_technology(pkt)						\
365 	(IS_ENABLED(CONFIG_NET_IPV6) &&	net_pkt_family(pkt) == AF_INET6 && \
366 	 (IS_ENABLED(CONFIG_NET_L2_IEEE802154) &&			\
367 	  net_pkt_lladdr_dst(pkt)->type == NET_LINK_IEEE802154))
368 
tcp_send(struct net_pkt * pkt)369 static void tcp_send(struct net_pkt *pkt)
370 {
371 	tcp_pkt_ref(pkt);
372 
373 	if (tcp_send_cb) {
374 		if (tcp_send_cb(pkt) < 0) {
375 			NET_ERR("net_send_data()");
376 			tcp_pkt_unref(pkt);
377 		}
378 		goto out;
379 	}
380 
381 	/* We must have special handling for some network technologies that
382 	 * tweak the IP protocol headers during packet sending. This happens
383 	 * with Bluetooth and IEEE 802.15.4 which use IPv6 header compression
384 	 * (6lo) and alter the sent network packet. So in order to avoid any
385 	 * corruption of the original data buffer, we must copy the sent data.
386 	 * For Bluetooth, its fragmentation code will even mangle the data
387 	 * part of the message so we need to copy those too.
388 	 */
389 	if (is_6lo_technology(pkt)) {
390 		struct net_pkt *new_pkt;
391 
392 		new_pkt = tcp_pkt_clone(pkt);
393 		if (!new_pkt) {
394 			/* The caller of this func assumes that the net_pkt
395 			 * is consumed by this function. We call unref here
396 			 * so that the unref at the end of the func will
397 			 * free the net_pkt.
398 			 */
399 			tcp_pkt_unref(pkt);
400 			NET_WARN("net_pkt alloc failure");
401 			goto out;
402 		}
403 
404 		if (net_send_data(new_pkt) < 0) {
405 			tcp_pkt_unref(new_pkt);
406 		}
407 
408 		/* We simulate sending of the original pkt and unref it like
409 		 * the device driver would do.
410 		 */
411 		tcp_pkt_unref(pkt);
412 	} else {
413 		if (net_send_data(pkt) < 0) {
414 			NET_ERR("net_send_data()");
415 			tcp_pkt_unref(pkt);
416 		}
417 	}
418 out:
419 	tcp_pkt_unref(pkt);
420 }
421 
tcp_derive_rto(struct tcp * conn)422 static void tcp_derive_rto(struct tcp *conn)
423 {
424 #ifdef CONFIG_NET_TCP_RANDOMIZED_RTO
425 	/* Compute a randomized rto 1 and 1.5 times tcp_rto */
426 	uint32_t gain;
427 	uint8_t gain8;
428 	uint32_t rto;
429 
430 	/* Getting random is computational expensive, so only use 8 bits */
431 	sys_rand_get(&gain8, sizeof(uint8_t));
432 
433 	gain = (uint32_t)gain8;
434 	gain += 1 << 9;
435 
436 	rto = (uint32_t)tcp_rto;
437 	rto = (gain * rto) >> 9;
438 	conn->rto = (uint16_t)rto;
439 #else
440 	ARG_UNUSED(conn);
441 #endif
442 }
443 
444 #ifdef CONFIG_NET_TCP_CONGESTION_AVOIDANCE
445 
446 /* Implementation according to RFC6582 */
447 
tcp_new_reno_log(struct tcp * conn,char * step)448 static void tcp_new_reno_log(struct tcp *conn, char *step)
449 {
450 	NET_DBG("conn: %p, ca %s, cwnd=%d, ssthres=%d, fast_pend=%i",
451 		conn, step, conn->ca.cwnd, conn->ca.ssthresh,
452 		conn->ca.pending_fast_retransmit_bytes);
453 }
454 
tcp_new_reno_init(struct tcp * conn)455 static void tcp_new_reno_init(struct tcp *conn)
456 {
457 	conn->ca.cwnd = conn_mss(conn) * TCP_CONGESTION_INITIAL_WIN;
458 	conn->ca.ssthresh = conn_mss(conn) * TCP_CONGESTION_INITIAL_SSTHRESH;
459 	conn->ca.pending_fast_retransmit_bytes = 0;
460 	tcp_new_reno_log(conn, "init");
461 }
462 
tcp_new_reno_fast_retransmit(struct tcp * conn)463 static void tcp_new_reno_fast_retransmit(struct tcp *conn)
464 {
465 	if (conn->ca.pending_fast_retransmit_bytes == 0) {
466 		conn->ca.ssthresh = MAX(conn_mss(conn) * 2, conn->unacked_len / 2);
467 		/* Account for the lost segments */
468 		conn->ca.cwnd = conn_mss(conn) * 3 + conn->ca.ssthresh;
469 		conn->ca.pending_fast_retransmit_bytes = conn->unacked_len;
470 		tcp_new_reno_log(conn, "fast_retransmit");
471 	}
472 }
473 
tcp_new_reno_timeout(struct tcp * conn)474 static void tcp_new_reno_timeout(struct tcp *conn)
475 {
476 	conn->ca.ssthresh = MAX(conn_mss(conn) * 2, conn->unacked_len / 2);
477 	conn->ca.cwnd = conn_mss(conn);
478 	tcp_new_reno_log(conn, "timeout");
479 }
480 
481 /* For every duplicate ack increment the cwnd by mss */
tcp_new_reno_dup_ack(struct tcp * conn)482 static void tcp_new_reno_dup_ack(struct tcp *conn)
483 {
484 	int32_t new_win = conn->ca.cwnd;
485 
486 	new_win += conn_mss(conn);
487 	conn->ca.cwnd = MIN(new_win, UINT16_MAX);
488 	tcp_new_reno_log(conn, "dup_ack");
489 }
490 
tcp_new_reno_pkts_acked(struct tcp * conn,uint32_t acked_len)491 static void tcp_new_reno_pkts_acked(struct tcp *conn, uint32_t acked_len)
492 {
493 	int32_t new_win = conn->ca.cwnd;
494 	int32_t win_inc = MIN(acked_len, conn_mss(conn));
495 
496 	if (conn->ca.pending_fast_retransmit_bytes == 0) {
497 		if (conn->ca.cwnd < conn->ca.ssthresh) {
498 			new_win += win_inc;
499 		} else {
500 			/* Implement a div_ceil	to avoid rounding to 0 */
501 			new_win += ((win_inc * win_inc) + conn->ca.cwnd - 1) / conn->ca.cwnd;
502 		}
503 		conn->ca.cwnd = MIN(new_win, UINT16_MAX);
504 	} else {
505 		/* Check if it is still in fast recovery mode */
506 		if (conn->ca.pending_fast_retransmit_bytes <= acked_len) {
507 			conn->ca.pending_fast_retransmit_bytes = 0;
508 			conn->ca.cwnd = conn->ca.ssthresh;
509 		} else {
510 			conn->ca.pending_fast_retransmit_bytes -= acked_len;
511 			conn->ca.cwnd -= acked_len;
512 		}
513 	}
514 	tcp_new_reno_log(conn, "pkts_acked");
515 }
516 
tcp_ca_init(struct tcp * conn)517 static void tcp_ca_init(struct tcp *conn)
518 {
519 	tcp_new_reno_init(conn);
520 }
521 
tcp_ca_fast_retransmit(struct tcp * conn)522 static void tcp_ca_fast_retransmit(struct tcp *conn)
523 {
524 	tcp_new_reno_fast_retransmit(conn);
525 }
526 
tcp_ca_timeout(struct tcp * conn)527 static void tcp_ca_timeout(struct tcp *conn)
528 {
529 	tcp_new_reno_timeout(conn);
530 }
531 
tcp_ca_dup_ack(struct tcp * conn)532 static void tcp_ca_dup_ack(struct tcp *conn)
533 {
534 	tcp_new_reno_dup_ack(conn);
535 }
536 
tcp_ca_pkts_acked(struct tcp * conn,uint32_t acked_len)537 static void tcp_ca_pkts_acked(struct tcp *conn, uint32_t acked_len)
538 {
539 	tcp_new_reno_pkts_acked(conn, acked_len);
540 }
541 #else
542 
tcp_ca_init(struct tcp * conn)543 static void tcp_ca_init(struct tcp *conn) { }
544 
tcp_ca_fast_retransmit(struct tcp * conn)545 static void tcp_ca_fast_retransmit(struct tcp *conn) { }
546 
tcp_ca_timeout(struct tcp * conn)547 static void tcp_ca_timeout(struct tcp *conn) { }
548 
tcp_ca_dup_ack(struct tcp * conn)549 static void tcp_ca_dup_ack(struct tcp *conn) { }
550 
tcp_ca_pkts_acked(struct tcp * conn,uint32_t acked_len)551 static void tcp_ca_pkts_acked(struct tcp *conn, uint32_t acked_len) { }
552 
553 #endif
554 
555 #if defined(CONFIG_NET_TCP_KEEPALIVE)
556 
557 static void tcp_send_keepalive_probe(struct k_work *work);
558 
keep_alive_timer_init(struct tcp * conn)559 static void keep_alive_timer_init(struct tcp *conn)
560 {
561 	conn->keep_alive = false;
562 	conn->keep_idle = CONFIG_NET_TCP_KEEPIDLE_DEFAULT;
563 	conn->keep_intvl = CONFIG_NET_TCP_KEEPINTVL_DEFAULT;
564 	conn->keep_cnt = CONFIG_NET_TCP_KEEPCNT_DEFAULT;
565 	NET_DBG("keepalive timer init idle = %d, interval = %d, cnt = %d",
566 		conn->keep_idle, conn->keep_intvl, conn->keep_cnt);
567 	k_work_init_delayable(&conn->keepalive_timer, tcp_send_keepalive_probe);
568 }
569 
keep_alive_param_copy(struct tcp * to,struct tcp * from)570 static void keep_alive_param_copy(struct tcp *to, struct tcp *from)
571 {
572 	to->keep_alive = from->keep_alive;
573 	to->keep_idle = from->keep_idle;
574 	to->keep_intvl = from->keep_intvl;
575 	to->keep_cnt = from->keep_cnt;
576 }
577 
keep_alive_timer_restart(struct tcp * conn)578 static void keep_alive_timer_restart(struct tcp *conn)
579 {
580 	if (!conn->keep_alive || conn->state != TCP_ESTABLISHED) {
581 		return;
582 	}
583 
584 	conn->keep_cur = 0;
585 	k_work_reschedule_for_queue(&tcp_work_q, &conn->keepalive_timer,
586 				    K_SECONDS(conn->keep_idle));
587 }
588 
keep_alive_timer_stop(struct tcp * conn)589 static void keep_alive_timer_stop(struct tcp *conn)
590 {
591 	k_work_cancel_delayable(&conn->keepalive_timer);
592 }
593 
set_tcp_keep_alive(struct tcp * conn,const void * value,size_t len)594 static int set_tcp_keep_alive(struct tcp *conn, const void *value, size_t len)
595 {
596 	int keep_alive;
597 
598 	if (conn == NULL || value == NULL || len != sizeof(int)) {
599 		return -EINVAL;
600 	}
601 
602 	keep_alive = *(int *)value;
603 	if ((keep_alive < 0) || (keep_alive > 1)) {
604 		return -EINVAL;
605 	}
606 
607 	conn->keep_alive = (bool)keep_alive;
608 
609 	if (keep_alive) {
610 		keep_alive_timer_restart(conn);
611 	} else {
612 		keep_alive_timer_stop(conn);
613 	}
614 
615 	return 0;
616 }
617 
set_tcp_keep_idle(struct tcp * conn,const void * value,size_t len)618 static int set_tcp_keep_idle(struct tcp *conn, const void *value, size_t len)
619 {
620 	int keep_idle;
621 
622 	if (conn == NULL || value == NULL || len != sizeof(int)) {
623 		return -EINVAL;
624 	}
625 
626 	keep_idle = *(int *)value;
627 	if (keep_idle < 1) {
628 		return -EINVAL;
629 	}
630 
631 	conn->keep_idle = keep_idle;
632 
633 	keep_alive_timer_restart(conn);
634 
635 	return 0;
636 }
637 
set_tcp_keep_intvl(struct tcp * conn,const void * value,size_t len)638 static int set_tcp_keep_intvl(struct tcp *conn, const void *value, size_t len)
639 {
640 	int keep_intvl;
641 
642 	if (conn == NULL || value == NULL || len != sizeof(int)) {
643 		return -EINVAL;
644 	}
645 
646 	keep_intvl = *(int *)value;
647 	if (keep_intvl < 1) {
648 		return -EINVAL;
649 	}
650 
651 	conn->keep_intvl = keep_intvl;
652 
653 	keep_alive_timer_restart(conn);
654 
655 	return 0;
656 }
657 
set_tcp_keep_cnt(struct tcp * conn,const void * value,size_t len)658 static int set_tcp_keep_cnt(struct tcp *conn, const void *value, size_t len)
659 {
660 	int keep_cnt;
661 
662 	if (conn == NULL || value == NULL || len != sizeof(int)) {
663 		return -EINVAL;
664 	}
665 
666 	keep_cnt = *(int *)value;
667 	if (keep_cnt < 1) {
668 		return -EINVAL;
669 	}
670 
671 	conn->keep_cnt = keep_cnt;
672 
673 	keep_alive_timer_restart(conn);
674 
675 	return 0;
676 }
677 
get_tcp_keep_alive(struct tcp * conn,void * value,size_t * len)678 static int get_tcp_keep_alive(struct tcp *conn, void *value, size_t *len)
679 {
680 	if (conn == NULL || value == NULL || len == NULL ||
681 	    *len != sizeof(int)) {
682 		return -EINVAL;
683 	}
684 
685 	*((int *)value) = (int)conn->keep_alive;
686 
687 	return 0;
688 }
689 
get_tcp_keep_idle(struct tcp * conn,void * value,size_t * len)690 static int get_tcp_keep_idle(struct tcp *conn, void *value, size_t *len)
691 {
692 	if (conn == NULL || value == NULL || len == NULL ||
693 	    *len != sizeof(int)) {
694 		return -EINVAL;
695 	}
696 
697 	*((int *)value) = (int)conn->keep_idle;
698 
699 	return 0;
700 }
701 
get_tcp_keep_intvl(struct tcp * conn,void * value,size_t * len)702 static int get_tcp_keep_intvl(struct tcp *conn, void *value, size_t *len)
703 {
704 	if (conn == NULL || value == NULL || len == NULL ||
705 	    *len != sizeof(int)) {
706 		return -EINVAL;
707 	}
708 
709 	*((int *)value) = (int)conn->keep_intvl;
710 
711 	return 0;
712 }
713 
get_tcp_keep_cnt(struct tcp * conn,void * value,size_t * len)714 static int get_tcp_keep_cnt(struct tcp *conn, void *value, size_t *len)
715 {
716 	if (conn == NULL || value == NULL || len == NULL ||
717 	    *len != sizeof(int)) {
718 		return -EINVAL;
719 	}
720 
721 	*((int *)value) = (int)conn->keep_cnt;
722 
723 	return 0;
724 }
725 
726 #else /* CONFIG_NET_TCP_KEEPALIVE */
727 
728 #define keep_alive_timer_init(...)
729 #define keep_alive_param_copy(...)
730 #define keep_alive_timer_restart(...)
731 #define keep_alive_timer_stop(...)
732 #define set_tcp_keep_alive(...) (-ENOPROTOOPT)
733 #define set_tcp_keep_idle(...) (-ENOPROTOOPT)
734 #define set_tcp_keep_intvl(...) (-ENOPROTOOPT)
735 #define set_tcp_keep_cnt(...) (-ENOPROTOOPT)
736 #define get_tcp_keep_alive(...) (-ENOPROTOOPT)
737 #define get_tcp_keep_idle(...) (-ENOPROTOOPT)
738 #define get_tcp_keep_intvl(...) (-ENOPROTOOPT)
739 #define get_tcp_keep_cnt(...) (-ENOPROTOOPT)
740 
741 #endif /* CONFIG_NET_TCP_KEEPALIVE */
742 
tcp_send_queue_flush(struct tcp * conn)743 static void tcp_send_queue_flush(struct tcp *conn)
744 {
745 	struct net_pkt *pkt;
746 
747 	k_work_cancel_delayable(&conn->send_timer);
748 
749 	while ((pkt = tcp_slist(conn, &conn->send_queue, get,
750 				struct net_pkt, next))) {
751 		tcp_pkt_unref(pkt);
752 	}
753 }
754 
tcp_conn_release(struct k_work * work)755 static void tcp_conn_release(struct k_work *work)
756 {
757 	struct tcp *conn = CONTAINER_OF(work, struct tcp, conn_release);
758 	struct net_pkt *pkt;
759 
760 #if defined(CONFIG_NET_TEST)
761 	if (conn->test_closed_cb != NULL) {
762 		conn->test_closed_cb(conn, conn->test_user_data);
763 	}
764 #endif
765 
766 	/* Application is no longer there, unref any remaining packets on the
767 	 * fifo (although there shouldn't be any at this point.)
768 	 */
769 	while ((pkt = k_fifo_get(&conn->recv_data, K_NO_WAIT)) != NULL) {
770 		tcp_pkt_unref(pkt);
771 	}
772 
773 	k_mutex_lock(&conn->lock, K_FOREVER);
774 
775 	if (conn->context->conn_handler) {
776 		net_conn_unregister(conn->context->conn_handler);
777 		conn->context->conn_handler = NULL;
778 	}
779 
780 	/* As the TCP socket could be closed without connect being called,
781 	 * check if the address reference is done before releasing the address.
782 	 */
783 	if (conn->iface != NULL && conn->addr_ref_done) {
784 		net_if_addr_unref(conn->iface, conn->src.sa.sa_family,
785 				  conn->src.sa.sa_family == AF_INET ?
786 				  (const void *)&conn->src.sin.sin_addr :
787 				  (const void *)&conn->src.sin6.sin6_addr,
788 				  NULL);
789 	}
790 
791 	conn->context->tcp = NULL;
792 	conn->state = TCP_UNUSED;
793 
794 	tcp_send_queue_flush(conn);
795 
796 	(void)k_work_cancel_delayable(&conn->send_data_timer);
797 	tcp_pkt_unref(conn->send_data);
798 
799 	if (CONFIG_NET_TCP_RECV_QUEUE_TIMEOUT) {
800 		tcp_pkt_unref(conn->queue_recv_data);
801 	}
802 
803 	(void)k_work_cancel_delayable(&conn->timewait_timer);
804 	(void)k_work_cancel_delayable(&conn->fin_timer);
805 	(void)k_work_cancel_delayable(&conn->persist_timer);
806 	(void)k_work_cancel_delayable(&conn->ack_timer);
807 	(void)k_work_cancel_delayable(&conn->send_timer);
808 	(void)k_work_cancel_delayable(&conn->recv_queue_timer);
809 	keep_alive_timer_stop(conn);
810 
811 	k_mutex_unlock(&conn->lock);
812 
813 	net_context_unref(conn->context);
814 	conn->context = NULL;
815 
816 	k_mutex_lock(&tcp_lock, K_FOREVER);
817 	sys_slist_find_and_remove(&tcp_conns, &conn->next);
818 	k_mutex_unlock(&tcp_lock);
819 
820 	k_mem_slab_free(&tcp_conns_slab, (void *)conn);
821 }
822 
823 #if defined(CONFIG_NET_TEST)
tcp_install_close_cb(struct net_context * ctx,net_tcp_closed_cb_t cb,void * user_data)824 void tcp_install_close_cb(struct net_context *ctx,
825 			  net_tcp_closed_cb_t cb,
826 			  void *user_data)
827 {
828 	NET_ASSERT(ctx->tcp != NULL);
829 
830 	((struct tcp *)ctx->tcp)->test_closed_cb = cb;
831 	((struct tcp *)ctx->tcp)->test_user_data = user_data;
832 }
833 #endif
834 
tcp_conn_unref(struct tcp * conn)835 static int tcp_conn_unref(struct tcp *conn)
836 {
837 	int ref_count = atomic_get(&conn->ref_count);
838 
839 	NET_DBG("conn: %p, ref_count=%d", conn, ref_count);
840 
841 	ref_count = atomic_dec(&conn->ref_count) - 1;
842 	if (ref_count != 0) {
843 		tp_out(net_context_get_family(conn->context), conn->iface,
844 		       "TP_TRACE", "event", "CONN_DELETE");
845 		return ref_count;
846 	}
847 
848 	/* Release the TCP context from the TCP workqueue. This will ensure,
849 	 * that all pending TCP works are cancelled properly, when the context
850 	 * is released.
851 	 */
852 	k_work_submit_to_queue(&tcp_work_q, &conn->conn_release);
853 
854 	return ref_count;
855 }
856 
857 #if CONFIG_NET_TCP_LOG_LEVEL >= LOG_LEVEL_DBG
858 #define tcp_conn_close(conn, status)				\
859 	tcp_conn_close_debug(conn, status, __func__, __LINE__)
860 
tcp_conn_close_debug(struct tcp * conn,int status,const char * caller,int line)861 static int tcp_conn_close_debug(struct tcp *conn, int status,
862 				const char *caller, int line)
863 #else
864 static int tcp_conn_close(struct tcp *conn, int status)
865 #endif
866 {
867 #if CONFIG_NET_TCP_LOG_LEVEL >= LOG_LEVEL_DBG
868 	NET_DBG("conn: %p closed by TCP stack (%s():%d)", conn, caller, line);
869 #endif
870 	k_mutex_lock(&conn->lock, K_FOREVER);
871 	conn_state(conn, TCP_CLOSED);
872 	keep_alive_timer_stop(conn);
873 	k_mutex_unlock(&conn->lock);
874 
875 	if (conn->in_connect) {
876 		if (conn->connect_cb) {
877 			conn->connect_cb(conn->context, status, conn->context->user_data);
878 
879 			/* Make sure the connect_cb is only called once. */
880 			conn->connect_cb = NULL;
881 		}
882 
883 		conn->in_connect = false;
884 		k_sem_reset(&conn->connect_sem);
885 	} else if (conn->context->recv_cb) {
886 		conn->context->recv_cb(conn->context, NULL, NULL, NULL,
887 				       status, conn->recv_user_data);
888 	}
889 
890 	k_sem_give(&conn->tx_sem);
891 
892 	return tcp_conn_unref(conn);
893 }
894 
tcp_send_process_no_lock(struct tcp * conn)895 static bool tcp_send_process_no_lock(struct tcp *conn)
896 {
897 	bool unref = false;
898 	struct net_pkt *pkt;
899 	bool local = false;
900 
901 	pkt = tcp_slist(conn, &conn->send_queue, peek_head,
902 			struct net_pkt, next);
903 	if (!pkt) {
904 		goto out;
905 	}
906 
907 	NET_DBG("%s %s", tcp_th(pkt), conn->in_retransmission ?
908 		"in_retransmission" : "");
909 
910 	if (conn->in_retransmission) {
911 		if (conn->send_retries > 0) {
912 			struct net_pkt *clone = tcp_pkt_clone(pkt);
913 
914 			if (clone) {
915 				tcp_send(clone);
916 				conn->send_retries--;
917 			} else {
918 				NET_WARN("net_pkt alloc failure");
919 			}
920 		} else {
921 			unref = true;
922 			goto out;
923 		}
924 	} else {
925 		uint8_t fl = th_get(pkt)->th_flags;
926 		bool forget = ACK == fl || PSH == fl || (ACK | PSH) == fl ||
927 			RST & fl;
928 
929 		pkt = forget ? tcp_slist(conn, &conn->send_queue, get,
930 					 struct net_pkt, next) :
931 			tcp_pkt_clone(pkt);
932 		if (!pkt) {
933 			NET_WARN("net_pkt alloc failure");
934 			goto out;
935 		}
936 
937 		if (is_destination_local(pkt)) {
938 			local = true;
939 		}
940 
941 		tcp_send(pkt);
942 
943 		if (forget == false &&
944 		    !k_work_delayable_remaining_get(&conn->send_timer)) {
945 			conn->send_retries = tcp_retries;
946 			conn->in_retransmission = true;
947 		}
948 	}
949 
950 	if (conn->in_retransmission) {
951 		k_work_reschedule_for_queue(&tcp_work_q, &conn->send_timer,
952 					    K_MSEC(TCP_RTO_MS));
953 	} else if (local && !sys_slist_is_empty(&conn->send_queue)) {
954 		k_work_reschedule_for_queue(&tcp_work_q, &conn->send_timer,
955 					    K_NO_WAIT);
956 	}
957 
958 out:
959 	return unref;
960 }
961 
tcp_send_process(struct k_work * work)962 static void tcp_send_process(struct k_work *work)
963 {
964 	struct k_work_delayable *dwork = k_work_delayable_from_work(work);
965 	struct tcp *conn = CONTAINER_OF(dwork, struct tcp, send_timer);
966 	bool unref;
967 
968 	k_mutex_lock(&conn->lock, K_FOREVER);
969 
970 	unref = tcp_send_process_no_lock(conn);
971 
972 	k_mutex_unlock(&conn->lock);
973 
974 	if (unref) {
975 		tcp_conn_close(conn, -ETIMEDOUT);
976 	}
977 }
978 
tcp_send_timer_cancel(struct tcp * conn)979 static void tcp_send_timer_cancel(struct tcp *conn)
980 {
981 	if (conn->in_retransmission == false) {
982 		return;
983 	}
984 
985 	k_work_cancel_delayable(&conn->send_timer);
986 
987 	{
988 		struct net_pkt *pkt = tcp_slist(conn, &conn->send_queue, get,
989 						struct net_pkt, next);
990 		if (pkt) {
991 			NET_DBG("%s", tcp_th(pkt));
992 			tcp_pkt_unref(pkt);
993 		}
994 	}
995 
996 	if (sys_slist_is_empty(&conn->send_queue)) {
997 		conn->in_retransmission = false;
998 	} else {
999 		conn->send_retries = tcp_retries;
1000 		k_work_reschedule_for_queue(&tcp_work_q, &conn->send_timer,
1001 					    K_MSEC(TCP_RTO_MS));
1002 	}
1003 }
1004 
1005 #if defined(CONFIG_NET_TCP_IPV6_ND_REACHABILITY_HINT)
1006 
tcp_nbr_reachability_hint(struct tcp * conn)1007 static void tcp_nbr_reachability_hint(struct tcp *conn)
1008 {
1009 	int64_t now;
1010 	struct net_if *iface;
1011 
1012 	if (net_context_get_family(conn->context) != AF_INET6) {
1013 		return;
1014 	}
1015 
1016 	now = k_uptime_get();
1017 	iface = net_context_get_iface(conn->context);
1018 
1019 	/* Ensure that Neighbor Reachability hints are rate-limited (using threshold
1020 	 * of half of reachable time).
1021 	 */
1022 	if ((now - conn->last_nd_hint_time) > (net_if_ipv6_get_reachable_time(iface) / 2)) {
1023 		net_ipv6_nbr_reachability_hint(iface, &conn->dst.sin6.sin6_addr);
1024 		conn->last_nd_hint_time = now;
1025 	}
1026 }
1027 
1028 #else /* CONFIG_NET_TCP_IPV6_ND_REACHABILITY_HINT */
1029 
1030 #define tcp_nbr_reachability_hint(...)
1031 
1032 #endif /* CONFIG_NET_TCP_IPV6_ND_REACHABILITY_HINT */
1033 
tcp_state_to_str(enum tcp_state state,bool prefix)1034 static const char *tcp_state_to_str(enum tcp_state state, bool prefix)
1035 {
1036 	const char *s = NULL;
1037 #define _(_x) case _x: do { s = #_x; goto out; } while (0)
1038 	switch (state) {
1039 	_(TCP_UNUSED);
1040 	_(TCP_LISTEN);
1041 	_(TCP_SYN_SENT);
1042 	_(TCP_SYN_RECEIVED);
1043 	_(TCP_ESTABLISHED);
1044 	_(TCP_FIN_WAIT_1);
1045 	_(TCP_FIN_WAIT_2);
1046 	_(TCP_CLOSE_WAIT);
1047 	_(TCP_CLOSING);
1048 	_(TCP_LAST_ACK);
1049 	_(TCP_TIME_WAIT);
1050 	_(TCP_CLOSED);
1051 	}
1052 #undef _
1053 	NET_ASSERT(s, "Invalid TCP state: %u", state);
1054 out:
1055 	return prefix ? s : (s + 4);
1056 }
1057 
tcp_conn_state(struct tcp * conn,struct net_pkt * pkt)1058 static const char *tcp_conn_state(struct tcp *conn, struct net_pkt *pkt)
1059 {
1060 #define BUF_SIZE 160
1061 	static char buf[BUF_SIZE];
1062 
1063 	snprintk(buf, BUF_SIZE, "%s [%s Seq=%u Ack=%u]", pkt ? tcp_th(pkt) : "",
1064 			tcp_state_to_str(conn->state, false),
1065 			conn->seq, conn->ack);
1066 #undef BUF_SIZE
1067 	return buf;
1068 }
1069 
tcp_options_get(struct net_pkt * pkt,int tcp_options_len,uint8_t * buf,size_t buf_len)1070 static uint8_t *tcp_options_get(struct net_pkt *pkt, int tcp_options_len,
1071 				uint8_t *buf, size_t buf_len)
1072 {
1073 	struct net_pkt_cursor backup;
1074 	int ret;
1075 
1076 	net_pkt_cursor_backup(pkt, &backup);
1077 	net_pkt_cursor_init(pkt);
1078 	net_pkt_skip(pkt, net_pkt_ip_hdr_len(pkt) + net_pkt_ip_opts_len(pkt) +
1079 		     sizeof(struct tcphdr));
1080 	ret = net_pkt_read(pkt, buf, MIN(tcp_options_len, buf_len));
1081 	if (ret < 0) {
1082 		buf = NULL;
1083 	}
1084 
1085 	net_pkt_cursor_restore(pkt, &backup);
1086 
1087 	return buf;
1088 }
1089 
tcp_options_check(struct tcp_options * recv_options,struct net_pkt * pkt,ssize_t len)1090 static bool tcp_options_check(struct tcp_options *recv_options,
1091 			      struct net_pkt *pkt, ssize_t len)
1092 {
1093 	uint8_t options_buf[40]; /* TCP header max options size is 40 */
1094 	bool result = len > 0 && ((len % 4) == 0) ? true : false;
1095 	uint8_t *options = tcp_options_get(pkt, len, options_buf,
1096 					   sizeof(options_buf));
1097 	uint8_t opt, opt_len;
1098 
1099 	NET_DBG("len=%zd", len);
1100 
1101 	recv_options->mss_found = false;
1102 	recv_options->wnd_found = false;
1103 
1104 	for ( ; options && len >= 1; options += opt_len, len -= opt_len) {
1105 		opt = options[0];
1106 
1107 		if (opt == NET_TCP_END_OPT) {
1108 			break;
1109 		} else if (opt == NET_TCP_NOP_OPT) {
1110 			opt_len = 1;
1111 			continue;
1112 		} else {
1113 			if (len < 2) { /* Only END and NOP can have length 1 */
1114 				NET_ERR("Illegal option %d with length %zd",
1115 					opt, len);
1116 				result = false;
1117 				break;
1118 			}
1119 			opt_len = options[1];
1120 		}
1121 
1122 		NET_DBG("opt: %hu, opt_len: %hu",
1123 			(uint16_t)opt, (uint16_t)opt_len);
1124 
1125 		if (opt_len < 2 || opt_len > len) {
1126 			result = false;
1127 			break;
1128 		}
1129 
1130 		switch (opt) {
1131 		case NET_TCP_MSS_OPT:
1132 			if (opt_len != 4) {
1133 				result = false;
1134 				goto end;
1135 			}
1136 
1137 			recv_options->mss =
1138 				ntohs(UNALIGNED_GET((uint16_t *)(options + 2)));
1139 			recv_options->mss_found = true;
1140 			NET_DBG("MSS=%hu", recv_options->mss);
1141 			break;
1142 		case NET_TCP_WINDOW_SCALE_OPT:
1143 			if (opt_len != 3) {
1144 				result = false;
1145 				goto end;
1146 			}
1147 
1148 			recv_options->window = opt;
1149 			recv_options->wnd_found = true;
1150 			break;
1151 		default:
1152 			continue;
1153 		}
1154 	}
1155 end:
1156 	if (false == result) {
1157 		NET_WARN("Invalid TCP options");
1158 	}
1159 
1160 	return result;
1161 }
1162 
tcp_short_window(struct tcp * conn)1163 static bool tcp_short_window(struct tcp *conn)
1164 {
1165 	int32_t threshold = MIN(conn_mss(conn), conn->recv_win_max / 2);
1166 
1167 	if (conn->recv_win > threshold) {
1168 		return false;
1169 	}
1170 
1171 	return true;
1172 }
1173 
tcp_need_window_update(struct tcp * conn)1174 static bool tcp_need_window_update(struct tcp *conn)
1175 {
1176 	int32_t threshold = MAX(conn_mss(conn), conn->recv_win_max / 2);
1177 
1178 	/* In case window is full again, and we didn't send a window update
1179 	 * since the window size dropped below threshold, do it now.
1180 	 */
1181 	return (conn->recv_win == conn->recv_win_max &&
1182 		conn->recv_win_sent <= threshold);
1183 }
1184 
1185 /**
1186  * @brief Update TCP receive window
1187  *
1188  * @param conn TCP network connection
1189  * @param delta Receive window delta
1190  *
1191  * @return 0 on success, -EINVAL
1192  *         if the receive window delta is out of bounds
1193  */
tcp_update_recv_wnd(struct tcp * conn,int32_t delta)1194 static int tcp_update_recv_wnd(struct tcp *conn, int32_t delta)
1195 {
1196 	int32_t new_win;
1197 	bool short_win_before;
1198 	bool short_win_after;
1199 
1200 	new_win = conn->recv_win + delta;
1201 	if (new_win < 0) {
1202 		new_win = 0;
1203 	} else if (new_win > conn->recv_win_max) {
1204 		new_win = conn->recv_win_max;
1205 	}
1206 
1207 	short_win_before = tcp_short_window(conn);
1208 
1209 	conn->recv_win = new_win;
1210 
1211 	short_win_after = tcp_short_window(conn);
1212 
1213 	if (((short_win_before && !short_win_after) ||
1214 	     tcp_need_window_update(conn)) &&
1215 	    conn->state == TCP_ESTABLISHED) {
1216 		k_work_cancel_delayable(&conn->ack_timer);
1217 		tcp_out(conn, ACK);
1218 	}
1219 
1220 	return 0;
1221 }
1222 
tcp_check_pending_data(struct tcp * conn,struct net_pkt * pkt,size_t len)1223 static size_t tcp_check_pending_data(struct tcp *conn, struct net_pkt *pkt,
1224 				     size_t len)
1225 {
1226 	size_t pending_len = 0;
1227 
1228 	if (CONFIG_NET_TCP_RECV_QUEUE_TIMEOUT &&
1229 	    !net_pkt_is_empty(conn->queue_recv_data)) {
1230 		/* Some potentential cases:
1231 		 * Note: MI = MAX_INT
1232 		 * Packet | Queued| End off   | Gap size | Required handling
1233 		 * Seq|Len|Seq|Len|           |          |
1234 		 *  3 | 3 | 6 | 4 | 3+3-6=  0 | 6-3-3=0  | Append
1235 		 *  3 | 4 | 6 | 4 | 3+4-6 = 1 | 6-3-4=-1 | Append, pull from queue
1236 		 *  3 | 7 | 6 | 4 | 3+7-6 = 4 | 6-3-7=-4 | Drop queued data
1237 		 *  3 | 8 | 6 | 4 | 3+8-6 = 5 | 6-3-8=-5 | Drop queued data
1238 		 *  6 | 5 | 6 | 4 | 6+5-6 = 5 | 6-6-5=-5 | Drop queued data
1239 		 *  6 | 4 | 6 | 4 | 6+4-6 = 4 | 6-6-4=-4 | Drop queued data / packet
1240 		 * 10 | 2 | 6 | 4 | 10+2-6= 6 | 6-10-2=-6| Should not happen, dropping queue
1241 		 *  7 | 4 | 6 | 4 | 7+4-6 = 5 | 6-7-4=-5 | Should not happen, dropping queue
1242 		 * 11 | 2 | 6 | 4 | 11+2-6= 7 | 6-11-2=-7| Should not happen, dropping queue
1243 		 *  2 | 3 | 6 | 4 | 2+3-6= MI | 6-2-3=1  | Keep queued data
1244 		 */
1245 		struct tcphdr *th = th_get(pkt);
1246 		uint32_t expected_seq = th_seq(th) + len;
1247 		uint32_t pending_seq;
1248 		int32_t gap_size;
1249 		uint32_t end_offset;
1250 
1251 		pending_seq = tcp_get_seq(conn->queue_recv_data->buffer);
1252 		end_offset = expected_seq - pending_seq;
1253 		gap_size = (int32_t)(pending_seq - th_seq(th) - ((uint32_t)len));
1254 		pending_len = net_pkt_get_len(conn->queue_recv_data);
1255 		if (end_offset < pending_len) {
1256 			if (end_offset) {
1257 				net_pkt_remove_tail(pkt, end_offset);
1258 				pending_len -= end_offset;
1259 			}
1260 
1261 			NET_DBG("Found pending data seq %u len %zd",
1262 				expected_seq, pending_len);
1263 
1264 			net_buf_frag_add(pkt->buffer,
1265 					 conn->queue_recv_data->buffer);
1266 			conn->queue_recv_data->buffer = NULL;
1267 
1268 			k_work_cancel_delayable(&conn->recv_queue_timer);
1269 		} else {
1270 			/* Check if the queued data is just a section of the incoming data */
1271 			if (gap_size <= 0) {
1272 				net_buf_unref(conn->queue_recv_data->buffer);
1273 				conn->queue_recv_data->buffer = NULL;
1274 
1275 				k_work_cancel_delayable(&conn->recv_queue_timer);
1276 			}
1277 
1278 			pending_len = 0;
1279 		}
1280 	}
1281 
1282 	return pending_len;
1283 }
1284 
tcp_data_get(struct tcp * conn,struct net_pkt * pkt,size_t * len)1285 static enum net_verdict tcp_data_get(struct tcp *conn, struct net_pkt *pkt, size_t *len)
1286 {
1287 	enum net_verdict ret = NET_DROP;
1288 
1289 	if (tcp_recv_cb) {
1290 		tcp_recv_cb(conn, pkt);
1291 		goto out;
1292 	}
1293 
1294 	if (conn->context->recv_cb) {
1295 		/* If there is any out-of-order pending data, then pass it
1296 		 * to the application here.
1297 		 */
1298 		*len += tcp_check_pending_data(conn, pkt, *len);
1299 
1300 		net_pkt_cursor_init(pkt);
1301 		net_pkt_set_overwrite(pkt, true);
1302 
1303 		net_pkt_skip(pkt, net_pkt_get_len(pkt) - *len);
1304 
1305 		tcp_update_recv_wnd(conn, -*len);
1306 		if (*len > conn->recv_win_sent) {
1307 			conn->recv_win_sent = 0;
1308 		} else {
1309 			conn->recv_win_sent -= *len;
1310 		}
1311 
1312 		/* Do not pass data to application with TCP conn
1313 		 * locked as there could be an issue when the app tries
1314 		 * to send the data and the conn is locked. So the recv
1315 		 * data is placed in fifo which is flushed in tcp_in()
1316 		 * after unlocking the conn
1317 		 */
1318 		k_fifo_put(&conn->recv_data, pkt);
1319 
1320 		ret = NET_OK;
1321 	}
1322  out:
1323 	return ret;
1324 }
1325 
tcp_finalize_pkt(struct net_pkt * pkt)1326 static int tcp_finalize_pkt(struct net_pkt *pkt)
1327 {
1328 	net_pkt_cursor_init(pkt);
1329 
1330 	if (IS_ENABLED(CONFIG_NET_IPV4) && net_pkt_family(pkt) == AF_INET) {
1331 		return net_ipv4_finalize(pkt, IPPROTO_TCP);
1332 	}
1333 
1334 	if (IS_ENABLED(CONFIG_NET_IPV6) && net_pkt_family(pkt) == AF_INET6) {
1335 		return net_ipv6_finalize(pkt, IPPROTO_TCP);
1336 	}
1337 
1338 	return -EINVAL;
1339 }
1340 
tcp_header_add(struct tcp * conn,struct net_pkt * pkt,uint8_t flags,uint32_t seq)1341 static int tcp_header_add(struct tcp *conn, struct net_pkt *pkt, uint8_t flags,
1342 			  uint32_t seq)
1343 {
1344 	NET_PKT_DATA_ACCESS_DEFINE(tcp_access, struct tcphdr);
1345 	struct tcphdr *th;
1346 
1347 	th = (struct tcphdr *)net_pkt_get_data(pkt, &tcp_access);
1348 	if (!th) {
1349 		return -ENOBUFS;
1350 	}
1351 
1352 	memset(th, 0, sizeof(struct tcphdr));
1353 
1354 	UNALIGNED_PUT(conn->src.sin.sin_port, &th->th_sport);
1355 	UNALIGNED_PUT(conn->dst.sin.sin_port, &th->th_dport);
1356 	th->th_off = 5;
1357 
1358 	if (conn->send_options.mss_found) {
1359 		th->th_off++;
1360 	}
1361 
1362 	UNALIGNED_PUT(flags, &th->th_flags);
1363 	UNALIGNED_PUT(htons(conn->recv_win), &th->th_win);
1364 	UNALIGNED_PUT(htonl(seq), &th->th_seq);
1365 
1366 	if (ACK & flags) {
1367 		UNALIGNED_PUT(htonl(conn->ack), &th->th_ack);
1368 	}
1369 
1370 	return net_pkt_set_data(pkt, &tcp_access);
1371 }
1372 
ip_header_add(struct tcp * conn,struct net_pkt * pkt)1373 static int ip_header_add(struct tcp *conn, struct net_pkt *pkt)
1374 {
1375 	if (IS_ENABLED(CONFIG_NET_IPV4) && net_pkt_family(pkt) == AF_INET) {
1376 		return net_context_create_ipv4_new(conn->context, pkt,
1377 						&conn->src.sin.sin_addr,
1378 						&conn->dst.sin.sin_addr);
1379 	}
1380 
1381 	if (IS_ENABLED(CONFIG_NET_IPV6) && net_pkt_family(pkt) == AF_INET6) {
1382 		return net_context_create_ipv6_new(conn->context, pkt,
1383 						&conn->src.sin6.sin6_addr,
1384 						&conn->dst.sin6.sin6_addr);
1385 	}
1386 
1387 	return -EINVAL;
1388 }
1389 
set_tcp_nodelay(struct tcp * conn,const void * value,size_t len)1390 static int set_tcp_nodelay(struct tcp *conn, const void *value, size_t len)
1391 {
1392 	int no_delay_int;
1393 
1394 	if (len != sizeof(int)) {
1395 		return -EINVAL;
1396 	}
1397 
1398 	no_delay_int = *(int *)value;
1399 
1400 	if ((no_delay_int < 0) || (no_delay_int > 1)) {
1401 		return -EINVAL;
1402 	}
1403 
1404 	conn->tcp_nodelay = (bool)no_delay_int;
1405 
1406 	return 0;
1407 }
1408 
get_tcp_nodelay(struct tcp * conn,void * value,size_t * len)1409 static int get_tcp_nodelay(struct tcp *conn, void *value, size_t *len)
1410 {
1411 	int no_delay_int = (int)conn->tcp_nodelay;
1412 
1413 	*((int *)value) = no_delay_int;
1414 
1415 	if (len) {
1416 		*len = sizeof(int);
1417 	}
1418 	return 0;
1419 }
1420 
net_tcp_set_mss_opt(struct tcp * conn,struct net_pkt * pkt)1421 static int net_tcp_set_mss_opt(struct tcp *conn, struct net_pkt *pkt)
1422 {
1423 	NET_PKT_DATA_ACCESS_DEFINE(mss_opt_access, struct tcp_mss_option);
1424 	struct tcp_mss_option *mss;
1425 	uint32_t recv_mss;
1426 
1427 	mss = net_pkt_get_data(pkt, &mss_opt_access);
1428 	if (!mss) {
1429 		return -ENOBUFS;
1430 	}
1431 
1432 	recv_mss = net_tcp_get_supported_mss(conn);
1433 	recv_mss |= (NET_TCP_MSS_OPT << 24) | (NET_TCP_MSS_SIZE << 16);
1434 
1435 	UNALIGNED_PUT(htonl(recv_mss), (uint32_t *)mss);
1436 
1437 	return net_pkt_set_data(pkt, &mss_opt_access);
1438 }
1439 
is_destination_local(struct net_pkt * pkt)1440 static bool is_destination_local(struct net_pkt *pkt)
1441 {
1442 	if (IS_ENABLED(CONFIG_NET_IPV4) && net_pkt_family(pkt) == AF_INET) {
1443 		if (net_ipv4_is_addr_loopback(
1444 				(struct in_addr *)NET_IPV4_HDR(pkt)->dst) ||
1445 		    net_ipv4_is_my_addr(
1446 				(struct in_addr *)NET_IPV4_HDR(pkt)->dst)) {
1447 			return true;
1448 		}
1449 	}
1450 
1451 	if (IS_ENABLED(CONFIG_NET_IPV6) && net_pkt_family(pkt) == AF_INET6) {
1452 		if (net_ipv6_is_addr_loopback(
1453 				(struct in6_addr *)NET_IPV6_HDR(pkt)->dst) ||
1454 		    net_ipv6_is_my_addr(
1455 				(struct in6_addr *)NET_IPV6_HDR(pkt)->dst)) {
1456 			return true;
1457 		}
1458 	}
1459 
1460 	return false;
1461 }
1462 
net_tcp_reply_rst(struct net_pkt * pkt)1463 void net_tcp_reply_rst(struct net_pkt *pkt)
1464 {
1465 	NET_PKT_DATA_ACCESS_DEFINE(tcp_access_rst, struct tcphdr);
1466 	struct tcphdr *th_pkt = th_get(pkt);
1467 	struct tcphdr *th_rst;
1468 	struct net_pkt *rst;
1469 	int ret;
1470 
1471 	if (th_pkt == NULL || (th_flags(th_pkt) & RST)) {
1472 		/* Don't reply to a RST segment. */
1473 		return;
1474 	}
1475 
1476 	rst = tcp_pkt_alloc_no_conn(pkt->iface, pkt->family,
1477 				    sizeof(struct tcphdr));
1478 	if (rst == NULL) {
1479 		return;
1480 	}
1481 
1482 	/* IP header */
1483 	if (IS_ENABLED(CONFIG_NET_IPV4) && net_pkt_family(pkt) == AF_INET) {
1484 		ret = net_ipv4_create(rst,
1485 				      (struct in_addr *)NET_IPV4_HDR(pkt)->dst,
1486 				      (struct in_addr *)NET_IPV4_HDR(pkt)->src);
1487 	} else if (IS_ENABLED(CONFIG_NET_IPV6) && net_pkt_family(pkt) == AF_INET6) {
1488 		ret =  net_ipv6_create(rst,
1489 				      (struct in6_addr *)NET_IPV6_HDR(pkt)->dst,
1490 				      (struct in6_addr *)NET_IPV6_HDR(pkt)->src);
1491 	} else {
1492 		ret = -EINVAL;
1493 	}
1494 
1495 	if (ret < 0) {
1496 		goto err;
1497 	}
1498 
1499 	/* TCP header */
1500 	th_rst = (struct tcphdr *)net_pkt_get_data(rst, &tcp_access_rst);
1501 	if (th_rst == NULL) {
1502 		goto err;
1503 	}
1504 
1505 	memset(th_rst, 0, sizeof(struct tcphdr));
1506 
1507 	UNALIGNED_PUT(th_pkt->th_dport, &th_rst->th_sport);
1508 	UNALIGNED_PUT(th_pkt->th_sport, &th_rst->th_dport);
1509 	th_rst->th_off = 5;
1510 
1511 	if (th_flags(th_pkt) & ACK) {
1512 		UNALIGNED_PUT(RST, &th_rst->th_flags);
1513 		UNALIGNED_PUT(th_pkt->th_ack, &th_rst->th_seq);
1514 	} else {
1515 		uint32_t ack = ntohl(th_pkt->th_seq) + tcp_data_len(pkt);
1516 
1517 		UNALIGNED_PUT(RST | ACK, &th_rst->th_flags);
1518 		UNALIGNED_PUT(htonl(ack), &th_rst->th_ack);
1519 	}
1520 
1521 	ret = net_pkt_set_data(rst, &tcp_access_rst);
1522 	if (ret < 0) {
1523 		goto err;
1524 	}
1525 
1526 	ret = tcp_finalize_pkt(rst);
1527 	if (ret < 0) {
1528 		goto err;
1529 	}
1530 
1531 	NET_DBG("%s", tcp_th(rst));
1532 
1533 	tcp_send(rst);
1534 
1535 	return;
1536 
1537 err:
1538 	tcp_pkt_unref(rst);
1539 }
1540 
tcp_out_ext(struct tcp * conn,uint8_t flags,struct net_pkt * data,uint32_t seq)1541 static int tcp_out_ext(struct tcp *conn, uint8_t flags, struct net_pkt *data,
1542 		       uint32_t seq)
1543 {
1544 	size_t alloc_len = sizeof(struct tcphdr);
1545 	struct net_pkt *pkt;
1546 	int ret = 0;
1547 
1548 	if (conn->send_options.mss_found) {
1549 		alloc_len += sizeof(uint32_t);
1550 	}
1551 
1552 	pkt = tcp_pkt_alloc(conn, alloc_len);
1553 	if (!pkt) {
1554 		ret = -ENOBUFS;
1555 		goto out;
1556 	}
1557 
1558 	if (data) {
1559 		/* Append the data buffer to the pkt */
1560 		net_pkt_append_buffer(pkt, data->buffer);
1561 		data->buffer = NULL;
1562 	}
1563 
1564 	ret = ip_header_add(conn, pkt);
1565 	if (ret < 0) {
1566 		tcp_pkt_unref(pkt);
1567 		goto out;
1568 	}
1569 
1570 	ret = tcp_header_add(conn, pkt, flags, seq);
1571 	if (ret < 0) {
1572 		tcp_pkt_unref(pkt);
1573 		goto out;
1574 	}
1575 
1576 	if (conn->send_options.mss_found) {
1577 		ret = net_tcp_set_mss_opt(conn, pkt);
1578 		if (ret < 0) {
1579 			tcp_pkt_unref(pkt);
1580 			goto out;
1581 		}
1582 	}
1583 
1584 	ret = tcp_finalize_pkt(pkt);
1585 	if (ret < 0) {
1586 		tcp_pkt_unref(pkt);
1587 		goto out;
1588 	}
1589 
1590 	if (tcp_send_cb) {
1591 		ret = tcp_send_cb(pkt);
1592 		goto out;
1593 	}
1594 
1595 	sys_slist_append(&conn->send_queue, &pkt->next);
1596 
1597 	if (flags & ACK) {
1598 		conn->recv_win_sent = conn->recv_win;
1599 	}
1600 
1601 	if (is_destination_local(pkt)) {
1602 		/* If the destination is local, we have to let the current
1603 		 * thread to finish with any state-machine changes before
1604 		 * sending the packet, or it might lead to state inconsistencies
1605 		 */
1606 		k_work_schedule_for_queue(&tcp_work_q,
1607 					  &conn->send_timer, K_NO_WAIT);
1608 	} else if (tcp_send_process_no_lock(conn)) {
1609 		tcp_conn_close(conn, -ETIMEDOUT);
1610 	}
1611 out:
1612 	return ret;
1613 }
1614 
tcp_out(struct tcp * conn,uint8_t flags)1615 static void tcp_out(struct tcp *conn, uint8_t flags)
1616 {
1617 	(void)tcp_out_ext(conn, flags, NULL /* no data */, conn->seq);
1618 }
1619 
tcp_pkt_pull(struct net_pkt * pkt,size_t len)1620 static int tcp_pkt_pull(struct net_pkt *pkt, size_t len)
1621 {
1622 	int total = net_pkt_get_len(pkt);
1623 	int ret = 0;
1624 
1625 	if (len > total) {
1626 		ret = -EINVAL;
1627 		goto out;
1628 	}
1629 
1630 	net_pkt_cursor_init(pkt);
1631 	net_pkt_set_overwrite(pkt, true);
1632 	net_pkt_pull(pkt, len);
1633 	net_pkt_trim_buffer(pkt);
1634  out:
1635 	return ret;
1636 }
1637 
tcp_pkt_peek(struct net_pkt * to,struct net_pkt * from,size_t pos,size_t len)1638 static int tcp_pkt_peek(struct net_pkt *to, struct net_pkt *from, size_t pos,
1639 			size_t len)
1640 {
1641 	net_pkt_cursor_init(to);
1642 	net_pkt_cursor_init(from);
1643 
1644 	if (pos) {
1645 		net_pkt_set_overwrite(from, true);
1646 		net_pkt_skip(from, pos);
1647 	}
1648 
1649 	return net_pkt_copy(to, from, len);
1650 }
1651 
tcp_pkt_append(struct net_pkt * pkt,const uint8_t * data,size_t len)1652 static int tcp_pkt_append(struct net_pkt *pkt, const uint8_t *data, size_t len)
1653 {
1654 	size_t alloc_len = len;
1655 	struct net_buf *buf = NULL;
1656 	int ret = 0;
1657 
1658 	if (pkt->buffer) {
1659 		buf = net_buf_frag_last(pkt->buffer);
1660 
1661 		if (len > net_buf_tailroom(buf)) {
1662 			alloc_len -= net_buf_tailroom(buf);
1663 		} else {
1664 			alloc_len = 0;
1665 		}
1666 	}
1667 
1668 	if (alloc_len > 0) {
1669 		ret = net_pkt_alloc_buffer_raw(pkt, alloc_len,
1670 					       TCP_PKT_ALLOC_TIMEOUT);
1671 		if (ret < 0) {
1672 			return -ENOBUFS;
1673 		}
1674 	}
1675 
1676 	if (buf == NULL) {
1677 		buf = pkt->buffer;
1678 	}
1679 
1680 	while (buf != NULL && len > 0) {
1681 		size_t write_len = MIN(len, net_buf_tailroom(buf));
1682 
1683 		net_buf_add_mem(buf, data, write_len);
1684 
1685 		data += write_len;
1686 		len -= write_len;
1687 		buf = buf->frags;
1688 	}
1689 
1690 	NET_ASSERT(len == 0, "Not all bytes written");
1691 
1692 	return ret;
1693 }
1694 
tcp_window_full(struct tcp * conn)1695 static bool tcp_window_full(struct tcp *conn)
1696 {
1697 	bool window_full = (conn->send_data_total >= conn->send_win);
1698 
1699 #ifdef CONFIG_NET_TCP_CONGESTION_AVOIDANCE
1700 	window_full = window_full || (conn->send_data_total >= conn->ca.cwnd);
1701 #endif
1702 
1703 	if (window_full) {
1704 		NET_DBG("conn: %p TX window_full", conn);
1705 	}
1706 
1707 	return window_full;
1708 }
1709 
tcp_unsent_len(struct tcp * conn)1710 static int tcp_unsent_len(struct tcp *conn)
1711 {
1712 	int unsent_len;
1713 
1714 	if (conn->unacked_len > conn->send_data_total) {
1715 		NET_ERR("total=%zu, unacked_len=%d",
1716 			conn->send_data_total, conn->unacked_len);
1717 		unsent_len = -ERANGE;
1718 		goto out;
1719 	}
1720 
1721 	unsent_len = conn->send_data_total - conn->unacked_len;
1722 	if (conn->unacked_len >= conn->send_win) {
1723 		unsent_len = 0;
1724 	} else {
1725 		unsent_len = MIN(unsent_len, conn->send_win - conn->unacked_len);
1726 
1727 #ifdef CONFIG_NET_TCP_CONGESTION_AVOIDANCE
1728 		if (conn->unacked_len >= conn->ca.cwnd) {
1729 			unsent_len = 0;
1730 		} else {
1731 			unsent_len = MIN(unsent_len, conn->ca.cwnd - conn->unacked_len);
1732 		}
1733 #endif
1734 	}
1735  out:
1736 	return unsent_len;
1737 }
1738 
tcp_send_data(struct tcp * conn)1739 static int tcp_send_data(struct tcp *conn)
1740 {
1741 	int ret = 0;
1742 	int len;
1743 	struct net_pkt *pkt;
1744 
1745 	len = MIN(tcp_unsent_len(conn), conn_mss(conn));
1746 	if (len < 0) {
1747 		ret = len;
1748 		goto out;
1749 	}
1750 	if (len == 0) {
1751 		NET_DBG("conn: %p no data to send", conn);
1752 		ret = -ENODATA;
1753 		goto out;
1754 	}
1755 
1756 	pkt = tcp_pkt_alloc(conn, len);
1757 	if (!pkt) {
1758 		NET_ERR("conn: %p packet allocation failed, len=%d", conn, len);
1759 		ret = -ENOBUFS;
1760 		goto out;
1761 	}
1762 
1763 	ret = tcp_pkt_peek(pkt, conn->send_data, conn->unacked_len, len);
1764 	if (ret < 0) {
1765 		tcp_pkt_unref(pkt);
1766 		ret = -ENOBUFS;
1767 		goto out;
1768 	}
1769 
1770 	ret = tcp_out_ext(conn, PSH | ACK, pkt, conn->seq + conn->unacked_len);
1771 	if (ret == 0) {
1772 		conn->unacked_len += len;
1773 
1774 		if (conn->data_mode == TCP_DATA_MODE_RESEND) {
1775 			net_stats_update_tcp_resent(conn->iface, len);
1776 			net_stats_update_tcp_seg_rexmit(conn->iface);
1777 		} else {
1778 			net_stats_update_tcp_sent(conn->iface, len);
1779 			net_stats_update_tcp_seg_sent(conn->iface);
1780 		}
1781 	}
1782 
1783 	/* The data we want to send, has been moved to the send queue so we
1784 	 * can unref the head net_pkt. If there was an error, we need to remove
1785 	 * the packet anyway.
1786 	 */
1787 	tcp_pkt_unref(pkt);
1788 
1789 	conn_send_data_dump(conn);
1790 
1791  out:
1792 	return ret;
1793 }
1794 
1795 /* Send all queued but unsent data from the send_data packet by packet
1796  * until the receiver's window is full. */
tcp_send_queued_data(struct tcp * conn)1797 static int tcp_send_queued_data(struct tcp *conn)
1798 {
1799 	int ret = 0;
1800 	bool subscribe = false;
1801 
1802 	if (conn->data_mode == TCP_DATA_MODE_RESEND) {
1803 		goto out;
1804 	}
1805 
1806 	while (tcp_unsent_len(conn) > 0) {
1807 		/* Implement Nagle's algorithm */
1808 		if ((conn->tcp_nodelay == false) && (conn->unacked_len > 0)) {
1809 			/* If there is already pending data */
1810 			if (tcp_unsent_len(conn) < conn_mss(conn)) {
1811 				/* The number of bytes to be transmitted is less than an MSS,
1812 				 * skip transmission for now.
1813 				 * Wait for more data to be transmitted or all pending data
1814 				 * being acknowledged.
1815 				 */
1816 				break;
1817 			}
1818 		}
1819 
1820 		ret = tcp_send_data(conn);
1821 		if (ret < 0) {
1822 			break;
1823 		}
1824 	}
1825 
1826 	if (conn->send_data_total) {
1827 		subscribe = true;
1828 	}
1829 
1830 	if (k_work_delayable_remaining_get(&conn->send_data_timer)) {
1831 		subscribe = false;
1832 	}
1833 
1834 	if (subscribe) {
1835 		conn->send_data_retries = 0;
1836 		k_work_reschedule_for_queue(&tcp_work_q, &conn->send_data_timer,
1837 					    K_MSEC(TCP_RTO_MS));
1838 	}
1839  out:
1840 	return ret;
1841 }
1842 
tcp_cleanup_recv_queue(struct k_work * work)1843 static void tcp_cleanup_recv_queue(struct k_work *work)
1844 {
1845 	struct k_work_delayable *dwork = k_work_delayable_from_work(work);
1846 	struct tcp *conn = CONTAINER_OF(dwork, struct tcp, recv_queue_timer);
1847 
1848 	k_mutex_lock(&conn->lock, K_FOREVER);
1849 
1850 	NET_DBG("Cleanup recv queue conn %p len %zd seq %u", conn,
1851 		net_pkt_get_len(conn->queue_recv_data),
1852 		tcp_get_seq(conn->queue_recv_data->buffer));
1853 
1854 	net_buf_unref(conn->queue_recv_data->buffer);
1855 	conn->queue_recv_data->buffer = NULL;
1856 
1857 	k_mutex_unlock(&conn->lock);
1858 }
1859 
tcp_resend_data(struct k_work * work)1860 static void tcp_resend_data(struct k_work *work)
1861 {
1862 	struct k_work_delayable *dwork = k_work_delayable_from_work(work);
1863 	struct tcp *conn = CONTAINER_OF(dwork, struct tcp, send_data_timer);
1864 	bool conn_unref = false;
1865 	int ret;
1866 	int exp_tcp_rto;
1867 
1868 	k_mutex_lock(&conn->lock, K_FOREVER);
1869 
1870 	NET_DBG("send_data_retries=%hu", conn->send_data_retries);
1871 
1872 	if (conn->send_data_retries >= tcp_retries) {
1873 		NET_DBG("conn: %p close, data retransmissions exceeded", conn);
1874 		conn_unref = true;
1875 		goto out;
1876 	}
1877 
1878 	if (IS_ENABLED(CONFIG_NET_TCP_CONGESTION_AVOIDANCE) &&
1879 	    (conn->send_data_retries == 0)) {
1880 		tcp_ca_timeout(conn);
1881 		if (tcp_window_full(conn)) {
1882 			(void)k_sem_take(&conn->tx_sem, K_NO_WAIT);
1883 		}
1884 	}
1885 
1886 	conn->data_mode = TCP_DATA_MODE_RESEND;
1887 	conn->unacked_len = 0;
1888 
1889 	ret = tcp_send_data(conn);
1890 	conn->send_data_retries++;
1891 	if (ret == 0) {
1892 		if (conn->in_close && conn->send_data_total == 0) {
1893 			NET_DBG("TCP connection in %s close, "
1894 				"not disposing yet (waiting %dms)",
1895 				"active", tcp_max_timeout_ms);
1896 			k_work_reschedule_for_queue(&tcp_work_q,
1897 						    &conn->fin_timer,
1898 						    FIN_TIMEOUT);
1899 
1900 			conn_state(conn, TCP_FIN_WAIT_1);
1901 
1902 			ret = tcp_out_ext(conn, FIN | ACK, NULL,
1903 					  conn->seq + conn->unacked_len);
1904 			if (ret == 0) {
1905 				conn_seq(conn, + 1);
1906 			}
1907 
1908 			keep_alive_timer_stop(conn);
1909 
1910 			goto out;
1911 		}
1912 	} else if (ret == -ENODATA) {
1913 		conn->data_mode = TCP_DATA_MODE_SEND;
1914 
1915 		goto out;
1916 	} else if (ret == -ENOBUFS) {
1917 		NET_ERR("TCP failed to allocate buffer in retransmission");
1918 	}
1919 
1920 	exp_tcp_rto = TCP_RTO_MS;
1921 	/* The last retransmit does not need to wait that long */
1922 	if (conn->send_data_retries < tcp_retries) {
1923 		/* Every retransmit, the retransmission timeout increases by a factor 1.5 */
1924 		for (int i = 0; i < conn->send_data_retries; i++) {
1925 			exp_tcp_rto += exp_tcp_rto >> 1;
1926 		}
1927 	}
1928 
1929 	k_work_reschedule_for_queue(&tcp_work_q, &conn->send_data_timer,
1930 				    K_MSEC(exp_tcp_rto));
1931 
1932  out:
1933 	k_mutex_unlock(&conn->lock);
1934 
1935 	if (conn_unref) {
1936 		tcp_conn_close(conn, -ETIMEDOUT);
1937 	}
1938 }
1939 
tcp_timewait_timeout(struct k_work * work)1940 static void tcp_timewait_timeout(struct k_work *work)
1941 {
1942 	struct k_work_delayable *dwork = k_work_delayable_from_work(work);
1943 	struct tcp *conn = CONTAINER_OF(dwork, struct tcp, timewait_timer);
1944 
1945 	/* no need to acquire the conn->lock as there is nothing scheduled here */
1946 	NET_DBG("conn: %p %s", conn, tcp_conn_state(conn, NULL));
1947 
1948 	(void)tcp_conn_close(conn, -ETIMEDOUT);
1949 }
1950 
tcp_establish_timeout(struct tcp * conn)1951 static void tcp_establish_timeout(struct tcp *conn)
1952 {
1953 	NET_DBG("Did not receive %s in %dms", "ACK", ACK_TIMEOUT_MS);
1954 	NET_DBG("conn: %p %s", conn, tcp_conn_state(conn, NULL));
1955 
1956 	(void)tcp_conn_close(conn, -ETIMEDOUT);
1957 }
1958 
tcp_fin_timeout(struct k_work * work)1959 static void tcp_fin_timeout(struct k_work *work)
1960 {
1961 	struct k_work_delayable *dwork = k_work_delayable_from_work(work);
1962 	struct tcp *conn = CONTAINER_OF(dwork, struct tcp, fin_timer);
1963 
1964 	/* no need to acquire the conn->lock as there is nothing scheduled here */
1965 	if (conn->state == TCP_SYN_RECEIVED) {
1966 		tcp_establish_timeout(conn);
1967 		return;
1968 	}
1969 
1970 	NET_DBG("Did not receive %s in %dms", "FIN", tcp_max_timeout_ms);
1971 	NET_DBG("conn: %p %s", conn, tcp_conn_state(conn, NULL));
1972 
1973 	(void)tcp_conn_close(conn, -ETIMEDOUT);
1974 }
1975 
tcp_last_ack_timeout(struct k_work * work)1976 static void tcp_last_ack_timeout(struct k_work *work)
1977 {
1978 	struct k_work_delayable *dwork = k_work_delayable_from_work(work);
1979 	struct tcp *conn = CONTAINER_OF(dwork, struct tcp, fin_timer);
1980 
1981 	NET_DBG("Did not receive %s in %dms", "last ACK", LAST_ACK_TIMEOUT_MS);
1982 	NET_DBG("conn: %p %s", conn, tcp_conn_state(conn, NULL));
1983 
1984 	(void)tcp_conn_close(conn, -ETIMEDOUT);
1985 }
1986 
tcp_setup_last_ack_timer(struct tcp * conn)1987 static void tcp_setup_last_ack_timer(struct tcp *conn)
1988 {
1989 	/* Just in case the last ack is lost, install a timer that will
1990 	 * close the connection in that case. Use the fin_timer for that
1991 	 * as the fin handling cannot be done in this passive close state.
1992 	 * Instead of default tcp_fin_timeout() function, have a separate
1993 	 * function to catch this last ack case.
1994 	 */
1995 	k_work_init_delayable(&conn->fin_timer, tcp_last_ack_timeout);
1996 
1997 	NET_DBG("TCP connection in %s close, "
1998 		"not disposing yet (waiting %dms)",
1999 		"passive", LAST_ACK_TIMEOUT_MS);
2000 	k_work_reschedule_for_queue(&tcp_work_q,
2001 				    &conn->fin_timer,
2002 				    LAST_ACK_TIMEOUT);
2003 }
2004 
tcp_cancel_last_ack_timer(struct tcp * conn)2005 static void tcp_cancel_last_ack_timer(struct tcp *conn)
2006 {
2007 	k_work_cancel_delayable(&conn->fin_timer);
2008 }
2009 
2010 #if defined(CONFIG_NET_TCP_KEEPALIVE)
tcp_send_keepalive_probe(struct k_work * work)2011 static void tcp_send_keepalive_probe(struct k_work *work)
2012 {
2013 	struct k_work_delayable *dwork = k_work_delayable_from_work(work);
2014 	struct tcp *conn = CONTAINER_OF(dwork, struct tcp, keepalive_timer);
2015 
2016 	if (conn->state != TCP_ESTABLISHED) {
2017 		NET_DBG("conn: %p TCP connection not established", conn);
2018 		return;
2019 	}
2020 
2021 	if (!conn->keep_alive) {
2022 		NET_DBG("conn: %p keepalive is not enabled", conn);
2023 		return;
2024 	}
2025 
2026 	conn->keep_cur++;
2027 	if (conn->keep_cur > conn->keep_cnt) {
2028 		NET_DBG("conn: %p keepalive probe failed multiple times",
2029 			conn);
2030 		tcp_conn_close(conn, -ETIMEDOUT);
2031 		return;
2032 	}
2033 
2034 	NET_DBG("conn: %p keepalive probe", conn);
2035 	k_work_reschedule_for_queue(&tcp_work_q, &conn->keepalive_timer,
2036 				    K_SECONDS(conn->keep_intvl));
2037 
2038 
2039 	(void)tcp_out_ext(conn, ACK, NULL, conn->seq - 1);
2040 }
2041 #endif /* CONFIG_NET_TCP_KEEPALIVE */
2042 
tcp_send_zwp(struct k_work * work)2043 static void tcp_send_zwp(struct k_work *work)
2044 {
2045 	struct k_work_delayable *dwork = k_work_delayable_from_work(work);
2046 	struct tcp *conn = CONTAINER_OF(dwork, struct tcp, persist_timer);
2047 
2048 	k_mutex_lock(&conn->lock, K_FOREVER);
2049 
2050 	(void)tcp_out_ext(conn, ACK, NULL, conn->seq - 1);
2051 
2052 	tcp_derive_rto(conn);
2053 
2054 	if (conn->send_win == 0) {
2055 		uint64_t timeout = TCP_RTO_MS;
2056 
2057 		/* Make sure the bitwise shift does not result in undefined behaviour */
2058 		if (conn->zwp_retries < 63) {
2059 			conn->zwp_retries++;
2060 		}
2061 
2062 		timeout <<= conn->zwp_retries;
2063 		if (timeout == 0 || timeout > ZWP_MAX_DELAY_MS) {
2064 			timeout = ZWP_MAX_DELAY_MS;
2065 		}
2066 
2067 		(void)k_work_reschedule_for_queue(
2068 			&tcp_work_q, &conn->persist_timer, K_MSEC(timeout));
2069 	}
2070 
2071 	k_mutex_unlock(&conn->lock);
2072 }
2073 
tcp_send_ack(struct k_work * work)2074 static void tcp_send_ack(struct k_work *work)
2075 {
2076 	struct k_work_delayable *dwork = k_work_delayable_from_work(work);
2077 	struct tcp *conn = CONTAINER_OF(dwork, struct tcp, ack_timer);
2078 
2079 	k_mutex_lock(&conn->lock, K_FOREVER);
2080 
2081 	tcp_out(conn, ACK);
2082 
2083 	k_mutex_unlock(&conn->lock);
2084 }
2085 
tcp_conn_ref(struct tcp * conn)2086 static void tcp_conn_ref(struct tcp *conn)
2087 {
2088 	int ref_count = atomic_inc(&conn->ref_count) + 1;
2089 
2090 	NET_DBG("conn: %p, ref_count: %d", conn, ref_count);
2091 }
2092 
tcp_conn_alloc(void)2093 static struct tcp *tcp_conn_alloc(void)
2094 {
2095 	struct tcp *conn = NULL;
2096 	int ret;
2097 
2098 	ret = k_mem_slab_alloc(&tcp_conns_slab, (void **)&conn, K_NO_WAIT);
2099 	if (ret) {
2100 		NET_ERR("Cannot allocate slab");
2101 		goto out;
2102 	}
2103 
2104 	memset(conn, 0, sizeof(*conn));
2105 
2106 	if (CONFIG_NET_TCP_RECV_QUEUE_TIMEOUT) {
2107 		conn->queue_recv_data = tcp_rx_pkt_alloc(conn, 0);
2108 		if (conn->queue_recv_data == NULL) {
2109 			NET_ERR("Cannot allocate %s queue for conn %p", "recv",
2110 				conn);
2111 			goto fail;
2112 		}
2113 	}
2114 
2115 	conn->send_data = tcp_pkt_alloc(conn, 0);
2116 	if (conn->send_data == NULL) {
2117 		NET_ERR("Cannot allocate %s queue for conn %p", "send", conn);
2118 		goto fail;
2119 	}
2120 
2121 	k_mutex_init(&conn->lock);
2122 	k_fifo_init(&conn->recv_data);
2123 	k_sem_init(&conn->connect_sem, 0, K_SEM_MAX_LIMIT);
2124 	k_sem_init(&conn->tx_sem, 1, 1);
2125 
2126 	conn->in_connect = false;
2127 	conn->state = TCP_LISTEN;
2128 	conn->recv_win_max = tcp_rx_window;
2129 	conn->recv_win = conn->recv_win_max;
2130 	conn->recv_win_sent = conn->recv_win_max;
2131 	conn->send_win_max = MAX(tcp_tx_window, NET_IPV6_MTU);
2132 	conn->send_win = conn->send_win_max;
2133 	conn->tcp_nodelay = false;
2134 	conn->addr_ref_done = false;
2135 #ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
2136 	conn->dup_ack_cnt = 0;
2137 #endif
2138 #ifdef CONFIG_NET_TCP_CONGESTION_AVOIDANCE
2139 	/* Initially set the congestion window at its max size, since only the MSS
2140 	 * is available as soon as the connection is established
2141 	 */
2142 	conn->ca.cwnd = UINT16_MAX;
2143 #endif
2144 
2145 	/* The ISN value will be set when we get the connection attempt or
2146 	 * when trying to create a connection.
2147 	 */
2148 	conn->seq = 0U;
2149 
2150 	sys_slist_init(&conn->send_queue);
2151 
2152 	k_work_init_delayable(&conn->send_timer, tcp_send_process);
2153 	k_work_init_delayable(&conn->timewait_timer, tcp_timewait_timeout);
2154 	k_work_init_delayable(&conn->fin_timer, tcp_fin_timeout);
2155 	k_work_init_delayable(&conn->send_data_timer, tcp_resend_data);
2156 	k_work_init_delayable(&conn->recv_queue_timer, tcp_cleanup_recv_queue);
2157 	k_work_init_delayable(&conn->persist_timer, tcp_send_zwp);
2158 	k_work_init_delayable(&conn->ack_timer, tcp_send_ack);
2159 	k_work_init(&conn->conn_release, tcp_conn_release);
2160 	keep_alive_timer_init(conn);
2161 
2162 	tcp_conn_ref(conn);
2163 
2164 	k_mutex_lock(&tcp_lock, K_FOREVER);
2165 	sys_slist_append(&tcp_conns, &conn->next);
2166 	k_mutex_unlock(&tcp_lock);
2167 out:
2168 	NET_DBG("conn: %p", conn);
2169 
2170 	return conn;
2171 
2172 fail:
2173 	if (CONFIG_NET_TCP_RECV_QUEUE_TIMEOUT && conn->queue_recv_data) {
2174 		tcp_pkt_unref(conn->queue_recv_data);
2175 		conn->queue_recv_data = NULL;
2176 	}
2177 
2178 	k_mem_slab_free(&tcp_conns_slab, (void *)conn);
2179 	return NULL;
2180 }
2181 
net_tcp_get(struct net_context * context)2182 int net_tcp_get(struct net_context *context)
2183 {
2184 	int ret = 0;
2185 	struct tcp *conn;
2186 
2187 	conn = tcp_conn_alloc();
2188 	if (conn == NULL) {
2189 		ret = -ENOMEM;
2190 		return ret;
2191 	}
2192 
2193 	/* Mutually link the net_context and tcp connection */
2194 	conn->context = context;
2195 	context->tcp = conn;
2196 
2197 	return ret;
2198 }
2199 
tcp_endpoint_cmp(union tcp_endpoint * ep,struct net_pkt * pkt,enum pkt_addr which)2200 static bool tcp_endpoint_cmp(union tcp_endpoint *ep, struct net_pkt *pkt,
2201 			     enum pkt_addr which)
2202 {
2203 	union tcp_endpoint ep_tmp;
2204 
2205 	if (tcp_endpoint_set(&ep_tmp, pkt, which) < 0) {
2206 		return false;
2207 	}
2208 
2209 	return !memcmp(ep, &ep_tmp, tcp_endpoint_len(ep->sa.sa_family));
2210 }
2211 
tcp_conn_cmp(struct tcp * conn,struct net_pkt * pkt)2212 static bool tcp_conn_cmp(struct tcp *conn, struct net_pkt *pkt)
2213 {
2214 	return tcp_endpoint_cmp(&conn->src, pkt, TCP_EP_DST) &&
2215 		tcp_endpoint_cmp(&conn->dst, pkt, TCP_EP_SRC);
2216 }
2217 
tcp_conn_search(struct net_pkt * pkt)2218 static struct tcp *tcp_conn_search(struct net_pkt *pkt)
2219 {
2220 	bool found = false;
2221 	struct tcp *conn;
2222 	struct tcp *tmp;
2223 
2224 	k_mutex_lock(&tcp_lock, K_FOREVER);
2225 
2226 	SYS_SLIST_FOR_EACH_CONTAINER_SAFE(&tcp_conns, conn, tmp, next) {
2227 		found = tcp_conn_cmp(conn, pkt);
2228 		if (found) {
2229 			break;
2230 		}
2231 	}
2232 
2233 	k_mutex_unlock(&tcp_lock);
2234 
2235 	return found ? conn : NULL;
2236 }
2237 
2238 static struct tcp *tcp_conn_new(struct net_pkt *pkt);
2239 
tcp_recv(struct net_conn * net_conn,struct net_pkt * pkt,union net_ip_header * ip,union net_proto_header * proto,void * user_data)2240 static enum net_verdict tcp_recv(struct net_conn *net_conn,
2241 				 struct net_pkt *pkt,
2242 				 union net_ip_header *ip,
2243 				 union net_proto_header *proto,
2244 				 void *user_data)
2245 {
2246 	struct tcp *conn;
2247 	struct tcphdr *th;
2248 	enum net_verdict verdict = NET_DROP;
2249 
2250 	ARG_UNUSED(net_conn);
2251 	ARG_UNUSED(proto);
2252 
2253 	conn = tcp_conn_search(pkt);
2254 	if (conn) {
2255 		goto in;
2256 	}
2257 
2258 	th = th_get(pkt);
2259 
2260 	if (th_flags(th) & SYN && !(th_flags(th) & ACK)) {
2261 		struct tcp *conn_old = ((struct net_context *)user_data)->tcp;
2262 
2263 		conn = tcp_conn_new(pkt);
2264 		if (!conn) {
2265 			NET_ERR("Cannot allocate a new TCP connection");
2266 			goto in;
2267 		}
2268 
2269 		conn->accepted_conn = conn_old;
2270 	}
2271 in:
2272 	if (conn) {
2273 		verdict = tcp_in(conn, pkt);
2274 	} else {
2275 		net_tcp_reply_rst(pkt);
2276 	}
2277 
2278 	return verdict;
2279 }
2280 
2281 #if defined(CONFIG_NET_TCP_ISN_RFC6528)
2282 
seq_scale(uint32_t seq)2283 static uint32_t seq_scale(uint32_t seq)
2284 {
2285 	return seq + (k_ticks_to_ns_floor32(k_uptime_ticks()) >> 6);
2286 }
2287 
2288 static uint8_t unique_key[16]; /* MD5 128 bits as described in RFC6528 */
2289 
tcpv6_init_isn(struct in6_addr * saddr,struct in6_addr * daddr,uint16_t sport,uint16_t dport)2290 static uint32_t tcpv6_init_isn(struct in6_addr *saddr,
2291 			       struct in6_addr *daddr,
2292 			       uint16_t sport,
2293 			       uint16_t dport)
2294 {
2295 	struct {
2296 		uint8_t key[sizeof(unique_key)];
2297 		struct in6_addr saddr;
2298 		struct in6_addr daddr;
2299 		uint16_t sport;
2300 		uint16_t dport;
2301 	} buf = {
2302 		.saddr = *(struct in6_addr *)saddr,
2303 		.daddr = *(struct in6_addr *)daddr,
2304 		.sport = sport,
2305 		.dport = dport
2306 	};
2307 
2308 	uint8_t hash[16];
2309 	size_t hash_len;
2310 	static bool once;
2311 
2312 	if (!once) {
2313 		sys_csrand_get(unique_key, sizeof(unique_key));
2314 		once = true;
2315 	}
2316 
2317 	memcpy(buf.key, unique_key, sizeof(buf.key));
2318 
2319 	psa_hash_compute(PSA_ALG_SHA_256, (const unsigned char *)&buf, sizeof(buf),
2320 			 hash, sizeof(hash), &hash_len);
2321 
2322 	return seq_scale(UNALIGNED_GET((uint32_t *)&hash[0]));
2323 }
2324 
tcpv4_init_isn(struct in_addr * saddr,struct in_addr * daddr,uint16_t sport,uint16_t dport)2325 static uint32_t tcpv4_init_isn(struct in_addr *saddr,
2326 			       struct in_addr *daddr,
2327 			       uint16_t sport,
2328 			       uint16_t dport)
2329 {
2330 	struct {
2331 		uint8_t key[sizeof(unique_key)];
2332 		struct in_addr saddr;
2333 		struct in_addr daddr;
2334 		uint16_t sport;
2335 		uint16_t dport;
2336 	} buf = {
2337 		.saddr = *(struct in_addr *)saddr,
2338 		.daddr = *(struct in_addr *)daddr,
2339 		.sport = sport,
2340 		.dport = dport
2341 	};
2342 
2343 	uint8_t hash[16];
2344 	size_t hash_len;
2345 	static bool once;
2346 
2347 	if (!once) {
2348 		sys_csrand_get(unique_key, sizeof(unique_key));
2349 		once = true;
2350 	}
2351 
2352 	memcpy(buf.key, unique_key, sizeof(unique_key));
2353 
2354 
2355 	psa_hash_compute(PSA_ALG_SHA_256, (const unsigned char *)&buf, sizeof(buf),
2356 			 hash, sizeof(hash), &hash_len);
2357 
2358 	return seq_scale(UNALIGNED_GET((uint32_t *)&hash[0]));
2359 }
2360 
2361 #else
2362 
2363 #define tcpv6_init_isn(...) (0UL)
2364 #define tcpv4_init_isn(...) (0UL)
2365 
2366 #endif /* CONFIG_NET_TCP_ISN_RFC6528 */
2367 
tcp_init_isn(struct sockaddr * saddr,struct sockaddr * daddr)2368 static uint32_t tcp_init_isn(struct sockaddr *saddr, struct sockaddr *daddr)
2369 {
2370 	if (IS_ENABLED(CONFIG_NET_TCP_ISN_RFC6528)) {
2371 		if (IS_ENABLED(CONFIG_NET_IPV6) &&
2372 		    saddr->sa_family == AF_INET6) {
2373 			return tcpv6_init_isn(&net_sin6(saddr)->sin6_addr,
2374 					      &net_sin6(daddr)->sin6_addr,
2375 					      net_sin6(saddr)->sin6_port,
2376 					      net_sin6(daddr)->sin6_port);
2377 		} else if (IS_ENABLED(CONFIG_NET_IPV4) &&
2378 			   saddr->sa_family == AF_INET) {
2379 			return tcpv4_init_isn(&net_sin(saddr)->sin_addr,
2380 					      &net_sin(daddr)->sin_addr,
2381 					      net_sin(saddr)->sin_port,
2382 					      net_sin(daddr)->sin_port);
2383 		}
2384 	}
2385 
2386 	return sys_rand32_get();
2387 }
2388 
2389 /* Create a new tcp connection, as a part of it, create and register
2390  * net_context
2391  */
tcp_conn_new(struct net_pkt * pkt)2392 static struct tcp *tcp_conn_new(struct net_pkt *pkt)
2393 {
2394 	struct tcp *conn = NULL;
2395 	struct net_context *context = NULL;
2396 	sa_family_t af = net_pkt_family(pkt);
2397 	struct sockaddr local_addr = { 0 };
2398 	int ret;
2399 
2400 	ret = net_context_get(af, SOCK_STREAM, IPPROTO_TCP, &context);
2401 	if (ret < 0) {
2402 		NET_ERR("net_context_get(): %d", ret);
2403 		goto err;
2404 	}
2405 
2406 	conn = context->tcp;
2407 	conn->iface = pkt->iface;
2408 	tcp_derive_rto(conn);
2409 
2410 	net_context_set_family(conn->context, net_pkt_family(pkt));
2411 
2412 	if (tcp_endpoint_set(&conn->dst, pkt, TCP_EP_SRC) < 0) {
2413 		net_context_put(context);
2414 		conn = NULL;
2415 		goto err;
2416 	}
2417 
2418 	if (tcp_endpoint_set(&conn->src, pkt, TCP_EP_DST) < 0) {
2419 		net_context_put(context);
2420 		conn = NULL;
2421 		goto err;
2422 	}
2423 
2424 	NET_DBG("conn: src: %s, dst: %s",
2425 		net_sprint_addr(conn->src.sa.sa_family,
2426 				(const void *)&conn->src.sin.sin_addr),
2427 		net_sprint_addr(conn->dst.sa.sa_family,
2428 				(const void *)&conn->dst.sin.sin_addr));
2429 
2430 	memcpy(&context->remote, &conn->dst, sizeof(context->remote));
2431 	context->flags |= NET_CONTEXT_REMOTE_ADDR_SET;
2432 
2433 	net_sin_ptr(&context->local)->sin_family = af;
2434 
2435 	local_addr.sa_family = net_context_get_family(context);
2436 
2437 	if (IS_ENABLED(CONFIG_NET_IPV6) &&
2438 	    net_context_get_family(context) == AF_INET6) {
2439 		net_ipaddr_copy(&net_sin6(&local_addr)->sin6_addr,
2440 				&conn->src.sin6.sin6_addr);
2441 	} else if (IS_ENABLED(CONFIG_NET_IPV4) &&
2442 		   net_context_get_family(context) == AF_INET) {
2443 		net_ipaddr_copy(&net_sin(&local_addr)->sin_addr,
2444 				&conn->src.sin.sin_addr);
2445 	}
2446 
2447 	ret = net_context_bind(context, &local_addr, sizeof(local_addr));
2448 	if (ret < 0) {
2449 		NET_DBG("Cannot bind accepted context, connection reset");
2450 		net_context_put(context);
2451 		conn = NULL;
2452 		goto err;
2453 	}
2454 
2455 	/* The newly created context object for the new TCP client connection needs
2456 	 * all four parameters of the tuple (local address, local port, remote
2457 	 * address, remote port) to be properly identified. Remote address and port
2458 	 * are already copied above from conn->dst. The call to net_context_bind
2459 	 * with the prepared local_addr further copies the local address. However,
2460 	 * this call won't copy the local port, as the bind would then fail due to
2461 	 * an address/port reuse without the REUSEPORT option enables for both
2462 	 * connections. Therefore, we copy the port after the bind call.
2463 	 * It is safe to bind to this address/port combination, as the new TCP
2464 	 * client connection is separated from the local listening connection
2465 	 * by the specified remote address and remote port.
2466 	 */
2467 	if (IS_ENABLED(CONFIG_NET_IPV6) &&
2468 	    net_context_get_family(context) == AF_INET6) {
2469 		net_sin6_ptr(&context->local)->sin6_port = conn->src.sin6.sin6_port;
2470 	} else if (IS_ENABLED(CONFIG_NET_IPV4) &&
2471 		   net_context_get_family(context) == AF_INET) {
2472 		net_sin_ptr(&context->local)->sin_port = conn->src.sin.sin_port;
2473 	}
2474 
2475 	if (!(IS_ENABLED(CONFIG_NET_TEST_PROTOCOL) ||
2476 	      IS_ENABLED(CONFIG_NET_TEST))) {
2477 		conn->seq = tcp_init_isn(&local_addr, &context->remote);
2478 	}
2479 
2480 	NET_DBG("context: local: %s, remote: %s",
2481 		net_sprint_addr(local_addr.sa_family,
2482 				(const void *)&net_sin(&local_addr)->sin_addr),
2483 		net_sprint_addr(context->remote.sa_family,
2484 				(const void *)&net_sin(&context->remote)->sin_addr));
2485 
2486 	ret = net_conn_register(IPPROTO_TCP, af,
2487 				&context->remote, &local_addr,
2488 				ntohs(conn->dst.sin.sin_port),/* local port */
2489 				ntohs(conn->src.sin.sin_port),/* remote port */
2490 				context, tcp_recv, context,
2491 				&context->conn_handler);
2492 	if (ret < 0) {
2493 		NET_ERR("net_conn_register(): %d", ret);
2494 		net_context_put(context);
2495 		conn = NULL;
2496 		goto err;
2497 	}
2498 
2499 	net_if_addr_ref(conn->iface, conn->dst.sa.sa_family,
2500 			conn->src.sa.sa_family == AF_INET ?
2501 			(const void *)&conn->src.sin.sin_addr :
2502 			(const void *)&conn->src.sin6.sin6_addr);
2503 	conn->addr_ref_done = true;
2504 
2505 err:
2506 	if (!conn) {
2507 		net_stats_update_tcp_seg_conndrop(net_pkt_iface(pkt));
2508 	}
2509 
2510 	return conn;
2511 }
2512 
tcp_validate_seq(struct tcp * conn,struct tcphdr * hdr)2513 static bool tcp_validate_seq(struct tcp *conn, struct tcphdr *hdr)
2514 {
2515 	return (net_tcp_seq_cmp(th_seq(hdr), conn->ack) >= 0) &&
2516 		(net_tcp_seq_cmp(th_seq(hdr), conn->ack + conn->recv_win) < 0);
2517 }
2518 
tcp_compute_new_length(struct tcp * conn,struct tcphdr * hdr,size_t len,bool fin_received)2519 static int32_t tcp_compute_new_length(struct tcp *conn, struct tcphdr *hdr, size_t len,
2520 				      bool fin_received)
2521 {
2522 	int32_t new_len = 0;
2523 
2524 	if (len > 0) {
2525 		/* Cases:
2526 		 * - Data already received earlier: new_len <= 0
2527 		 * - Partially new data new_len > 0
2528 		 * - Out of order data new_len > 0,
2529 		 *   should be checked by sequence number
2530 		 */
2531 		new_len = (int32_t)(len) - net_tcp_seq_cmp(conn->ack, th_seq(hdr));
2532 		if (fin_received) {
2533 			/* Add with one additional byte as the FIN flag has to be subtracted */
2534 			new_len++;
2535 		}
2536 	}
2537 	return new_len;
2538 }
2539 
tcp_enter_time_wait(struct tcp * conn)2540 static enum tcp_state tcp_enter_time_wait(struct tcp *conn)
2541 {
2542 	tcp_send_timer_cancel(conn);
2543 	/* Entering TIME-WAIT, so cancel the timer and start the TIME-WAIT timer */
2544 	k_work_cancel_delayable(&conn->fin_timer);
2545 	k_work_reschedule_for_queue(
2546 		&tcp_work_q, &conn->timewait_timer,
2547 		K_MSEC(CONFIG_NET_TCP_TIME_WAIT_DELAY));
2548 	return TCP_TIME_WAIT;
2549 }
2550 
check_seq_list(struct net_buf * buf)2551 static bool check_seq_list(struct net_buf *buf)
2552 {
2553 	struct net_buf *last = NULL;
2554 	struct net_buf *tmp = buf;
2555 	uint32_t seq;
2556 	uint32_t next_seq = 0;
2557 	bool result = true;
2558 
2559 	while (tmp) {
2560 		seq = tcp_get_seq(tmp);
2561 
2562 		NET_DBG("buf %p seq %u len %d", tmp, seq, tmp->len);
2563 
2564 		if (last != NULL) {
2565 			if (next_seq != seq) {
2566 				result = false;
2567 			}
2568 		}
2569 
2570 		next_seq = seq + tmp->len;
2571 		last = tmp;
2572 		tmp = tmp->frags;
2573 	}
2574 	return result;
2575 }
2576 
tcp_queue_recv_data(struct tcp * conn,struct net_pkt * pkt,size_t len,uint32_t seq)2577 static void tcp_queue_recv_data(struct tcp *conn, struct net_pkt *pkt,
2578 				size_t len, uint32_t seq)
2579 {
2580 	uint32_t seq_start = seq;
2581 	bool inserted = false;
2582 	struct net_buf *tmp;
2583 
2584 	NET_DBG("conn: %p len %zd seq %u ack %u", conn, len, seq, conn->ack);
2585 
2586 	tmp = pkt->buffer;
2587 
2588 	tcp_set_seq(tmp, seq);
2589 	seq += tmp->len;
2590 	tmp = tmp->frags;
2591 
2592 	while (tmp) {
2593 		tcp_set_seq(tmp, seq);
2594 		seq += tmp->len;
2595 		tmp = tmp->frags;
2596 	}
2597 
2598 	if (IS_ENABLED(CONFIG_NET_TCP_LOG_LEVEL_DBG)) {
2599 		NET_DBG("Queuing data: conn %p", conn);
2600 	}
2601 
2602 	if (!net_pkt_is_empty(conn->queue_recv_data)) {
2603 		/* Place the data to correct place in the list. If the data
2604 		 * would not be sequential, then drop this packet.
2605 		 *
2606 		 * Only work with subtractions between sequence numbers in uint32_t format
2607 		 * to proper handle cases that are around the wrapping point.
2608 		 */
2609 
2610 		/* Some potentential cases:
2611 		 * Note: MI = MAX_INT
2612 		 * Packet | Queued| End off1  | Start off| End off2    | Required handling
2613 		 * Seq|Len|Seq|Len|           |          |             |
2614 		 *  3 | 3 | 6 | 4 | 3+3-6=  0 | NA       | NA          | Prepend
2615 		 *  3 | 4 | 6 | 4 | 3+4-6 = 1 | NA       | NA          | Prepend, pull from buffer
2616 		 *  3 | 7 | 6 | 4 | 3+7-6 = 4 | 6-3=3    | 6+4-3=7     | Drop queued data
2617 		 *  3 | 8 | 6 | 4 | 3+8-6 = 5 | 6-3=3    | 6+4-3=7     | Drop queued data
2618 		 *  6 | 5 | 6 | 4 | 6+5-6 = 5 | 6-6=0    | 6+4-6=4     | Drop queued data
2619 		 *  6 | 4 | 6 | 4 | 6+4-6 = 4 | 6-6=0    | 6+4-6=4     | Drop queued data / packet
2620 		 *  7 | 2 | 6 | 4 | 7+2-6 = 3 | 6-7=MI   | 6+4-7=3     | Drop packet
2621 		 * 10 | 2 | 6 | 4 | 10+2-6= 6 | 6-10=MI-3| 6+4-10=0    | Append
2622 		 *  7 | 4 | 6 | 4 | 7+4-6 = 5 | 6-7 =MI  | 6+4-7 =3    | Append, pull from packet
2623 		 * 11 | 2 | 6 | 4 | 11+2-6= 7 | 6-11=MI-6| 6+4-11=MI-1 | Drop incoming packet
2624 		 *  2 | 3 | 6 | 4 | 2+3-6= MI | 6-2=4    | 6+4-2=8     | Drop incoming packet
2625 		 */
2626 
2627 		uint32_t pending_seq;
2628 		uint32_t start_offset;
2629 		uint32_t end_offset;
2630 		size_t pending_len;
2631 
2632 		pending_seq = tcp_get_seq(conn->queue_recv_data->buffer);
2633 		end_offset = seq - pending_seq;
2634 		pending_len = net_pkt_get_len(conn->queue_recv_data);
2635 		if (end_offset < pending_len) {
2636 			if (end_offset < len) {
2637 				if (end_offset) {
2638 					net_pkt_remove_tail(pkt, end_offset);
2639 				}
2640 
2641 				/* Put new data before the pending data */
2642 				net_buf_frag_add(pkt->buffer,
2643 						 conn->queue_recv_data->buffer);
2644 				NET_DBG("Adding at before queue, end_offset %i, pending_len %zu",
2645 					end_offset, pending_len);
2646 				conn->queue_recv_data->buffer = pkt->buffer;
2647 				inserted = true;
2648 			}
2649 		} else {
2650 			struct net_buf *last;
2651 
2652 			last = net_buf_frag_last(conn->queue_recv_data->buffer);
2653 			pending_seq = tcp_get_seq(last);
2654 
2655 			start_offset = pending_seq - seq_start;
2656 			/* Compute the offset w.r.t. the start point of the new packet */
2657 			end_offset = (pending_seq + last->len) - seq_start;
2658 
2659 			/* Check if queue start with within the within the new packet */
2660 			if ((start_offset < len) && (end_offset <= len)) {
2661 				/* The queued data is irrelevant since the new packet overlaps the
2662 				 * new packet, take the new packet as contents
2663 				 */
2664 				net_buf_unref(conn->queue_recv_data->buffer);
2665 				conn->queue_recv_data->buffer = pkt->buffer;
2666 				inserted = true;
2667 			} else {
2668 				if (end_offset < len) {
2669 					if (end_offset) {
2670 						net_pkt_remove_tail(conn->queue_recv_data,
2671 								    end_offset);
2672 					}
2673 
2674 					/* Put new data after pending data */
2675 					NET_DBG("Adding at end of queue, start %i, end %i, len %zu",
2676 						start_offset, end_offset, len);
2677 					net_buf_frag_add(conn->queue_recv_data->buffer,
2678 							 pkt->buffer);
2679 					inserted = true;
2680 				}
2681 			}
2682 		}
2683 
2684 		if (inserted) {
2685 			NET_DBG("All pending data: conn %p", conn);
2686 			if (check_seq_list(conn->queue_recv_data->buffer) == false) {
2687 				NET_ERR("Incorrect order in out of order sequence for conn %p",
2688 					conn);
2689 				/* error in sequence list, drop it */
2690 				net_buf_unref(conn->queue_recv_data->buffer);
2691 				conn->queue_recv_data->buffer = NULL;
2692 			}
2693 		} else {
2694 			NET_DBG("Cannot add new data to queue");
2695 		}
2696 	} else {
2697 		net_pkt_append_buffer(conn->queue_recv_data, pkt->buffer);
2698 		inserted = true;
2699 	}
2700 
2701 	if (inserted) {
2702 		/* We need to keep the received data but free the pkt */
2703 		pkt->buffer = NULL;
2704 
2705 		if (!k_work_delayable_is_pending(&conn->recv_queue_timer)) {
2706 			k_work_reschedule_for_queue(
2707 				&tcp_work_q, &conn->recv_queue_timer,
2708 				K_MSEC(CONFIG_NET_TCP_RECV_QUEUE_TIMEOUT));
2709 		}
2710 	}
2711 }
2712 
tcp_data_received(struct tcp * conn,struct net_pkt * pkt,size_t * len,bool psh)2713 static enum net_verdict tcp_data_received(struct tcp *conn, struct net_pkt *pkt,
2714 					  size_t *len, bool psh)
2715 {
2716 	enum net_verdict ret;
2717 
2718 	if (*len == 0) {
2719 		return NET_DROP;
2720 	}
2721 
2722 	ret = tcp_data_get(conn, pkt, len);
2723 
2724 	net_stats_update_tcp_seg_recv(conn->iface);
2725 	conn_ack(conn, *len);
2726 
2727 	/* Delay ACK response in case of small window or missing PSH,
2728 	 * as described in RFC 813.
2729 	 */
2730 	if (tcp_short_window(conn) || !psh) {
2731 		k_work_schedule_for_queue(&tcp_work_q, &conn->ack_timer,
2732 					  ACK_DELAY);
2733 	} else {
2734 		k_work_cancel_delayable(&conn->ack_timer);
2735 		tcp_out(conn, ACK);
2736 	}
2737 
2738 	return ret;
2739 }
2740 
tcp_out_of_order_data(struct tcp * conn,struct net_pkt * pkt,size_t data_len,uint32_t seq)2741 static void tcp_out_of_order_data(struct tcp *conn, struct net_pkt *pkt,
2742 				  size_t data_len, uint32_t seq)
2743 {
2744 	size_t headers_len;
2745 
2746 	if (data_len == 0) {
2747 		return;
2748 	}
2749 
2750 	headers_len = net_pkt_get_len(pkt) - data_len;
2751 
2752 	/* Get rid of protocol headers from the data */
2753 	if (tcp_pkt_pull(pkt, headers_len) < 0) {
2754 		return;
2755 	}
2756 
2757 	/* We received out-of-order data. Try to queue it.
2758 	 */
2759 	tcp_queue_recv_data(conn, pkt, data_len, seq);
2760 }
2761 
tcp_check_sock_options(struct tcp * conn)2762 static void tcp_check_sock_options(struct tcp *conn)
2763 {
2764 	int sndbuf_opt = 0;
2765 	int rcvbuf_opt = 0;
2766 
2767 	if (IS_ENABLED(CONFIG_NET_CONTEXT_SNDBUF)) {
2768 		(void)net_context_get_option(conn->context, NET_OPT_SNDBUF,
2769 					     &sndbuf_opt, NULL);
2770 	}
2771 
2772 	if (IS_ENABLED(CONFIG_NET_CONTEXT_RCVBUF)) {
2773 		(void)net_context_get_option(conn->context, NET_OPT_RCVBUF,
2774 					     &rcvbuf_opt, NULL);
2775 	}
2776 
2777 	if (sndbuf_opt > 0 && sndbuf_opt != conn->send_win_max) {
2778 		k_mutex_lock(&conn->lock, K_FOREVER);
2779 
2780 		conn->send_win_max = sndbuf_opt;
2781 		if (conn->send_win > conn->send_win_max) {
2782 			conn->send_win = conn->send_win_max;
2783 		}
2784 
2785 		k_mutex_unlock(&conn->lock);
2786 	}
2787 
2788 	if (rcvbuf_opt > 0 && rcvbuf_opt != conn->recv_win_max) {
2789 		int diff;
2790 
2791 		k_mutex_lock(&conn->lock, K_FOREVER);
2792 
2793 		diff = rcvbuf_opt - conn->recv_win_max;
2794 		conn->recv_win_max = rcvbuf_opt;
2795 		tcp_update_recv_wnd(conn, diff);
2796 
2797 		k_mutex_unlock(&conn->lock);
2798 	}
2799 }
2800 
2801 /* TCP state machine, everything happens here */
tcp_in(struct tcp * conn,struct net_pkt * pkt)2802 static enum net_verdict tcp_in(struct tcp *conn, struct net_pkt *pkt)
2803 {
2804 	struct tcphdr *th = pkt ? th_get(pkt) : NULL;
2805 	uint8_t next = 0, fl = 0;
2806 	bool do_close = false;
2807 	bool connection_ok = false;
2808 	size_t tcp_options_len = th ? (th_off(th) - 5) * 4 : 0;
2809 	struct net_conn *conn_handler = NULL;
2810 	struct net_pkt *recv_pkt;
2811 	void *recv_user_data;
2812 	struct k_fifo *recv_data_fifo;
2813 	size_t len;
2814 	int ret;
2815 	int close_status = 0;
2816 	enum net_verdict verdict = NET_DROP;
2817 
2818 	if (th) {
2819 		/* Currently we ignore ECN and CWR flags */
2820 		fl = th_flags(th) & ~(ECN | CWR);
2821 	}
2822 
2823 	if (conn->state != TCP_SYN_SENT) {
2824 		tcp_check_sock_options(conn);
2825 	}
2826 
2827 	k_mutex_lock(&conn->lock, K_FOREVER);
2828 
2829 	/* Connection context was already freed. */
2830 	if (conn->state == TCP_UNUSED) {
2831 		k_mutex_unlock(&conn->lock);
2832 		return NET_DROP;
2833 	}
2834 
2835 	NET_DBG("%s", tcp_conn_state(conn, pkt));
2836 
2837 	if (th && th_off(th) < 5) {
2838 		tcp_out(conn, RST);
2839 		do_close = true;
2840 		close_status = -ECONNRESET;
2841 		goto out;
2842 	}
2843 
2844 	if (FL(&fl, &, RST)) {
2845 		/* We only accept RST packet that has valid seq field. */
2846 		if (!tcp_validate_seq(conn, th)) {
2847 			net_stats_update_tcp_seg_rsterr(net_pkt_iface(pkt));
2848 			k_mutex_unlock(&conn->lock);
2849 			return NET_DROP;
2850 		}
2851 
2852 		/* Valid RST received. */
2853 		verdict = NET_OK;
2854 		net_stats_update_tcp_seg_rst(net_pkt_iface(pkt));
2855 		do_close = true;
2856 		close_status = -ECONNRESET;
2857 		conn->rst_received = true;
2858 
2859 		/* If we receive RST and ACK for the sent SYN, it means
2860 		 * that there is no socket listening the port we are trying
2861 		 * to connect to. Set the errno properly in this case.
2862 		 */
2863 		if (conn->in_connect) {
2864 			fl = th_flags(th);
2865 			if (FL(&fl, ==, RST | ACK)) {
2866 				close_status = -ECONNREFUSED;
2867 			}
2868 		}
2869 
2870 		goto out;
2871 	}
2872 
2873 	if (tcp_options_len && !tcp_options_check(&conn->recv_options, pkt,
2874 						  tcp_options_len)) {
2875 		NET_DBG("DROP: Invalid TCP option list");
2876 		tcp_out(conn, RST);
2877 		do_close = true;
2878 		close_status = -ECONNRESET;
2879 		goto out;
2880 	}
2881 
2882 	if (th && (conn->state != TCP_LISTEN) && (conn->state != TCP_SYN_SENT) &&
2883 	    tcp_validate_seq(conn, th) && FL(&fl, &, SYN)) {
2884 		/* According to RFC 793, ch 3.9 Event Processing, receiving SYN
2885 		 * once the connection has been established is an error
2886 		 * condition, reset should be sent and connection closed.
2887 		 */
2888 		NET_DBG("conn: %p, SYN received in %s state, dropping connection",
2889 			conn, tcp_state_to_str(conn->state, false));
2890 		net_stats_update_tcp_seg_drop(conn->iface);
2891 		tcp_out(conn, RST);
2892 		do_close = true;
2893 		close_status = -ECONNRESET;
2894 		goto out;
2895 	}
2896 
2897 	if (th) {
2898 		conn->send_win = ntohs(th_win(th));
2899 		if (conn->send_win > conn->send_win_max) {
2900 			NET_DBG("Lowering send window from %u to %u",
2901 				conn->send_win, conn->send_win_max);
2902 
2903 			conn->send_win = conn->send_win_max;
2904 		}
2905 
2906 		if (conn->send_win == 0) {
2907 			if (!k_work_delayable_is_pending(&conn->persist_timer)) {
2908 				conn->zwp_retries = 0;
2909 				(void)k_work_reschedule_for_queue(
2910 					&tcp_work_q, &conn->persist_timer,
2911 					K_MSEC(TCP_RTO_MS));
2912 			}
2913 		} else {
2914 			(void)k_work_cancel_delayable(&conn->persist_timer);
2915 		}
2916 
2917 		if (tcp_window_full(conn)) {
2918 			(void)k_sem_take(&conn->tx_sem, K_NO_WAIT);
2919 		} else {
2920 			k_sem_give(&conn->tx_sem);
2921 		}
2922 	}
2923 
2924 next_state:
2925 	len = pkt ? tcp_data_len(pkt) : 0;
2926 
2927 	switch (conn->state) {
2928 	case TCP_LISTEN:
2929 		if (FL(&fl, ==, SYN)) {
2930 			/* Make sure our MSS is also sent in the ACK */
2931 			conn->send_options.mss_found = true;
2932 			conn_ack(conn, th_seq(th) + 1); /* capture peer's isn */
2933 			tcp_out(conn, SYN | ACK);
2934 			conn->send_options.mss_found = false;
2935 			conn_seq(conn, + 1);
2936 			next = TCP_SYN_RECEIVED;
2937 
2938 			/* Close the connection if we do not receive ACK on time.
2939 			 */
2940 			k_work_reschedule_for_queue(&tcp_work_q,
2941 						    &conn->establish_timer,
2942 						    ACK_TIMEOUT);
2943 			verdict = NET_OK;
2944 		} else {
2945 			conn->send_options.mss_found = true;
2946 			ret = tcp_out_ext(conn, SYN, NULL /* no data */, conn->seq);
2947 			if (ret < 0) {
2948 				do_close = true;
2949 				close_status = ret;
2950 			} else {
2951 				conn->send_options.mss_found = false;
2952 				conn_seq(conn, + 1);
2953 				next = TCP_SYN_SENT;
2954 				tcp_conn_ref(conn);
2955 			}
2956 		}
2957 		break;
2958 	case TCP_SYN_RECEIVED:
2959 		if (FL(&fl, &, ACK, th_ack(th) == conn->seq &&
2960 				th_seq(th) == conn->ack)) {
2961 			net_tcp_accept_cb_t accept_cb = NULL;
2962 			struct net_context *context = NULL;
2963 
2964 			if (conn->accepted_conn != NULL) {
2965 				accept_cb = conn->accepted_conn->accept_cb;
2966 				context = conn->accepted_conn->context;
2967 				keep_alive_param_copy(conn, conn->accepted_conn);
2968 			}
2969 
2970 			k_work_cancel_delayable(&conn->establish_timer);
2971 			tcp_send_timer_cancel(conn);
2972 			tcp_conn_ref(conn);
2973 			net_context_set_state(conn->context,
2974 					      NET_CONTEXT_CONNECTED);
2975 
2976 			/* Make sure the accept_cb is only called once. */
2977 			conn->accepted_conn = NULL;
2978 
2979 			if (accept_cb == NULL) {
2980 				/* In case of no accept_cb registered,
2981 				 * application will not take ownership of the
2982 				 * connection. To prevent connection leak, unref
2983 				 * the TCP context and put the connection into
2984 				 * active close (TCP_FIN_WAIT_1).
2985 				 */
2986 				net_tcp_put(conn->context);
2987 				break;
2988 			}
2989 
2990 			keep_alive_timer_restart(conn);
2991 
2992 			net_ipaddr_copy(&conn->context->remote, &conn->dst.sa);
2993 
2994 			/* Check if v4-mapping-to-v6 needs to be done for
2995 			 * the accepted socket.
2996 			 */
2997 			if (IS_ENABLED(CONFIG_NET_IPV4_MAPPING_TO_IPV6) &&
2998 			    net_context_get_family(conn->context) == AF_INET &&
2999 			    net_context_get_family(context) == AF_INET6 &&
3000 			    !net_context_is_v6only_set(context)) {
3001 				struct in6_addr mapped;
3002 
3003 				net_ipv6_addr_create_v4_mapped(
3004 					&net_sin(&conn->context->remote)->sin_addr,
3005 					&mapped);
3006 				net_ipaddr_copy(&net_sin6(&conn->context->remote)->sin6_addr,
3007 						&mapped);
3008 
3009 				net_sin6(&conn->context->remote)->sin6_family = AF_INET6;
3010 
3011 				NET_DBG("Setting v4 mapped address %s",
3012 					net_sprint_ipv6_addr(&mapped));
3013 
3014 				/* Note that we cannot set the local address to IPv6 one
3015 				 * as that is used to match the connection, and not just
3016 				 * for printing. The remote address is only used for
3017 				 * passing it to accept() and printing it by "net conn"
3018 				 * command.
3019 				 */
3020 			}
3021 
3022 			accept_cb(conn->context, &conn->context->remote,
3023 				  net_context_get_family(context) == AF_INET6 ?
3024 				  sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in),
3025 				  0, context);
3026 
3027 			next = TCP_ESTABLISHED;
3028 
3029 			tcp_ca_init(conn);
3030 
3031 			if (len) {
3032 				verdict = tcp_data_get(conn, pkt, &len);
3033 				if (verdict == NET_OK) {
3034 					/* net_pkt owned by the recv fifo now */
3035 					pkt = NULL;
3036 				}
3037 
3038 				conn_ack(conn, + len);
3039 				tcp_out(conn, ACK);
3040 			} else {
3041 				verdict = NET_OK;
3042 			}
3043 
3044 			/* ACK for SYN | ACK has been received. This signilizes that
3045 			 * the connection makes a "forward progress".
3046 			 */
3047 			tcp_nbr_reachability_hint(conn);
3048 		}
3049 		break;
3050 	case TCP_SYN_SENT:
3051 		/* if we are in SYN SENT and receive only a SYN without an
3052 		 * ACK , shouldn't we go to SYN RECEIVED state? See Figure
3053 		 * 6 of RFC 793
3054 		 */
3055 		if (FL(&fl, &, SYN | ACK, th && th_ack(th) == conn->seq)) {
3056 			tcp_send_timer_cancel(conn);
3057 			conn_ack(conn, th_seq(th) + 1);
3058 			if (len) {
3059 				verdict = tcp_data_get(conn, pkt, &len);
3060 				if (verdict == NET_OK) {
3061 					/* net_pkt owned by the recv fifo now */
3062 					pkt = NULL;
3063 				}
3064 
3065 				conn_ack(conn, + len);
3066 			} else {
3067 				verdict = NET_OK;
3068 			}
3069 
3070 			next = TCP_ESTABLISHED;
3071 			net_context_set_state(conn->context,
3072 					      NET_CONTEXT_CONNECTED);
3073 			tcp_ca_init(conn);
3074 			tcp_out(conn, ACK);
3075 			keep_alive_timer_restart(conn);
3076 
3077 			/* The connection semaphore is released *after*
3078 			 * we have changed the connection state. This way
3079 			 * the application can send data and it is queued
3080 			 * properly even if this thread is running in lower
3081 			 * priority.
3082 			 */
3083 			connection_ok = true;
3084 
3085 			/* ACK for SYN has been received. This signilizes that
3086 			 * the connection makes a "forward progress".
3087 			 */
3088 			tcp_nbr_reachability_hint(conn);
3089 		} else if (pkt) {
3090 			net_tcp_reply_rst(pkt);
3091 		}
3092 
3093 		break;
3094 	case TCP_ESTABLISHED:
3095 		/* full-close */
3096 		if (th && FL(&fl, &, FIN, th_seq(th) == conn->ack)) {
3097 			bool acked = false;
3098 
3099 			if (len) {
3100 				verdict = tcp_data_get(conn, pkt, &len);
3101 				if (verdict == NET_OK) {
3102 					/* net_pkt owned by the recv fifo now */
3103 					pkt = NULL;
3104 				}
3105 			} else {
3106 				verdict = NET_OK;
3107 			}
3108 
3109 			conn_ack(conn, + len + 1);
3110 			keep_alive_timer_stop(conn);
3111 
3112 			if (FL(&fl, &, ACK)) {
3113 				acked = true;
3114 
3115 				if (net_tcp_seq_cmp(th_ack(th), conn->seq) > 0) {
3116 					uint32_t len_acked = th_ack(th) - conn->seq;
3117 
3118 					conn_seq(conn, + len_acked);
3119 				}
3120 			}
3121 
3122 			if (acked) {
3123 				tcp_out(conn, FIN | ACK);
3124 				conn_seq(conn, + 1);
3125 				tcp_setup_last_ack_timer(conn);
3126 				next = TCP_LAST_ACK;
3127 			} else {
3128 				tcp_out(conn, ACK);
3129 				next = TCP_CLOSE_WAIT;
3130 			}
3131 
3132 			break;
3133 		}
3134 
3135 		/* Whatever we've received, we know that peer is alive, so reset
3136 		 * the keepalive timer.
3137 		 */
3138 		keep_alive_timer_restart(conn);
3139 
3140 #ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
3141 		if (th && (net_tcp_seq_cmp(th_ack(th), conn->seq) == 0)) {
3142 			/* Only if there is pending data, increment the duplicate ack count */
3143 			if (conn->send_data_total > 0) {
3144 				/* There could be also payload, only without payload account them */
3145 				if (len == 0) {
3146 					/* Increment the duplicate acc counter,
3147 					 * but maximize the value
3148 					 */
3149 					conn->dup_ack_cnt = MIN(conn->dup_ack_cnt + 1,
3150 						DUPLICATE_ACK_RETRANSMIT_TRHESHOLD + 1);
3151 					tcp_ca_dup_ack(conn);
3152 				}
3153 			} else {
3154 				conn->dup_ack_cnt = 0;
3155 			}
3156 
3157 			/* Only do fast retransmit when not already in a resend state */
3158 			if ((conn->data_mode == TCP_DATA_MODE_SEND) &&
3159 			    (conn->dup_ack_cnt == DUPLICATE_ACK_RETRANSMIT_TRHESHOLD)) {
3160 				/* Apply a fast retransmit */
3161 				int temp_unacked_len = conn->unacked_len;
3162 
3163 				conn->unacked_len = 0;
3164 
3165 				(void)tcp_send_data(conn);
3166 
3167 				/* Restore the current transmission */
3168 				conn->unacked_len = temp_unacked_len;
3169 
3170 				tcp_ca_fast_retransmit(conn);
3171 				if (tcp_window_full(conn)) {
3172 					(void)k_sem_take(&conn->tx_sem, K_NO_WAIT);
3173 				}
3174 			}
3175 		}
3176 #endif
3177 		NET_ASSERT((conn->send_data_total == 0) ||
3178 			   k_work_delayable_is_pending(&conn->send_data_timer),
3179 			   "conn: %p, Missing a subscription "
3180 				"of the send_data queue timer", conn);
3181 
3182 		if (th && (net_tcp_seq_cmp(th_ack(th), conn->seq) > 0)) {
3183 			uint32_t len_acked = th_ack(th) - conn->seq;
3184 
3185 			NET_DBG("conn: %p len_acked=%u", conn, len_acked);
3186 
3187 			if ((conn->send_data_total < len_acked) ||
3188 					(tcp_pkt_pull(conn->send_data,
3189 						      len_acked) < 0)) {
3190 				NET_ERR("conn: %p, Invalid len_acked=%u "
3191 					"(total=%zu)", conn, len_acked,
3192 					conn->send_data_total);
3193 				net_stats_update_tcp_seg_drop(conn->iface);
3194 				tcp_out(conn, RST);
3195 				do_close = true;
3196 				close_status = -ECONNRESET;
3197 				break;
3198 			}
3199 
3200 #ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
3201 			/* New segment, reset duplicate ack counter */
3202 			conn->dup_ack_cnt = 0;
3203 #endif
3204 			tcp_ca_pkts_acked(conn, len_acked);
3205 
3206 			conn->send_data_total -= len_acked;
3207 			if (conn->unacked_len < len_acked) {
3208 				conn->unacked_len = 0;
3209 			} else {
3210 				conn->unacked_len -= len_acked;
3211 			}
3212 
3213 			if (!tcp_window_full(conn)) {
3214 				k_sem_give(&conn->tx_sem);
3215 			}
3216 
3217 			conn_seq(conn, + len_acked);
3218 			net_stats_update_tcp_seg_recv(conn->iface);
3219 
3220 			/* Receipt of an acknowledgment that covers a sequence number
3221 			 * not previously acknowledged indicates that the connection
3222 			 * makes a "forward progress".
3223 			 */
3224 			tcp_nbr_reachability_hint(conn);
3225 
3226 			conn_send_data_dump(conn);
3227 
3228 			conn->send_data_retries = 0;
3229 			if (conn->data_mode == TCP_DATA_MODE_RESEND) {
3230 				conn->unacked_len = 0;
3231 				tcp_derive_rto(conn);
3232 			}
3233 			conn->data_mode = TCP_DATA_MODE_SEND;
3234 			if (conn->send_data_total > 0) {
3235 				k_work_reschedule_for_queue(&tcp_work_q, &conn->send_data_timer,
3236 					    K_MSEC(TCP_RTO_MS));
3237 			}
3238 
3239 			/* We are closing the connection, send a FIN to peer */
3240 			if (conn->in_close && conn->send_data_total == 0) {
3241 				tcp_send_timer_cancel(conn);
3242 				next = TCP_FIN_WAIT_1;
3243 
3244 				k_work_reschedule_for_queue(&tcp_work_q,
3245 							    &conn->fin_timer,
3246 							    FIN_TIMEOUT);
3247 
3248 				tcp_out(conn, FIN | ACK);
3249 				conn_seq(conn, + 1);
3250 				verdict = NET_OK;
3251 				keep_alive_timer_stop(conn);
3252 				break;
3253 			}
3254 
3255 			ret = tcp_send_queued_data(conn);
3256 			if (ret < 0 && ret != -ENOBUFS) {
3257 				tcp_out(conn, RST);
3258 				do_close = true;
3259 				close_status = ret;
3260 				verdict = NET_OK;
3261 				break;
3262 			}
3263 
3264 			if (tcp_window_full(conn)) {
3265 				(void)k_sem_take(&conn->tx_sem, K_NO_WAIT);
3266 			}
3267 		}
3268 
3269 		if (th) {
3270 			if (th_seq(th) == conn->ack) {
3271 				if (len > 0) {
3272 					bool psh = FL(&fl, &, PSH);
3273 
3274 					verdict = tcp_data_received(conn, pkt, &len, psh);
3275 					if (verdict == NET_OK) {
3276 						/* net_pkt owned by the recv fifo now */
3277 						pkt = NULL;
3278 					}
3279 				} else {
3280 					/* ACK, no data */
3281 					verdict = NET_OK;
3282 				}
3283 			} else if (net_tcp_seq_greater(conn->ack, th_seq(th))) {
3284 				/* This should handle the acknowledgements of keep alive
3285 				 * packets and retransmitted data.
3286 				 * RISK:
3287 				 * There is a tiny risk of creating a ACK loop this way when
3288 				 * both ends of the connection are out of order due to packet
3289 				 * loss is a simultaneous bidirectional data flow.
3290 				 */
3291 				tcp_out(conn, ACK); /* peer has resent */
3292 
3293 				net_stats_update_tcp_seg_ackerr(conn->iface);
3294 				verdict = NET_OK;
3295 			} else if (CONFIG_NET_TCP_RECV_QUEUE_TIMEOUT) {
3296 				tcp_out_of_order_data(conn, pkt, len,
3297 						      th_seq(th));
3298 				/* Send out a duplicated ACK */
3299 				if ((len > 0) || FL(&fl, &, FIN)) {
3300 					tcp_out(conn, ACK);
3301 				}
3302 
3303 				verdict = NET_OK;
3304 			}
3305 		}
3306 
3307 		/* Check if there is any data left to retransmit possibly*/
3308 		if (conn->send_data_total == 0) {
3309 			conn->send_data_retries = 0;
3310 			k_work_cancel_delayable(&conn->send_data_timer);
3311 		}
3312 
3313 		/* A lot could have happened to the transmission window check the situation here */
3314 		if (tcp_window_full(conn)) {
3315 			(void)k_sem_take(&conn->tx_sem, K_NO_WAIT);
3316 		} else {
3317 			k_sem_give(&conn->tx_sem);
3318 		}
3319 
3320 		break;
3321 	case TCP_CLOSE_WAIT:
3322 		tcp_out(conn, FIN);
3323 		conn_seq(conn, + 1);
3324 		next = TCP_LAST_ACK;
3325 		tcp_setup_last_ack_timer(conn);
3326 		break;
3327 	case TCP_LAST_ACK:
3328 		if (th && FL(&fl, ==, ACK, th_ack(th) == conn->seq)) {
3329 			tcp_send_timer_cancel(conn);
3330 			do_close = true;
3331 			verdict = NET_OK;
3332 			close_status = 0;
3333 
3334 			/* Remove the last ack timer if we received it in time */
3335 			tcp_cancel_last_ack_timer(conn);
3336 		}
3337 		break;
3338 	case TCP_CLOSED:
3339 		break;
3340 	case TCP_FIN_WAIT_1:
3341 		/*
3342 		 * FIN1:
3343 		 * Acknowledge path and sequence path are independent, treat them that way
3344 		 * The table of incoming messages and their destination states:
3345 		 * -   & -   -> TCP_FIN_WAIT_1
3346 		 * FIN & -   -> TCP_CLOSING
3347 		 * -   & ACK -> TCP_FIN_WAIT_2
3348 		 * FIN & ACK -> TCP_TIME_WAIT
3349 		 */
3350 		if (th) {
3351 			bool fin_acked = false;
3352 
3353 			if (tcp_compute_new_length(conn, th, len, false) > 0) {
3354 				/* We do not implement half closed sockets, therefore
3355 				 * cannot accept new data in after sending our FIN, as
3356 				 * we are in sequence can send a reset now.
3357 				 */
3358 				net_stats_update_tcp_seg_drop(conn->iface);
3359 
3360 				next = tcp_enter_time_wait(conn);
3361 
3362 				tcp_out(conn, RST);
3363 				break;
3364 			}
3365 			if (FL(&fl, &, ACK, th_ack(th) == conn->seq)) {
3366 				NET_DBG("conn %p: FIN acknowledged, going to FIN_WAIT_2 "
3367 					"state seq %u, ack %u"
3368 					, conn, conn->seq, conn->ack);
3369 				tcp_send_timer_cancel(conn);
3370 				fin_acked = true;
3371 				next = TCP_FIN_WAIT_2;
3372 				verdict = NET_OK;
3373 			}
3374 
3375 			/*
3376 			 * There can also be data in the message, so compute with the length
3377 			 * of the packet to check the sequence number of the FIN flag with the ACK
3378 			 */
3379 			if (FL(&fl, &, FIN, net_tcp_seq_cmp(th_seq(th) + len, conn->ack) == 0)) {
3380 				conn_ack(conn, + 1);
3381 
3382 				/* State path is dependent on if the acknowledge is in */
3383 				if (fin_acked) {
3384 					/* Already acknowledged, we can go further */
3385 					NET_DBG("conn %p: FIN received, going to TIME WAIT", conn);
3386 
3387 					next = tcp_enter_time_wait(conn);
3388 
3389 					tcp_out(conn, ACK);
3390 				} else {
3391 					/* Fin not yet acknowledged, waiting for the ack in CLOSING
3392 					 */
3393 					NET_DBG("conn %p: FIN received, going to CLOSING as no "
3394 						"ACK has been received", conn);
3395 					tcp_send_timer_cancel(conn);
3396 					tcp_out_ext(conn, FIN | ACK, NULL, conn->seq - 1);
3397 					next = TCP_CLOSING;
3398 				}
3399 				verdict = NET_OK;
3400 			} else {
3401 				if (len > 0) {
3402 					if (fin_acked) {
3403 						/* Send out a duplicate ACK */
3404 						tcp_send_timer_cancel(conn);
3405 						tcp_out(conn, ACK);
3406 					} else {
3407 						/* In FIN1 state
3408 						 * Send out a duplicate ACK, with the pending FIN
3409 						 * flag
3410 						 */
3411 						tcp_send_timer_cancel(conn);
3412 						tcp_out_ext(conn, FIN | ACK, NULL, conn->seq - 1);
3413 					}
3414 					verdict = NET_OK;
3415 				}
3416 			}
3417 		}
3418 		break;
3419 	case TCP_FIN_WAIT_2:
3420 		/*
3421 		 * FIN2:
3422 		 * Only FIN is relevant in this state, as our FIN was already acknowledged
3423 		 * -   -> TCP_FIN_WAIT_2
3424 		 * FIN -> TCP_TIME_WAIT
3425 		 */
3426 		if (th) {
3427 			/* No tcp_send_timer_cancel call required here, as is has been called
3428 			 * before entering this state, only allowed through the
3429 			 * tcp_enter_time_wait function.
3430 			 */
3431 
3432 			/* Compute if there is new data after our close */
3433 			if (tcp_compute_new_length(conn, th, len, false) > 0) {
3434 				/* We do not implement half closed sockets, therefore
3435 				 * cannot accept new data in after sending our FIN, as
3436 				 * we are in sequence can send a reset now.
3437 				 */
3438 				net_stats_update_tcp_seg_drop(conn->iface);
3439 
3440 				next = tcp_enter_time_wait(conn);
3441 
3442 				tcp_out(conn, RST);
3443 				break;
3444 			}
3445 			/*
3446 			 * There can also be data in the message, so compute with the length
3447 			 * of the packet to check the sequence number of the FIN flag with the ACK
3448 			 */
3449 			if (FL(&fl, &, FIN, net_tcp_seq_cmp(th_seq(th) + len, conn->ack) == 0)) {
3450 				conn_ack(conn, + 1);
3451 				NET_DBG("conn %p: FIN received, going to TIME WAIT", conn);
3452 
3453 				next = tcp_enter_time_wait(conn);
3454 
3455 				verdict = NET_OK;
3456 				tcp_out(conn, ACK);
3457 			} else {
3458 				if (len > 0) {
3459 					/* Send out a duplicate ACK */
3460 					tcp_out(conn, ACK);
3461 					verdict = NET_OK;
3462 				}
3463 			}
3464 		}
3465 		break;
3466 	case TCP_CLOSING:
3467 		if (th) {
3468 			bool fin_acked = false;
3469 
3470 			/*
3471 			 * Closing:
3472 			 * Our FIN has to be acknowledged
3473 			 * -   -> TCP_CLOSING
3474 			 * ACK -> TCP_TIME_WAIT
3475 			 */
3476 			int32_t new_len = tcp_compute_new_length(conn, th, len, true);
3477 
3478 			if (new_len > 0) {
3479 				/* This should not happen here, as no data can be send after
3480 				 * the FIN flag has been send.
3481 				 */
3482 				NET_ERR("conn: %p, new bytes %u during CLOSING state "
3483 					"sending reset", conn, new_len);
3484 				net_stats_update_tcp_seg_drop(conn->iface);
3485 
3486 				next = tcp_enter_time_wait(conn);
3487 
3488 				tcp_out(conn, RST);
3489 				break;
3490 			}
3491 
3492 			if (FL(&fl, &, ACK, th_ack(th) == conn->seq)) {
3493 				NET_DBG("conn %p: FIN acknowledged, going to TIME WAIT "
3494 					"state seq %u, ack %u"
3495 					, conn, conn->seq, conn->ack);
3496 
3497 				next = tcp_enter_time_wait(conn);
3498 				fin_acked = true;
3499 
3500 				verdict = NET_OK;
3501 			}
3502 
3503 			/*
3504 			 * There can also be data in the message, so compute with the length
3505 			 * of the packet to check with the ack
3506 			 * Since the conn->ack was already incremented in TCP_FIN_WAIT_1
3507 			 * add 1 in the comparison sequence
3508 			 */
3509 			if ((FL(&fl, &, FIN,
3510 				net_tcp_seq_cmp(th_seq(th) + len + 1, conn->ack) == 0)) ||
3511 			    (len > 0)) {
3512 				tcp_send_timer_cancel(conn);
3513 				if (fin_acked) {
3514 					/* Send out a duplicate ACK */
3515 					tcp_out(conn, ACK);
3516 				} else {
3517 					/* Send out a duplicate ACK, with the pending FIN
3518 					 * flag
3519 					 */
3520 					tcp_out_ext(conn, FIN | ACK, NULL, conn->seq - 1);
3521 				}
3522 				verdict = NET_OK;
3523 			}
3524 		}
3525 		break;
3526 	case TCP_TIME_WAIT:
3527 		if (th) {
3528 			int32_t new_len = tcp_compute_new_length(conn, th, len, true);
3529 
3530 			/* No tcp_send_timer_cancel call required here, as is has been called
3531 			 * before entering this state, only allowed through the
3532 			 * tcp_enter_time_wait function.
3533 			 */
3534 
3535 			if (new_len > 0) {
3536 				/* This should not happen here, as no data can be send after
3537 				 * the FIN flag has been send.
3538 				 */
3539 				NET_ERR("conn: %p, new bytes %u during TIME-WAIT state "
3540 					"sending reset", conn, new_len);
3541 				net_stats_update_tcp_seg_drop(conn->iface);
3542 
3543 				tcp_out(conn, RST);
3544 			} else {
3545 				/* Acknowledge any FIN attempts, in case retransmission took
3546 				 * place.
3547 				 */
3548 				if ((FL(&fl, &, FIN,
3549 					net_tcp_seq_cmp(th_seq(th) + 1, conn->ack) == 0)) ||
3550 				    (len > 0)) {
3551 					tcp_out(conn, ACK);
3552 					verdict = NET_OK;
3553 				}
3554 			}
3555 		}
3556 		break;
3557 	default:
3558 		NET_ASSERT(false, "%s is unimplemented",
3559 			   tcp_state_to_str(conn->state, true));
3560 	}
3561 
3562 out:
3563 	if (pkt) {
3564 		if (verdict == NET_OK) {
3565 			net_pkt_unref(pkt);
3566 		}
3567 
3568 		pkt = NULL;
3569 	}
3570 
3571 	if (next) {
3572 		th = NULL;
3573 		conn_state(conn, next);
3574 		next = 0;
3575 
3576 		if (connection_ok) {
3577 			conn->in_connect = false;
3578 			if (conn->connect_cb) {
3579 				conn->connect_cb(conn->context, 0, conn->context->user_data);
3580 
3581 				/* Make sure the connect_cb is only called once. */
3582 				conn->connect_cb = NULL;
3583 			}
3584 
3585 			k_sem_give(&conn->connect_sem);
3586 		}
3587 
3588 		goto next_state;
3589 	}
3590 
3591 	if (conn->context) {
3592 		/* If the conn->context is not set, then the connection was
3593 		 * already closed.
3594 		 */
3595 		conn_handler = (struct net_conn *)conn->context->conn_handler;
3596 	}
3597 
3598 	recv_user_data = conn->recv_user_data;
3599 	recv_data_fifo = &conn->recv_data;
3600 
3601 	k_mutex_unlock(&conn->lock);
3602 
3603 	/* Pass all the received data stored in recv fifo to the application.
3604 	 * This is done like this so that we do not have any connection lock
3605 	 * held.
3606 	 */
3607 	while (conn_handler && atomic_get(&conn->ref_count) > 0 &&
3608 	       (recv_pkt = k_fifo_get(recv_data_fifo, K_NO_WAIT)) != NULL) {
3609 		if (net_context_packet_received(conn_handler, recv_pkt, NULL,
3610 						NULL, recv_user_data) ==
3611 		    NET_DROP) {
3612 			/* Application is no longer there, unref the pkt */
3613 			tcp_pkt_unref(recv_pkt);
3614 		}
3615 	}
3616 
3617 	/* Make sure we close the connection only once by checking connection
3618 	 * state.
3619 	 */
3620 	if (do_close && conn->state != TCP_UNUSED && conn->state != TCP_CLOSED) {
3621 		tcp_conn_close(conn, close_status);
3622 	}
3623 
3624 	return verdict;
3625 }
3626 
3627 /* Active connection close: send FIN and go to FIN_WAIT_1 state */
net_tcp_put(struct net_context * context)3628 int net_tcp_put(struct net_context *context)
3629 {
3630 	struct tcp *conn = context->tcp;
3631 
3632 	if (!conn) {
3633 		return -ENOENT;
3634 	}
3635 
3636 	k_mutex_lock(&conn->lock, K_FOREVER);
3637 
3638 	NET_DBG("%s", conn ? tcp_conn_state(conn, NULL) : "");
3639 	NET_DBG("context %p %s", context,
3640 		({ const char *state = net_context_state(context);
3641 					state ? state : "<unknown>"; }));
3642 
3643 	if (conn->state == TCP_ESTABLISHED ||
3644 	    conn->state == TCP_SYN_RECEIVED) {
3645 		/* Send all remaining data if possible. */
3646 		if (conn->send_data_total > 0) {
3647 			NET_DBG("conn %p pending %zu bytes", conn,
3648 				conn->send_data_total);
3649 			conn->in_close = true;
3650 
3651 			/* How long to wait until all the data has been sent?
3652 			 */
3653 			k_work_reschedule_for_queue(&tcp_work_q,
3654 						    &conn->send_data_timer,
3655 						    K_MSEC(TCP_RTO_MS));
3656 		} else {
3657 			int ret;
3658 
3659 			NET_DBG("TCP connection in %s close, "
3660 				"not disposing yet (waiting %dms)",
3661 				"active", tcp_max_timeout_ms);
3662 			k_work_reschedule_for_queue(&tcp_work_q,
3663 						    &conn->fin_timer,
3664 						    FIN_TIMEOUT);
3665 
3666 			ret = tcp_out_ext(conn, FIN | ACK, NULL,
3667 					  conn->seq + conn->unacked_len);
3668 			if (ret == 0) {
3669 				conn_seq(conn, + 1);
3670 			}
3671 
3672 			conn_state(conn, TCP_FIN_WAIT_1);
3673 
3674 			keep_alive_timer_stop(conn);
3675 		}
3676 	} else if (conn->in_connect) {
3677 		conn->in_connect = false;
3678 		k_sem_reset(&conn->connect_sem);
3679 	}
3680 
3681 	k_mutex_unlock(&conn->lock);
3682 
3683 	tcp_conn_unref(conn);
3684 
3685 	return 0;
3686 }
3687 
net_tcp_listen(struct net_context * context)3688 int net_tcp_listen(struct net_context *context)
3689 {
3690 	/* when created, tcp connections are in state TCP_LISTEN */
3691 	net_context_set_state(context, NET_CONTEXT_LISTENING);
3692 
3693 	return 0;
3694 }
3695 
net_tcp_update_recv_wnd(struct net_context * context,int32_t delta)3696 int net_tcp_update_recv_wnd(struct net_context *context, int32_t delta)
3697 {
3698 	struct tcp *conn = context->tcp;
3699 	int ret;
3700 
3701 	if (!conn) {
3702 		NET_ERR("context->tcp == NULL");
3703 		return -EPROTOTYPE;
3704 	}
3705 
3706 	k_mutex_lock(&conn->lock, K_FOREVER);
3707 
3708 	ret = tcp_update_recv_wnd((struct tcp *)context->tcp, delta);
3709 
3710 	k_mutex_unlock(&conn->lock);
3711 
3712 	return ret;
3713 }
3714 
net_tcp_queue(struct net_context * context,const void * data,size_t len,const struct msghdr * msg)3715 int net_tcp_queue(struct net_context *context, const void *data, size_t len,
3716 		  const struct msghdr *msg)
3717 {
3718 	struct tcp *conn = context->tcp;
3719 	size_t queued_len = 0;
3720 	int ret = 0;
3721 
3722 	if (!conn || conn->state != TCP_ESTABLISHED) {
3723 		return -ENOTCONN;
3724 	}
3725 
3726 	k_mutex_lock(&conn->lock, K_FOREVER);
3727 
3728 	/* If there is no space to transmit, try at a later time.
3729 	 * The ZWP will make sure the window becomes available at
3730 	 * some point in time.
3731 	 */
3732 	if (tcp_window_full(conn)) {
3733 		ret = -EAGAIN;
3734 		goto out;
3735 	}
3736 
3737 	if (msg) {
3738 		len = 0;
3739 
3740 		for (int i = 0; i < msg->msg_iovlen; i++) {
3741 			len += msg->msg_iov[i].iov_len;
3742 		}
3743 	}
3744 
3745 	/* Queue no more than TX window permits. It's guaranteed at this point
3746 	 * that conn->send_data_total is less than conn->send_win, as it was
3747 	 * verified in tcp_window_full() check above. As the connection mutex
3748 	 * is held, their values shall not change since.
3749 	 */
3750 	len = MIN(conn->send_win - conn->send_data_total, len);
3751 
3752 	if (msg) {
3753 		for (int i = 0; i < msg->msg_iovlen; i++) {
3754 			int iovlen = MIN(msg->msg_iov[i].iov_len, len);
3755 
3756 			ret = tcp_pkt_append(conn->send_data,
3757 					     msg->msg_iov[i].iov_base,
3758 					     iovlen);
3759 			if (ret < 0) {
3760 				if (queued_len == 0) {
3761 					goto out;
3762 				} else {
3763 					break;
3764 				}
3765 			}
3766 
3767 			queued_len += iovlen;
3768 			len -= iovlen;
3769 
3770 			if (len == 0) {
3771 				break;
3772 			}
3773 		}
3774 	} else {
3775 		ret = tcp_pkt_append(conn->send_data, data, len);
3776 		if (ret < 0) {
3777 			goto out;
3778 		}
3779 
3780 		queued_len = len;
3781 	}
3782 
3783 	conn->send_data_total += queued_len;
3784 
3785 	/* Successfully queued data for transmission. Even if there's a transmit
3786 	 * failure now (out-of-buf case), it can be ignored for now, retransmit
3787 	 * timer will take care of queued data retransmission.
3788 	 */
3789 	ret = tcp_send_queued_data(conn);
3790 	if (ret < 0 && ret != -ENOBUFS) {
3791 		tcp_conn_close(conn, ret);
3792 		goto out;
3793 	}
3794 
3795 	if (tcp_window_full(conn)) {
3796 		(void)k_sem_take(&conn->tx_sem, K_NO_WAIT);
3797 	}
3798 
3799 	ret = queued_len;
3800 out:
3801 	k_mutex_unlock(&conn->lock);
3802 
3803 	return ret;
3804 }
3805 
3806 /* net context is about to send out queued data - inform caller only */
net_tcp_send_data(struct net_context * context,net_context_send_cb_t cb,void * user_data)3807 int net_tcp_send_data(struct net_context *context, net_context_send_cb_t cb,
3808 		      void *user_data)
3809 {
3810 	if (cb) {
3811 		cb(context, 0, user_data);
3812 	}
3813 
3814 	return 0;
3815 }
3816 
3817 /* When connect() is called on a TCP socket, register the socket for incoming
3818  * traffic with net context and give the TCP packet receiving function, which
3819  * in turn will call tcp_in() to deliver the TCP packet to the stack
3820  */
net_tcp_connect(struct net_context * context,const struct sockaddr * remote_addr,struct sockaddr * local_addr,uint16_t remote_port,uint16_t local_port,k_timeout_t timeout,net_context_connect_cb_t cb,void * user_data)3821 int net_tcp_connect(struct net_context *context,
3822 		    const struct sockaddr *remote_addr,
3823 		    struct sockaddr *local_addr,
3824 		    uint16_t remote_port, uint16_t local_port,
3825 		    k_timeout_t timeout, net_context_connect_cb_t cb,
3826 		    void *user_data)
3827 {
3828 	struct tcp *conn;
3829 	int ret = 0;
3830 
3831 	NET_DBG("context: %p, local: %s, remote: %s", context,
3832 		net_sprint_addr(local_addr->sa_family,
3833 				(const void *)&net_sin(local_addr)->sin_addr),
3834 		net_sprint_addr(remote_addr->sa_family,
3835 				(const void *)&net_sin(remote_addr)->sin_addr));
3836 
3837 	conn = context->tcp;
3838 	conn->iface = net_context_get_iface(context);
3839 	tcp_derive_rto(conn);
3840 
3841 	switch (net_context_get_family(context)) {
3842 		const struct in_addr *ip4;
3843 		const struct in6_addr *ip6;
3844 
3845 	case AF_INET:
3846 		if (!IS_ENABLED(CONFIG_NET_IPV4)) {
3847 			ret = -EINVAL;
3848 			goto out;
3849 		}
3850 
3851 		memset(&conn->src, 0, sizeof(struct sockaddr_in));
3852 		memset(&conn->dst, 0, sizeof(struct sockaddr_in));
3853 
3854 		conn->src.sa.sa_family = AF_INET;
3855 		conn->dst.sa.sa_family = AF_INET;
3856 
3857 		conn->dst.sin.sin_port = remote_port;
3858 		conn->src.sin.sin_port = local_port;
3859 
3860 		/* we have to select the source address here as
3861 		 * net_context_create_ipv4_new() is not called in the packet
3862 		 * output chain
3863 		 */
3864 		if (net_ipv4_is_addr_unspecified(
3865 			&net_sin(local_addr)->sin_addr)) {
3866 			ip4 = net_if_ipv4_select_src_addr(
3867 				net_context_get_iface(context),
3868 				&net_sin(remote_addr)->sin_addr);
3869 			net_ipaddr_copy(&conn->src.sin.sin_addr, ip4);
3870 		} else {
3871 			net_ipaddr_copy(&conn->src.sin.sin_addr,
3872 					&net_sin(local_addr)->sin_addr);
3873 		}
3874 		net_ipaddr_copy(&conn->dst.sin.sin_addr,
3875 				&net_sin(remote_addr)->sin_addr);
3876 		break;
3877 
3878 	case AF_INET6:
3879 		if (!IS_ENABLED(CONFIG_NET_IPV6)) {
3880 			ret = -EINVAL;
3881 			goto out;
3882 		}
3883 
3884 		memset(&conn->src, 0, sizeof(struct sockaddr_in6));
3885 		memset(&conn->dst, 0, sizeof(struct sockaddr_in6));
3886 
3887 		conn->src.sin6.sin6_family = AF_INET6;
3888 		conn->dst.sin6.sin6_family = AF_INET6;
3889 
3890 		conn->dst.sin6.sin6_port = remote_port;
3891 		conn->src.sin6.sin6_port = local_port;
3892 
3893 		if (net_ipv6_is_addr_unspecified(
3894 			&net_sin6(local_addr)->sin6_addr)) {
3895 			ip6 = net_if_ipv6_select_src_addr(
3896 				net_context_get_iface(context),
3897 				&net_sin6(remote_addr)->sin6_addr);
3898 			net_ipaddr_copy(&conn->src.sin6.sin6_addr, ip6);
3899 		} else {
3900 			net_ipaddr_copy(&conn->src.sin6.sin6_addr,
3901 					&net_sin6(local_addr)->sin6_addr);
3902 		}
3903 		net_ipaddr_copy(&conn->dst.sin6.sin6_addr,
3904 				&net_sin6(remote_addr)->sin6_addr);
3905 		break;
3906 
3907 	default:
3908 		ret = -EPROTONOSUPPORT;
3909 	}
3910 
3911 	if (!(IS_ENABLED(CONFIG_NET_TEST_PROTOCOL) ||
3912 	      IS_ENABLED(CONFIG_NET_TEST))) {
3913 		conn->seq = tcp_init_isn(&conn->src.sa, &conn->dst.sa);
3914 	}
3915 
3916 	NET_DBG("conn: %p src: %s, dst: %s", conn,
3917 		net_sprint_addr(conn->src.sa.sa_family,
3918 				(const void *)&conn->src.sin.sin_addr),
3919 		net_sprint_addr(conn->dst.sa.sa_family,
3920 				(const void *)&conn->dst.sin.sin_addr));
3921 
3922 	net_context_set_state(context, NET_CONTEXT_CONNECTING);
3923 
3924 	ret = net_conn_register(net_context_get_proto(context),
3925 				net_context_get_family(context),
3926 				remote_addr, local_addr,
3927 				ntohs(remote_port), ntohs(local_port),
3928 				context, tcp_recv, context,
3929 				&context->conn_handler);
3930 	if (ret < 0) {
3931 		goto out;
3932 	}
3933 
3934 	net_if_addr_ref(conn->iface, conn->src.sa.sa_family,
3935 			conn->src.sa.sa_family == AF_INET ?
3936 			(const void *)&conn->src.sin.sin_addr :
3937 			(const void *)&conn->src.sin6.sin6_addr);
3938 	conn->addr_ref_done = true;
3939 
3940 	conn->connect_cb = cb;
3941 	context->user_data = user_data;
3942 
3943 	/* Input of a (nonexistent) packet with no flags set will cause
3944 	 * a TCP connection to be established
3945 	 */
3946 	conn->in_connect = !IS_ENABLED(CONFIG_NET_TEST_PROTOCOL);
3947 
3948 	/* The ref will make sure that if the connection is closed in tcp_in(),
3949 	 * we do not access already freed connection.
3950 	 */
3951 	tcp_conn_ref(conn);
3952 	(void)tcp_in(conn, NULL);
3953 
3954 	if (!IS_ENABLED(CONFIG_NET_TEST_PROTOCOL)) {
3955 		if (conn->state == TCP_UNUSED || conn->state == TCP_CLOSED) {
3956 			if (conn->rst_received) {
3957 				ret = -ECONNREFUSED;
3958 			} else {
3959 				ret = -ENOTCONN;
3960 			}
3961 			goto out_unref;
3962 		} else if ((K_TIMEOUT_EQ(timeout, K_NO_WAIT)) &&
3963 			   conn->state != TCP_ESTABLISHED) {
3964 			ret = -EINPROGRESS;
3965 			goto out_unref;
3966 		} else if (k_sem_take(&conn->connect_sem, timeout) != 0 &&
3967 			   conn->state != TCP_ESTABLISHED) {
3968 			if (conn->in_connect) {
3969 				conn->in_connect = false;
3970 				tcp_conn_close(conn, -ETIMEDOUT);
3971 			}
3972 
3973 			if (conn->rst_received) {
3974 				ret = -ECONNREFUSED;
3975 			} else {
3976 				ret = -ETIMEDOUT;
3977 			}
3978 			goto out_unref;
3979 		}
3980 		conn->in_connect = false;
3981 	}
3982 
3983 out_unref:
3984 	tcp_conn_unref(conn);
3985 
3986 out:
3987 	NET_DBG("conn: %p, ret=%d", conn, ret);
3988 
3989 	return ret;
3990 }
3991 
net_tcp_accept(struct net_context * context,net_tcp_accept_cb_t cb,void * user_data)3992 int net_tcp_accept(struct net_context *context, net_tcp_accept_cb_t cb,
3993 		   void *user_data)
3994 {
3995 	struct tcp *conn = context->tcp;
3996 	struct sockaddr local_addr = { };
3997 	uint16_t local_port, remote_port;
3998 
3999 	if (!conn) {
4000 		return -EINVAL;
4001 	}
4002 
4003 	NET_DBG("context: %p, tcp: %p, cb: %p", context, conn, cb);
4004 
4005 	if (conn->state != TCP_LISTEN) {
4006 		return -EINVAL;
4007 	}
4008 
4009 	conn->accept_cb = cb;
4010 	local_addr.sa_family = net_context_get_family(context);
4011 
4012 	switch (local_addr.sa_family) {
4013 		struct sockaddr_in *in;
4014 		struct sockaddr_in6 *in6;
4015 
4016 	case AF_INET:
4017 		if (!IS_ENABLED(CONFIG_NET_IPV4)) {
4018 			return -EINVAL;
4019 		}
4020 
4021 		in = (struct sockaddr_in *)&local_addr;
4022 
4023 		if (net_sin_ptr(&context->local)->sin_addr) {
4024 			net_ipaddr_copy(&in->sin_addr,
4025 					net_sin_ptr(&context->local)->sin_addr);
4026 		}
4027 
4028 		in->sin_port =
4029 			net_sin((struct sockaddr *)&context->local)->sin_port;
4030 		local_port = ntohs(in->sin_port);
4031 		remote_port = ntohs(net_sin(&context->remote)->sin_port);
4032 
4033 		break;
4034 
4035 	case AF_INET6:
4036 		if (!IS_ENABLED(CONFIG_NET_IPV6)) {
4037 			return -EINVAL;
4038 		}
4039 
4040 		in6 = (struct sockaddr_in6 *)&local_addr;
4041 
4042 		if (net_sin6_ptr(&context->local)->sin6_addr) {
4043 			net_ipaddr_copy(&in6->sin6_addr,
4044 				net_sin6_ptr(&context->local)->sin6_addr);
4045 		}
4046 
4047 		in6->sin6_port =
4048 			net_sin6((struct sockaddr *)&context->local)->sin6_port;
4049 		local_port = ntohs(in6->sin6_port);
4050 		remote_port = ntohs(net_sin6(&context->remote)->sin6_port);
4051 
4052 		break;
4053 
4054 	default:
4055 		return -EINVAL;
4056 	}
4057 
4058 	context->user_data = user_data;
4059 
4060 	/* Remove the temporary connection handler and register
4061 	 * a proper now as we have an established connection.
4062 	 */
4063 	net_conn_unregister(context->conn_handler);
4064 
4065 	return net_conn_register(net_context_get_proto(context),
4066 				 local_addr.sa_family,
4067 				 context->flags & NET_CONTEXT_REMOTE_ADDR_SET ?
4068 				 &context->remote : NULL,
4069 				 &local_addr,
4070 				 remote_port, local_port,
4071 				 context, tcp_recv, context,
4072 				 &context->conn_handler);
4073 }
4074 
net_tcp_recv(struct net_context * context,net_context_recv_cb_t cb,void * user_data)4075 int net_tcp_recv(struct net_context *context, net_context_recv_cb_t cb,
4076 		 void *user_data)
4077 {
4078 	struct tcp *conn = context->tcp;
4079 
4080 	NET_DBG("context: %p, cb: %p, user_data: %p", context, cb, user_data);
4081 
4082 	context->recv_cb = cb;
4083 
4084 	if (conn) {
4085 		conn->recv_user_data = user_data;
4086 	}
4087 
4088 	return 0;
4089 }
4090 
net_tcp_finalize(struct net_pkt * pkt,bool force_chksum)4091 int net_tcp_finalize(struct net_pkt *pkt, bool force_chksum)
4092 {
4093 	NET_PKT_DATA_ACCESS_DEFINE(tcp_access, struct net_tcp_hdr);
4094 	struct net_tcp_hdr *tcp_hdr;
4095 	enum net_if_checksum_type type = net_pkt_family(pkt) == AF_INET6 ?
4096 		NET_IF_CHECKSUM_IPV6_TCP : NET_IF_CHECKSUM_IPV4_TCP;
4097 
4098 	tcp_hdr = (struct net_tcp_hdr *)net_pkt_get_data(pkt, &tcp_access);
4099 	if (!tcp_hdr) {
4100 		return -ENOBUFS;
4101 	}
4102 
4103 	tcp_hdr->chksum = 0U;
4104 
4105 	if (net_if_need_calc_tx_checksum(net_pkt_iface(pkt), type) || force_chksum) {
4106 		tcp_hdr->chksum = net_calc_chksum_tcp(pkt);
4107 		net_pkt_set_chksum_done(pkt, true);
4108 	}
4109 
4110 	return net_pkt_set_data(pkt, &tcp_access);
4111 }
4112 
net_tcp_input(struct net_pkt * pkt,struct net_pkt_data_access * tcp_access)4113 struct net_tcp_hdr *net_tcp_input(struct net_pkt *pkt,
4114 				  struct net_pkt_data_access *tcp_access)
4115 {
4116 	struct net_tcp_hdr *tcp_hdr;
4117 	enum net_if_checksum_type type = net_pkt_family(pkt) == AF_INET6 ?
4118 		NET_IF_CHECKSUM_IPV6_TCP : NET_IF_CHECKSUM_IPV4_TCP;
4119 
4120 	if (IS_ENABLED(CONFIG_NET_TCP_CHECKSUM) &&
4121 	    (net_if_need_calc_rx_checksum(net_pkt_iface(pkt), type) ||
4122 	     net_pkt_is_ip_reassembled(pkt)) &&
4123 	    net_calc_chksum_tcp(pkt) != 0U) {
4124 		NET_DBG("DROP: checksum mismatch");
4125 		goto drop;
4126 	}
4127 
4128 	tcp_hdr = (struct net_tcp_hdr *)net_pkt_get_data(pkt, tcp_access);
4129 	if (tcp_hdr && !net_pkt_set_data(pkt, tcp_access)) {
4130 		return tcp_hdr;
4131 	}
4132 
4133 drop:
4134 	net_stats_update_tcp_seg_chkerr(net_pkt_iface(pkt));
4135 	return NULL;
4136 }
4137 
4138 #if defined(CONFIG_NET_TEST_PROTOCOL)
tcp_input(struct net_conn * net_conn,struct net_pkt * pkt,union net_ip_header * ip,union net_proto_header * proto,void * user_data)4139 static enum net_verdict tcp_input(struct net_conn *net_conn,
4140 				  struct net_pkt *pkt,
4141 				  union net_ip_header *ip,
4142 				  union net_proto_header *proto,
4143 				  void *user_data)
4144 {
4145 	struct tcphdr *th = th_get(pkt);
4146 	enum net_verdict verdict = NET_DROP;
4147 
4148 	if (th) {
4149 		struct tcp *conn = tcp_conn_search(pkt);
4150 
4151 		if (conn == NULL && SYN == th_flags(th)) {
4152 			struct net_context *context =
4153 				tcp_calloc(1, sizeof(struct net_context));
4154 			net_tcp_get(context);
4155 			net_context_set_family(context, net_pkt_family(pkt));
4156 			conn = context->tcp;
4157 			tcp_endpoint_set(&conn->dst, pkt, TCP_EP_SRC);
4158 			tcp_endpoint_set(&conn->src, pkt, TCP_EP_DST);
4159 			/* Make an extra reference, the sanity check suite
4160 			 * will delete the connection explicitly
4161 			 */
4162 			tcp_conn_ref(conn);
4163 		}
4164 
4165 		if (conn) {
4166 			conn->iface = pkt->iface;
4167 			verdict = tcp_in(conn, pkt);
4168 		}
4169 	}
4170 
4171 	return verdict;
4172 }
4173 
tp_tcp_recv_cb(struct tcp * conn,struct net_pkt * pkt)4174 static size_t tp_tcp_recv_cb(struct tcp *conn, struct net_pkt *pkt)
4175 {
4176 	ssize_t len = tcp_data_len(pkt);
4177 	struct net_pkt *up = tcp_pkt_clone(pkt);
4178 
4179 	NET_DBG("pkt: %p, len: %zu", pkt, net_pkt_get_len(pkt));
4180 
4181 	net_pkt_cursor_init(up);
4182 	net_pkt_set_overwrite(up, true);
4183 
4184 	net_pkt_pull(up, net_pkt_get_len(up) - len);
4185 
4186 	for (struct net_buf *buf = pkt->buffer; buf != NULL; buf = buf->frags) {
4187 		net_tcp_queue(conn->context, buf->data, buf->len);
4188 	}
4189 
4190 	return len;
4191 }
4192 
tp_tcp_recv(int fd,void * buf,size_t len,int flags)4193 static ssize_t tp_tcp_recv(int fd, void *buf, size_t len, int flags)
4194 {
4195 	return 0;
4196 }
4197 
tp_init(struct tcp * conn,struct tp * tp)4198 static void tp_init(struct tcp *conn, struct tp *tp)
4199 {
4200 	struct tp out = {
4201 		.msg = "",
4202 		.status = "",
4203 		.state = tcp_state_to_str(conn->state, true),
4204 		.seq = conn->seq,
4205 		.ack = conn->ack,
4206 		.rcv = "",
4207 		.data = "",
4208 		.op = "",
4209 	};
4210 
4211 	*tp = out;
4212 }
4213 
tcp_to_json(struct tcp * conn,void * data,size_t * data_len)4214 static void tcp_to_json(struct tcp *conn, void *data, size_t *data_len)
4215 {
4216 	struct tp tp;
4217 
4218 	tp_init(conn, &tp);
4219 
4220 	tp_encode(&tp, data, data_len);
4221 }
4222 
tp_input(struct net_conn * net_conn,struct net_pkt * pkt,union net_ip_header * ip_hdr,union net_proto_header * proto,void * user_data)4223 enum net_verdict tp_input(struct net_conn *net_conn,
4224 			  struct net_pkt *pkt,
4225 			  union net_ip_header *ip_hdr,
4226 			  union net_proto_header *proto,
4227 			  void *user_data)
4228 {
4229 	struct net_udp_hdr *uh = net_udp_get_hdr(pkt, NULL);
4230 	size_t data_len = ntohs(uh->len) - sizeof(*uh);
4231 	struct tcp *conn = tcp_conn_search(pkt);
4232 	size_t json_len = 0;
4233 	struct tp *tp;
4234 	struct tp_new *tp_new;
4235 	enum tp_type type;
4236 	bool responded = false;
4237 	static char buf[512];
4238 	enum net_verdict verdict = NET_DROP;
4239 
4240 	net_pkt_cursor_init(pkt);
4241 	net_pkt_set_overwrite(pkt, true);
4242 	net_pkt_skip(pkt, net_pkt_ip_hdr_len(pkt) +
4243 		     net_pkt_ip_opts_len(pkt) + sizeof(*uh));
4244 	net_pkt_read(pkt, buf, data_len);
4245 	buf[data_len] = '\0';
4246 	data_len += 1;
4247 
4248 	type = json_decode_msg(buf, data_len);
4249 
4250 	data_len = ntohs(uh->len) - sizeof(*uh);
4251 
4252 	net_pkt_cursor_init(pkt);
4253 	net_pkt_set_overwrite(pkt, true);
4254 	net_pkt_skip(pkt, net_pkt_ip_hdr_len(pkt) +
4255 		     net_pkt_ip_opts_len(pkt) + sizeof(*uh));
4256 	net_pkt_read(pkt, buf, data_len);
4257 	buf[data_len] = '\0';
4258 	data_len += 1;
4259 
4260 	switch (type) {
4261 	case TP_CONFIG_REQUEST:
4262 		tp_new = json_to_tp_new(buf, data_len);
4263 		break;
4264 	default:
4265 		tp = json_to_tp(buf, data_len);
4266 		break;
4267 	}
4268 
4269 	switch (type) {
4270 	case TP_COMMAND:
4271 		if (is("CONNECT", tp->op)) {
4272 			tp_output(pkt->family, pkt->iface, buf, 1);
4273 			responded = true;
4274 			{
4275 				struct net_context *context = tcp_calloc(1,
4276 						sizeof(struct net_context));
4277 				net_tcp_get(context);
4278 				net_context_set_family(context,
4279 						       net_pkt_family(pkt));
4280 				conn = context->tcp;
4281 				tcp_endpoint_set(&conn->dst, pkt, TCP_EP_SRC);
4282 				tcp_endpoint_set(&conn->src, pkt, TCP_EP_DST);
4283 				conn->iface = pkt->iface;
4284 				tcp_conn_ref(conn);
4285 			}
4286 			conn->seq = tp->seq;
4287 			verdict = tcp_in(conn, NULL);
4288 		}
4289 		if (is("CLOSE", tp->op)) {
4290 			tp_trace = false;
4291 			{
4292 				struct net_context *context;
4293 
4294 				conn = (void *)sys_slist_peek_head(&tcp_conns);
4295 				context = conn->context;
4296 				while (tcp_conn_close(conn, 0))
4297 					;
4298 				tcp_free(context);
4299 			}
4300 			tp_mem_stat();
4301 			tp_nbuf_stat();
4302 			tp_pkt_stat();
4303 			tp_seq_stat();
4304 		}
4305 		if (is("CLOSE2", tp->op)) {
4306 			struct tcp *conn =
4307 				(void *)sys_slist_peek_head(&tcp_conns);
4308 			net_tcp_put(conn->context);
4309 		}
4310 		if (is("RECV", tp->op)) {
4311 #define HEXSTR_SIZE 64
4312 			char hexstr[HEXSTR_SIZE];
4313 			ssize_t len = tp_tcp_recv(0, buf, sizeof(buf), 0);
4314 
4315 			tp_init(conn, tp);
4316 			bin2hex(buf, len, hexstr, HEXSTR_SIZE);
4317 			tp->data = hexstr;
4318 			NET_DBG("%zd = tcp_recv(\"%s\")", len, tp->data);
4319 			json_len = sizeof(buf);
4320 			tp_encode(tp, buf, &json_len);
4321 		}
4322 		if (is("SEND", tp->op)) {
4323 			ssize_t len = tp_str_to_hex(buf, sizeof(buf), tp->data);
4324 			struct tcp *conn =
4325 				(void *)sys_slist_peek_head(&tcp_conns);
4326 
4327 			tp_output(pkt->family, pkt->iface, buf, 1);
4328 			responded = true;
4329 			NET_DBG("tcp_send(\"%s\")", tp->data);
4330 			{
4331 				net_tcp_queue(conn->context, buf, len);
4332 			}
4333 		}
4334 		break;
4335 	case TP_CONFIG_REQUEST:
4336 		tp_new_find_and_apply(tp_new, "tcp_rto", &tcp_rto, TP_INT);
4337 		tp_new_find_and_apply(tp_new, "tcp_retries", &tcp_retries,
4338 					TP_INT);
4339 		tp_new_find_and_apply(tp_new, "tcp_window", &tcp_rx_window,
4340 					TP_INT);
4341 		tp_new_find_and_apply(tp_new, "tp_trace", &tp_trace, TP_BOOL);
4342 		break;
4343 	case TP_INTROSPECT_REQUEST:
4344 		json_len = sizeof(buf);
4345 		conn = (void *)sys_slist_peek_head(&tcp_conns);
4346 		tcp_to_json(conn, buf, &json_len);
4347 		break;
4348 	case TP_DEBUG_STOP:
4349 	case TP_DEBUG_CONTINUE:
4350 		tp_state = tp->type;
4351 		break;
4352 	default:
4353 		NET_ASSERT(false, "Unimplemented tp command: %s", tp->msg);
4354 	}
4355 
4356 	if (json_len) {
4357 		tp_output(pkt->family, pkt->iface, buf, json_len);
4358 	} else if ((TP_CONFIG_REQUEST == type || TP_COMMAND == type)
4359 			&& responded == false) {
4360 		tp_output(pkt->family, pkt->iface, buf, 1);
4361 	}
4362 
4363 	return verdict;
4364 }
4365 
test_cb_register(sa_family_t family,uint8_t proto,uint16_t remote_port,uint16_t local_port,net_conn_cb_t cb)4366 static void test_cb_register(sa_family_t family, uint8_t proto, uint16_t remote_port,
4367 			     uint16_t local_port, net_conn_cb_t cb)
4368 {
4369 	struct net_conn_handle *conn_handle = NULL;
4370 	const struct sockaddr addr = { .sa_family = family, };
4371 
4372 	int ret = net_conn_register(proto,
4373 				    family,
4374 				    &addr,	/* remote address */
4375 				    &addr,	/* local address */
4376 				    local_port,
4377 				    remote_port,
4378 				    NULL,
4379 				    cb,
4380 				    NULL,	/* user_data */
4381 				    &conn_handle);
4382 	if (ret < 0) {
4383 		NET_ERR("net_conn_register(): %d", ret);
4384 	}
4385 }
4386 #endif /* CONFIG_NET_TEST_PROTOCOL */
4387 
net_tcp_foreach(net_tcp_cb_t cb,void * user_data)4388 void net_tcp_foreach(net_tcp_cb_t cb, void *user_data)
4389 {
4390 	struct tcp *conn;
4391 	struct tcp *tmp;
4392 
4393 	k_mutex_lock(&tcp_lock, K_FOREVER);
4394 
4395 	SYS_SLIST_FOR_EACH_CONTAINER_SAFE(&tcp_conns, conn, tmp, next) {
4396 		if (atomic_get(&conn->ref_count) > 0) {
4397 			k_mutex_unlock(&tcp_lock);
4398 			cb(conn, user_data);
4399 			k_mutex_lock(&tcp_lock, K_FOREVER);
4400 		}
4401 	}
4402 
4403 	k_mutex_unlock(&tcp_lock);
4404 }
4405 
get_ipv6_destination_mtu(struct net_if * iface,const struct in6_addr * dest)4406 static uint16_t get_ipv6_destination_mtu(struct net_if *iface,
4407 					 const struct in6_addr *dest)
4408 {
4409 #if defined(CONFIG_NET_IPV6_PMTU)
4410 	int mtu = net_pmtu_get_mtu((struct sockaddr *)&(struct sockaddr_in6){
4411 			.sin6_family = AF_INET6,
4412 			.sin6_addr = *dest });
4413 
4414 	if (mtu < 0) {
4415 		if (iface != NULL) {
4416 			return net_if_get_mtu(iface);
4417 		}
4418 
4419 		return NET_IPV6_MTU;
4420 	}
4421 
4422 	return (uint16_t)mtu;
4423 #else
4424 	if (iface != NULL) {
4425 		return net_if_get_mtu(iface);
4426 	}
4427 
4428 	return NET_IPV6_MTU;
4429 #endif /* CONFIG_NET_IPV6_PMTU */
4430 }
4431 
get_ipv4_destination_mtu(struct net_if * iface,const struct in_addr * dest)4432 static uint16_t get_ipv4_destination_mtu(struct net_if *iface,
4433 					 const struct in_addr *dest)
4434 {
4435 #if defined(CONFIG_NET_IPV4_PMTU)
4436 	int mtu = net_pmtu_get_mtu((struct sockaddr *)&(struct sockaddr_in){
4437 			.sin_family = AF_INET,
4438 			.sin_addr = *dest });
4439 
4440 	if (mtu < 0) {
4441 		if (iface != NULL) {
4442 			return net_if_get_mtu(iface);
4443 		}
4444 
4445 		return NET_IPV4_MTU;
4446 	}
4447 
4448 	return (uint16_t)mtu;
4449 #else
4450 	if (iface != NULL) {
4451 		return net_if_get_mtu(iface);
4452 	}
4453 
4454 	return NET_IPV4_MTU;
4455 #endif /* CONFIG_NET_IPV4_PMTU */
4456 }
4457 
net_tcp_get_supported_mss(const struct tcp * conn)4458 uint16_t net_tcp_get_supported_mss(const struct tcp *conn)
4459 {
4460 	sa_family_t family = net_context_get_family(conn->context);
4461 
4462 	if (IS_ENABLED(CONFIG_NET_IPV4) && family == AF_INET) {
4463 		struct net_if *iface = net_context_get_iface(conn->context);
4464 		uint16_t dest_mtu;
4465 
4466 		dest_mtu = get_ipv4_destination_mtu(iface, &conn->dst.sin.sin_addr);
4467 
4468 		/* Detect MSS based on interface MTU minus "TCP,IP header size" */
4469 		return dest_mtu - NET_IPV4TCPH_LEN;
4470 
4471 	} else if (IS_ENABLED(CONFIG_NET_IPV6) && family == AF_INET6) {
4472 		struct net_if *iface = net_context_get_iface(conn->context);
4473 		uint16_t dest_mtu;
4474 
4475 		dest_mtu = get_ipv6_destination_mtu(iface, &conn->dst.sin6.sin6_addr);
4476 
4477 		/* Detect MSS based on interface MTU minus "TCP,IP header size" */
4478 		return dest_mtu - NET_IPV6TCPH_LEN;
4479 	}
4480 
4481 	return 0;
4482 }
4483 
4484 #if defined(CONFIG_NET_TEST)
4485 struct testing_user_data {
4486 	struct sockaddr remote;
4487 	uint16_t mtu;
4488 };
4489 
testing_find_conn(struct tcp * conn,void * user_data)4490 static void testing_find_conn(struct tcp *conn, void *user_data)
4491 {
4492 	struct testing_user_data *data = user_data;
4493 
4494 	if (IS_ENABLED(CONFIG_NET_IPV6) && data->remote.sa_family == AF_INET6 &&
4495 	    net_ipv6_addr_cmp(&conn->dst.sin6.sin6_addr,
4496 			      &net_sin6(&data->remote)->sin6_addr)) {
4497 		if (data->mtu > 0) {
4498 			/* Set it only once */
4499 			return;
4500 		}
4501 
4502 		NET_DBG("Found connection %p mtu %u", conn,
4503 			net_tcp_get_supported_mss(conn) + NET_IPV6TCPH_LEN);
4504 		data->mtu = net_tcp_get_supported_mss(conn) + NET_IPV6TCPH_LEN;
4505 		return;
4506 	}
4507 
4508 	if (IS_ENABLED(CONFIG_NET_IPV4) && data->remote.sa_family == AF_INET &&
4509 	    net_ipv4_addr_cmp(&conn->dst.sin.sin_addr,
4510 			      &net_sin(&data->remote)->sin_addr)) {
4511 		if (data->mtu > 0) {
4512 			/* Set it only once */
4513 			return;
4514 		}
4515 
4516 		NET_DBG("Found connection %p mtu %u", conn,
4517 			net_tcp_get_supported_mss(conn) + NET_IPV4TCPH_LEN);
4518 		data->mtu = net_tcp_get_supported_mss(conn) + NET_IPV4TCPH_LEN;
4519 		return;
4520 	}
4521 }
4522 
net_tcp_get_mtu(struct sockaddr * dst)4523 uint16_t net_tcp_get_mtu(struct sockaddr *dst)
4524 {
4525 	struct testing_user_data data = {
4526 		.remote = *dst,
4527 		.mtu = 0,
4528 	};
4529 
4530 	net_tcp_foreach(testing_find_conn, &data);
4531 
4532 	return data.mtu;
4533 }
4534 #endif /* CONFIG_NET_TEST */
4535 
net_tcp_set_option(struct net_context * context,enum tcp_conn_option option,const void * value,size_t len)4536 int net_tcp_set_option(struct net_context *context,
4537 		       enum tcp_conn_option option,
4538 		       const void *value, size_t len)
4539 {
4540 	int ret = 0;
4541 
4542 	NET_ASSERT(context);
4543 
4544 	struct tcp *conn = context->tcp;
4545 
4546 	NET_ASSERT(conn);
4547 
4548 	k_mutex_lock(&conn->lock, K_FOREVER);
4549 
4550 	switch (option) {
4551 	case TCP_OPT_NODELAY:
4552 		ret = set_tcp_nodelay(conn, value, len);
4553 		break;
4554 	case TCP_OPT_KEEPALIVE:
4555 		ret = set_tcp_keep_alive(conn, value, len);
4556 		break;
4557 	case TCP_OPT_KEEPIDLE:
4558 		ret = set_tcp_keep_idle(conn, value, len);
4559 		break;
4560 	case TCP_OPT_KEEPINTVL:
4561 		ret = set_tcp_keep_intvl(conn, value, len);
4562 		break;
4563 	case TCP_OPT_KEEPCNT:
4564 		ret = set_tcp_keep_cnt(conn, value, len);
4565 		break;
4566 	}
4567 
4568 	k_mutex_unlock(&conn->lock);
4569 
4570 	return ret;
4571 }
4572 
net_tcp_get_option(struct net_context * context,enum tcp_conn_option option,void * value,size_t * len)4573 int net_tcp_get_option(struct net_context *context,
4574 		       enum tcp_conn_option option,
4575 		       void *value, size_t *len)
4576 {
4577 	int ret = 0;
4578 
4579 	NET_ASSERT(context);
4580 
4581 	struct tcp *conn = context->tcp;
4582 
4583 	NET_ASSERT(conn);
4584 
4585 	k_mutex_lock(&conn->lock, K_FOREVER);
4586 
4587 	switch (option) {
4588 	case TCP_OPT_NODELAY:
4589 		ret = get_tcp_nodelay(conn, value, len);
4590 		break;
4591 	case TCP_OPT_KEEPALIVE:
4592 		ret = get_tcp_keep_alive(conn, value, len);
4593 		break;
4594 	case TCP_OPT_KEEPIDLE:
4595 		ret = get_tcp_keep_idle(conn, value, len);
4596 		break;
4597 	case TCP_OPT_KEEPINTVL:
4598 		ret = get_tcp_keep_intvl(conn, value, len);
4599 		break;
4600 	case TCP_OPT_KEEPCNT:
4601 		ret = get_tcp_keep_cnt(conn, value, len);
4602 		break;
4603 	}
4604 
4605 	k_mutex_unlock(&conn->lock);
4606 
4607 	return ret;
4608 }
4609 
net_tcp_state_str(enum tcp_state state)4610 const char *net_tcp_state_str(enum tcp_state state)
4611 {
4612 	return tcp_state_to_str(state, false);
4613 }
4614 
net_tcp_tx_sem_get(struct net_context * context)4615 struct k_sem *net_tcp_tx_sem_get(struct net_context *context)
4616 {
4617 	struct tcp *conn = context->tcp;
4618 
4619 	return &conn->tx_sem;
4620 }
4621 
net_tcp_conn_sem_get(struct net_context * context)4622 struct k_sem *net_tcp_conn_sem_get(struct net_context *context)
4623 {
4624 	struct tcp *conn = context->tcp;
4625 
4626 	return &conn->connect_sem;
4627 }
4628 
net_tcp_init(void)4629 void net_tcp_init(void)
4630 {
4631 	int i;
4632 	int rto;
4633 #if defined(CONFIG_NET_TEST_PROTOCOL)
4634 	/* Register inputs for TTCN-3 based TCP sanity check */
4635 	test_cb_register(AF_INET,  IPPROTO_TCP, 4242, 4242, tcp_input);
4636 	test_cb_register(AF_INET6, IPPROTO_TCP, 4242, 4242, tcp_input);
4637 	test_cb_register(AF_INET,  IPPROTO_UDP, 4242, 4242, tp_input);
4638 	test_cb_register(AF_INET6, IPPROTO_UDP, 4242, 4242, tp_input);
4639 
4640 	tcp_recv_cb = tp_tcp_recv_cb;
4641 #endif
4642 
4643 #if defined(CONFIG_NET_TC_THREAD_COOPERATIVE)
4644 #define THREAD_PRIORITY K_PRIO_COOP(CONFIG_NET_TCP_WORKER_PRIO)
4645 #else
4646 #define THREAD_PRIORITY K_PRIO_PREEMPT(CONFIG_NET_TCP_WORKER_PRIO)
4647 #endif
4648 
4649 	/* Use private workqueue in order not to block the system work queue.
4650 	 */
4651 	k_work_queue_start(&tcp_work_q, work_q_stack,
4652 			   K_KERNEL_STACK_SIZEOF(work_q_stack), THREAD_PRIORITY,
4653 			   NULL);
4654 
4655 	/* Compute the largest possible retransmission timeout */
4656 	tcp_max_timeout_ms = 0;
4657 	rto = tcp_rto;
4658 	for (i = 0; i < tcp_retries; i++) {
4659 		tcp_max_timeout_ms += rto;
4660 		rto += rto >> 1;
4661 	}
4662 	/* At the last timeout cycle */
4663 	tcp_max_timeout_ms += tcp_rto;
4664 
4665 	/* When CONFIG_NET_TCP_RANDOMIZED_RTO is active in can be worse case 1.5 times larger */
4666 	if (IS_ENABLED(CONFIG_NET_TCP_RANDOMIZED_RTO)) {
4667 		tcp_max_timeout_ms += tcp_max_timeout_ms >> 1;
4668 	}
4669 
4670 	k_thread_name_set(&tcp_work_q.thread, "tcp_work");
4671 	NET_DBG("Workq started. Thread ID: %p", &tcp_work_q.thread);
4672 }
4673