1 /** @file
2  * @brief Network initialization
3  *
4  * Initialize the network IP stack. Create one thread for reading data
5  * from IP stack and passing that data to applications (Rx thread).
6  */
7 
8 /*
9  * Copyright (c) 2016 Intel Corporation
10  *
11  * SPDX-License-Identifier: Apache-2.0
12  */
13 
14 #include <zephyr/logging/log.h>
15 LOG_MODULE_REGISTER(net_core, CONFIG_NET_CORE_LOG_LEVEL);
16 
17 #include <zephyr/init.h>
18 #include <zephyr/kernel.h>
19 #include <zephyr/tracing/tracing.h>
20 #include <zephyr/toolchain.h>
21 #include <zephyr/linker/sections.h>
22 #include <string.h>
23 #include <errno.h>
24 
25 #include <zephyr/net/ipv4_autoconf.h>
26 #include <zephyr/net/net_if.h>
27 #include <zephyr/net/net_mgmt.h>
28 #include <zephyr/net/net_pkt.h>
29 #include <zephyr/net/net_core.h>
30 #include <zephyr/net/dns_resolve.h>
31 #include <zephyr/net/gptp.h>
32 #include <zephyr/net/websocket.h>
33 #include <zephyr/net/ethernet.h>
34 #include <zephyr/net/capture.h>
35 
36 #if defined(CONFIG_NET_LLDP)
37 #include <zephyr/net/lldp.h>
38 #endif
39 
40 #include "net_private.h"
41 #include "shell/net_shell.h"
42 
43 #include "pmtu.h"
44 
45 #include "icmpv6.h"
46 #include "ipv6.h"
47 
48 #include "icmpv4.h"
49 #include "ipv4.h"
50 
51 #include "dhcpv4/dhcpv4_internal.h"
52 #include "dhcpv6/dhcpv6_internal.h"
53 
54 #include "route.h"
55 
56 #include "packet_socket.h"
57 #include "canbus_socket.h"
58 
59 #include "connection.h"
60 #include "udp_internal.h"
61 #include "tcp_internal.h"
62 
63 #include "net_stats.h"
64 
65 #if defined(CONFIG_NET_NATIVE)
process_data(struct net_pkt * pkt,bool is_loopback)66 static inline enum net_verdict process_data(struct net_pkt *pkt,
67 					    bool is_loopback)
68 {
69 	int ret;
70 	bool locally_routed = false;
71 
72 	net_pkt_set_l2_processed(pkt, false);
73 
74 	/* Initial call will forward packets to SOCK_RAW packet sockets. */
75 	ret = net_packet_socket_input(pkt, ETH_P_ALL);
76 	if (ret != NET_CONTINUE) {
77 		return ret;
78 	}
79 
80 	/* If the packet is routed back to us when we have reassembled an IPv4 or IPv6 packet,
81 	 * then do not pass it to L2 as the packet does not have link layer headers in it.
82 	 */
83 	if (net_pkt_is_ip_reassembled(pkt)) {
84 		locally_routed = true;
85 	}
86 
87 	/* If there is no data, then drop the packet. */
88 	if (!pkt->frags) {
89 		NET_DBG("Corrupted packet (frags %p)", pkt->frags);
90 		net_stats_update_processing_error(net_pkt_iface(pkt));
91 
92 		return NET_DROP;
93 	}
94 
95 	if (!is_loopback && !locally_routed) {
96 		ret = net_if_recv_data(net_pkt_iface(pkt), pkt);
97 		if (ret != NET_CONTINUE) {
98 			if (ret == NET_DROP) {
99 				NET_DBG("Packet %p discarded by L2", pkt);
100 				net_stats_update_processing_error(
101 							net_pkt_iface(pkt));
102 			}
103 
104 			return ret;
105 		}
106 	}
107 
108 	net_pkt_set_l2_processed(pkt, true);
109 
110 	/* L2 has modified the buffer starting point, it is easier
111 	 * to re-initialize the cursor rather than updating it.
112 	 */
113 	net_pkt_cursor_init(pkt);
114 
115 	if (IS_ENABLED(CONFIG_NET_SOCKETS_PACKET_DGRAM)) {
116 		/* Consecutive call will forward packets to SOCK_DGRAM packet sockets
117 		 * (after L2 removed header).
118 		 */
119 		ret = net_packet_socket_input(pkt, ETH_P_ALL);
120 		if (ret != NET_CONTINUE) {
121 			return ret;
122 		}
123 	}
124 
125 	uint8_t family = net_pkt_family(pkt);
126 
127 	if (IS_ENABLED(CONFIG_NET_IP) && (family == AF_INET || family == AF_INET6 ||
128 					  family == AF_UNSPEC || family == AF_PACKET)) {
129 		/* L2 processed, now we can pass IPPROTO_RAW to packet socket:
130 		 */
131 		ret = net_packet_socket_input(pkt, IPPROTO_RAW);
132 		if (ret != NET_CONTINUE) {
133 			return ret;
134 		}
135 
136 		/* IP version and header length. */
137 		uint8_t vtc_vhl = NET_IPV6_HDR(pkt)->vtc & 0xf0;
138 
139 		if (IS_ENABLED(CONFIG_NET_IPV6) && vtc_vhl == 0x60) {
140 			return net_ipv6_input(pkt, is_loopback);
141 		} else if (IS_ENABLED(CONFIG_NET_IPV4) && vtc_vhl == 0x40) {
142 			return net_ipv4_input(pkt, is_loopback);
143 		}
144 
145 		NET_DBG("Unknown IP family packet (0x%x)", NET_IPV6_HDR(pkt)->vtc & 0xf0);
146 		net_stats_update_ip_errors_protoerr(net_pkt_iface(pkt));
147 		net_stats_update_ip_errors_vhlerr(net_pkt_iface(pkt));
148 		return NET_DROP;
149 	} else if (IS_ENABLED(CONFIG_NET_SOCKETS_CAN) && family == AF_CAN) {
150 		return net_canbus_socket_input(pkt);
151 	}
152 
153 	NET_DBG("Unknown protocol family packet (0x%x)", family);
154 	return NET_DROP;
155 }
156 
processing_data(struct net_pkt * pkt,bool is_loopback)157 static void processing_data(struct net_pkt *pkt, bool is_loopback)
158 {
159 again:
160 	switch (process_data(pkt, is_loopback)) {
161 	case NET_CONTINUE:
162 		if (IS_ENABLED(CONFIG_NET_L2_VIRTUAL)) {
163 			/* If we have a tunneling packet, feed it back
164 			 * to the stack in this case.
165 			 */
166 			goto again;
167 		} else {
168 			NET_DBG("Dropping pkt %p", pkt);
169 			net_pkt_unref(pkt);
170 		}
171 		break;
172 	case NET_OK:
173 		NET_DBG("Consumed pkt %p", pkt);
174 		break;
175 	case NET_DROP:
176 	default:
177 		NET_DBG("Dropping pkt %p", pkt);
178 		net_pkt_unref(pkt);
179 		break;
180 	}
181 }
182 
183 /* Things to setup after we are able to RX and TX */
net_post_init(void)184 static void net_post_init(void)
185 {
186 #if defined(CONFIG_NET_LLDP)
187 	net_lldp_init();
188 #endif
189 #if defined(CONFIG_NET_GPTP)
190 	net_gptp_init();
191 #endif
192 }
193 
copy_ll_addr(struct net_pkt * pkt)194 static inline void copy_ll_addr(struct net_pkt *pkt)
195 {
196 	memcpy(net_pkt_lladdr_src(pkt), net_pkt_lladdr_if(pkt),
197 	       sizeof(struct net_linkaddr));
198 	memcpy(net_pkt_lladdr_dst(pkt), net_pkt_lladdr_if(pkt),
199 	       sizeof(struct net_linkaddr));
200 }
201 
202 /* Check if the IPv{4|6} addresses are proper. As this can be expensive,
203  * make this optional. We still check the IPv4 TTL and IPv6 hop limit
204  * if the corresponding protocol family is enabled.
205  */
check_ip(struct net_pkt * pkt)206 static inline int check_ip(struct net_pkt *pkt)
207 {
208 	uint8_t family;
209 	int ret;
210 
211 	if (!IS_ENABLED(CONFIG_NET_IP)) {
212 		return 0;
213 	}
214 
215 	family = net_pkt_family(pkt);
216 	ret = 0;
217 
218 	if (IS_ENABLED(CONFIG_NET_IPV6) && family == AF_INET6) {
219 		/* Drop IPv6 packet if hop limit is 0 */
220 		if (NET_IPV6_HDR(pkt)->hop_limit == 0) {
221 			NET_DBG("DROP: IPv6 hop limit");
222 			ret = -ENOMSG; /* silently drop the pkt, not an error */
223 			goto drop;
224 		}
225 
226 		if (!IS_ENABLED(CONFIG_NET_IP_ADDR_CHECK)) {
227 			return 0;
228 		}
229 
230 #if defined(CONFIG_NET_LOOPBACK)
231 		/* If loopback driver is enabled, then send packets to it
232 		 * as the address check is not needed.
233 		 */
234 		if (net_if_l2(net_pkt_iface(pkt)) == &NET_L2_GET_NAME(DUMMY)) {
235 			return 0;
236 		}
237 #endif
238 		if (net_ipv6_addr_cmp((struct in6_addr *)NET_IPV6_HDR(pkt)->dst,
239 				      net_ipv6_unspecified_address())) {
240 			NET_DBG("DROP: IPv6 dst address missing");
241 			ret = -EADDRNOTAVAIL;
242 			goto drop;
243 		}
244 
245 		/* If the destination address is our own, then route it
246 		 * back to us (if it is not already forwarded).
247 		 */
248 		if ((net_ipv6_is_addr_loopback(
249 				(struct in6_addr *)NET_IPV6_HDR(pkt)->dst) ||
250 		    net_ipv6_is_my_addr(
251 				(struct in6_addr *)NET_IPV6_HDR(pkt)->dst)) &&
252 		    !net_pkt_forwarding(pkt)) {
253 			struct in6_addr addr;
254 
255 			/* Swap the addresses so that in receiving side
256 			 * the packet is accepted.
257 			 */
258 			net_ipv6_addr_copy_raw((uint8_t *)&addr, NET_IPV6_HDR(pkt)->src);
259 			net_ipv6_addr_copy_raw(NET_IPV6_HDR(pkt)->src,
260 					       NET_IPV6_HDR(pkt)->dst);
261 			net_ipv6_addr_copy_raw(NET_IPV6_HDR(pkt)->dst, (uint8_t *)&addr);
262 
263 			net_pkt_set_ll_proto_type(pkt, ETH_P_IPV6);
264 			copy_ll_addr(pkt);
265 
266 			return 1;
267 		}
268 
269 		/* If the destination address is interface local scope
270 		 * multicast address, then loop the data back to us.
271 		 * The FF01:: multicast addresses are only meant to be used
272 		 * in local host, so this is similar as how ::1 unicast
273 		 * addresses are handled. See RFC 3513 ch 2.7 for details.
274 		 */
275 		if (net_ipv6_is_addr_mcast_iface(
276 				(struct in6_addr *)NET_IPV6_HDR(pkt)->dst)) {
277 			NET_DBG("IPv6 interface scope mcast dst address");
278 			return 1;
279 		}
280 
281 		/* The source check must be done after the destination check
282 		 * as having src ::1 is perfectly ok if dst is ::1 too.
283 		 */
284 		if (net_ipv6_is_addr_loopback(
285 				(struct in6_addr *)NET_IPV6_HDR(pkt)->src)) {
286 			NET_DBG("DROP: IPv6 loopback src address");
287 			ret = -EADDRNOTAVAIL;
288 			goto drop;
289 		}
290 
291 	} else if (IS_ENABLED(CONFIG_NET_IPV4) && family == AF_INET) {
292 		/* Drop IPv4 packet if ttl is 0 */
293 		if (NET_IPV4_HDR(pkt)->ttl == 0) {
294 			NET_DBG("DROP: IPv4 ttl");
295 			ret = -ENOMSG; /* silently drop the pkt, not an error */
296 			goto drop;
297 		}
298 
299 		if (!IS_ENABLED(CONFIG_NET_IP_ADDR_CHECK)) {
300 			return 0;
301 		}
302 
303 #if defined(CONFIG_NET_LOOPBACK)
304 		/* If loopback driver is enabled, then send packets to it
305 		 * as the address check is not needed.
306 		 */
307 		if (net_if_l2(net_pkt_iface(pkt)) == &NET_L2_GET_NAME(DUMMY)) {
308 			return 0;
309 		}
310 #endif
311 		if (net_ipv4_addr_cmp((struct in_addr *)NET_IPV4_HDR(pkt)->dst,
312 				      net_ipv4_unspecified_address())) {
313 			NET_DBG("DROP: IPv4 dst address missing");
314 			ret = -EADDRNOTAVAIL;
315 			goto drop;
316 		}
317 
318 		/* If the destination address is our own, then route it
319 		 * back to us.
320 		 */
321 		if (net_ipv4_is_addr_loopback((struct in_addr *)NET_IPV4_HDR(pkt)->dst) ||
322 		    (net_ipv4_is_addr_bcast(net_pkt_iface(pkt),
323 				     (struct in_addr *)NET_IPV4_HDR(pkt)->dst) == false &&
324 		     net_ipv4_is_my_addr((struct in_addr *)NET_IPV4_HDR(pkt)->dst))) {
325 			struct in_addr addr;
326 
327 			/* Swap the addresses so that in receiving side
328 			 * the packet is accepted.
329 			 */
330 			net_ipv4_addr_copy_raw((uint8_t *)&addr, NET_IPV4_HDR(pkt)->src);
331 			net_ipv4_addr_copy_raw(NET_IPV4_HDR(pkt)->src,
332 					       NET_IPV4_HDR(pkt)->dst);
333 			net_ipv4_addr_copy_raw(NET_IPV4_HDR(pkt)->dst, (uint8_t *)&addr);
334 
335 			net_pkt_set_ll_proto_type(pkt, ETH_P_IP);
336 			copy_ll_addr(pkt);
337 
338 			return 1;
339 		}
340 
341 		/* The source check must be done after the destination check
342 		 * as having src 127.0.0.0/8 is perfectly ok if dst is in
343 		 * localhost subnet too.
344 		 */
345 		if (net_ipv4_is_addr_loopback((struct in_addr *)NET_IPV4_HDR(pkt)->src)) {
346 			NET_DBG("DROP: IPv4 loopback src address");
347 			ret = -EADDRNOTAVAIL;
348 			goto drop;
349 		}
350 	}
351 
352 	return ret;
353 
354 drop:
355 	if (IS_ENABLED(CONFIG_NET_STATISTICS)) {
356 		if (family == AF_INET6) {
357 			net_stats_update_ipv6_drop(net_pkt_iface(pkt));
358 		} else {
359 			net_stats_update_ipv4_drop(net_pkt_iface(pkt));
360 		}
361 	}
362 
363 	return ret;
364 }
365 
net_try_send_data(struct net_pkt * pkt,k_timeout_t timeout)366 int net_try_send_data(struct net_pkt *pkt, k_timeout_t timeout)
367 {
368 	int status;
369 	int ret;
370 
371 	SYS_PORT_TRACING_FUNC_ENTER(net, send_data, pkt);
372 
373 	if (!pkt || !pkt->frags) {
374 		ret = -ENODATA;
375 		goto err;
376 	}
377 
378 	if (!net_pkt_iface(pkt)) {
379 		ret = -EINVAL;
380 		goto err;
381 	}
382 
383 	net_pkt_trim_buffer(pkt);
384 	net_pkt_cursor_init(pkt);
385 
386 	status = check_ip(pkt);
387 	if (status < 0) {
388 		/* Special handling for ENOMSG which is returned if packet
389 		 * TTL is 0 or hop limit is 0. This is not an error as it is
390 		 * perfectly valid case to set the limit to 0. In this case
391 		 * we just silently drop the packet by returning 0.
392 		 */
393 		if (status == -ENOMSG) {
394 			ret = 0;
395 			goto err;
396 		}
397 
398 		return status;
399 	} else if (status > 0) {
400 		/* Packet is destined back to us so send it directly
401 		 * to RX processing.
402 		 */
403 		NET_DBG("Loopback pkt %p back to us", pkt);
404 		processing_data(pkt, true);
405 		ret = 0;
406 		goto err;
407 	}
408 
409 	if (net_if_try_send_data(net_pkt_iface(pkt), pkt, timeout) == NET_DROP) {
410 		ret = -EIO;
411 		goto err;
412 	}
413 
414 	if (IS_ENABLED(CONFIG_NET_STATISTICS)) {
415 		switch (net_pkt_family(pkt)) {
416 		case AF_INET:
417 			net_stats_update_ipv4_sent(net_pkt_iface(pkt));
418 			break;
419 		case AF_INET6:
420 			net_stats_update_ipv6_sent(net_pkt_iface(pkt));
421 			break;
422 		}
423 	}
424 
425 	ret = 0;
426 
427 err:
428 	SYS_PORT_TRACING_FUNC_EXIT(net, send_data, pkt, ret);
429 
430 	return ret;
431 }
432 
net_rx(struct net_if * iface,struct net_pkt * pkt)433 static void net_rx(struct net_if *iface, struct net_pkt *pkt)
434 {
435 	bool is_loopback = false;
436 	size_t pkt_len;
437 
438 	pkt_len = net_pkt_get_len(pkt);
439 
440 	NET_DBG("Received pkt %p len %zu", pkt, pkt_len);
441 
442 	net_stats_update_bytes_recv(iface, pkt_len);
443 
444 	if (IS_ENABLED(CONFIG_NET_LOOPBACK)) {
445 #ifdef CONFIG_NET_L2_DUMMY
446 		if (net_if_l2(iface) == &NET_L2_GET_NAME(DUMMY)) {
447 			is_loopback = true;
448 		}
449 #endif
450 	}
451 
452 	processing_data(pkt, is_loopback);
453 
454 	net_print_statistics();
455 	net_pkt_print();
456 }
457 
net_process_rx_packet(struct net_pkt * pkt)458 void net_process_rx_packet(struct net_pkt *pkt)
459 {
460 	net_pkt_set_rx_stats_tick(pkt, k_cycle_get_32());
461 
462 	net_capture_pkt(net_pkt_iface(pkt), pkt);
463 
464 	net_rx(net_pkt_iface(pkt), pkt);
465 }
466 
net_queue_rx(struct net_if * iface,struct net_pkt * pkt)467 static void net_queue_rx(struct net_if *iface, struct net_pkt *pkt)
468 {
469 	size_t len = net_pkt_get_len(pkt);
470 	uint8_t prio = net_pkt_priority(pkt);
471 	uint8_t tc = net_rx_priority2tc(prio);
472 
473 #if NET_TC_RX_COUNT > 1
474 	NET_DBG("TC %d with prio %d pkt %p", tc, prio, pkt);
475 #endif
476 
477 	if ((IS_ENABLED(CONFIG_NET_TC_RX_SKIP_FOR_HIGH_PRIO) &&
478 	     prio >= NET_PRIORITY_CA) || NET_TC_RX_COUNT == 0) {
479 		net_process_rx_packet(pkt);
480 	} else {
481 		if (net_tc_submit_to_rx_queue(tc, pkt) != NET_OK) {
482 			goto drop;
483 		}
484 	}
485 
486 	net_stats_update_tc_recv_pkt(iface, tc);
487 	net_stats_update_tc_recv_bytes(iface, tc, len);
488 	net_stats_update_tc_recv_priority(iface, tc, prio);
489 	return;
490 
491 drop:
492 	net_pkt_unref(pkt);
493 	net_stats_update_tc_recv_dropped(iface, tc);
494 	return;
495 }
496 
497 /* Called by driver when a packet has been received */
net_recv_data(struct net_if * iface,struct net_pkt * pkt)498 int net_recv_data(struct net_if *iface, struct net_pkt *pkt)
499 {
500 	int ret;
501 
502 	SYS_PORT_TRACING_FUNC_ENTER(net, recv_data, iface, pkt);
503 
504 	if (!pkt || !iface) {
505 		ret = -EINVAL;
506 		goto err;
507 	}
508 
509 	if (net_pkt_is_empty(pkt)) {
510 		ret = -ENODATA;
511 		goto err;
512 	}
513 
514 	if (!net_if_flag_is_set(iface, NET_IF_UP)) {
515 		ret = -ENETDOWN;
516 		goto err;
517 	}
518 
519 	net_pkt_set_overwrite(pkt, true);
520 	net_pkt_cursor_init(pkt);
521 
522 	NET_DBG("prio %d iface %p pkt %p len %zu", net_pkt_priority(pkt),
523 		iface, pkt, net_pkt_get_len(pkt));
524 
525 	if (IS_ENABLED(CONFIG_NET_ROUTING)) {
526 		net_pkt_set_orig_iface(pkt, iface);
527 	}
528 
529 	net_pkt_set_iface(pkt, iface);
530 
531 	if (!net_pkt_filter_recv_ok(pkt)) {
532 		/* silently drop the packet */
533 		net_pkt_unref(pkt);
534 	} else {
535 		net_queue_rx(iface, pkt);
536 	}
537 
538 	ret = 0;
539 
540 err:
541 	SYS_PORT_TRACING_FUNC_EXIT(net, recv_data, iface, pkt, ret);
542 
543 	return ret;
544 }
545 
l3_init(void)546 static inline void l3_init(void)
547 {
548 	net_pmtu_init();
549 	net_icmpv4_init();
550 	net_icmpv6_init();
551 	net_ipv4_init();
552 	net_ipv6_init();
553 
554 	net_ipv4_autoconf_init();
555 
556 	if (IS_ENABLED(CONFIG_NET_UDP) ||
557 	    IS_ENABLED(CONFIG_NET_TCP) ||
558 	    IS_ENABLED(CONFIG_NET_SOCKETS_PACKET) ||
559 	    IS_ENABLED(CONFIG_NET_SOCKETS_CAN)) {
560 		net_conn_init();
561 	}
562 
563 	net_tcp_init();
564 
565 	net_route_init();
566 
567 	NET_DBG("Network L3 init done");
568 }
569 #else /* CONFIG_NET_NATIVE */
570 #define l3_init(...)
571 #define net_post_init(...)
net_try_send_data(struct net_pkt * pkt,k_timeout_t timeout)572 int net_try_send_data(struct net_pkt *pkt, k_timeout_t timeout)
573 {
574 	ARG_UNUSED(pkt);
575 	ARG_UNUSED(timeout);
576 
577 	return -ENOTSUP;
578 }
net_recv_data(struct net_if * iface,struct net_pkt * pkt)579 int net_recv_data(struct net_if *iface, struct net_pkt *pkt)
580 {
581 	ARG_UNUSED(iface);
582 	ARG_UNUSED(pkt);
583 
584 	return -ENOTSUP;
585 }
586 #endif /* CONFIG_NET_NATIVE */
587 
init_rx_queues(void)588 static void init_rx_queues(void)
589 {
590 	/* Starting TX side. The ordering is important here and the TX
591 	 * can only be started when RX side is ready to receive packets.
592 	 */
593 	net_if_init();
594 
595 	net_tc_rx_init();
596 
597 	/* This will take the interface up and start everything. */
598 	net_if_post_init();
599 
600 	/* Things to init after network interface is working */
601 	net_post_init();
602 }
603 
services_init(void)604 static inline int services_init(void)
605 {
606 	int status;
607 
608 	socket_service_init();
609 
610 	status = net_dhcpv4_init();
611 	if (status) {
612 		return status;
613 	}
614 
615 	status = net_dhcpv6_init();
616 	if (status != 0) {
617 		return status;
618 	}
619 
620 	net_dhcpv4_server_init();
621 
622 	dns_dispatcher_init();
623 	dns_init_resolver();
624 	mdns_init_responder();
625 
626 	websocket_init();
627 
628 	net_coap_init();
629 
630 	net_shell_init();
631 
632 	return status;
633 }
634 
net_init(void)635 static int net_init(void)
636 {
637 	net_hostname_init();
638 
639 	NET_DBG("Priority %d", CONFIG_NET_INIT_PRIO);
640 
641 	net_pkt_init();
642 
643 	net_context_init();
644 
645 	l3_init();
646 
647 	net_mgmt_event_init();
648 
649 	init_rx_queues();
650 
651 	return services_init();
652 }
653 
654 SYS_INIT(net_init, POST_KERNEL, CONFIG_NET_INIT_PRIO);
655