1 /*
2 * Copyright (c) 2018 Intel Corporation
3 * Copyright (c) 2022 Jamie McCrae
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 */
7
8 #include <zephyr/logging/log.h>
9 LOG_MODULE_DECLARE(net_ipv4, CONFIG_NET_IPV4_LOG_LEVEL);
10
11 #include <errno.h>
12 #include <zephyr/net/net_core.h>
13 #include <zephyr/net/net_pkt.h>
14 #include <zephyr/net/net_stats.h>
15 #include <zephyr/net/net_context.h>
16 #include <zephyr/net/net_mgmt.h>
17 #include <zephyr/random/rand32.h>
18 #include "net_private.h"
19 #include "connection.h"
20 #include "icmpv4.h"
21 #include "udp_internal.h"
22 #include "tcp_internal.h"
23 #include "ipv4.h"
24 #include "route.h"
25 #include "net_stats.h"
26
27 /* Timeout for various buffer allocations in this file. */
28 #define NET_BUF_TIMEOUT K_MSEC(100)
29
30 static void reassembly_timeout(struct k_work *work);
31
32 static struct net_ipv4_reassembly reassembly[CONFIG_NET_IPV4_FRAGMENT_MAX_COUNT];
33
reassembly_get(uint16_t id,struct in_addr * src,struct in_addr * dst,uint8_t protocol)34 static struct net_ipv4_reassembly *reassembly_get(uint16_t id, struct in_addr *src,
35 struct in_addr *dst, uint8_t protocol)
36 {
37 int i, avail = -1;
38
39 for (i = 0; i < CONFIG_NET_IPV4_FRAGMENT_MAX_COUNT; i++) {
40 if (k_work_delayable_remaining_get(&reassembly[i].timer) &&
41 reassembly[i].id == id &&
42 net_ipv4_addr_cmp(src, &reassembly[i].src) &&
43 net_ipv4_addr_cmp(dst, &reassembly[i].dst) &&
44 reassembly[i].protocol == protocol) {
45 return &reassembly[i];
46 }
47
48 if (k_work_delayable_remaining_get(&reassembly[i].timer)) {
49 continue;
50 }
51
52 if (avail < 0) {
53 avail = i;
54 }
55 }
56
57 if (avail < 0) {
58 return NULL;
59 }
60
61 k_work_reschedule(&reassembly[avail].timer, K_SECONDS(CONFIG_NET_IPV4_FRAGMENT_TIMEOUT));
62
63 net_ipaddr_copy(&reassembly[avail].src, src);
64 net_ipaddr_copy(&reassembly[avail].dst, dst);
65
66 reassembly[avail].protocol = protocol;
67 reassembly[avail].id = id;
68
69 return &reassembly[avail];
70 }
71
reassembly_cancel(uint32_t id,struct in_addr * src,struct in_addr * dst)72 static bool reassembly_cancel(uint32_t id, struct in_addr *src, struct in_addr *dst)
73 {
74 int i, j;
75
76 LOG_DBG("Cancel 0x%x", id);
77
78 for (i = 0; i < CONFIG_NET_IPV4_FRAGMENT_MAX_COUNT; i++) {
79 int32_t remaining;
80
81 if (reassembly[i].id != id ||
82 !net_ipv4_addr_cmp(src, &reassembly[i].src) ||
83 !net_ipv4_addr_cmp(dst, &reassembly[i].dst)) {
84 continue;
85 }
86
87 remaining = k_ticks_to_ms_ceil32(
88 k_work_delayable_remaining_get(&reassembly[i].timer));
89 k_work_cancel_delayable(&reassembly[i].timer);
90
91 LOG_DBG("IPv4 reassembly id 0x%x remaining %d ms", reassembly[i].id, remaining);
92
93 reassembly[i].id = 0U;
94
95 for (j = 0; j < CONFIG_NET_IPV4_FRAGMENT_MAX_PKT; j++) {
96 if (!reassembly[i].pkt[j]) {
97 continue;
98 }
99
100 LOG_DBG("[%d] IPv4 reassembly pkt %p %zd bytes data", j,
101 reassembly[i].pkt[j], net_pkt_get_len(reassembly[i].pkt[j]));
102
103 net_pkt_unref(reassembly[i].pkt[j]);
104 reassembly[i].pkt[j] = NULL;
105 }
106
107 return true;
108 }
109
110 return false;
111 }
112
reassembly_info(char * str,struct net_ipv4_reassembly * reass)113 static void reassembly_info(char *str, struct net_ipv4_reassembly *reass)
114 {
115 LOG_DBG("%s id 0x%x src %s dst %s remain %d ms", str, reass->id,
116 net_sprint_ipv4_addr(&reass->src),
117 net_sprint_ipv4_addr(&reass->dst),
118 k_ticks_to_ms_ceil32(
119 k_work_delayable_remaining_get(&reass->timer)));
120 }
121
reassembly_timeout(struct k_work * work)122 static void reassembly_timeout(struct k_work *work)
123 {
124 struct net_ipv4_reassembly *reass =
125 CONTAINER_OF(work, struct net_ipv4_reassembly, timer);
126
127 reassembly_info("Reassembly cancelled", reass);
128
129 /* Send a ICMPv4 Time Exceeded only if we received the first fragment */
130 if (reass->pkt[0] && net_pkt_ipv4_fragment_offset(reass->pkt[0]) == 0) {
131 net_icmpv4_send_error(reass->pkt[0], NET_ICMPV4_TIME_EXCEEDED,
132 NET_ICMPV4_TIME_EXCEEDED_FRAGMENT_REASSEMBLY_TIME);
133 }
134
135 reassembly_cancel(reass->id, &reass->src, &reass->dst);
136 }
137
reassemble_packet(struct net_ipv4_reassembly * reass)138 static void reassemble_packet(struct net_ipv4_reassembly *reass)
139 {
140 NET_PKT_DATA_ACCESS_CONTIGUOUS_DEFINE(ipv4_access, struct net_ipv4_hdr);
141 struct net_ipv4_hdr *ipv4_hdr;
142 struct net_pkt *pkt;
143 struct net_buf *last;
144 int i;
145
146 k_work_cancel_delayable(&reass->timer);
147
148 NET_ASSERT(reass->pkt[0]);
149
150 last = net_buf_frag_last(reass->pkt[0]->buffer);
151
152 /* We start from 2nd packet which is then appended to the first one */
153 for (i = 1; i < CONFIG_NET_IPV4_FRAGMENT_MAX_PKT; i++) {
154 pkt = reass->pkt[i];
155 if (!pkt) {
156 break;
157 }
158
159 net_pkt_cursor_init(pkt);
160
161 /* Get rid of IPv4 header which is at the beginning of the fragment. */
162 ipv4_hdr = (struct net_ipv4_hdr *)net_pkt_get_data(pkt, &ipv4_access);
163 if (!ipv4_hdr) {
164 goto error;
165 }
166
167 LOG_DBG("Removing %d bytes from start of pkt %p", net_pkt_ip_hdr_len(pkt),
168 pkt->buffer);
169
170 if (net_pkt_pull(pkt, net_pkt_ip_hdr_len(pkt))) {
171 LOG_ERR("Failed to pull headers");
172 reassembly_cancel(reass->id, &reass->src, &reass->dst);
173 return;
174 }
175
176 /* Attach the data to the previous packet */
177 last->frags = pkt->buffer;
178 last = net_buf_frag_last(pkt->buffer);
179
180 pkt->buffer = NULL;
181 reass->pkt[i] = NULL;
182
183 net_pkt_unref(pkt);
184 }
185
186 pkt = reass->pkt[0];
187 reass->pkt[0] = NULL;
188
189 /* Update the header details for the packet */
190 net_pkt_cursor_init(pkt);
191
192 ipv4_hdr = (struct net_ipv4_hdr *)net_pkt_get_data(pkt, &ipv4_access);
193 if (!ipv4_hdr) {
194 goto error;
195 }
196
197 /* Fix the total length, offset and checksum of the IPv4 packet */
198 ipv4_hdr->len = htons(net_pkt_get_len(pkt));
199 ipv4_hdr->offset[0] = 0;
200 ipv4_hdr->offset[1] = 0;
201 ipv4_hdr->chksum = 0;
202 ipv4_hdr->chksum = net_calc_chksum_ipv4(pkt);
203
204 net_pkt_set_data(pkt, &ipv4_access);
205
206 LOG_DBG("New pkt %p IPv4 len is %d bytes", pkt, net_pkt_get_len(pkt));
207
208 /* We need to use the queue when feeding the packet back into the
209 * IP stack as we might run out of stack if we call processing_data()
210 * directly. As the packet does not contain link layer header, we
211 * MUST NOT pass it to L2 so there will be a special check for that
212 * in process_data() when handling the packet.
213 */
214 if (net_recv_data(net_pkt_iface(pkt), pkt) >= 0) {
215 return;
216 }
217
218 error:
219 net_pkt_unref(pkt);
220 }
221
net_ipv4_frag_foreach(net_ipv4_frag_cb_t cb,void * user_data)222 void net_ipv4_frag_foreach(net_ipv4_frag_cb_t cb, void *user_data)
223 {
224 int i;
225
226 for (i = 0; i < CONFIG_NET_IPV4_FRAGMENT_MAX_COUNT; i++) {
227 if (!k_work_delayable_remaining_get(&reassembly[i].timer)) {
228 continue;
229 }
230
231 cb(&reassembly[i], user_data);
232 }
233 }
234
235 /* Verify that we have all the fragments received and in correct order.
236 * Return:
237 * - a negative value if the fragments are erroneous and must be dropped
238 * - zero if we are expecting more fragments
239 * - a positive value if we can proceed with the reassembly
240 */
fragments_are_ready(struct net_ipv4_reassembly * reass)241 static int fragments_are_ready(struct net_ipv4_reassembly *reass)
242 {
243 unsigned int expected_offset = 0;
244 bool more = true;
245 int i;
246
247 /* Fragments can arrive in any order, for example in reverse order:
248 * 1 -> Fragment3(M=0, offset=x2)
249 * 2 -> Fragment2(M=1, offset=x1)
250 * 3 -> Fragment1(M=1, offset=0)
251 * We have to test several requirements before proceeding with the reassembly:
252 * - We received the first fragment (Fragment Offset is 0)
253 * - All intermediate fragments are contiguous
254 * - The More bit of the last fragment is 0
255 */
256 for (i = 0; i < CONFIG_NET_IPV4_FRAGMENT_MAX_PKT; i++) {
257 struct net_pkt *pkt = reass->pkt[i];
258 unsigned int offset;
259 int payload_len;
260
261 if (!pkt) {
262 break;
263 }
264
265 offset = net_pkt_ipv4_fragment_offset(pkt);
266
267 if (offset < expected_offset) {
268 /* Overlapping or duplicated, drop it */
269 return -EBADMSG;
270 } else if (offset != expected_offset) {
271 /* Not contiguous, let's wait for fragments */
272 return 0;
273 }
274
275 payload_len = net_pkt_get_len(pkt) - net_pkt_ip_hdr_len(pkt);
276
277 if (payload_len < 0) {
278 return -EBADMSG;
279 }
280
281 expected_offset += payload_len;
282 more = net_pkt_ipv4_fragment_more(pkt);
283 }
284
285 if (more) {
286 return 0;
287 }
288
289 return 1;
290 }
291
shift_packets(struct net_ipv4_reassembly * reass,int pos)292 static int shift_packets(struct net_ipv4_reassembly *reass, int pos)
293 {
294 int i;
295
296 for (i = pos + 1; i < CONFIG_NET_IPV4_FRAGMENT_MAX_PKT; i++) {
297 if (!reass->pkt[i]) {
298 LOG_DBG("Moving [%d] %p (offset 0x%x) to [%d]", pos, reass->pkt[pos],
299 net_pkt_ipv4_fragment_offset(reass->pkt[pos]), pos + 1);
300
301 /* pkt[i] is free, so shift everything between [pos] and [i - 1] by one
302 * element
303 */
304 memmove(&reass->pkt[pos + 1], &reass->pkt[pos],
305 sizeof(void *) * (i - pos));
306
307 /* pkt[pos] is now free */
308 reass->pkt[pos] = NULL;
309
310 return 0;
311 }
312 }
313
314 /* We do not have free space left in the array */
315 return -ENOMEM;
316 }
317
net_ipv4_handle_fragment_hdr(struct net_pkt * pkt,struct net_ipv4_hdr * hdr)318 enum net_verdict net_ipv4_handle_fragment_hdr(struct net_pkt *pkt, struct net_ipv4_hdr *hdr)
319 {
320 struct net_ipv4_reassembly *reass = NULL;
321 uint16_t flag;
322 bool found;
323 uint8_t more;
324 uint16_t id;
325 int ret;
326 int i;
327
328 flag = ntohs(*((uint16_t *)&hdr->offset));
329 id = ntohs(*((uint16_t *)&hdr->id));
330
331 reass = reassembly_get(id, (struct in_addr *)hdr->src,
332 (struct in_addr *)hdr->dst, hdr->proto);
333 if (!reass) {
334 LOG_ERR("Cannot get reassembly slot, dropping pkt %p", pkt);
335 goto drop;
336 }
337
338 more = (flag & NET_IPV4_MORE_FRAG_MASK) ? true : false;
339 net_pkt_set_ipv4_fragment_flags(pkt, flag);
340
341 if (more && (net_pkt_get_len(pkt) - net_pkt_ip_hdr_len(pkt)) % 8) {
342 /* Fragment length is not multiple of 8, discard the packet and send bad IP
343 * header error.
344 */
345 net_icmpv4_send_error(pkt, NET_ICMPV4_BAD_IP_HEADER,
346 NET_ICMPV4_BAD_IP_HEADER_LENGTH);
347 goto drop;
348 }
349
350 /* The fragments might come in wrong order so place them in the reassembly chain in the
351 * correct order.
352 */
353 for (i = 0, found = false; i < CONFIG_NET_IPV4_FRAGMENT_MAX_PKT; i++) {
354 if (reass->pkt[i]) {
355 if (net_pkt_ipv4_fragment_offset(reass->pkt[i]) <
356 net_pkt_ipv4_fragment_offset(pkt)) {
357 continue;
358 }
359
360 /* Make room for this fragment. If there is no room then it will discard
361 * the whole reassembly.
362 */
363 if (shift_packets(reass, i)) {
364 break;
365 }
366 }
367
368 LOG_DBG("Storing pkt %p to slot %d offset %d", pkt, i,
369 net_pkt_ipv4_fragment_offset(pkt));
370 reass->pkt[i] = pkt;
371 found = true;
372
373 break;
374 }
375
376 if (!found) {
377 /* We could not add this fragment into our saved fragment list. The whole packet
378 * must be discarded at this point.
379 */
380 LOG_ERR("No slots available for 0x%x", reass->id);
381 net_pkt_unref(pkt);
382 goto drop;
383 }
384
385 ret = fragments_are_ready(reass);
386 if (ret < 0) {
387 LOG_ERR("Reassembled IPv4 verify failed, dropping id %u", reass->id);
388
389 /* Let the caller release the already inserted pkt */
390 if (i < CONFIG_NET_IPV4_FRAGMENT_MAX_PKT) {
391 reass->pkt[i] = NULL;
392 }
393
394 net_pkt_unref(pkt);
395 goto drop;
396 } else if (ret == 0) {
397 reassembly_info("Reassembly nth pkt", reass);
398
399 LOG_DBG("More fragments to be received");
400 goto accept;
401 }
402
403 reassembly_info("Reassembly last pkt", reass);
404
405 /* The last fragment received, reassemble the packet */
406 reassemble_packet(reass);
407
408 accept:
409 return NET_OK;
410
411 drop:
412 if (reass) {
413 if (reassembly_cancel(reass->id, &reass->src, &reass->dst)) {
414 return NET_OK;
415 }
416 }
417
418 return NET_DROP;
419 }
420
send_ipv4_fragment(struct net_pkt * pkt,uint16_t rand_id,uint16_t fit_len,uint16_t frag_offset,bool final)421 static int send_ipv4_fragment(struct net_pkt *pkt, uint16_t rand_id, uint16_t fit_len,
422 uint16_t frag_offset, bool final)
423 {
424 int ret = -ENOBUFS;
425 struct net_pkt *frag_pkt;
426 struct net_pkt_cursor cur;
427 struct net_pkt_cursor cur_pkt;
428 uint16_t offset_pkt;
429
430 frag_pkt = net_pkt_alloc_with_buffer(net_pkt_iface(pkt), fit_len +
431 net_pkt_ip_hdr_len(pkt),
432 AF_INET, 0, NET_BUF_TIMEOUT);
433 if (!frag_pkt) {
434 return -ENOMEM;
435 }
436
437 net_pkt_cursor_init(frag_pkt);
438 net_pkt_cursor_backup(pkt, &cur_pkt);
439 net_pkt_cursor_backup(frag_pkt, &cur);
440
441 /* Copy the original IPv4 headers back to the fragment packet */
442 if (net_pkt_copy(frag_pkt, pkt, net_pkt_ip_hdr_len(pkt))) {
443 goto fail;
444 }
445
446 net_pkt_cursor_restore(pkt, &cur_pkt);
447
448 /* Copy the payload part of this fragment from the original packet */
449 if (net_pkt_skip(pkt, (frag_offset + net_pkt_ip_hdr_len(pkt))) ||
450 net_pkt_copy(frag_pkt, pkt, fit_len)) {
451 goto fail;
452 }
453
454 net_pkt_cursor_restore(frag_pkt, &cur);
455 net_pkt_cursor_restore(pkt, &cur_pkt);
456
457 net_pkt_set_ip_hdr_len(frag_pkt, net_pkt_ip_hdr_len(pkt));
458
459 net_pkt_set_overwrite(frag_pkt, true);
460 net_pkt_cursor_init(frag_pkt);
461
462 /* Update the header of the packet */
463 NET_PKT_DATA_ACCESS_DEFINE(ipv4_access, struct net_ipv4_hdr);
464 struct net_ipv4_hdr *ipv4_hdr;
465
466 ipv4_hdr = (struct net_ipv4_hdr *)net_pkt_get_data(frag_pkt, &ipv4_access);
467 if (!ipv4_hdr) {
468 return -ENOBUFS;
469 }
470
471 memcpy(ipv4_hdr->id, &rand_id, sizeof(rand_id));
472 offset_pkt = frag_offset / 8;
473
474 if (!final) {
475 offset_pkt |= NET_IPV4_MORE_FRAG_MASK;
476 }
477
478 sys_put_be16(offset_pkt, ipv4_hdr->offset);
479 ipv4_hdr->len = htons((fit_len + net_pkt_ip_hdr_len(pkt)));
480
481 ipv4_hdr->chksum = 0;
482 if (net_if_need_calc_tx_checksum(net_pkt_iface(frag_pkt))) {
483 ipv4_hdr->chksum = net_calc_chksum_ipv4(frag_pkt);
484 }
485
486 net_pkt_set_data(frag_pkt, &ipv4_access);
487
488 net_pkt_set_overwrite(frag_pkt, false);
489 net_pkt_cursor_restore(frag_pkt, &cur);
490
491 /* If everything has been ok so far, we can send the packet. */
492 ret = net_send_data(frag_pkt);
493 if (ret < 0) {
494 goto fail;
495 }
496
497 /* Let this packet to be sent and hopefully it will release the memory that can be
498 * utilized for next IPv4 fragment.
499 */
500 k_yield();
501
502 return 0;
503
504 fail:
505 LOG_ERR("Cannot send fragment (%d)", ret);
506 net_pkt_unref(frag_pkt);
507
508 return ret;
509 }
510
net_ipv4_send_fragmented_pkt(struct net_if * iface,struct net_pkt * pkt,uint16_t pkt_len,uint16_t mtu)511 int net_ipv4_send_fragmented_pkt(struct net_if *iface, struct net_pkt *pkt,
512 uint16_t pkt_len, uint16_t mtu)
513 {
514 uint16_t frag_offset = 0;
515 uint16_t flag;
516 int fit_len;
517 int ret;
518 struct net_ipv4_hdr *frag_hdr;
519
520 NET_PKT_DATA_ACCESS_DEFINE(frag_access, struct net_ipv4_hdr);
521 frag_hdr = (struct net_ipv4_hdr *)net_pkt_get_data(pkt, &frag_access);
522 if (!frag_hdr) {
523 return -EINVAL;
524 }
525
526 /* Check if the DF (Don't Fragment) flag is set, if so, we cannot fragment the packet */
527 flag = ntohs(*((uint16_t *)&frag_hdr->offset));
528
529 if (flag & NET_IPV4_DO_NOT_FRAG_MASK) {
530 /* This packet cannot be fragmented */
531 return -EPERM;
532 }
533
534 /* Generate a random ID to be used for packet identification, ensuring that it is not 0 */
535 uint16_t rand_id = (uint16_t)sys_rand32_get();
536
537 if (rand_id == 0) {
538 rand_id = 1;
539 }
540
541 /* Calculate maximum payload that can fit into each packet after IPv4 header. Offsets are
542 * multiples of 8, therefore round down to nearest 8-byte boundary.
543 */
544 fit_len = (mtu - net_pkt_ip_hdr_len(pkt)) / 8;
545
546 if (fit_len <= 0) {
547 LOG_ERR("No room for IPv4 payload MTU %d hdrs_len %d", mtu,
548 net_pkt_ip_hdr_len(pkt));
549 return -EINVAL;
550 }
551
552 fit_len *= 8;
553
554 pkt_len -= net_pkt_ip_hdr_len(pkt);
555
556 while (frag_offset < pkt_len) {
557 bool final = false;
558
559 if ((frag_offset + fit_len) >= pkt_len) {
560 final = true;
561 fit_len = (pkt_len - frag_offset);
562 }
563
564 ret = send_ipv4_fragment(pkt, rand_id, fit_len, frag_offset, final);
565 if (ret < 0) {
566 return ret;
567 }
568
569 frag_offset += fit_len;
570 }
571
572 return 0;
573 }
574
net_ipv4_prepare_for_send(struct net_pkt * pkt)575 enum net_verdict net_ipv4_prepare_for_send(struct net_pkt *pkt)
576 {
577 NET_PKT_DATA_ACCESS_CONTIGUOUS_DEFINE(ipv4_access, struct net_ipv4_hdr);
578 struct net_ipv4_hdr *ip_hdr;
579 int ret;
580
581 NET_ASSERT(pkt && pkt->buffer);
582
583 ip_hdr = (struct net_ipv4_hdr *)net_pkt_get_data(pkt, &ipv4_access);
584 if (!ip_hdr) {
585 return NET_DROP;
586 }
587
588 /* If we have already fragmented the packet, the ID field will contain a non-zero value
589 * and we can skip other checks.
590 */
591 if (ip_hdr->id[0] == 0 && ip_hdr->id[1] == 0) {
592 uint16_t mtu = net_if_get_mtu(net_pkt_iface(pkt));
593 size_t pkt_len = net_pkt_get_len(pkt);
594
595 mtu = MAX(NET_IPV4_MTU, mtu);
596
597 if (pkt_len > mtu) {
598 ret = net_ipv4_send_fragmented_pkt(net_pkt_iface(pkt), pkt, pkt_len, mtu);
599
600 if (ret < 0) {
601 LOG_DBG("Cannot fragment IPv4 pkt (%d)", ret);
602
603 if (ret == -ENOMEM || ret == -ENOBUFS || ret == -EPERM) {
604 /* Try to send the packet if we could not allocate enough
605 * network packets or if the don't fragment flag is set
606 * and hope the original large packet can be sent OK.
607 */
608 goto ignore_frag_error;
609 } else {
610 /* Other error, drop the packet */
611 return NET_DROP;
612 }
613 }
614
615 /* We "fake" the sending of the packet here so that
616 * tcp.c:tcp_retry_expired() will increase the ref count when re-sending
617 * the packet. This is crucial to do here and will cause free memory
618 * access if not done.
619 */
620 if (IS_ENABLED(CONFIG_NET_TCP)) {
621 net_pkt_set_sent(pkt, true);
622 }
623
624 /* We need to unref here because we simulate the packet being sent. */
625 net_pkt_unref(pkt);
626
627 /* No need to continue with the sending as the packet is now split and
628 * its fragments will be sent separately to the network.
629 */
630 return NET_CONTINUE;
631 }
632 }
633
634 ignore_frag_error:
635
636 return NET_OK;
637 }
638
net_ipv4_setup_fragment_buffers(void)639 void net_ipv4_setup_fragment_buffers(void)
640 {
641 /* Static initialising does not work here because of the array, so we must do it at
642 * runtime.
643 */
644 for (int i = 0; i < CONFIG_NET_IPV4_FRAGMENT_MAX_COUNT; i++) {
645 k_work_init_delayable(&reassembly[i].timer, reassembly_timeout);
646 }
647 }
648