1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	IPv6 output functions
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on linux/net/ipv4/ip_output.c
10  *
11  *	Changes:
12  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
13  *				extension headers are implemented.
14  *				route changes now work.
15  *				ip6_forward does not confuse sniffers.
16  *				etc.
17  *
18  *      H. von Brand    :       Added missing #include <linux/string.h>
19  *	Imran Patel	:	frag id should be in NBO
20  *      Kazunori MIYAZAWA @USAGI
21  *			:       add ip6_append_data and related functions
22  *				for datagram xmit
23  */
24 
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37 
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41 
42 #include <net/sock.h>
43 #include <net/snmp.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 #include <net/ip_tunnels.h>
58 
ip6_finish_output2(struct net * net,struct sock * sk,struct sk_buff * skb)59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60 {
61 	struct dst_entry *dst = skb_dst(skb);
62 	struct net_device *dev = dst->dev;
63 	const struct in6_addr *nexthop;
64 	struct neighbour *neigh;
65 	int ret;
66 
67 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
68 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
69 
70 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
71 		    ((mroute6_is_socket(net, skb) &&
72 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
73 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
74 					 &ipv6_hdr(skb)->saddr))) {
75 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
76 
77 			/* Do not check for IFF_ALLMULTI; multicast routing
78 			   is not supported in any case.
79 			 */
80 			if (newskb)
81 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
82 					net, sk, newskb, NULL, newskb->dev,
83 					dev_loopback_xmit);
84 
85 			if (ipv6_hdr(skb)->hop_limit == 0) {
86 				IP6_INC_STATS(net, idev,
87 					      IPSTATS_MIB_OUTDISCARDS);
88 				kfree_skb(skb);
89 				return 0;
90 			}
91 		}
92 
93 		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
94 
95 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
96 		    IPV6_ADDR_SCOPE_NODELOCAL &&
97 		    !(dev->flags & IFF_LOOPBACK)) {
98 			kfree_skb(skb);
99 			return 0;
100 		}
101 	}
102 
103 	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
104 		int res = lwtunnel_xmit(skb);
105 
106 		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
107 			return res;
108 	}
109 
110 	rcu_read_lock_bh();
111 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
112 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
113 	if (unlikely(!neigh))
114 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
115 	if (!IS_ERR(neigh)) {
116 		sock_confirm_neigh(skb, neigh);
117 		ret = neigh_output(neigh, skb, false);
118 		rcu_read_unlock_bh();
119 		return ret;
120 	}
121 	rcu_read_unlock_bh();
122 
123 	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
124 	kfree_skb(skb);
125 	return -EINVAL;
126 }
127 
__ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)128 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
129 {
130 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
131 	/* Policy lookup after SNAT yielded a new policy */
132 	if (skb_dst(skb)->xfrm) {
133 		IPCB(skb)->flags |= IPSKB_REROUTED;
134 		return dst_output(net, sk, skb);
135 	}
136 #endif
137 
138 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
139 	    dst_allfrag(skb_dst(skb)) ||
140 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
141 		return ip6_fragment(net, sk, skb, ip6_finish_output2);
142 	else
143 		return ip6_finish_output2(net, sk, skb);
144 }
145 
ip6_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)146 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
147 {
148 	int ret;
149 
150 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
151 	switch (ret) {
152 	case NET_XMIT_SUCCESS:
153 		return __ip6_finish_output(net, sk, skb);
154 	case NET_XMIT_CN:
155 		return __ip6_finish_output(net, sk, skb) ? : ret;
156 	default:
157 		kfree_skb(skb);
158 		return ret;
159 	}
160 }
161 
ip6_output(struct net * net,struct sock * sk,struct sk_buff * skb)162 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
163 {
164 	struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
165 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
166 
167 	skb->protocol = htons(ETH_P_IPV6);
168 	skb->dev = dev;
169 
170 	if (unlikely(idev->cnf.disable_ipv6)) {
171 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
172 		kfree_skb(skb);
173 		return 0;
174 	}
175 
176 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
177 			    net, sk, skb, indev, dev,
178 			    ip6_finish_output,
179 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
180 }
181 
ip6_autoflowlabel(struct net * net,const struct ipv6_pinfo * np)182 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
183 {
184 	if (!np->autoflowlabel_set)
185 		return ip6_default_np_autolabel(net);
186 	else
187 		return np->autoflowlabel;
188 }
189 
190 /*
191  * xmit an sk_buff (used by TCP, SCTP and DCCP)
192  * Note : socket lock is not held for SYNACK packets, but might be modified
193  * by calls to skb_set_owner_w() and ipv6_local_error(),
194  * which are using proper atomic operations or spinlocks.
195  */
ip6_xmit(const struct sock * sk,struct sk_buff * skb,struct flowi6 * fl6,__u32 mark,struct ipv6_txoptions * opt,int tclass,u32 priority)196 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
197 	     __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
198 {
199 	struct net *net = sock_net(sk);
200 	const struct ipv6_pinfo *np = inet6_sk(sk);
201 	struct in6_addr *first_hop = &fl6->daddr;
202 	struct dst_entry *dst = skb_dst(skb);
203 	unsigned int head_room;
204 	struct ipv6hdr *hdr;
205 	u8  proto = fl6->flowi6_proto;
206 	int seg_len = skb->len;
207 	int hlimit = -1;
208 	u32 mtu;
209 
210 	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
211 	if (opt)
212 		head_room += opt->opt_nflen + opt->opt_flen;
213 
214 	if (unlikely(skb_headroom(skb) < head_room)) {
215 		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
216 		if (!skb2) {
217 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
218 				      IPSTATS_MIB_OUTDISCARDS);
219 			kfree_skb(skb);
220 			return -ENOBUFS;
221 		}
222 		if (skb->sk)
223 			skb_set_owner_w(skb2, skb->sk);
224 		consume_skb(skb);
225 		skb = skb2;
226 	}
227 
228 	if (opt) {
229 		seg_len += opt->opt_nflen + opt->opt_flen;
230 
231 		if (opt->opt_flen)
232 			ipv6_push_frag_opts(skb, opt, &proto);
233 
234 		if (opt->opt_nflen)
235 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
236 					     &fl6->saddr);
237 	}
238 
239 	skb_push(skb, sizeof(struct ipv6hdr));
240 	skb_reset_network_header(skb);
241 	hdr = ipv6_hdr(skb);
242 
243 	/*
244 	 *	Fill in the IPv6 header
245 	 */
246 	if (np)
247 		hlimit = np->hop_limit;
248 	if (hlimit < 0)
249 		hlimit = ip6_dst_hoplimit(dst);
250 
251 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
252 				ip6_autoflowlabel(net, np), fl6));
253 
254 	hdr->payload_len = htons(seg_len);
255 	hdr->nexthdr = proto;
256 	hdr->hop_limit = hlimit;
257 
258 	hdr->saddr = fl6->saddr;
259 	hdr->daddr = *first_hop;
260 
261 	skb->protocol = htons(ETH_P_IPV6);
262 	skb->priority = priority;
263 	skb->mark = mark;
264 
265 	mtu = dst_mtu(dst);
266 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
267 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
268 			      IPSTATS_MIB_OUT, skb->len);
269 
270 		/* if egress device is enslaved to an L3 master device pass the
271 		 * skb to its handler for processing
272 		 */
273 		skb = l3mdev_ip6_out((struct sock *)sk, skb);
274 		if (unlikely(!skb))
275 			return 0;
276 
277 		/* hooks should never assume socket lock is held.
278 		 * we promote our socket to non const
279 		 */
280 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
281 			       net, (struct sock *)sk, skb, NULL, dst->dev,
282 			       dst_output);
283 	}
284 
285 	skb->dev = dst->dev;
286 	/* ipv6_local_error() does not require socket lock,
287 	 * we promote our socket to non const
288 	 */
289 	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
290 
291 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
292 	kfree_skb(skb);
293 	return -EMSGSIZE;
294 }
295 EXPORT_SYMBOL(ip6_xmit);
296 
ip6_call_ra_chain(struct sk_buff * skb,int sel)297 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
298 {
299 	struct ip6_ra_chain *ra;
300 	struct sock *last = NULL;
301 
302 	read_lock(&ip6_ra_lock);
303 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
304 		struct sock *sk = ra->sk;
305 		if (sk && ra->sel == sel &&
306 		    (!sk->sk_bound_dev_if ||
307 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
308 			struct ipv6_pinfo *np = inet6_sk(sk);
309 
310 			if (np && np->rtalert_isolate &&
311 			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
312 				continue;
313 			}
314 			if (last) {
315 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
316 				if (skb2)
317 					rawv6_rcv(last, skb2);
318 			}
319 			last = sk;
320 		}
321 	}
322 
323 	if (last) {
324 		rawv6_rcv(last, skb);
325 		read_unlock(&ip6_ra_lock);
326 		return 1;
327 	}
328 	read_unlock(&ip6_ra_lock);
329 	return 0;
330 }
331 
ip6_forward_proxy_check(struct sk_buff * skb)332 static int ip6_forward_proxy_check(struct sk_buff *skb)
333 {
334 	struct ipv6hdr *hdr = ipv6_hdr(skb);
335 	u8 nexthdr = hdr->nexthdr;
336 	__be16 frag_off;
337 	int offset;
338 
339 	if (ipv6_ext_hdr(nexthdr)) {
340 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
341 		if (offset < 0)
342 			return 0;
343 	} else
344 		offset = sizeof(struct ipv6hdr);
345 
346 	if (nexthdr == IPPROTO_ICMPV6) {
347 		struct icmp6hdr *icmp6;
348 
349 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
350 					 offset + 1 - skb->data)))
351 			return 0;
352 
353 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
354 
355 		switch (icmp6->icmp6_type) {
356 		case NDISC_ROUTER_SOLICITATION:
357 		case NDISC_ROUTER_ADVERTISEMENT:
358 		case NDISC_NEIGHBOUR_SOLICITATION:
359 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
360 		case NDISC_REDIRECT:
361 			/* For reaction involving unicast neighbor discovery
362 			 * message destined to the proxied address, pass it to
363 			 * input function.
364 			 */
365 			return 1;
366 		default:
367 			break;
368 		}
369 	}
370 
371 	/*
372 	 * The proxying router can't forward traffic sent to a link-local
373 	 * address, so signal the sender and discard the packet. This
374 	 * behavior is clarified by the MIPv6 specification.
375 	 */
376 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
377 		dst_link_failure(skb);
378 		return -1;
379 	}
380 
381 	return 0;
382 }
383 
ip6_forward_finish(struct net * net,struct sock * sk,struct sk_buff * skb)384 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
385 				     struct sk_buff *skb)
386 {
387 	struct dst_entry *dst = skb_dst(skb);
388 
389 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
390 	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
391 
392 #ifdef CONFIG_NET_SWITCHDEV
393 	if (skb->offload_l3_fwd_mark) {
394 		consume_skb(skb);
395 		return 0;
396 	}
397 #endif
398 
399 	skb->tstamp = 0;
400 	return dst_output(net, sk, skb);
401 }
402 
ip6_pkt_too_big(const struct sk_buff * skb,unsigned int mtu)403 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
404 {
405 	if (skb->len <= mtu)
406 		return false;
407 
408 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
409 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
410 		return true;
411 
412 	if (skb->ignore_df)
413 		return false;
414 
415 	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
416 		return false;
417 
418 	return true;
419 }
420 
ip6_forward(struct sk_buff * skb)421 int ip6_forward(struct sk_buff *skb)
422 {
423 	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
424 	struct dst_entry *dst = skb_dst(skb);
425 	struct ipv6hdr *hdr = ipv6_hdr(skb);
426 	struct inet6_skb_parm *opt = IP6CB(skb);
427 	struct net *net = dev_net(dst->dev);
428 	u32 mtu;
429 
430 	if (net->ipv6.devconf_all->forwarding == 0)
431 		goto error;
432 
433 	if (skb->pkt_type != PACKET_HOST)
434 		goto drop;
435 
436 	if (unlikely(skb->sk))
437 		goto drop;
438 
439 	if (skb_warn_if_lro(skb))
440 		goto drop;
441 
442 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
443 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
444 		goto drop;
445 	}
446 
447 	skb_forward_csum(skb);
448 
449 	/*
450 	 *	We DO NOT make any processing on
451 	 *	RA packets, pushing them to user level AS IS
452 	 *	without ane WARRANTY that application will be able
453 	 *	to interpret them. The reason is that we
454 	 *	cannot make anything clever here.
455 	 *
456 	 *	We are not end-node, so that if packet contains
457 	 *	AH/ESP, we cannot make anything.
458 	 *	Defragmentation also would be mistake, RA packets
459 	 *	cannot be fragmented, because there is no warranty
460 	 *	that different fragments will go along one path. --ANK
461 	 */
462 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
463 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
464 			return 0;
465 	}
466 
467 	/*
468 	 *	check and decrement ttl
469 	 */
470 	if (hdr->hop_limit <= 1) {
471 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
472 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
473 
474 		kfree_skb(skb);
475 		return -ETIMEDOUT;
476 	}
477 
478 	/* XXX: idev->cnf.proxy_ndp? */
479 	if (net->ipv6.devconf_all->proxy_ndp &&
480 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
481 		int proxied = ip6_forward_proxy_check(skb);
482 		if (proxied > 0)
483 			return ip6_input(skb);
484 		else if (proxied < 0) {
485 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
486 			goto drop;
487 		}
488 	}
489 
490 	if (!xfrm6_route_forward(skb)) {
491 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
492 		goto drop;
493 	}
494 	dst = skb_dst(skb);
495 
496 	/* IPv6 specs say nothing about it, but it is clear that we cannot
497 	   send redirects to source routed frames.
498 	   We don't send redirects to frames decapsulated from IPsec.
499 	 */
500 	if (IP6CB(skb)->iif == dst->dev->ifindex &&
501 	    opt->srcrt == 0 && !skb_sec_path(skb)) {
502 		struct in6_addr *target = NULL;
503 		struct inet_peer *peer;
504 		struct rt6_info *rt;
505 
506 		/*
507 		 *	incoming and outgoing devices are the same
508 		 *	send a redirect.
509 		 */
510 
511 		rt = (struct rt6_info *) dst;
512 		if (rt->rt6i_flags & RTF_GATEWAY)
513 			target = &rt->rt6i_gateway;
514 		else
515 			target = &hdr->daddr;
516 
517 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
518 
519 		/* Limit redirects both by destination (here)
520 		   and by source (inside ndisc_send_redirect)
521 		 */
522 		if (inet_peer_xrlim_allow(peer, 1*HZ))
523 			ndisc_send_redirect(skb, target);
524 		if (peer)
525 			inet_putpeer(peer);
526 	} else {
527 		int addrtype = ipv6_addr_type(&hdr->saddr);
528 
529 		/* This check is security critical. */
530 		if (addrtype == IPV6_ADDR_ANY ||
531 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
532 			goto error;
533 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
534 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
535 				    ICMPV6_NOT_NEIGHBOUR, 0);
536 			goto error;
537 		}
538 	}
539 
540 	mtu = ip6_dst_mtu_forward(dst);
541 	if (mtu < IPV6_MIN_MTU)
542 		mtu = IPV6_MIN_MTU;
543 
544 	if (ip6_pkt_too_big(skb, mtu)) {
545 		/* Again, force OUTPUT device used as source address */
546 		skb->dev = dst->dev;
547 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
548 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
549 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
550 				IPSTATS_MIB_FRAGFAILS);
551 		kfree_skb(skb);
552 		return -EMSGSIZE;
553 	}
554 
555 	if (skb_cow(skb, dst->dev->hard_header_len)) {
556 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
557 				IPSTATS_MIB_OUTDISCARDS);
558 		goto drop;
559 	}
560 
561 	hdr = ipv6_hdr(skb);
562 
563 	/* Mangling hops number delayed to point after skb COW */
564 
565 	hdr->hop_limit--;
566 
567 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
568 		       net, NULL, skb, skb->dev, dst->dev,
569 		       ip6_forward_finish);
570 
571 error:
572 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
573 drop:
574 	kfree_skb(skb);
575 	return -EINVAL;
576 }
577 
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)578 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
579 {
580 	to->pkt_type = from->pkt_type;
581 	to->priority = from->priority;
582 	to->protocol = from->protocol;
583 	skb_dst_drop(to);
584 	skb_dst_set(to, dst_clone(skb_dst(from)));
585 	to->dev = from->dev;
586 	to->mark = from->mark;
587 
588 	skb_copy_hash(to, from);
589 
590 #ifdef CONFIG_NET_SCHED
591 	to->tc_index = from->tc_index;
592 #endif
593 	nf_copy(to, from);
594 	skb_ext_copy(to, from);
595 	skb_copy_secmark(to, from);
596 }
597 
ip6_fraglist_init(struct sk_buff * skb,unsigned int hlen,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_fraglist_iter * iter)598 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
599 		      u8 nexthdr, __be32 frag_id,
600 		      struct ip6_fraglist_iter *iter)
601 {
602 	unsigned int first_len;
603 	struct frag_hdr *fh;
604 
605 	/* BUILD HEADER */
606 	*prevhdr = NEXTHDR_FRAGMENT;
607 	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
608 	if (!iter->tmp_hdr)
609 		return -ENOMEM;
610 
611 	iter->frag = skb_shinfo(skb)->frag_list;
612 	skb_frag_list_init(skb);
613 
614 	iter->offset = 0;
615 	iter->hlen = hlen;
616 	iter->frag_id = frag_id;
617 	iter->nexthdr = nexthdr;
618 
619 	__skb_pull(skb, hlen);
620 	fh = __skb_push(skb, sizeof(struct frag_hdr));
621 	__skb_push(skb, hlen);
622 	skb_reset_network_header(skb);
623 	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
624 
625 	fh->nexthdr = nexthdr;
626 	fh->reserved = 0;
627 	fh->frag_off = htons(IP6_MF);
628 	fh->identification = frag_id;
629 
630 	first_len = skb_pagelen(skb);
631 	skb->data_len = first_len - skb_headlen(skb);
632 	skb->len = first_len;
633 	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
634 
635 	return 0;
636 }
637 EXPORT_SYMBOL(ip6_fraglist_init);
638 
ip6_fraglist_prepare(struct sk_buff * skb,struct ip6_fraglist_iter * iter)639 void ip6_fraglist_prepare(struct sk_buff *skb,
640 			  struct ip6_fraglist_iter *iter)
641 {
642 	struct sk_buff *frag = iter->frag;
643 	unsigned int hlen = iter->hlen;
644 	struct frag_hdr *fh;
645 
646 	frag->ip_summed = CHECKSUM_NONE;
647 	skb_reset_transport_header(frag);
648 	fh = __skb_push(frag, sizeof(struct frag_hdr));
649 	__skb_push(frag, hlen);
650 	skb_reset_network_header(frag);
651 	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
652 	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
653 	fh->nexthdr = iter->nexthdr;
654 	fh->reserved = 0;
655 	fh->frag_off = htons(iter->offset);
656 	if (frag->next)
657 		fh->frag_off |= htons(IP6_MF);
658 	fh->identification = iter->frag_id;
659 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
660 	ip6_copy_metadata(frag, skb);
661 }
662 EXPORT_SYMBOL(ip6_fraglist_prepare);
663 
ip6_frag_init(struct sk_buff * skb,unsigned int hlen,unsigned int mtu,unsigned short needed_tailroom,int hdr_room,u8 * prevhdr,u8 nexthdr,__be32 frag_id,struct ip6_frag_state * state)664 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
665 		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
666 		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
667 {
668 	state->prevhdr = prevhdr;
669 	state->nexthdr = nexthdr;
670 	state->frag_id = frag_id;
671 
672 	state->hlen = hlen;
673 	state->mtu = mtu;
674 
675 	state->left = skb->len - hlen;	/* Space per frame */
676 	state->ptr = hlen;		/* Where to start from */
677 
678 	state->hroom = hdr_room;
679 	state->troom = needed_tailroom;
680 
681 	state->offset = 0;
682 }
683 EXPORT_SYMBOL(ip6_frag_init);
684 
ip6_frag_next(struct sk_buff * skb,struct ip6_frag_state * state)685 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
686 {
687 	u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
688 	struct sk_buff *frag;
689 	struct frag_hdr *fh;
690 	unsigned int len;
691 
692 	len = state->left;
693 	/* IF: it doesn't fit, use 'mtu' - the data space left */
694 	if (len > state->mtu)
695 		len = state->mtu;
696 	/* IF: we are not sending up to and including the packet end
697 	   then align the next start on an eight byte boundary */
698 	if (len < state->left)
699 		len &= ~7;
700 
701 	/* Allocate buffer */
702 	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
703 			 state->hroom + state->troom, GFP_ATOMIC);
704 	if (!frag)
705 		return ERR_PTR(-ENOMEM);
706 
707 	/*
708 	 *	Set up data on packet
709 	 */
710 
711 	ip6_copy_metadata(frag, skb);
712 	skb_reserve(frag, state->hroom);
713 	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
714 	skb_reset_network_header(frag);
715 	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
716 	frag->transport_header = (frag->network_header + state->hlen +
717 				  sizeof(struct frag_hdr));
718 
719 	/*
720 	 *	Charge the memory for the fragment to any owner
721 	 *	it might possess
722 	 */
723 	if (skb->sk)
724 		skb_set_owner_w(frag, skb->sk);
725 
726 	/*
727 	 *	Copy the packet header into the new buffer.
728 	 */
729 	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
730 
731 	fragnexthdr_offset = skb_network_header(frag);
732 	fragnexthdr_offset += prevhdr - skb_network_header(skb);
733 	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
734 
735 	/*
736 	 *	Build fragment header.
737 	 */
738 	fh->nexthdr = state->nexthdr;
739 	fh->reserved = 0;
740 	fh->identification = state->frag_id;
741 
742 	/*
743 	 *	Copy a block of the IP datagram.
744 	 */
745 	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
746 			     len));
747 	state->left -= len;
748 
749 	fh->frag_off = htons(state->offset);
750 	if (state->left > 0)
751 		fh->frag_off |= htons(IP6_MF);
752 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
753 
754 	state->ptr += len;
755 	state->offset += len;
756 
757 	return frag;
758 }
759 EXPORT_SYMBOL(ip6_frag_next);
760 
ip6_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,int (* output)(struct net *,struct sock *,struct sk_buff *))761 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
762 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
763 {
764 	struct sk_buff *frag;
765 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
766 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
767 				inet6_sk(skb->sk) : NULL;
768 	struct ip6_frag_state state;
769 	unsigned int mtu, hlen, nexthdr_offset;
770 	ktime_t tstamp = skb->tstamp;
771 	int hroom, err = 0;
772 	__be32 frag_id;
773 	u8 *prevhdr, nexthdr = 0;
774 
775 	err = ip6_find_1stfragopt(skb, &prevhdr);
776 	if (err < 0)
777 		goto fail;
778 	hlen = err;
779 	nexthdr = *prevhdr;
780 	nexthdr_offset = prevhdr - skb_network_header(skb);
781 
782 	mtu = ip6_skb_dst_mtu(skb);
783 
784 	/* We must not fragment if the socket is set to force MTU discovery
785 	 * or if the skb it not generated by a local socket.
786 	 */
787 	if (unlikely(!skb->ignore_df && skb->len > mtu))
788 		goto fail_toobig;
789 
790 	if (IP6CB(skb)->frag_max_size) {
791 		if (IP6CB(skb)->frag_max_size > mtu)
792 			goto fail_toobig;
793 
794 		/* don't send fragments larger than what we received */
795 		mtu = IP6CB(skb)->frag_max_size;
796 		if (mtu < IPV6_MIN_MTU)
797 			mtu = IPV6_MIN_MTU;
798 	}
799 
800 	if (np && np->frag_size < mtu) {
801 		if (np->frag_size)
802 			mtu = np->frag_size;
803 	}
804 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
805 		goto fail_toobig;
806 	mtu -= hlen + sizeof(struct frag_hdr);
807 
808 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
809 				    &ipv6_hdr(skb)->saddr);
810 
811 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
812 	    (err = skb_checksum_help(skb)))
813 		goto fail;
814 
815 	prevhdr = skb_network_header(skb) + nexthdr_offset;
816 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
817 	if (skb_has_frag_list(skb)) {
818 		unsigned int first_len = skb_pagelen(skb);
819 		struct ip6_fraglist_iter iter;
820 		struct sk_buff *frag2;
821 
822 		if (first_len - hlen > mtu ||
823 		    ((first_len - hlen) & 7) ||
824 		    skb_cloned(skb) ||
825 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
826 			goto slow_path;
827 
828 		skb_walk_frags(skb, frag) {
829 			/* Correct geometry. */
830 			if (frag->len > mtu ||
831 			    ((frag->len & 7) && frag->next) ||
832 			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
833 				goto slow_path_clean;
834 
835 			/* Partially cloned skb? */
836 			if (skb_shared(frag))
837 				goto slow_path_clean;
838 
839 			BUG_ON(frag->sk);
840 			if (skb->sk) {
841 				frag->sk = skb->sk;
842 				frag->destructor = sock_wfree;
843 			}
844 			skb->truesize -= frag->truesize;
845 		}
846 
847 		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
848 					&iter);
849 		if (err < 0)
850 			goto fail;
851 
852 		for (;;) {
853 			/* Prepare header of the next frame,
854 			 * before previous one went down. */
855 			if (iter.frag)
856 				ip6_fraglist_prepare(skb, &iter);
857 
858 			skb->tstamp = tstamp;
859 			err = output(net, sk, skb);
860 			if (!err)
861 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
862 					      IPSTATS_MIB_FRAGCREATES);
863 
864 			if (err || !iter.frag)
865 				break;
866 
867 			skb = ip6_fraglist_next(&iter);
868 		}
869 
870 		kfree(iter.tmp_hdr);
871 
872 		if (err == 0) {
873 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
874 				      IPSTATS_MIB_FRAGOKS);
875 			return 0;
876 		}
877 
878 		kfree_skb_list(iter.frag);
879 
880 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
881 			      IPSTATS_MIB_FRAGFAILS);
882 		return err;
883 
884 slow_path_clean:
885 		skb_walk_frags(skb, frag2) {
886 			if (frag2 == frag)
887 				break;
888 			frag2->sk = NULL;
889 			frag2->destructor = NULL;
890 			skb->truesize += frag2->truesize;
891 		}
892 	}
893 
894 slow_path:
895 	/*
896 	 *	Fragment the datagram.
897 	 */
898 
899 	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
900 		      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
901 		      &state);
902 
903 	/*
904 	 *	Keep copying data until we run out.
905 	 */
906 
907 	while (state.left > 0) {
908 		frag = ip6_frag_next(skb, &state);
909 		if (IS_ERR(frag)) {
910 			err = PTR_ERR(frag);
911 			goto fail;
912 		}
913 
914 		/*
915 		 *	Put this fragment into the sending queue.
916 		 */
917 		frag->tstamp = tstamp;
918 		err = output(net, sk, frag);
919 		if (err)
920 			goto fail;
921 
922 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
923 			      IPSTATS_MIB_FRAGCREATES);
924 	}
925 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
926 		      IPSTATS_MIB_FRAGOKS);
927 	consume_skb(skb);
928 	return err;
929 
930 fail_toobig:
931 	if (skb->sk && dst_allfrag(skb_dst(skb)))
932 		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
933 
934 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
935 	err = -EMSGSIZE;
936 
937 fail:
938 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
939 		      IPSTATS_MIB_FRAGFAILS);
940 	kfree_skb(skb);
941 	return err;
942 }
943 
ip6_rt_check(const struct rt6key * rt_key,const struct in6_addr * fl_addr,const struct in6_addr * addr_cache)944 static inline int ip6_rt_check(const struct rt6key *rt_key,
945 			       const struct in6_addr *fl_addr,
946 			       const struct in6_addr *addr_cache)
947 {
948 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
949 		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
950 }
951 
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,const struct flowi6 * fl6)952 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
953 					  struct dst_entry *dst,
954 					  const struct flowi6 *fl6)
955 {
956 	struct ipv6_pinfo *np = inet6_sk(sk);
957 	struct rt6_info *rt;
958 
959 	if (!dst)
960 		goto out;
961 
962 	if (dst->ops->family != AF_INET6) {
963 		dst_release(dst);
964 		return NULL;
965 	}
966 
967 	rt = (struct rt6_info *)dst;
968 	/* Yes, checking route validity in not connected
969 	 * case is not very simple. Take into account,
970 	 * that we do not support routing by source, TOS,
971 	 * and MSG_DONTROUTE		--ANK (980726)
972 	 *
973 	 * 1. ip6_rt_check(): If route was host route,
974 	 *    check that cached destination is current.
975 	 *    If it is network route, we still may
976 	 *    check its validity using saved pointer
977 	 *    to the last used address: daddr_cache.
978 	 *    We do not want to save whole address now,
979 	 *    (because main consumer of this service
980 	 *    is tcp, which has not this problem),
981 	 *    so that the last trick works only on connected
982 	 *    sockets.
983 	 * 2. oif also should be the same.
984 	 */
985 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
986 #ifdef CONFIG_IPV6_SUBTREES
987 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
988 #endif
989 	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
990 	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
991 		dst_release(dst);
992 		dst = NULL;
993 	}
994 
995 out:
996 	return dst;
997 }
998 
ip6_dst_lookup_tail(struct net * net,const struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)999 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1000 			       struct dst_entry **dst, struct flowi6 *fl6)
1001 {
1002 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1003 	struct neighbour *n;
1004 	struct rt6_info *rt;
1005 #endif
1006 	int err;
1007 	int flags = 0;
1008 
1009 	/* The correct way to handle this would be to do
1010 	 * ip6_route_get_saddr, and then ip6_route_output; however,
1011 	 * the route-specific preferred source forces the
1012 	 * ip6_route_output call _before_ ip6_route_get_saddr.
1013 	 *
1014 	 * In source specific routing (no src=any default route),
1015 	 * ip6_route_output will fail given src=any saddr, though, so
1016 	 * that's why we try it again later.
1017 	 */
1018 	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1019 		struct fib6_info *from;
1020 		struct rt6_info *rt;
1021 		bool had_dst = *dst != NULL;
1022 
1023 		if (!had_dst)
1024 			*dst = ip6_route_output(net, sk, fl6);
1025 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1026 
1027 		rcu_read_lock();
1028 		from = rt ? rcu_dereference(rt->from) : NULL;
1029 		err = ip6_route_get_saddr(net, from, &fl6->daddr,
1030 					  sk ? inet6_sk(sk)->srcprefs : 0,
1031 					  &fl6->saddr);
1032 		rcu_read_unlock();
1033 
1034 		if (err)
1035 			goto out_err_release;
1036 
1037 		/* If we had an erroneous initial result, pretend it
1038 		 * never existed and let the SA-enabled version take
1039 		 * over.
1040 		 */
1041 		if (!had_dst && (*dst)->error) {
1042 			dst_release(*dst);
1043 			*dst = NULL;
1044 		}
1045 
1046 		if (fl6->flowi6_oif)
1047 			flags |= RT6_LOOKUP_F_IFACE;
1048 	}
1049 
1050 	if (!*dst)
1051 		*dst = ip6_route_output_flags(net, sk, fl6, flags);
1052 
1053 	err = (*dst)->error;
1054 	if (err)
1055 		goto out_err_release;
1056 
1057 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1058 	/*
1059 	 * Here if the dst entry we've looked up
1060 	 * has a neighbour entry that is in the INCOMPLETE
1061 	 * state and the src address from the flow is
1062 	 * marked as OPTIMISTIC, we release the found
1063 	 * dst entry and replace it instead with the
1064 	 * dst entry of the nexthop router
1065 	 */
1066 	rt = (struct rt6_info *) *dst;
1067 	rcu_read_lock_bh();
1068 	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1069 				      rt6_nexthop(rt, &fl6->daddr));
1070 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1071 	rcu_read_unlock_bh();
1072 
1073 	if (err) {
1074 		struct inet6_ifaddr *ifp;
1075 		struct flowi6 fl_gw6;
1076 		int redirect;
1077 
1078 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1079 				      (*dst)->dev, 1);
1080 
1081 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1082 		if (ifp)
1083 			in6_ifa_put(ifp);
1084 
1085 		if (redirect) {
1086 			/*
1087 			 * We need to get the dst entry for the
1088 			 * default router instead
1089 			 */
1090 			dst_release(*dst);
1091 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1092 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1093 			*dst = ip6_route_output(net, sk, &fl_gw6);
1094 			err = (*dst)->error;
1095 			if (err)
1096 				goto out_err_release;
1097 		}
1098 	}
1099 #endif
1100 	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1101 	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1102 		err = -EAFNOSUPPORT;
1103 		goto out_err_release;
1104 	}
1105 
1106 	return 0;
1107 
1108 out_err_release:
1109 	dst_release(*dst);
1110 	*dst = NULL;
1111 
1112 	if (err == -ENETUNREACH)
1113 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1114 	return err;
1115 }
1116 
1117 /**
1118  *	ip6_dst_lookup - perform route lookup on flow
1119  *	@net: Network namespace to perform lookup in
1120  *	@sk: socket which provides route info
1121  *	@dst: pointer to dst_entry * for result
1122  *	@fl6: flow to lookup
1123  *
1124  *	This function performs a route lookup on the given flow.
1125  *
1126  *	It returns zero on success, or a standard errno code on error.
1127  */
ip6_dst_lookup(struct net * net,struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)1128 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1129 		   struct flowi6 *fl6)
1130 {
1131 	*dst = NULL;
1132 	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1133 }
1134 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1135 
1136 /**
1137  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1138  *	@net: Network namespace to perform lookup in
1139  *	@sk: socket which provides route info
1140  *	@fl6: flow to lookup
1141  *	@final_dst: final destination address for ipsec lookup
1142  *
1143  *	This function performs a route lookup on the given flow.
1144  *
1145  *	It returns a valid dst pointer on success, or a pointer encoded
1146  *	error code.
1147  */
ip6_dst_lookup_flow(struct net * net,const struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst)1148 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1149 				      const struct in6_addr *final_dst)
1150 {
1151 	struct dst_entry *dst = NULL;
1152 	int err;
1153 
1154 	err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1155 	if (err)
1156 		return ERR_PTR(err);
1157 	if (final_dst)
1158 		fl6->daddr = *final_dst;
1159 
1160 	return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1161 }
1162 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1163 
1164 /**
1165  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1166  *	@sk: socket which provides the dst cache and route info
1167  *	@fl6: flow to lookup
1168  *	@final_dst: final destination address for ipsec lookup
1169  *	@connected: whether @sk is connected or not
1170  *
1171  *	This function performs a route lookup on the given flow with the
1172  *	possibility of using the cached route in the socket if it is valid.
1173  *	It will take the socket dst lock when operating on the dst cache.
1174  *	As a result, this function can only be used in process context.
1175  *
1176  *	In addition, for a connected socket, cache the dst in the socket
1177  *	if the current cache is not valid.
1178  *
1179  *	It returns a valid dst pointer on success, or a pointer encoded
1180  *	error code.
1181  */
ip6_sk_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst,bool connected)1182 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1183 					 const struct in6_addr *final_dst,
1184 					 bool connected)
1185 {
1186 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1187 
1188 	dst = ip6_sk_dst_check(sk, dst, fl6);
1189 	if (dst)
1190 		return dst;
1191 
1192 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1193 	if (connected && !IS_ERR(dst))
1194 		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1195 
1196 	return dst;
1197 }
1198 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1199 
1200 /**
1201  *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1202  *      @skb: Packet for which lookup is done
1203  *      @dev: Tunnel device
1204  *      @net: Network namespace of tunnel device
1205  *      @sock: Socket which provides route info
1206  *      @saddr: Memory to store the src ip address
1207  *      @info: Tunnel information
1208  *      @protocol: IP protocol
1209  *      @use_cache: Flag to enable cache usage
1210  *      This function performs a route lookup on a tunnel
1211  *
1212  *      It returns a valid dst pointer and stores src address to be used in
1213  *      tunnel in param saddr on success, else a pointer encoded error code.
1214  */
1215 
ip6_dst_lookup_tunnel(struct sk_buff * skb,struct net_device * dev,struct net * net,struct socket * sock,struct in6_addr * saddr,const struct ip_tunnel_info * info,u8 protocol,bool use_cache)1216 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1217 					struct net_device *dev,
1218 					struct net *net,
1219 					struct socket *sock,
1220 					struct in6_addr *saddr,
1221 					const struct ip_tunnel_info *info,
1222 					u8 protocol,
1223 					bool use_cache)
1224 {
1225 	struct dst_entry *dst = NULL;
1226 #ifdef CONFIG_DST_CACHE
1227 	struct dst_cache *dst_cache;
1228 #endif
1229 	struct flowi6 fl6;
1230 	__u8 prio;
1231 
1232 #ifdef CONFIG_DST_CACHE
1233 	dst_cache = (struct dst_cache *)&info->dst_cache;
1234 	if (use_cache) {
1235 		dst = dst_cache_get_ip6(dst_cache, saddr);
1236 		if (dst)
1237 			return dst;
1238 	}
1239 #endif
1240 	memset(&fl6, 0, sizeof(fl6));
1241 	fl6.flowi6_mark = skb->mark;
1242 	fl6.flowi6_proto = protocol;
1243 	fl6.daddr = info->key.u.ipv6.dst;
1244 	fl6.saddr = info->key.u.ipv6.src;
1245 	prio = info->key.tos;
1246 	fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1247 					  info->key.label);
1248 
1249 	dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1250 					      NULL);
1251 	if (IS_ERR(dst)) {
1252 		netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1253 		return ERR_PTR(-ENETUNREACH);
1254 	}
1255 	if (dst->dev == dev) { /* is this necessary? */
1256 		netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1257 		dst_release(dst);
1258 		return ERR_PTR(-ELOOP);
1259 	}
1260 #ifdef CONFIG_DST_CACHE
1261 	if (use_cache)
1262 		dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1263 #endif
1264 	*saddr = fl6.saddr;
1265 	return dst;
1266 }
1267 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1268 
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1269 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1270 					       gfp_t gfp)
1271 {
1272 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1273 }
1274 
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1275 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1276 						gfp_t gfp)
1277 {
1278 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1279 }
1280 
ip6_append_data_mtu(unsigned int * mtu,int * maxfraglen,unsigned int fragheaderlen,struct sk_buff * skb,struct rt6_info * rt,unsigned int orig_mtu)1281 static void ip6_append_data_mtu(unsigned int *mtu,
1282 				int *maxfraglen,
1283 				unsigned int fragheaderlen,
1284 				struct sk_buff *skb,
1285 				struct rt6_info *rt,
1286 				unsigned int orig_mtu)
1287 {
1288 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1289 		if (!skb) {
1290 			/* first fragment, reserve header_len */
1291 			*mtu = orig_mtu - rt->dst.header_len;
1292 
1293 		} else {
1294 			/*
1295 			 * this fragment is not first, the headers
1296 			 * space is regarded as data space.
1297 			 */
1298 			*mtu = orig_mtu;
1299 		}
1300 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1301 			      + fragheaderlen - sizeof(struct frag_hdr);
1302 	}
1303 }
1304 
ip6_setup_cork(struct sock * sk,struct inet_cork_full * cork,struct inet6_cork * v6_cork,struct ipcm6_cookie * ipc6,struct rt6_info * rt,struct flowi6 * fl6)1305 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1306 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1307 			  struct rt6_info *rt, struct flowi6 *fl6)
1308 {
1309 	struct ipv6_pinfo *np = inet6_sk(sk);
1310 	unsigned int mtu;
1311 	struct ipv6_txoptions *opt = ipc6->opt;
1312 
1313 	/*
1314 	 * setup for corking
1315 	 */
1316 	if (opt) {
1317 		if (WARN_ON(v6_cork->opt))
1318 			return -EINVAL;
1319 
1320 		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1321 		if (unlikely(!v6_cork->opt))
1322 			return -ENOBUFS;
1323 
1324 		v6_cork->opt->tot_len = sizeof(*opt);
1325 		v6_cork->opt->opt_flen = opt->opt_flen;
1326 		v6_cork->opt->opt_nflen = opt->opt_nflen;
1327 
1328 		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1329 						    sk->sk_allocation);
1330 		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1331 			return -ENOBUFS;
1332 
1333 		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1334 						    sk->sk_allocation);
1335 		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1336 			return -ENOBUFS;
1337 
1338 		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1339 						   sk->sk_allocation);
1340 		if (opt->hopopt && !v6_cork->opt->hopopt)
1341 			return -ENOBUFS;
1342 
1343 		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1344 						    sk->sk_allocation);
1345 		if (opt->srcrt && !v6_cork->opt->srcrt)
1346 			return -ENOBUFS;
1347 
1348 		/* need source address above miyazawa*/
1349 	}
1350 	dst_hold(&rt->dst);
1351 	cork->base.dst = &rt->dst;
1352 	cork->fl.u.ip6 = *fl6;
1353 	v6_cork->hop_limit = ipc6->hlimit;
1354 	v6_cork->tclass = ipc6->tclass;
1355 	if (rt->dst.flags & DST_XFRM_TUNNEL)
1356 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1357 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1358 	else
1359 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1360 			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1361 	if (np->frag_size < mtu) {
1362 		if (np->frag_size)
1363 			mtu = np->frag_size;
1364 	}
1365 	if (mtu < IPV6_MIN_MTU)
1366 		return -EINVAL;
1367 	cork->base.fragsize = mtu;
1368 	cork->base.gso_size = ipc6->gso_size;
1369 	cork->base.tx_flags = 0;
1370 	cork->base.mark = ipc6->sockc.mark;
1371 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1372 
1373 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1374 		cork->base.flags |= IPCORK_ALLFRAG;
1375 	cork->base.length = 0;
1376 
1377 	cork->base.transmit_time = ipc6->sockc.transmit_time;
1378 
1379 	return 0;
1380 }
1381 
__ip6_append_data(struct sock * sk,struct flowi6 * fl6,struct sk_buff_head * queue,struct inet_cork * cork,struct inet6_cork * v6_cork,struct page_frag * pfrag,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,unsigned int flags,struct ipcm6_cookie * ipc6)1382 static int __ip6_append_data(struct sock *sk,
1383 			     struct flowi6 *fl6,
1384 			     struct sk_buff_head *queue,
1385 			     struct inet_cork *cork,
1386 			     struct inet6_cork *v6_cork,
1387 			     struct page_frag *pfrag,
1388 			     int getfrag(void *from, char *to, int offset,
1389 					 int len, int odd, struct sk_buff *skb),
1390 			     void *from, int length, int transhdrlen,
1391 			     unsigned int flags, struct ipcm6_cookie *ipc6)
1392 {
1393 	struct sk_buff *skb, *skb_prev = NULL;
1394 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1395 	struct ubuf_info *uarg = NULL;
1396 	int exthdrlen = 0;
1397 	int dst_exthdrlen = 0;
1398 	int hh_len;
1399 	int copy;
1400 	int err;
1401 	int offset = 0;
1402 	u32 tskey = 0;
1403 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1404 	struct ipv6_txoptions *opt = v6_cork->opt;
1405 	int csummode = CHECKSUM_NONE;
1406 	unsigned int maxnonfragsize, headersize;
1407 	unsigned int wmem_alloc_delta = 0;
1408 	bool paged, extra_uref = false;
1409 
1410 	skb = skb_peek_tail(queue);
1411 	if (!skb) {
1412 		exthdrlen = opt ? opt->opt_flen : 0;
1413 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1414 	}
1415 
1416 	paged = !!cork->gso_size;
1417 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1418 	orig_mtu = mtu;
1419 
1420 	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1421 	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1422 		tskey = sk->sk_tskey++;
1423 
1424 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1425 
1426 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1427 			(opt ? opt->opt_nflen : 0);
1428 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1429 		     sizeof(struct frag_hdr);
1430 
1431 	headersize = sizeof(struct ipv6hdr) +
1432 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1433 		     (dst_allfrag(&rt->dst) ?
1434 		      sizeof(struct frag_hdr) : 0) +
1435 		     rt->rt6i_nfheader_len;
1436 
1437 	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1438 	 * the first fragment
1439 	 */
1440 	if (headersize + transhdrlen > mtu)
1441 		goto emsgsize;
1442 
1443 	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1444 	    (sk->sk_protocol == IPPROTO_UDP ||
1445 	     sk->sk_protocol == IPPROTO_RAW)) {
1446 		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1447 				sizeof(struct ipv6hdr));
1448 		goto emsgsize;
1449 	}
1450 
1451 	if (ip6_sk_ignore_df(sk))
1452 		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1453 	else
1454 		maxnonfragsize = mtu;
1455 
1456 	if (cork->length + length > maxnonfragsize - headersize) {
1457 emsgsize:
1458 		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1459 		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1460 		return -EMSGSIZE;
1461 	}
1462 
1463 	/* CHECKSUM_PARTIAL only with no extension headers and when
1464 	 * we are not going to fragment
1465 	 */
1466 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1467 	    headersize == sizeof(struct ipv6hdr) &&
1468 	    length <= mtu - headersize &&
1469 	    (!(flags & MSG_MORE) || cork->gso_size) &&
1470 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1471 		csummode = CHECKSUM_PARTIAL;
1472 
1473 	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1474 		uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1475 		if (!uarg)
1476 			return -ENOBUFS;
1477 		extra_uref = !skb_zcopy(skb);	/* only ref on new uarg */
1478 		if (rt->dst.dev->features & NETIF_F_SG &&
1479 		    csummode == CHECKSUM_PARTIAL) {
1480 			paged = true;
1481 		} else {
1482 			uarg->zerocopy = 0;
1483 			skb_zcopy_set(skb, uarg, &extra_uref);
1484 		}
1485 	}
1486 
1487 	/*
1488 	 * Let's try using as much space as possible.
1489 	 * Use MTU if total length of the message fits into the MTU.
1490 	 * Otherwise, we need to reserve fragment header and
1491 	 * fragment alignment (= 8-15 octects, in total).
1492 	 *
1493 	 * Note that we may need to "move" the data from the tail
1494 	 * of the buffer to the new fragment when we split
1495 	 * the message.
1496 	 *
1497 	 * FIXME: It may be fragmented into multiple chunks
1498 	 *        at once if non-fragmentable extension headers
1499 	 *        are too large.
1500 	 * --yoshfuji
1501 	 */
1502 
1503 	cork->length += length;
1504 	if (!skb)
1505 		goto alloc_new_skb;
1506 
1507 	while (length > 0) {
1508 		/* Check if the remaining data fits into current packet. */
1509 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1510 		if (copy < length)
1511 			copy = maxfraglen - skb->len;
1512 
1513 		if (copy <= 0) {
1514 			char *data;
1515 			unsigned int datalen;
1516 			unsigned int fraglen;
1517 			unsigned int fraggap;
1518 			unsigned int alloclen;
1519 			unsigned int pagedlen;
1520 alloc_new_skb:
1521 			/* There's no room in the current skb */
1522 			if (skb)
1523 				fraggap = skb->len - maxfraglen;
1524 			else
1525 				fraggap = 0;
1526 			/* update mtu and maxfraglen if necessary */
1527 			if (!skb || !skb_prev)
1528 				ip6_append_data_mtu(&mtu, &maxfraglen,
1529 						    fragheaderlen, skb, rt,
1530 						    orig_mtu);
1531 
1532 			skb_prev = skb;
1533 
1534 			/*
1535 			 * If remaining data exceeds the mtu,
1536 			 * we know we need more fragment(s).
1537 			 */
1538 			datalen = length + fraggap;
1539 
1540 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1541 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1542 			fraglen = datalen + fragheaderlen;
1543 			pagedlen = 0;
1544 
1545 			if ((flags & MSG_MORE) &&
1546 			    !(rt->dst.dev->features&NETIF_F_SG))
1547 				alloclen = mtu;
1548 			else if (!paged)
1549 				alloclen = fraglen;
1550 			else {
1551 				alloclen = min_t(int, fraglen, MAX_HEADER);
1552 				pagedlen = fraglen - alloclen;
1553 			}
1554 
1555 			alloclen += dst_exthdrlen;
1556 
1557 			if (datalen != length + fraggap) {
1558 				/*
1559 				 * this is not the last fragment, the trailer
1560 				 * space is regarded as data space.
1561 				 */
1562 				datalen += rt->dst.trailer_len;
1563 			}
1564 
1565 			alloclen += rt->dst.trailer_len;
1566 			fraglen = datalen + fragheaderlen;
1567 
1568 			/*
1569 			 * We just reserve space for fragment header.
1570 			 * Note: this may be overallocation if the message
1571 			 * (without MSG_MORE) fits into the MTU.
1572 			 */
1573 			alloclen += sizeof(struct frag_hdr);
1574 
1575 			copy = datalen - transhdrlen - fraggap - pagedlen;
1576 			if (copy < 0) {
1577 				err = -EINVAL;
1578 				goto error;
1579 			}
1580 			if (transhdrlen) {
1581 				skb = sock_alloc_send_skb(sk,
1582 						alloclen + hh_len,
1583 						(flags & MSG_DONTWAIT), &err);
1584 			} else {
1585 				skb = NULL;
1586 				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1587 				    2 * sk->sk_sndbuf)
1588 					skb = alloc_skb(alloclen + hh_len,
1589 							sk->sk_allocation);
1590 				if (unlikely(!skb))
1591 					err = -ENOBUFS;
1592 			}
1593 			if (!skb)
1594 				goto error;
1595 			/*
1596 			 *	Fill in the control structures
1597 			 */
1598 			skb->protocol = htons(ETH_P_IPV6);
1599 			skb->ip_summed = csummode;
1600 			skb->csum = 0;
1601 			/* reserve for fragmentation and ipsec header */
1602 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1603 				    dst_exthdrlen);
1604 
1605 			/*
1606 			 *	Find where to start putting bytes
1607 			 */
1608 			data = skb_put(skb, fraglen - pagedlen);
1609 			skb_set_network_header(skb, exthdrlen);
1610 			data += fragheaderlen;
1611 			skb->transport_header = (skb->network_header +
1612 						 fragheaderlen);
1613 			if (fraggap) {
1614 				skb->csum = skb_copy_and_csum_bits(
1615 					skb_prev, maxfraglen,
1616 					data + transhdrlen, fraggap);
1617 				skb_prev->csum = csum_sub(skb_prev->csum,
1618 							  skb->csum);
1619 				data += fraggap;
1620 				pskb_trim_unique(skb_prev, maxfraglen);
1621 			}
1622 			if (copy > 0 &&
1623 			    getfrag(from, data + transhdrlen, offset,
1624 				    copy, fraggap, skb) < 0) {
1625 				err = -EFAULT;
1626 				kfree_skb(skb);
1627 				goto error;
1628 			}
1629 
1630 			offset += copy;
1631 			length -= copy + transhdrlen;
1632 			transhdrlen = 0;
1633 			exthdrlen = 0;
1634 			dst_exthdrlen = 0;
1635 
1636 			/* Only the initial fragment is time stamped */
1637 			skb_shinfo(skb)->tx_flags = cork->tx_flags;
1638 			cork->tx_flags = 0;
1639 			skb_shinfo(skb)->tskey = tskey;
1640 			tskey = 0;
1641 			skb_zcopy_set(skb, uarg, &extra_uref);
1642 
1643 			if ((flags & MSG_CONFIRM) && !skb_prev)
1644 				skb_set_dst_pending_confirm(skb, 1);
1645 
1646 			/*
1647 			 * Put the packet on the pending queue
1648 			 */
1649 			if (!skb->destructor) {
1650 				skb->destructor = sock_wfree;
1651 				skb->sk = sk;
1652 				wmem_alloc_delta += skb->truesize;
1653 			}
1654 			__skb_queue_tail(queue, skb);
1655 			continue;
1656 		}
1657 
1658 		if (copy > length)
1659 			copy = length;
1660 
1661 		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1662 		    skb_tailroom(skb) >= copy) {
1663 			unsigned int off;
1664 
1665 			off = skb->len;
1666 			if (getfrag(from, skb_put(skb, copy),
1667 						offset, copy, off, skb) < 0) {
1668 				__skb_trim(skb, off);
1669 				err = -EFAULT;
1670 				goto error;
1671 			}
1672 		} else if (!uarg || !uarg->zerocopy) {
1673 			int i = skb_shinfo(skb)->nr_frags;
1674 
1675 			err = -ENOMEM;
1676 			if (!sk_page_frag_refill(sk, pfrag))
1677 				goto error;
1678 
1679 			if (!skb_can_coalesce(skb, i, pfrag->page,
1680 					      pfrag->offset)) {
1681 				err = -EMSGSIZE;
1682 				if (i == MAX_SKB_FRAGS)
1683 					goto error;
1684 
1685 				__skb_fill_page_desc(skb, i, pfrag->page,
1686 						     pfrag->offset, 0);
1687 				skb_shinfo(skb)->nr_frags = ++i;
1688 				get_page(pfrag->page);
1689 			}
1690 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1691 			if (getfrag(from,
1692 				    page_address(pfrag->page) + pfrag->offset,
1693 				    offset, copy, skb->len, skb) < 0)
1694 				goto error_efault;
1695 
1696 			pfrag->offset += copy;
1697 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1698 			skb->len += copy;
1699 			skb->data_len += copy;
1700 			skb->truesize += copy;
1701 			wmem_alloc_delta += copy;
1702 		} else {
1703 			err = skb_zerocopy_iter_dgram(skb, from, copy);
1704 			if (err < 0)
1705 				goto error;
1706 		}
1707 		offset += copy;
1708 		length -= copy;
1709 	}
1710 
1711 	if (wmem_alloc_delta)
1712 		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1713 	return 0;
1714 
1715 error_efault:
1716 	err = -EFAULT;
1717 error:
1718 	if (uarg)
1719 		sock_zerocopy_put_abort(uarg, extra_uref);
1720 	cork->length -= length;
1721 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1722 	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1723 	return err;
1724 }
1725 
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags)1726 int ip6_append_data(struct sock *sk,
1727 		    int getfrag(void *from, char *to, int offset, int len,
1728 				int odd, struct sk_buff *skb),
1729 		    void *from, int length, int transhdrlen,
1730 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1731 		    struct rt6_info *rt, unsigned int flags)
1732 {
1733 	struct inet_sock *inet = inet_sk(sk);
1734 	struct ipv6_pinfo *np = inet6_sk(sk);
1735 	int exthdrlen;
1736 	int err;
1737 
1738 	if (flags&MSG_PROBE)
1739 		return 0;
1740 	if (skb_queue_empty(&sk->sk_write_queue)) {
1741 		/*
1742 		 * setup for corking
1743 		 */
1744 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1745 				     ipc6, rt, fl6);
1746 		if (err)
1747 			return err;
1748 
1749 		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1750 		length += exthdrlen;
1751 		transhdrlen += exthdrlen;
1752 	} else {
1753 		fl6 = &inet->cork.fl.u.ip6;
1754 		transhdrlen = 0;
1755 	}
1756 
1757 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1758 				 &np->cork, sk_page_frag(sk), getfrag,
1759 				 from, length, transhdrlen, flags, ipc6);
1760 }
1761 EXPORT_SYMBOL_GPL(ip6_append_data);
1762 
ip6_cork_release(struct inet_cork_full * cork,struct inet6_cork * v6_cork)1763 static void ip6_cork_release(struct inet_cork_full *cork,
1764 			     struct inet6_cork *v6_cork)
1765 {
1766 	if (v6_cork->opt) {
1767 		kfree(v6_cork->opt->dst0opt);
1768 		kfree(v6_cork->opt->dst1opt);
1769 		kfree(v6_cork->opt->hopopt);
1770 		kfree(v6_cork->opt->srcrt);
1771 		kfree(v6_cork->opt);
1772 		v6_cork->opt = NULL;
1773 	}
1774 
1775 	if (cork->base.dst) {
1776 		dst_release(cork->base.dst);
1777 		cork->base.dst = NULL;
1778 		cork->base.flags &= ~IPCORK_ALLFRAG;
1779 	}
1780 	memset(&cork->fl, 0, sizeof(cork->fl));
1781 }
1782 
__ip6_make_skb(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1783 struct sk_buff *__ip6_make_skb(struct sock *sk,
1784 			       struct sk_buff_head *queue,
1785 			       struct inet_cork_full *cork,
1786 			       struct inet6_cork *v6_cork)
1787 {
1788 	struct sk_buff *skb, *tmp_skb;
1789 	struct sk_buff **tail_skb;
1790 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1791 	struct ipv6_pinfo *np = inet6_sk(sk);
1792 	struct net *net = sock_net(sk);
1793 	struct ipv6hdr *hdr;
1794 	struct ipv6_txoptions *opt = v6_cork->opt;
1795 	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1796 	struct flowi6 *fl6 = &cork->fl.u.ip6;
1797 	unsigned char proto = fl6->flowi6_proto;
1798 
1799 	skb = __skb_dequeue(queue);
1800 	if (!skb)
1801 		goto out;
1802 	tail_skb = &(skb_shinfo(skb)->frag_list);
1803 
1804 	/* move skb->data to ip header from ext header */
1805 	if (skb->data < skb_network_header(skb))
1806 		__skb_pull(skb, skb_network_offset(skb));
1807 	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1808 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1809 		*tail_skb = tmp_skb;
1810 		tail_skb = &(tmp_skb->next);
1811 		skb->len += tmp_skb->len;
1812 		skb->data_len += tmp_skb->len;
1813 		skb->truesize += tmp_skb->truesize;
1814 		tmp_skb->destructor = NULL;
1815 		tmp_skb->sk = NULL;
1816 	}
1817 
1818 	/* Allow local fragmentation. */
1819 	skb->ignore_df = ip6_sk_ignore_df(sk);
1820 
1821 	*final_dst = fl6->daddr;
1822 	__skb_pull(skb, skb_network_header_len(skb));
1823 	if (opt && opt->opt_flen)
1824 		ipv6_push_frag_opts(skb, opt, &proto);
1825 	if (opt && opt->opt_nflen)
1826 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1827 
1828 	skb_push(skb, sizeof(struct ipv6hdr));
1829 	skb_reset_network_header(skb);
1830 	hdr = ipv6_hdr(skb);
1831 
1832 	ip6_flow_hdr(hdr, v6_cork->tclass,
1833 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1834 					ip6_autoflowlabel(net, np), fl6));
1835 	hdr->hop_limit = v6_cork->hop_limit;
1836 	hdr->nexthdr = proto;
1837 	hdr->saddr = fl6->saddr;
1838 	hdr->daddr = *final_dst;
1839 
1840 	skb->priority = sk->sk_priority;
1841 	skb->mark = cork->base.mark;
1842 
1843 	skb->tstamp = cork->base.transmit_time;
1844 
1845 	skb_dst_set(skb, dst_clone(&rt->dst));
1846 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1847 	if (proto == IPPROTO_ICMPV6) {
1848 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1849 
1850 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1851 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1852 	}
1853 
1854 	ip6_cork_release(cork, v6_cork);
1855 out:
1856 	return skb;
1857 }
1858 
ip6_send_skb(struct sk_buff * skb)1859 int ip6_send_skb(struct sk_buff *skb)
1860 {
1861 	struct net *net = sock_net(skb->sk);
1862 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1863 	int err;
1864 
1865 	err = ip6_local_out(net, skb->sk, skb);
1866 	if (err) {
1867 		if (err > 0)
1868 			err = net_xmit_errno(err);
1869 		if (err)
1870 			IP6_INC_STATS(net, rt->rt6i_idev,
1871 				      IPSTATS_MIB_OUTDISCARDS);
1872 	}
1873 
1874 	return err;
1875 }
1876 
ip6_push_pending_frames(struct sock * sk)1877 int ip6_push_pending_frames(struct sock *sk)
1878 {
1879 	struct sk_buff *skb;
1880 
1881 	skb = ip6_finish_skb(sk);
1882 	if (!skb)
1883 		return 0;
1884 
1885 	return ip6_send_skb(skb);
1886 }
1887 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1888 
__ip6_flush_pending_frames(struct sock * sk,struct sk_buff_head * queue,struct inet_cork_full * cork,struct inet6_cork * v6_cork)1889 static void __ip6_flush_pending_frames(struct sock *sk,
1890 				       struct sk_buff_head *queue,
1891 				       struct inet_cork_full *cork,
1892 				       struct inet6_cork *v6_cork)
1893 {
1894 	struct sk_buff *skb;
1895 
1896 	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1897 		if (skb_dst(skb))
1898 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1899 				      IPSTATS_MIB_OUTDISCARDS);
1900 		kfree_skb(skb);
1901 	}
1902 
1903 	ip6_cork_release(cork, v6_cork);
1904 }
1905 
ip6_flush_pending_frames(struct sock * sk)1906 void ip6_flush_pending_frames(struct sock *sk)
1907 {
1908 	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1909 				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1910 }
1911 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1912 
ip6_make_skb(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,struct ipcm6_cookie * ipc6,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,struct inet_cork_full * cork)1913 struct sk_buff *ip6_make_skb(struct sock *sk,
1914 			     int getfrag(void *from, char *to, int offset,
1915 					 int len, int odd, struct sk_buff *skb),
1916 			     void *from, int length, int transhdrlen,
1917 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1918 			     struct rt6_info *rt, unsigned int flags,
1919 			     struct inet_cork_full *cork)
1920 {
1921 	struct inet6_cork v6_cork;
1922 	struct sk_buff_head queue;
1923 	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1924 	int err;
1925 
1926 	if (flags & MSG_PROBE)
1927 		return NULL;
1928 
1929 	__skb_queue_head_init(&queue);
1930 
1931 	cork->base.flags = 0;
1932 	cork->base.addr = 0;
1933 	cork->base.opt = NULL;
1934 	cork->base.dst = NULL;
1935 	v6_cork.opt = NULL;
1936 	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1937 	if (err) {
1938 		ip6_cork_release(cork, &v6_cork);
1939 		return ERR_PTR(err);
1940 	}
1941 	if (ipc6->dontfrag < 0)
1942 		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1943 
1944 	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1945 				&current->task_frag, getfrag, from,
1946 				length + exthdrlen, transhdrlen + exthdrlen,
1947 				flags, ipc6);
1948 	if (err) {
1949 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1950 		return ERR_PTR(err);
1951 	}
1952 
1953 	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1954 }
1955