1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46 
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52 
ip_tunnel_hash(__be32 key,__be32 remote)53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 	return hash_32((__force u32)key ^ (__force u32)remote,
56 			 IP_TNL_HASH_BITS);
57 }
58 
ip_tunnel_key_match(const struct ip_tunnel_parm * p,__be16 flags,__be32 key)59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 				__be16 flags, __be32 key)
61 {
62 	if (p->i_flags & TUNNEL_KEY) {
63 		if (flags & TUNNEL_KEY)
64 			return key == p->i_key;
65 		else
66 			/* key expected, none present */
67 			return false;
68 	} else
69 		return !(flags & TUNNEL_KEY);
70 }
71 
72 /* Fallback tunnel: no source, no destination, no key, no options
73 
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78 
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
ip_tunnel_lookup(struct ip_tunnel_net * itn,int link,__be16 flags,__be32 remote,__be32 local,__be32 key)83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 				   int link, __be16 flags,
85 				   __be32 remote, __be32 local,
86 				   __be32 key)
87 {
88 	struct ip_tunnel *t, *cand = NULL;
89 	struct hlist_head *head;
90 	struct net_device *ndev;
91 	unsigned int hash;
92 
93 	hash = ip_tunnel_hash(key, remote);
94 	head = &itn->tunnels[hash];
95 
96 	hlist_for_each_entry_rcu(t, head, hash_node) {
97 		if (local != t->parms.iph.saddr ||
98 		    remote != t->parms.iph.daddr ||
99 		    !(t->dev->flags & IFF_UP))
100 			continue;
101 
102 		if (!ip_tunnel_key_match(&t->parms, flags, key))
103 			continue;
104 
105 		if (t->parms.link == link)
106 			return t;
107 		else
108 			cand = t;
109 	}
110 
111 	hlist_for_each_entry_rcu(t, head, hash_node) {
112 		if (remote != t->parms.iph.daddr ||
113 		    t->parms.iph.saddr != 0 ||
114 		    !(t->dev->flags & IFF_UP))
115 			continue;
116 
117 		if (!ip_tunnel_key_match(&t->parms, flags, key))
118 			continue;
119 
120 		if (t->parms.link == link)
121 			return t;
122 		else if (!cand)
123 			cand = t;
124 	}
125 
126 	hash = ip_tunnel_hash(key, 0);
127 	head = &itn->tunnels[hash];
128 
129 	hlist_for_each_entry_rcu(t, head, hash_node) {
130 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132 			continue;
133 
134 		if (!(t->dev->flags & IFF_UP))
135 			continue;
136 
137 		if (!ip_tunnel_key_match(&t->parms, flags, key))
138 			continue;
139 
140 		if (t->parms.link == link)
141 			return t;
142 		else if (!cand)
143 			cand = t;
144 	}
145 
146 	hlist_for_each_entry_rcu(t, head, hash_node) {
147 		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148 		    t->parms.iph.saddr != 0 ||
149 		    t->parms.iph.daddr != 0 ||
150 		    !(t->dev->flags & IFF_UP))
151 			continue;
152 
153 		if (t->parms.link == link)
154 			return t;
155 		else if (!cand)
156 			cand = t;
157 	}
158 
159 	if (cand)
160 		return cand;
161 
162 	t = rcu_dereference(itn->collect_md_tun);
163 	if (t && t->dev->flags & IFF_UP)
164 		return t;
165 
166 	ndev = READ_ONCE(itn->fb_tunnel_dev);
167 	if (ndev && ndev->flags & IFF_UP)
168 		return netdev_priv(ndev);
169 
170 	return NULL;
171 }
172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173 
ip_bucket(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175 				    struct ip_tunnel_parm *parms)
176 {
177 	unsigned int h;
178 	__be32 remote;
179 	__be32 i_key = parms->i_key;
180 
181 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182 		remote = parms->iph.daddr;
183 	else
184 		remote = 0;
185 
186 	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187 		i_key = 0;
188 
189 	h = ip_tunnel_hash(i_key, remote);
190 	return &itn->tunnels[h];
191 }
192 
ip_tunnel_add(struct ip_tunnel_net * itn,struct ip_tunnel * t)193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194 {
195 	struct hlist_head *head = ip_bucket(itn, &t->parms);
196 
197 	if (t->collect_md)
198 		rcu_assign_pointer(itn->collect_md_tun, t);
199 	hlist_add_head_rcu(&t->hash_node, head);
200 }
201 
ip_tunnel_del(struct ip_tunnel_net * itn,struct ip_tunnel * t)202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203 {
204 	if (t->collect_md)
205 		rcu_assign_pointer(itn->collect_md_tun, NULL);
206 	hlist_del_init_rcu(&t->hash_node);
207 }
208 
ip_tunnel_find(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms,int type)209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210 					struct ip_tunnel_parm *parms,
211 					int type)
212 {
213 	__be32 remote = parms->iph.daddr;
214 	__be32 local = parms->iph.saddr;
215 	__be32 key = parms->i_key;
216 	__be16 flags = parms->i_flags;
217 	int link = parms->link;
218 	struct ip_tunnel *t = NULL;
219 	struct hlist_head *head = ip_bucket(itn, parms);
220 
221 	hlist_for_each_entry_rcu(t, head, hash_node) {
222 		if (local == t->parms.iph.saddr &&
223 		    remote == t->parms.iph.daddr &&
224 		    link == t->parms.link &&
225 		    type == t->dev->type &&
226 		    ip_tunnel_key_match(&t->parms, flags, key))
227 			break;
228 	}
229 	return t;
230 }
231 
__ip_tunnel_create(struct net * net,const struct rtnl_link_ops * ops,struct ip_tunnel_parm * parms)232 static struct net_device *__ip_tunnel_create(struct net *net,
233 					     const struct rtnl_link_ops *ops,
234 					     struct ip_tunnel_parm *parms)
235 {
236 	int err;
237 	struct ip_tunnel *tunnel;
238 	struct net_device *dev;
239 	char name[IFNAMSIZ];
240 
241 	err = -E2BIG;
242 	if (parms->name[0]) {
243 		if (!dev_valid_name(parms->name))
244 			goto failed;
245 		strlcpy(name, parms->name, IFNAMSIZ);
246 	} else {
247 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
248 			goto failed;
249 		strcpy(name, ops->kind);
250 		strcat(name, "%d");
251 	}
252 
253 	ASSERT_RTNL();
254 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255 	if (!dev) {
256 		err = -ENOMEM;
257 		goto failed;
258 	}
259 	dev_net_set(dev, net);
260 
261 	dev->rtnl_link_ops = ops;
262 
263 	tunnel = netdev_priv(dev);
264 	tunnel->parms = *parms;
265 	tunnel->net = net;
266 
267 	err = register_netdevice(dev);
268 	if (err)
269 		goto failed_free;
270 
271 	return dev;
272 
273 failed_free:
274 	free_netdev(dev);
275 failed:
276 	return ERR_PTR(err);
277 }
278 
ip_tunnel_bind_dev(struct net_device * dev)279 static int ip_tunnel_bind_dev(struct net_device *dev)
280 {
281 	struct net_device *tdev = NULL;
282 	struct ip_tunnel *tunnel = netdev_priv(dev);
283 	const struct iphdr *iph;
284 	int hlen = LL_MAX_HEADER;
285 	int mtu = ETH_DATA_LEN;
286 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287 
288 	iph = &tunnel->parms.iph;
289 
290 	/* Guess output device to choose reasonable mtu and needed_headroom */
291 	if (iph->daddr) {
292 		struct flowi4 fl4;
293 		struct rtable *rt;
294 
295 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296 				    iph->saddr, tunnel->parms.o_key,
297 				    RT_TOS(iph->tos), tunnel->parms.link,
298 				    tunnel->fwmark, 0);
299 		rt = ip_route_output_key(tunnel->net, &fl4);
300 
301 		if (!IS_ERR(rt)) {
302 			tdev = rt->dst.dev;
303 			ip_rt_put(rt);
304 		}
305 		if (dev->type != ARPHRD_ETHER)
306 			dev->flags |= IFF_POINTOPOINT;
307 
308 		dst_cache_reset(&tunnel->dst_cache);
309 	}
310 
311 	if (!tdev && tunnel->parms.link)
312 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313 
314 	if (tdev) {
315 		hlen = tdev->hard_header_len + tdev->needed_headroom;
316 		mtu = min(tdev->mtu, IP_MAX_MTU);
317 	}
318 
319 	dev->needed_headroom = t_hlen + hlen;
320 	mtu -= (dev->hard_header_len + t_hlen);
321 
322 	if (mtu < IPV4_MIN_MTU)
323 		mtu = IPV4_MIN_MTU;
324 
325 	return mtu;
326 }
327 
ip_tunnel_create(struct net * net,struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)328 static struct ip_tunnel *ip_tunnel_create(struct net *net,
329 					  struct ip_tunnel_net *itn,
330 					  struct ip_tunnel_parm *parms)
331 {
332 	struct ip_tunnel *nt;
333 	struct net_device *dev;
334 	int t_hlen;
335 	int mtu;
336 	int err;
337 
338 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339 	if (IS_ERR(dev))
340 		return ERR_CAST(dev);
341 
342 	mtu = ip_tunnel_bind_dev(dev);
343 	err = dev_set_mtu(dev, mtu);
344 	if (err)
345 		goto err_dev_set_mtu;
346 
347 	nt = netdev_priv(dev);
348 	t_hlen = nt->hlen + sizeof(struct iphdr);
349 	dev->min_mtu = ETH_MIN_MTU;
350 	dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
351 	ip_tunnel_add(itn, nt);
352 	return nt;
353 
354 err_dev_set_mtu:
355 	unregister_netdevice(dev);
356 	return ERR_PTR(err);
357 }
358 
ip_tunnel_rcv(struct ip_tunnel * tunnel,struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct metadata_dst * tun_dst,bool log_ecn_error)359 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
360 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
361 		  bool log_ecn_error)
362 {
363 	const struct iphdr *iph = ip_hdr(skb);
364 	int err;
365 
366 #ifdef CONFIG_NET_IPGRE_BROADCAST
367 	if (ipv4_is_multicast(iph->daddr)) {
368 		tunnel->dev->stats.multicast++;
369 		skb->pkt_type = PACKET_BROADCAST;
370 	}
371 #endif
372 
373 	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
374 	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
375 		tunnel->dev->stats.rx_crc_errors++;
376 		tunnel->dev->stats.rx_errors++;
377 		goto drop;
378 	}
379 
380 	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
381 		if (!(tpi->flags&TUNNEL_SEQ) ||
382 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
383 			tunnel->dev->stats.rx_fifo_errors++;
384 			tunnel->dev->stats.rx_errors++;
385 			goto drop;
386 		}
387 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
388 	}
389 
390 	skb_reset_network_header(skb);
391 
392 	err = IP_ECN_decapsulate(iph, skb);
393 	if (unlikely(err)) {
394 		if (log_ecn_error)
395 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
396 					&iph->saddr, iph->tos);
397 		if (err > 1) {
398 			++tunnel->dev->stats.rx_frame_errors;
399 			++tunnel->dev->stats.rx_errors;
400 			goto drop;
401 		}
402 	}
403 
404 	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
405 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
406 
407 	if (tunnel->dev->type == ARPHRD_ETHER) {
408 		skb->protocol = eth_type_trans(skb, tunnel->dev);
409 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
410 	} else {
411 		skb->dev = tunnel->dev;
412 	}
413 
414 	if (tun_dst)
415 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
416 
417 	gro_cells_receive(&tunnel->gro_cells, skb);
418 	return 0;
419 
420 drop:
421 	if (tun_dst)
422 		dst_release((struct dst_entry *)tun_dst);
423 	kfree_skb(skb);
424 	return 0;
425 }
426 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
427 
ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)428 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
429 			    unsigned int num)
430 {
431 	if (num >= MAX_IPTUN_ENCAP_OPS)
432 		return -ERANGE;
433 
434 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
435 			&iptun_encaps[num],
436 			NULL, ops) ? 0 : -1;
437 }
438 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
439 
ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)440 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
441 			    unsigned int num)
442 {
443 	int ret;
444 
445 	if (num >= MAX_IPTUN_ENCAP_OPS)
446 		return -ERANGE;
447 
448 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
449 		       &iptun_encaps[num],
450 		       ops, NULL) == ops) ? 0 : -1;
451 
452 	synchronize_net();
453 
454 	return ret;
455 }
456 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
457 
ip_tunnel_encap_setup(struct ip_tunnel * t,struct ip_tunnel_encap * ipencap)458 int ip_tunnel_encap_setup(struct ip_tunnel *t,
459 			  struct ip_tunnel_encap *ipencap)
460 {
461 	int hlen;
462 
463 	memset(&t->encap, 0, sizeof(t->encap));
464 
465 	hlen = ip_encap_hlen(ipencap);
466 	if (hlen < 0)
467 		return hlen;
468 
469 	t->encap.type = ipencap->type;
470 	t->encap.sport = ipencap->sport;
471 	t->encap.dport = ipencap->dport;
472 	t->encap.flags = ipencap->flags;
473 
474 	t->encap_hlen = hlen;
475 	t->hlen = t->encap_hlen + t->tun_hlen;
476 
477 	return 0;
478 }
479 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
480 
tnl_update_pmtu(struct net_device * dev,struct sk_buff * skb,struct rtable * rt,__be16 df,const struct iphdr * inner_iph,int tunnel_hlen,__be32 dst,bool md)481 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
482 			    struct rtable *rt, __be16 df,
483 			    const struct iphdr *inner_iph,
484 			    int tunnel_hlen, __be32 dst, bool md)
485 {
486 	struct ip_tunnel *tunnel = netdev_priv(dev);
487 	int pkt_size;
488 	int mtu;
489 
490 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
491 	pkt_size = skb->len - tunnel_hlen - dev->hard_header_len;
492 
493 	if (df)
494 		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
495 					- sizeof(struct iphdr) - tunnel_hlen;
496 	else
497 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
498 
499 	if (skb_valid_dst(skb))
500 		skb_dst_update_pmtu_no_confirm(skb, mtu);
501 
502 	if (skb->protocol == htons(ETH_P_IP)) {
503 		if (!skb_is_gso(skb) &&
504 		    (inner_iph->frag_off & htons(IP_DF)) &&
505 		    mtu < pkt_size) {
506 			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
507 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
508 			return -E2BIG;
509 		}
510 	}
511 #if IS_ENABLED(CONFIG_IPV6)
512 	else if (skb->protocol == htons(ETH_P_IPV6)) {
513 		struct rt6_info *rt6;
514 		__be32 daddr;
515 
516 		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
517 					   NULL;
518 		daddr = md ? dst : tunnel->parms.iph.daddr;
519 
520 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
521 			   mtu >= IPV6_MIN_MTU) {
522 			if ((daddr && !ipv4_is_multicast(daddr)) ||
523 			    rt6->rt6i_dst.plen == 128) {
524 				rt6->rt6i_flags |= RTF_MODIFIED;
525 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
526 			}
527 		}
528 
529 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
530 					mtu < pkt_size) {
531 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
532 			return -E2BIG;
533 		}
534 	}
535 #endif
536 	return 0;
537 }
538 
ip_md_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,u8 proto,int tunnel_hlen)539 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
540 		       u8 proto, int tunnel_hlen)
541 {
542 	struct ip_tunnel *tunnel = netdev_priv(dev);
543 	u32 headroom = sizeof(struct iphdr);
544 	struct ip_tunnel_info *tun_info;
545 	const struct ip_tunnel_key *key;
546 	const struct iphdr *inner_iph;
547 	struct rtable *rt = NULL;
548 	struct flowi4 fl4;
549 	__be16 df = 0;
550 	u8 tos, ttl;
551 	bool use_cache;
552 
553 	tun_info = skb_tunnel_info(skb);
554 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
555 		     ip_tunnel_info_af(tun_info) != AF_INET))
556 		goto tx_error;
557 	key = &tun_info->key;
558 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
559 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
560 	tos = key->tos;
561 	if (tos == 1) {
562 		if (skb->protocol == htons(ETH_P_IP))
563 			tos = inner_iph->tos;
564 		else if (skb->protocol == htons(ETH_P_IPV6))
565 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
566 	}
567 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
568 			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
569 			    0, skb->mark, skb_get_hash(skb));
570 	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
571 		goto tx_error;
572 
573 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
574 	if (use_cache)
575 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
576 	if (!rt) {
577 		rt = ip_route_output_key(tunnel->net, &fl4);
578 		if (IS_ERR(rt)) {
579 			dev->stats.tx_carrier_errors++;
580 			goto tx_error;
581 		}
582 		if (use_cache)
583 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
584 					  fl4.saddr);
585 	}
586 	if (rt->dst.dev == dev) {
587 		ip_rt_put(rt);
588 		dev->stats.collisions++;
589 		goto tx_error;
590 	}
591 
592 	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
593 		df = htons(IP_DF);
594 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
595 			    key->u.ipv4.dst, true)) {
596 		ip_rt_put(rt);
597 		goto tx_error;
598 	}
599 
600 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
601 	ttl = key->ttl;
602 	if (ttl == 0) {
603 		if (skb->protocol == htons(ETH_P_IP))
604 			ttl = inner_iph->ttl;
605 		else if (skb->protocol == htons(ETH_P_IPV6))
606 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
607 		else
608 			ttl = ip4_dst_hoplimit(&rt->dst);
609 	}
610 
611 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
612 	if (headroom > dev->needed_headroom)
613 		dev->needed_headroom = headroom;
614 
615 	if (skb_cow_head(skb, dev->needed_headroom)) {
616 		ip_rt_put(rt);
617 		goto tx_dropped;
618 	}
619 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
620 		      df, !net_eq(tunnel->net, dev_net(dev)));
621 	return;
622 tx_error:
623 	dev->stats.tx_errors++;
624 	goto kfree;
625 tx_dropped:
626 	dev->stats.tx_dropped++;
627 kfree:
628 	kfree_skb(skb);
629 }
630 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
631 
ip_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,u8 protocol)632 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
633 		    const struct iphdr *tnl_params, u8 protocol)
634 {
635 	struct ip_tunnel *tunnel = netdev_priv(dev);
636 	struct ip_tunnel_info *tun_info = NULL;
637 	const struct iphdr *inner_iph;
638 	unsigned int max_headroom;	/* The extra header space needed */
639 	struct rtable *rt = NULL;		/* Route to the other host */
640 	bool use_cache = false;
641 	struct flowi4 fl4;
642 	bool md = false;
643 	bool connected;
644 	u8 tos, ttl;
645 	__be32 dst;
646 	__be16 df;
647 
648 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
649 	connected = (tunnel->parms.iph.daddr != 0);
650 
651 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
652 
653 	dst = tnl_params->daddr;
654 	if (dst == 0) {
655 		/* NBMA tunnel */
656 
657 		if (!skb_dst(skb)) {
658 			dev->stats.tx_fifo_errors++;
659 			goto tx_error;
660 		}
661 
662 		tun_info = skb_tunnel_info(skb);
663 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
664 		    ip_tunnel_info_af(tun_info) == AF_INET &&
665 		    tun_info->key.u.ipv4.dst) {
666 			dst = tun_info->key.u.ipv4.dst;
667 			md = true;
668 			connected = true;
669 		}
670 		else if (skb->protocol == htons(ETH_P_IP)) {
671 			rt = skb_rtable(skb);
672 			dst = rt_nexthop(rt, inner_iph->daddr);
673 		}
674 #if IS_ENABLED(CONFIG_IPV6)
675 		else if (skb->protocol == htons(ETH_P_IPV6)) {
676 			const struct in6_addr *addr6;
677 			struct neighbour *neigh;
678 			bool do_tx_error_icmp;
679 			int addr_type;
680 
681 			neigh = dst_neigh_lookup(skb_dst(skb),
682 						 &ipv6_hdr(skb)->daddr);
683 			if (!neigh)
684 				goto tx_error;
685 
686 			addr6 = (const struct in6_addr *)&neigh->primary_key;
687 			addr_type = ipv6_addr_type(addr6);
688 
689 			if (addr_type == IPV6_ADDR_ANY) {
690 				addr6 = &ipv6_hdr(skb)->daddr;
691 				addr_type = ipv6_addr_type(addr6);
692 			}
693 
694 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
695 				do_tx_error_icmp = true;
696 			else {
697 				do_tx_error_icmp = false;
698 				dst = addr6->s6_addr32[3];
699 			}
700 			neigh_release(neigh);
701 			if (do_tx_error_icmp)
702 				goto tx_error_icmp;
703 		}
704 #endif
705 		else
706 			goto tx_error;
707 
708 		if (!md)
709 			connected = false;
710 	}
711 
712 	tos = tnl_params->tos;
713 	if (tos & 0x1) {
714 		tos &= ~0x1;
715 		if (skb->protocol == htons(ETH_P_IP)) {
716 			tos = inner_iph->tos;
717 			connected = false;
718 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
719 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
720 			connected = false;
721 		}
722 	}
723 
724 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
725 			    tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
726 			    tunnel->fwmark, skb_get_hash(skb));
727 
728 	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
729 		goto tx_error;
730 
731 	if (connected && md) {
732 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
733 		if (use_cache)
734 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
735 					       &fl4.saddr);
736 	} else {
737 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
738 						&fl4.saddr) : NULL;
739 	}
740 
741 	if (!rt) {
742 		rt = ip_route_output_key(tunnel->net, &fl4);
743 
744 		if (IS_ERR(rt)) {
745 			dev->stats.tx_carrier_errors++;
746 			goto tx_error;
747 		}
748 		if (use_cache)
749 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
750 					  fl4.saddr);
751 		else if (!md && connected)
752 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
753 					  fl4.saddr);
754 	}
755 
756 	if (rt->dst.dev == dev) {
757 		ip_rt_put(rt);
758 		dev->stats.collisions++;
759 		goto tx_error;
760 	}
761 
762 	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph,
763 			    0, 0, false)) {
764 		ip_rt_put(rt);
765 		goto tx_error;
766 	}
767 
768 	if (tunnel->err_count > 0) {
769 		if (time_before(jiffies,
770 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
771 			tunnel->err_count--;
772 
773 			dst_link_failure(skb);
774 		} else
775 			tunnel->err_count = 0;
776 	}
777 
778 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
779 	ttl = tnl_params->ttl;
780 	if (ttl == 0) {
781 		if (skb->protocol == htons(ETH_P_IP))
782 			ttl = inner_iph->ttl;
783 #if IS_ENABLED(CONFIG_IPV6)
784 		else if (skb->protocol == htons(ETH_P_IPV6))
785 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
786 #endif
787 		else
788 			ttl = ip4_dst_hoplimit(&rt->dst);
789 	}
790 
791 	df = tnl_params->frag_off;
792 	if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
793 		df |= (inner_iph->frag_off&htons(IP_DF));
794 
795 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
796 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
797 	if (max_headroom > dev->needed_headroom)
798 		dev->needed_headroom = max_headroom;
799 
800 	if (skb_cow_head(skb, dev->needed_headroom)) {
801 		ip_rt_put(rt);
802 		dev->stats.tx_dropped++;
803 		kfree_skb(skb);
804 		return;
805 	}
806 
807 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
808 		      df, !net_eq(tunnel->net, dev_net(dev)));
809 	return;
810 
811 #if IS_ENABLED(CONFIG_IPV6)
812 tx_error_icmp:
813 	dst_link_failure(skb);
814 #endif
815 tx_error:
816 	dev->stats.tx_errors++;
817 	kfree_skb(skb);
818 }
819 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
820 
ip_tunnel_update(struct ip_tunnel_net * itn,struct ip_tunnel * t,struct net_device * dev,struct ip_tunnel_parm * p,bool set_mtu,__u32 fwmark)821 static void ip_tunnel_update(struct ip_tunnel_net *itn,
822 			     struct ip_tunnel *t,
823 			     struct net_device *dev,
824 			     struct ip_tunnel_parm *p,
825 			     bool set_mtu,
826 			     __u32 fwmark)
827 {
828 	ip_tunnel_del(itn, t);
829 	t->parms.iph.saddr = p->iph.saddr;
830 	t->parms.iph.daddr = p->iph.daddr;
831 	t->parms.i_key = p->i_key;
832 	t->parms.o_key = p->o_key;
833 	if (dev->type != ARPHRD_ETHER) {
834 		memcpy(dev->dev_addr, &p->iph.saddr, 4);
835 		memcpy(dev->broadcast, &p->iph.daddr, 4);
836 	}
837 	ip_tunnel_add(itn, t);
838 
839 	t->parms.iph.ttl = p->iph.ttl;
840 	t->parms.iph.tos = p->iph.tos;
841 	t->parms.iph.frag_off = p->iph.frag_off;
842 
843 	if (t->parms.link != p->link || t->fwmark != fwmark) {
844 		int mtu;
845 
846 		t->parms.link = p->link;
847 		t->fwmark = fwmark;
848 		mtu = ip_tunnel_bind_dev(dev);
849 		if (set_mtu)
850 			dev->mtu = mtu;
851 	}
852 	dst_cache_reset(&t->dst_cache);
853 	netdev_state_change(dev);
854 }
855 
ip_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)856 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
857 {
858 	int err = 0;
859 	struct ip_tunnel *t = netdev_priv(dev);
860 	struct net *net = t->net;
861 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
862 
863 	switch (cmd) {
864 	case SIOCGETTUNNEL:
865 		if (dev == itn->fb_tunnel_dev) {
866 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
867 			if (!t)
868 				t = netdev_priv(dev);
869 		}
870 		memcpy(p, &t->parms, sizeof(*p));
871 		break;
872 
873 	case SIOCADDTUNNEL:
874 	case SIOCCHGTUNNEL:
875 		err = -EPERM;
876 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
877 			goto done;
878 		if (p->iph.ttl)
879 			p->iph.frag_off |= htons(IP_DF);
880 		if (!(p->i_flags & VTI_ISVTI)) {
881 			if (!(p->i_flags & TUNNEL_KEY))
882 				p->i_key = 0;
883 			if (!(p->o_flags & TUNNEL_KEY))
884 				p->o_key = 0;
885 		}
886 
887 		t = ip_tunnel_find(itn, p, itn->type);
888 
889 		if (cmd == SIOCADDTUNNEL) {
890 			if (!t) {
891 				t = ip_tunnel_create(net, itn, p);
892 				err = PTR_ERR_OR_ZERO(t);
893 				break;
894 			}
895 
896 			err = -EEXIST;
897 			break;
898 		}
899 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
900 			if (t) {
901 				if (t->dev != dev) {
902 					err = -EEXIST;
903 					break;
904 				}
905 			} else {
906 				unsigned int nflags = 0;
907 
908 				if (ipv4_is_multicast(p->iph.daddr))
909 					nflags = IFF_BROADCAST;
910 				else if (p->iph.daddr)
911 					nflags = IFF_POINTOPOINT;
912 
913 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
914 					err = -EINVAL;
915 					break;
916 				}
917 
918 				t = netdev_priv(dev);
919 			}
920 		}
921 
922 		if (t) {
923 			err = 0;
924 			ip_tunnel_update(itn, t, dev, p, true, 0);
925 		} else {
926 			err = -ENOENT;
927 		}
928 		break;
929 
930 	case SIOCDELTUNNEL:
931 		err = -EPERM;
932 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
933 			goto done;
934 
935 		if (dev == itn->fb_tunnel_dev) {
936 			err = -ENOENT;
937 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
938 			if (!t)
939 				goto done;
940 			err = -EPERM;
941 			if (t == netdev_priv(itn->fb_tunnel_dev))
942 				goto done;
943 			dev = t->dev;
944 		}
945 		unregister_netdevice(dev);
946 		err = 0;
947 		break;
948 
949 	default:
950 		err = -EINVAL;
951 	}
952 
953 done:
954 	return err;
955 }
956 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
957 
ip_tunnel_ioctl(struct net_device * dev,struct ifreq * ifr,int cmd)958 int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
959 {
960 	struct ip_tunnel_parm p;
961 	int err;
962 
963 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
964 		return -EFAULT;
965 	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
966 	if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
967 		return -EFAULT;
968 	return err;
969 }
970 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
971 
__ip_tunnel_change_mtu(struct net_device * dev,int new_mtu,bool strict)972 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
973 {
974 	struct ip_tunnel *tunnel = netdev_priv(dev);
975 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
976 	int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
977 
978 	if (new_mtu < ETH_MIN_MTU)
979 		return -EINVAL;
980 
981 	if (new_mtu > max_mtu) {
982 		if (strict)
983 			return -EINVAL;
984 
985 		new_mtu = max_mtu;
986 	}
987 
988 	dev->mtu = new_mtu;
989 	return 0;
990 }
991 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
992 
ip_tunnel_change_mtu(struct net_device * dev,int new_mtu)993 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
994 {
995 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
996 }
997 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
998 
ip_tunnel_dev_free(struct net_device * dev)999 static void ip_tunnel_dev_free(struct net_device *dev)
1000 {
1001 	struct ip_tunnel *tunnel = netdev_priv(dev);
1002 
1003 	gro_cells_destroy(&tunnel->gro_cells);
1004 	dst_cache_destroy(&tunnel->dst_cache);
1005 	free_percpu(dev->tstats);
1006 }
1007 
ip_tunnel_dellink(struct net_device * dev,struct list_head * head)1008 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1009 {
1010 	struct ip_tunnel *tunnel = netdev_priv(dev);
1011 	struct ip_tunnel_net *itn;
1012 
1013 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1014 
1015 	if (itn->fb_tunnel_dev != dev) {
1016 		ip_tunnel_del(itn, netdev_priv(dev));
1017 		unregister_netdevice_queue(dev, head);
1018 	}
1019 }
1020 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1021 
ip_tunnel_get_link_net(const struct net_device * dev)1022 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1023 {
1024 	struct ip_tunnel *tunnel = netdev_priv(dev);
1025 
1026 	return tunnel->net;
1027 }
1028 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1029 
ip_tunnel_get_iflink(const struct net_device * dev)1030 int ip_tunnel_get_iflink(const struct net_device *dev)
1031 {
1032 	struct ip_tunnel *tunnel = netdev_priv(dev);
1033 
1034 	return tunnel->parms.link;
1035 }
1036 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1037 
ip_tunnel_init_net(struct net * net,unsigned int ip_tnl_net_id,struct rtnl_link_ops * ops,char * devname)1038 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1039 				  struct rtnl_link_ops *ops, char *devname)
1040 {
1041 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1042 	struct ip_tunnel_parm parms;
1043 	unsigned int i;
1044 
1045 	itn->rtnl_link_ops = ops;
1046 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1047 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1048 
1049 	if (!ops || !net_has_fallback_tunnels(net)) {
1050 		struct ip_tunnel_net *it_init_net;
1051 
1052 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1053 		itn->type = it_init_net->type;
1054 		itn->fb_tunnel_dev = NULL;
1055 		return 0;
1056 	}
1057 
1058 	memset(&parms, 0, sizeof(parms));
1059 	if (devname)
1060 		strlcpy(parms.name, devname, IFNAMSIZ);
1061 
1062 	rtnl_lock();
1063 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1064 	/* FB netdevice is special: we have one, and only one per netns.
1065 	 * Allowing to move it to another netns is clearly unsafe.
1066 	 */
1067 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1068 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1069 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1070 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1071 		itn->type = itn->fb_tunnel_dev->type;
1072 	}
1073 	rtnl_unlock();
1074 
1075 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1076 }
1077 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1078 
ip_tunnel_destroy(struct net * net,struct ip_tunnel_net * itn,struct list_head * head,struct rtnl_link_ops * ops)1079 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1080 			      struct list_head *head,
1081 			      struct rtnl_link_ops *ops)
1082 {
1083 	struct net_device *dev, *aux;
1084 	int h;
1085 
1086 	for_each_netdev_safe(net, dev, aux)
1087 		if (dev->rtnl_link_ops == ops)
1088 			unregister_netdevice_queue(dev, head);
1089 
1090 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1091 		struct ip_tunnel *t;
1092 		struct hlist_node *n;
1093 		struct hlist_head *thead = &itn->tunnels[h];
1094 
1095 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1096 			/* If dev is in the same netns, it has already
1097 			 * been added to the list by the previous loop.
1098 			 */
1099 			if (!net_eq(dev_net(t->dev), net))
1100 				unregister_netdevice_queue(t->dev, head);
1101 	}
1102 }
1103 
ip_tunnel_delete_nets(struct list_head * net_list,unsigned int id,struct rtnl_link_ops * ops)1104 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1105 			   struct rtnl_link_ops *ops)
1106 {
1107 	struct ip_tunnel_net *itn;
1108 	struct net *net;
1109 	LIST_HEAD(list);
1110 
1111 	rtnl_lock();
1112 	list_for_each_entry(net, net_list, exit_list) {
1113 		itn = net_generic(net, id);
1114 		ip_tunnel_destroy(net, itn, &list, ops);
1115 	}
1116 	unregister_netdevice_many(&list);
1117 	rtnl_unlock();
1118 }
1119 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1120 
ip_tunnel_newlink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1121 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1122 		      struct ip_tunnel_parm *p, __u32 fwmark)
1123 {
1124 	struct ip_tunnel *nt;
1125 	struct net *net = dev_net(dev);
1126 	struct ip_tunnel_net *itn;
1127 	int mtu;
1128 	int err;
1129 
1130 	nt = netdev_priv(dev);
1131 	itn = net_generic(net, nt->ip_tnl_net_id);
1132 
1133 	if (nt->collect_md) {
1134 		if (rtnl_dereference(itn->collect_md_tun))
1135 			return -EEXIST;
1136 	} else {
1137 		if (ip_tunnel_find(itn, p, dev->type))
1138 			return -EEXIST;
1139 	}
1140 
1141 	nt->net = net;
1142 	nt->parms = *p;
1143 	nt->fwmark = fwmark;
1144 	err = register_netdevice(dev);
1145 	if (err)
1146 		goto err_register_netdevice;
1147 
1148 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1149 		eth_hw_addr_random(dev);
1150 
1151 	mtu = ip_tunnel_bind_dev(dev);
1152 	if (tb[IFLA_MTU]) {
1153 		unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
1154 
1155 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
1156 			    (unsigned int)(max - sizeof(struct iphdr)));
1157 	}
1158 
1159 	err = dev_set_mtu(dev, mtu);
1160 	if (err)
1161 		goto err_dev_set_mtu;
1162 
1163 	ip_tunnel_add(itn, nt);
1164 	return 0;
1165 
1166 err_dev_set_mtu:
1167 	unregister_netdevice(dev);
1168 err_register_netdevice:
1169 	return err;
1170 }
1171 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1172 
ip_tunnel_changelink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1173 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1174 			 struct ip_tunnel_parm *p, __u32 fwmark)
1175 {
1176 	struct ip_tunnel *t;
1177 	struct ip_tunnel *tunnel = netdev_priv(dev);
1178 	struct net *net = tunnel->net;
1179 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1180 
1181 	if (dev == itn->fb_tunnel_dev)
1182 		return -EINVAL;
1183 
1184 	t = ip_tunnel_find(itn, p, dev->type);
1185 
1186 	if (t) {
1187 		if (t->dev != dev)
1188 			return -EEXIST;
1189 	} else {
1190 		t = tunnel;
1191 
1192 		if (dev->type != ARPHRD_ETHER) {
1193 			unsigned int nflags = 0;
1194 
1195 			if (ipv4_is_multicast(p->iph.daddr))
1196 				nflags = IFF_BROADCAST;
1197 			else if (p->iph.daddr)
1198 				nflags = IFF_POINTOPOINT;
1199 
1200 			if ((dev->flags ^ nflags) &
1201 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1202 				return -EINVAL;
1203 		}
1204 	}
1205 
1206 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1207 	return 0;
1208 }
1209 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1210 
ip_tunnel_init(struct net_device * dev)1211 int ip_tunnel_init(struct net_device *dev)
1212 {
1213 	struct ip_tunnel *tunnel = netdev_priv(dev);
1214 	struct iphdr *iph = &tunnel->parms.iph;
1215 	int err;
1216 
1217 	dev->needs_free_netdev = true;
1218 	dev->priv_destructor = ip_tunnel_dev_free;
1219 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1220 	if (!dev->tstats)
1221 		return -ENOMEM;
1222 
1223 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1224 	if (err) {
1225 		free_percpu(dev->tstats);
1226 		return err;
1227 	}
1228 
1229 	err = gro_cells_init(&tunnel->gro_cells, dev);
1230 	if (err) {
1231 		dst_cache_destroy(&tunnel->dst_cache);
1232 		free_percpu(dev->tstats);
1233 		return err;
1234 	}
1235 
1236 	tunnel->dev = dev;
1237 	tunnel->net = dev_net(dev);
1238 	strcpy(tunnel->parms.name, dev->name);
1239 	iph->version		= 4;
1240 	iph->ihl		= 5;
1241 
1242 	if (tunnel->collect_md)
1243 		netif_keep_dst(dev);
1244 	return 0;
1245 }
1246 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1247 
ip_tunnel_uninit(struct net_device * dev)1248 void ip_tunnel_uninit(struct net_device *dev)
1249 {
1250 	struct ip_tunnel *tunnel = netdev_priv(dev);
1251 	struct net *net = tunnel->net;
1252 	struct ip_tunnel_net *itn;
1253 
1254 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1255 	ip_tunnel_del(itn, netdev_priv(dev));
1256 	if (itn->fb_tunnel_dev == dev)
1257 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1258 
1259 	dst_cache_reset(&tunnel->dst_cache);
1260 }
1261 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1262 
1263 /* Do least required initialization, rest of init is done in tunnel_init call */
ip_tunnel_setup(struct net_device * dev,unsigned int net_id)1264 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1265 {
1266 	struct ip_tunnel *tunnel = netdev_priv(dev);
1267 	tunnel->ip_tnl_net_id = net_id;
1268 }
1269 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1270 
1271 MODULE_LICENSE("GPL");
1272