1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <linux/types.h>
4 #include <linux/netfilter.h>
5 #include <linux/module.h>
6 #include <linux/slab.h>
7 #include <linux/mutex.h>
8 #include <linux/vmalloc.h>
9 #include <linux/stddef.h>
10 #include <linux/err.h>
11 #include <linux/percpu.h>
12 #include <linux/notifier.h>
13 #include <linux/kernel.h>
14 #include <linux/netdevice.h>
15 
16 #include <net/netfilter/nf_conntrack.h>
17 #include <net/netfilter/nf_conntrack_l4proto.h>
18 #include <net/netfilter/nf_conntrack_core.h>
19 #include <net/netfilter/nf_log.h>
20 
21 #include <linux/ip.h>
22 #include <linux/icmp.h>
23 #include <linux/sysctl.h>
24 #include <net/route.h>
25 #include <net/ip.h>
26 
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/netfilter_ipv6.h>
29 #include <linux/netfilter_ipv6/ip6_tables.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_zones.h>
32 #include <net/netfilter/nf_conntrack_seqadj.h>
33 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
34 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
35 #include <net/netfilter/nf_nat_helper.h>
36 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
37 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
38 
39 #include <linux/ipv6.h>
40 #include <linux/in6.h>
41 #include <net/ipv6.h>
42 #include <net/inet_frag.h>
43 
44 extern unsigned int nf_conntrack_net_id;
45 
46 static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
47 
48 static DEFINE_MUTEX(nf_ct_proto_mutex);
49 
50 #ifdef CONFIG_SYSCTL
51 static int
nf_ct_register_sysctl(struct net * net,struct ctl_table_header ** header,const char * path,struct ctl_table * table)52 nf_ct_register_sysctl(struct net *net,
53 		      struct ctl_table_header **header,
54 		      const char *path,
55 		      struct ctl_table *table)
56 {
57 	if (*header == NULL) {
58 		*header = register_net_sysctl(net, path, table);
59 		if (*header == NULL)
60 			return -ENOMEM;
61 	}
62 
63 	return 0;
64 }
65 
66 static void
nf_ct_unregister_sysctl(struct ctl_table_header ** header,struct ctl_table ** table,unsigned int users)67 nf_ct_unregister_sysctl(struct ctl_table_header **header,
68 			struct ctl_table **table,
69 			unsigned int users)
70 {
71 	if (users > 0)
72 		return;
73 
74 	unregister_net_sysctl_table(*header);
75 	kfree(*table);
76 	*header = NULL;
77 	*table = NULL;
78 }
79 
80 __printf(5, 6)
nf_l4proto_log_invalid(const struct sk_buff * skb,struct net * net,u16 pf,u8 protonum,const char * fmt,...)81 void nf_l4proto_log_invalid(const struct sk_buff *skb,
82 			    struct net *net,
83 			    u16 pf, u8 protonum,
84 			    const char *fmt, ...)
85 {
86 	struct va_format vaf;
87 	va_list args;
88 
89 	if (net->ct.sysctl_log_invalid != protonum ||
90 	    net->ct.sysctl_log_invalid != IPPROTO_RAW)
91 		return;
92 
93 	va_start(args, fmt);
94 	vaf.fmt = fmt;
95 	vaf.va = &args;
96 
97 	nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
98 		      "nf_ct_proto_%d: %pV ", protonum, &vaf);
99 	va_end(args);
100 }
101 EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
102 
103 __printf(3, 4)
nf_ct_l4proto_log_invalid(const struct sk_buff * skb,const struct nf_conn * ct,const char * fmt,...)104 void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
105 			       const struct nf_conn *ct,
106 			       const char *fmt, ...)
107 {
108 	struct va_format vaf;
109 	struct net *net;
110 	va_list args;
111 
112 	net = nf_ct_net(ct);
113 	if (likely(net->ct.sysctl_log_invalid == 0))
114 		return;
115 
116 	va_start(args, fmt);
117 	vaf.fmt = fmt;
118 	vaf.va = &args;
119 
120 	nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct),
121 			       nf_ct_protonum(ct), "%pV", &vaf);
122 	va_end(args);
123 }
124 EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
125 #endif
126 
127 const struct nf_conntrack_l4proto *
__nf_ct_l4proto_find(u_int16_t l3proto,u_int8_t l4proto)128 __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
129 {
130 	if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
131 		return &nf_conntrack_l4proto_generic;
132 
133 	return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
134 }
135 EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
136 
137 const struct nf_conntrack_l4proto *
nf_ct_l4proto_find_get(u_int16_t l3num,u_int8_t l4num)138 nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
139 {
140 	const struct nf_conntrack_l4proto *p;
141 
142 	rcu_read_lock();
143 	p = __nf_ct_l4proto_find(l3num, l4num);
144 	if (!try_module_get(p->me))
145 		p = &nf_conntrack_l4proto_generic;
146 	rcu_read_unlock();
147 
148 	return p;
149 }
150 EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
151 
nf_ct_l4proto_put(const struct nf_conntrack_l4proto * p)152 void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
153 {
154 	module_put(p->me);
155 }
156 EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
157 
kill_l4proto(struct nf_conn * i,void * data)158 static int kill_l4proto(struct nf_conn *i, void *data)
159 {
160 	const struct nf_conntrack_l4proto *l4proto;
161 	l4proto = data;
162 	return nf_ct_protonum(i) == l4proto->l4proto &&
163 	       nf_ct_l3num(i) == l4proto->l3proto;
164 }
165 
nf_ct_l4proto_net(struct net * net,const struct nf_conntrack_l4proto * l4proto)166 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
167 				const struct nf_conntrack_l4proto *l4proto)
168 {
169 	if (l4proto->get_net_proto) {
170 		/* statically built-in protocols use static per-net */
171 		return l4proto->get_net_proto(net);
172 	} else if (l4proto->net_id) {
173 		/* ... and loadable protocols use dynamic per-net */
174 		return net_generic(net, *l4proto->net_id);
175 	}
176 	return NULL;
177 }
178 
179 static
nf_ct_l4proto_register_sysctl(struct net * net,struct nf_proto_net * pn,const struct nf_conntrack_l4proto * l4proto)180 int nf_ct_l4proto_register_sysctl(struct net *net,
181 				  struct nf_proto_net *pn,
182 				  const struct nf_conntrack_l4proto *l4proto)
183 {
184 	int err = 0;
185 
186 #ifdef CONFIG_SYSCTL
187 	if (pn->ctl_table != NULL) {
188 		err = nf_ct_register_sysctl(net,
189 					    &pn->ctl_table_header,
190 					    "net/netfilter",
191 					    pn->ctl_table);
192 		if (err < 0) {
193 			if (!pn->users) {
194 				kfree(pn->ctl_table);
195 				pn->ctl_table = NULL;
196 			}
197 		}
198 	}
199 #endif /* CONFIG_SYSCTL */
200 	return err;
201 }
202 
203 static
nf_ct_l4proto_unregister_sysctl(struct net * net,struct nf_proto_net * pn,const struct nf_conntrack_l4proto * l4proto)204 void nf_ct_l4proto_unregister_sysctl(struct net *net,
205 				struct nf_proto_net *pn,
206 				const struct nf_conntrack_l4proto *l4proto)
207 {
208 #ifdef CONFIG_SYSCTL
209 	if (pn->ctl_table_header != NULL)
210 		nf_ct_unregister_sysctl(&pn->ctl_table_header,
211 					&pn->ctl_table,
212 					pn->users);
213 #endif /* CONFIG_SYSCTL */
214 }
215 
216 /* FIXME: Allow NULL functions and sub in pointers to generic for
217    them. --RR */
nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto * l4proto)218 int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
219 {
220 	int ret = 0;
221 
222 	if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos))
223 		return -EBUSY;
224 
225 	if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) ||
226 	    (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
227 		return -EINVAL;
228 
229 	mutex_lock(&nf_ct_proto_mutex);
230 	if (!nf_ct_protos[l4proto->l3proto]) {
231 		/* l3proto may be loaded latter. */
232 		struct nf_conntrack_l4proto __rcu **proto_array;
233 		int i;
234 
235 		proto_array =
236 			kmalloc_array(MAX_NF_CT_PROTO,
237 				      sizeof(struct nf_conntrack_l4proto *),
238 				      GFP_KERNEL);
239 		if (proto_array == NULL) {
240 			ret = -ENOMEM;
241 			goto out_unlock;
242 		}
243 
244 		for (i = 0; i < MAX_NF_CT_PROTO; i++)
245 			RCU_INIT_POINTER(proto_array[i],
246 					 &nf_conntrack_l4proto_generic);
247 
248 		/* Before making proto_array visible to lockless readers,
249 		 * we must make sure its content is committed to memory.
250 		 */
251 		smp_wmb();
252 
253 		nf_ct_protos[l4proto->l3proto] = proto_array;
254 	} else if (rcu_dereference_protected(
255 			nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
256 			lockdep_is_held(&nf_ct_proto_mutex)
257 			) != &nf_conntrack_l4proto_generic) {
258 		ret = -EBUSY;
259 		goto out_unlock;
260 	}
261 
262 	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
263 			   l4proto);
264 out_unlock:
265 	mutex_unlock(&nf_ct_proto_mutex);
266 	return ret;
267 }
268 EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
269 
nf_ct_l4proto_pernet_register_one(struct net * net,const struct nf_conntrack_l4proto * l4proto)270 int nf_ct_l4proto_pernet_register_one(struct net *net,
271 				const struct nf_conntrack_l4proto *l4proto)
272 {
273 	int ret = 0;
274 	struct nf_proto_net *pn = NULL;
275 
276 	if (l4proto->init_net) {
277 		ret = l4proto->init_net(net, l4proto->l3proto);
278 		if (ret < 0)
279 			goto out;
280 	}
281 
282 	pn = nf_ct_l4proto_net(net, l4proto);
283 	if (pn == NULL)
284 		goto out;
285 
286 	ret = nf_ct_l4proto_register_sysctl(net, pn, l4proto);
287 	if (ret < 0)
288 		goto out;
289 
290 	pn->users++;
291 out:
292 	return ret;
293 }
294 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
295 
__nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto * l4proto)296 static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
297 
298 {
299 	BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
300 
301 	BUG_ON(rcu_dereference_protected(
302 			nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
303 			lockdep_is_held(&nf_ct_proto_mutex)
304 			) != l4proto);
305 	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
306 			   &nf_conntrack_l4proto_generic);
307 }
308 
nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto * l4proto)309 void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
310 {
311 	mutex_lock(&nf_ct_proto_mutex);
312 	__nf_ct_l4proto_unregister_one(l4proto);
313 	mutex_unlock(&nf_ct_proto_mutex);
314 
315 	synchronize_net();
316 	/* Remove all contrack entries for this protocol */
317 	nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
318 }
319 EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
320 
nf_ct_l4proto_pernet_unregister_one(struct net * net,const struct nf_conntrack_l4proto * l4proto)321 void nf_ct_l4proto_pernet_unregister_one(struct net *net,
322 				const struct nf_conntrack_l4proto *l4proto)
323 {
324 	struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
325 
326 	if (pn == NULL)
327 		return;
328 
329 	pn->users--;
330 	nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
331 }
332 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
333 
334 static void
nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],unsigned int num_proto)335 nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
336 			 unsigned int num_proto)
337 {
338 	int i;
339 
340 	mutex_lock(&nf_ct_proto_mutex);
341 	for (i = 0; i < num_proto; i++)
342 		__nf_ct_l4proto_unregister_one(l4proto[i]);
343 	mutex_unlock(&nf_ct_proto_mutex);
344 
345 	synchronize_net();
346 
347 	for (i = 0; i < num_proto; i++)
348 		nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto[i]);
349 }
350 
351 static int
nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],unsigned int num_proto)352 nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
353 		       unsigned int num_proto)
354 {
355 	int ret = -EINVAL, ver;
356 	unsigned int i;
357 
358 	for (i = 0; i < num_proto; i++) {
359 		ret = nf_ct_l4proto_register_one(l4proto[i]);
360 		if (ret < 0)
361 			break;
362 	}
363 	if (i != num_proto) {
364 		ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
365 		pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n",
366 		       ver, l4proto[i]->l4proto);
367 		nf_ct_l4proto_unregister(l4proto, i);
368 	}
369 	return ret;
370 }
371 
nf_ct_l4proto_pernet_register(struct net * net,const struct nf_conntrack_l4proto * const l4proto[],unsigned int num_proto)372 int nf_ct_l4proto_pernet_register(struct net *net,
373 				  const struct nf_conntrack_l4proto *const l4proto[],
374 				  unsigned int num_proto)
375 {
376 	int ret = -EINVAL;
377 	unsigned int i;
378 
379 	for (i = 0; i < num_proto; i++) {
380 		ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]);
381 		if (ret < 0)
382 			break;
383 	}
384 	if (i != num_proto) {
385 		pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n",
386 		       l4proto[i]->l4proto,
387 		       l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
388 		nf_ct_l4proto_pernet_unregister(net, l4proto, i);
389 	}
390 	return ret;
391 }
392 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
393 
nf_ct_l4proto_pernet_unregister(struct net * net,const struct nf_conntrack_l4proto * const l4proto[],unsigned int num_proto)394 void nf_ct_l4proto_pernet_unregister(struct net *net,
395 				const struct nf_conntrack_l4proto *const l4proto[],
396 				unsigned int num_proto)
397 {
398 	while (num_proto-- != 0)
399 		nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
400 }
401 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
402 
ipv4_helper(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)403 static unsigned int ipv4_helper(void *priv,
404 				struct sk_buff *skb,
405 				const struct nf_hook_state *state)
406 {
407 	struct nf_conn *ct;
408 	enum ip_conntrack_info ctinfo;
409 	const struct nf_conn_help *help;
410 	const struct nf_conntrack_helper *helper;
411 
412 	/* This is where we call the helper: as the packet goes out. */
413 	ct = nf_ct_get(skb, &ctinfo);
414 	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
415 		return NF_ACCEPT;
416 
417 	help = nfct_help(ct);
418 	if (!help)
419 		return NF_ACCEPT;
420 
421 	/* rcu_read_lock()ed by nf_hook_thresh */
422 	helper = rcu_dereference(help->helper);
423 	if (!helper)
424 		return NF_ACCEPT;
425 
426 	return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
427 			    ct, ctinfo);
428 }
429 
ipv4_confirm(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)430 static unsigned int ipv4_confirm(void *priv,
431 				 struct sk_buff *skb,
432 				 const struct nf_hook_state *state)
433 {
434 	struct nf_conn *ct;
435 	enum ip_conntrack_info ctinfo;
436 
437 	ct = nf_ct_get(skb, &ctinfo);
438 	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
439 		goto out;
440 
441 	/* adjust seqs for loopback traffic only in outgoing direction */
442 	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
443 	    !nf_is_loopback_packet(skb)) {
444 		if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
445 			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
446 			return NF_DROP;
447 		}
448 	}
449 out:
450 	/* We've seen it coming out the other side: confirm it */
451 	return nf_conntrack_confirm(skb);
452 }
453 
ipv4_conntrack_in(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)454 static unsigned int ipv4_conntrack_in(void *priv,
455 				      struct sk_buff *skb,
456 				      const struct nf_hook_state *state)
457 {
458 	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
459 }
460 
ipv4_conntrack_local(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)461 static unsigned int ipv4_conntrack_local(void *priv,
462 					 struct sk_buff *skb,
463 					 const struct nf_hook_state *state)
464 {
465 	if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
466 		enum ip_conntrack_info ctinfo;
467 		struct nf_conn *tmpl;
468 
469 		tmpl = nf_ct_get(skb, &ctinfo);
470 		if (tmpl && nf_ct_is_template(tmpl)) {
471 			/* when skipping ct, clear templates to avoid fooling
472 			 * later targets/matches
473 			 */
474 			skb->_nfct = 0;
475 			nf_ct_put(tmpl);
476 		}
477 		return NF_ACCEPT;
478 	}
479 
480 	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
481 }
482 
483 /* Connection tracking may drop packets, but never alters them, so
484  * make it the first hook.
485  */
486 static const struct nf_hook_ops ipv4_conntrack_ops[] = {
487 	{
488 		.hook		= ipv4_conntrack_in,
489 		.pf		= NFPROTO_IPV4,
490 		.hooknum	= NF_INET_PRE_ROUTING,
491 		.priority	= NF_IP_PRI_CONNTRACK,
492 	},
493 	{
494 		.hook		= ipv4_conntrack_local,
495 		.pf		= NFPROTO_IPV4,
496 		.hooknum	= NF_INET_LOCAL_OUT,
497 		.priority	= NF_IP_PRI_CONNTRACK,
498 	},
499 	{
500 		.hook		= ipv4_helper,
501 		.pf		= NFPROTO_IPV4,
502 		.hooknum	= NF_INET_POST_ROUTING,
503 		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
504 	},
505 	{
506 		.hook		= ipv4_confirm,
507 		.pf		= NFPROTO_IPV4,
508 		.hooknum	= NF_INET_POST_ROUTING,
509 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
510 	},
511 	{
512 		.hook		= ipv4_helper,
513 		.pf		= NFPROTO_IPV4,
514 		.hooknum	= NF_INET_LOCAL_IN,
515 		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
516 	},
517 	{
518 		.hook		= ipv4_confirm,
519 		.pf		= NFPROTO_IPV4,
520 		.hooknum	= NF_INET_LOCAL_IN,
521 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
522 	},
523 };
524 
525 /* Fast function for those who don't want to parse /proc (and I don't
526  * blame them).
527  * Reversing the socket's dst/src point of view gives us the reply
528  * mapping.
529  */
530 static int
getorigdst(struct sock * sk,int optval,void __user * user,int * len)531 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
532 {
533 	const struct inet_sock *inet = inet_sk(sk);
534 	const struct nf_conntrack_tuple_hash *h;
535 	struct nf_conntrack_tuple tuple;
536 
537 	memset(&tuple, 0, sizeof(tuple));
538 
539 	lock_sock(sk);
540 	tuple.src.u3.ip = inet->inet_rcv_saddr;
541 	tuple.src.u.tcp.port = inet->inet_sport;
542 	tuple.dst.u3.ip = inet->inet_daddr;
543 	tuple.dst.u.tcp.port = inet->inet_dport;
544 	tuple.src.l3num = PF_INET;
545 	tuple.dst.protonum = sk->sk_protocol;
546 	release_sock(sk);
547 
548 	/* We only do TCP and SCTP at the moment: is there a better way? */
549 	if (tuple.dst.protonum != IPPROTO_TCP &&
550 	    tuple.dst.protonum != IPPROTO_SCTP) {
551 		pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
552 		return -ENOPROTOOPT;
553 	}
554 
555 	if ((unsigned int)*len < sizeof(struct sockaddr_in)) {
556 		pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
557 			 *len, sizeof(struct sockaddr_in));
558 		return -EINVAL;
559 	}
560 
561 	h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
562 	if (h) {
563 		struct sockaddr_in sin;
564 		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
565 
566 		sin.sin_family = AF_INET;
567 		sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
568 			.tuple.dst.u.tcp.port;
569 		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
570 			.tuple.dst.u3.ip;
571 		memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
572 
573 		pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
574 			 &sin.sin_addr.s_addr, ntohs(sin.sin_port));
575 		nf_ct_put(ct);
576 		if (copy_to_user(user, &sin, sizeof(sin)) != 0)
577 			return -EFAULT;
578 		else
579 			return 0;
580 	}
581 	pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
582 		 &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
583 		 &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
584 	return -ENOENT;
585 }
586 
587 static struct nf_sockopt_ops so_getorigdst = {
588 	.pf		= PF_INET,
589 	.get_optmin	= SO_ORIGINAL_DST,
590 	.get_optmax	= SO_ORIGINAL_DST + 1,
591 	.get		= getorigdst,
592 	.owner		= THIS_MODULE,
593 };
594 
595 #if IS_ENABLED(CONFIG_IPV6)
596 static int
ipv6_getorigdst(struct sock * sk,int optval,void __user * user,int * len)597 ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
598 {
599 	struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
600 	const struct ipv6_pinfo *inet6 = inet6_sk(sk);
601 	const struct inet_sock *inet = inet_sk(sk);
602 	const struct nf_conntrack_tuple_hash *h;
603 	struct sockaddr_in6 sin6;
604 	struct nf_conn *ct;
605 	__be32 flow_label;
606 	int bound_dev_if;
607 
608 	lock_sock(sk);
609 	tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
610 	tuple.src.u.tcp.port = inet->inet_sport;
611 	tuple.dst.u3.in6 = sk->sk_v6_daddr;
612 	tuple.dst.u.tcp.port = inet->inet_dport;
613 	tuple.dst.protonum = sk->sk_protocol;
614 	bound_dev_if = sk->sk_bound_dev_if;
615 	flow_label = inet6->flow_label;
616 	release_sock(sk);
617 
618 	if (tuple.dst.protonum != IPPROTO_TCP &&
619 	    tuple.dst.protonum != IPPROTO_SCTP)
620 		return -ENOPROTOOPT;
621 
622 	if (*len < 0 || (unsigned int)*len < sizeof(sin6))
623 		return -EINVAL;
624 
625 	h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
626 	if (!h) {
627 		pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
628 			 &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
629 			 &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
630 		return -ENOENT;
631 	}
632 
633 	ct = nf_ct_tuplehash_to_ctrack(h);
634 
635 	sin6.sin6_family = AF_INET6;
636 	sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
637 	sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
638 	memcpy(&sin6.sin6_addr,
639 	       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
640 	       sizeof(sin6.sin6_addr));
641 
642 	nf_ct_put(ct);
643 	sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
644 	return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
645 }
646 
647 static struct nf_sockopt_ops so_getorigdst6 = {
648 	.pf		= NFPROTO_IPV6,
649 	.get_optmin	= IP6T_SO_ORIGINAL_DST,
650 	.get_optmax	= IP6T_SO_ORIGINAL_DST + 1,
651 	.get		= ipv6_getorigdst,
652 	.owner		= THIS_MODULE,
653 };
654 
ipv6_confirm(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)655 static unsigned int ipv6_confirm(void *priv,
656 				 struct sk_buff *skb,
657 				 const struct nf_hook_state *state)
658 {
659 	struct nf_conn *ct;
660 	enum ip_conntrack_info ctinfo;
661 	unsigned char pnum = ipv6_hdr(skb)->nexthdr;
662 	int protoff;
663 	__be16 frag_off;
664 
665 	ct = nf_ct_get(skb, &ctinfo);
666 	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
667 		goto out;
668 
669 	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
670 				   &frag_off);
671 	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
672 		pr_debug("proto header not found\n");
673 		goto out;
674 	}
675 
676 	/* adjust seqs for loopback traffic only in outgoing direction */
677 	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
678 	    !nf_is_loopback_packet(skb)) {
679 		if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
680 			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
681 			return NF_DROP;
682 		}
683 	}
684 out:
685 	/* We've seen it coming out the other side: confirm it */
686 	return nf_conntrack_confirm(skb);
687 }
688 
ipv6_conntrack_in(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)689 static unsigned int ipv6_conntrack_in(void *priv,
690 				      struct sk_buff *skb,
691 				      const struct nf_hook_state *state)
692 {
693 	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
694 }
695 
ipv6_conntrack_local(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)696 static unsigned int ipv6_conntrack_local(void *priv,
697 					 struct sk_buff *skb,
698 					 const struct nf_hook_state *state)
699 {
700 	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
701 }
702 
ipv6_helper(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)703 static unsigned int ipv6_helper(void *priv,
704 				struct sk_buff *skb,
705 				const struct nf_hook_state *state)
706 {
707 	struct nf_conn *ct;
708 	const struct nf_conn_help *help;
709 	const struct nf_conntrack_helper *helper;
710 	enum ip_conntrack_info ctinfo;
711 	__be16 frag_off;
712 	int protoff;
713 	u8 nexthdr;
714 
715 	/* This is where we call the helper: as the packet goes out. */
716 	ct = nf_ct_get(skb, &ctinfo);
717 	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
718 		return NF_ACCEPT;
719 
720 	help = nfct_help(ct);
721 	if (!help)
722 		return NF_ACCEPT;
723 	/* rcu_read_lock()ed by nf_hook_thresh */
724 	helper = rcu_dereference(help->helper);
725 	if (!helper)
726 		return NF_ACCEPT;
727 
728 	nexthdr = ipv6_hdr(skb)->nexthdr;
729 	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
730 				   &frag_off);
731 	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
732 		pr_debug("proto header not found\n");
733 		return NF_ACCEPT;
734 	}
735 
736 	return helper->help(skb, protoff, ct, ctinfo);
737 }
738 
739 static const struct nf_hook_ops ipv6_conntrack_ops[] = {
740 	{
741 		.hook		= ipv6_conntrack_in,
742 		.pf		= NFPROTO_IPV6,
743 		.hooknum	= NF_INET_PRE_ROUTING,
744 		.priority	= NF_IP6_PRI_CONNTRACK,
745 	},
746 	{
747 		.hook		= ipv6_conntrack_local,
748 		.pf		= NFPROTO_IPV6,
749 		.hooknum	= NF_INET_LOCAL_OUT,
750 		.priority	= NF_IP6_PRI_CONNTRACK,
751 	},
752 	{
753 		.hook		= ipv6_helper,
754 		.pf		= NFPROTO_IPV6,
755 		.hooknum	= NF_INET_POST_ROUTING,
756 		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
757 	},
758 	{
759 		.hook		= ipv6_confirm,
760 		.pf		= NFPROTO_IPV6,
761 		.hooknum	= NF_INET_POST_ROUTING,
762 		.priority	= NF_IP6_PRI_LAST,
763 	},
764 	{
765 		.hook		= ipv6_helper,
766 		.pf		= NFPROTO_IPV6,
767 		.hooknum	= NF_INET_LOCAL_IN,
768 		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
769 	},
770 	{
771 		.hook		= ipv6_confirm,
772 		.pf		= NFPROTO_IPV6,
773 		.hooknum	= NF_INET_LOCAL_IN,
774 		.priority	= NF_IP6_PRI_LAST - 1,
775 	},
776 };
777 #endif
778 
nf_ct_tcp_fixup(struct nf_conn * ct,void * _nfproto)779 static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto)
780 {
781 	u8 nfproto = (unsigned long)_nfproto;
782 
783 	if (nf_ct_l3num(ct) != nfproto)
784 		return 0;
785 
786 	if (nf_ct_protonum(ct) == IPPROTO_TCP &&
787 	    ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED) {
788 		ct->proto.tcp.seen[0].td_maxwin = 0;
789 		ct->proto.tcp.seen[1].td_maxwin = 0;
790 	}
791 
792 	return 0;
793 }
794 
nf_ct_netns_do_get(struct net * net,u8 nfproto)795 static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
796 {
797 	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
798 	bool fixup_needed = false;
799 	int err = 0;
800 
801 	mutex_lock(&nf_ct_proto_mutex);
802 
803 	switch (nfproto) {
804 	case NFPROTO_IPV4:
805 		cnet->users4++;
806 		if (cnet->users4 > 1)
807 			goto out_unlock;
808 		err = nf_defrag_ipv4_enable(net);
809 		if (err) {
810 			cnet->users4 = 0;
811 			goto out_unlock;
812 		}
813 
814 		err = nf_register_net_hooks(net, ipv4_conntrack_ops,
815 					    ARRAY_SIZE(ipv4_conntrack_ops));
816 		if (err)
817 			cnet->users4 = 0;
818 		else
819 			fixup_needed = true;
820 		break;
821 #if IS_ENABLED(CONFIG_IPV6)
822 	case NFPROTO_IPV6:
823 		cnet->users6++;
824 		if (cnet->users6 > 1)
825 			goto out_unlock;
826 		err = nf_defrag_ipv6_enable(net);
827 		if (err < 0) {
828 			cnet->users6 = 0;
829 			goto out_unlock;
830 		}
831 
832 		err = nf_register_net_hooks(net, ipv6_conntrack_ops,
833 					    ARRAY_SIZE(ipv6_conntrack_ops));
834 		if (err)
835 			cnet->users6 = 0;
836 		else
837 			fixup_needed = true;
838 		break;
839 #endif
840 	default:
841 		err = -EPROTO;
842 		break;
843 	}
844  out_unlock:
845 	mutex_unlock(&nf_ct_proto_mutex);
846 
847 	if (fixup_needed)
848 		nf_ct_iterate_cleanup_net(net, nf_ct_tcp_fixup,
849 					  (void *)(unsigned long)nfproto, 0, 0);
850 
851 	return err;
852 }
853 
nf_ct_netns_do_put(struct net * net,u8 nfproto)854 static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
855 {
856 	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
857 
858 	mutex_lock(&nf_ct_proto_mutex);
859 	switch (nfproto) {
860 	case NFPROTO_IPV4:
861 		if (cnet->users4 && (--cnet->users4 == 0))
862 			nf_unregister_net_hooks(net, ipv4_conntrack_ops,
863 						ARRAY_SIZE(ipv4_conntrack_ops));
864 		break;
865 #if IS_ENABLED(CONFIG_IPV6)
866 	case NFPROTO_IPV6:
867 		if (cnet->users6 && (--cnet->users6 == 0))
868 			nf_unregister_net_hooks(net, ipv6_conntrack_ops,
869 						ARRAY_SIZE(ipv6_conntrack_ops));
870 		break;
871 #endif
872 	}
873 
874 	mutex_unlock(&nf_ct_proto_mutex);
875 }
876 
nf_ct_netns_get(struct net * net,u8 nfproto)877 int nf_ct_netns_get(struct net *net, u8 nfproto)
878 {
879 	int err;
880 
881 	if (nfproto == NFPROTO_INET) {
882 		err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
883 		if (err < 0)
884 			goto err1;
885 		err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
886 		if (err < 0)
887 			goto err2;
888 	} else {
889 		err = nf_ct_netns_do_get(net, nfproto);
890 		if (err < 0)
891 			goto err1;
892 	}
893 	return 0;
894 
895 err2:
896 	nf_ct_netns_put(net, NFPROTO_IPV4);
897 err1:
898 	return err;
899 }
900 EXPORT_SYMBOL_GPL(nf_ct_netns_get);
901 
nf_ct_netns_put(struct net * net,uint8_t nfproto)902 void nf_ct_netns_put(struct net *net, uint8_t nfproto)
903 {
904 	if (nfproto == NFPROTO_INET) {
905 		nf_ct_netns_do_put(net, NFPROTO_IPV4);
906 		nf_ct_netns_do_put(net, NFPROTO_IPV6);
907 	} else {
908 		nf_ct_netns_do_put(net, nfproto);
909 	}
910 }
911 EXPORT_SYMBOL_GPL(nf_ct_netns_put);
912 
913 static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
914 	&nf_conntrack_l4proto_tcp4,
915 	&nf_conntrack_l4proto_udp4,
916 	&nf_conntrack_l4proto_icmp,
917 #ifdef CONFIG_NF_CT_PROTO_DCCP
918 	&nf_conntrack_l4proto_dccp4,
919 #endif
920 #ifdef CONFIG_NF_CT_PROTO_SCTP
921 	&nf_conntrack_l4proto_sctp4,
922 #endif
923 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
924 	&nf_conntrack_l4proto_udplite4,
925 #endif
926 #if IS_ENABLED(CONFIG_IPV6)
927 	&nf_conntrack_l4proto_tcp6,
928 	&nf_conntrack_l4proto_udp6,
929 	&nf_conntrack_l4proto_icmpv6,
930 #ifdef CONFIG_NF_CT_PROTO_DCCP
931 	&nf_conntrack_l4proto_dccp6,
932 #endif
933 #ifdef CONFIG_NF_CT_PROTO_SCTP
934 	&nf_conntrack_l4proto_sctp6,
935 #endif
936 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
937 	&nf_conntrack_l4proto_udplite6,
938 #endif
939 #endif /* CONFIG_IPV6 */
940 };
941 
nf_conntrack_proto_init(void)942 int nf_conntrack_proto_init(void)
943 {
944 	int ret = 0;
945 
946 	ret = nf_register_sockopt(&so_getorigdst);
947 	if (ret < 0)
948 		return ret;
949 
950 #if IS_ENABLED(CONFIG_IPV6)
951 	ret = nf_register_sockopt(&so_getorigdst6);
952 	if (ret < 0)
953 		goto cleanup_sockopt;
954 #endif
955 	ret = nf_ct_l4proto_register(builtin_l4proto,
956 				     ARRAY_SIZE(builtin_l4proto));
957 	if (ret < 0)
958 		goto cleanup_sockopt2;
959 
960 	return ret;
961 cleanup_sockopt2:
962 	nf_unregister_sockopt(&so_getorigdst);
963 #if IS_ENABLED(CONFIG_IPV6)
964 cleanup_sockopt:
965 	nf_unregister_sockopt(&so_getorigdst6);
966 #endif
967 	return ret;
968 }
969 
nf_conntrack_proto_fini(void)970 void nf_conntrack_proto_fini(void)
971 {
972 	unsigned int i;
973 
974 	nf_unregister_sockopt(&so_getorigdst);
975 #if IS_ENABLED(CONFIG_IPV6)
976 	nf_unregister_sockopt(&so_getorigdst6);
977 #endif
978 	/* No need to call nf_ct_l4proto_unregister(), the register
979 	 * tables are free'd here anyway.
980 	 */
981 	for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
982 		kfree(nf_ct_protos[i]);
983 }
984 
nf_conntrack_proto_pernet_init(struct net * net)985 int nf_conntrack_proto_pernet_init(struct net *net)
986 {
987 	int err;
988 	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
989 					&nf_conntrack_l4proto_generic);
990 
991 	err = nf_conntrack_l4proto_generic.init_net(net,
992 					nf_conntrack_l4proto_generic.l3proto);
993 	if (err < 0)
994 		return err;
995 	err = nf_ct_l4proto_register_sysctl(net,
996 					    pn,
997 					    &nf_conntrack_l4proto_generic);
998 	if (err < 0)
999 		return err;
1000 
1001 	err = nf_ct_l4proto_pernet_register(net, builtin_l4proto,
1002 					    ARRAY_SIZE(builtin_l4proto));
1003 	if (err < 0) {
1004 		nf_ct_l4proto_unregister_sysctl(net, pn,
1005 						&nf_conntrack_l4proto_generic);
1006 		return err;
1007 	}
1008 
1009 	pn->users++;
1010 	return 0;
1011 }
1012 
nf_conntrack_proto_pernet_fini(struct net * net)1013 void nf_conntrack_proto_pernet_fini(struct net *net)
1014 {
1015 	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
1016 					&nf_conntrack_l4proto_generic);
1017 
1018 	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto,
1019 					ARRAY_SIZE(builtin_l4proto));
1020 	pn->users--;
1021 	nf_ct_l4proto_unregister_sysctl(net,
1022 					pn,
1023 					&nf_conntrack_l4proto_generic);
1024 }
1025 
1026 
1027 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
1028 		  &nf_conntrack_htable_size, 0600);
1029 
1030 MODULE_ALIAS("ip_conntrack");
1031 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
1032 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
1033 MODULE_LICENSE("GPL");
1034