1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/types.h>
3 #include <linux/ip.h>
4 #include <linux/netfilter.h>
5 #include <linux/netfilter_ipv6.h>
6 #include <linux/netfilter_bridge.h>
7 #include <linux/module.h>
8 #include <linux/skbuff.h>
9 #include <linux/icmp.h>
10 #include <linux/sysctl.h>
11 #include <net/route.h>
12 #include <net/ip.h>
13 
14 #include <net/netfilter/nf_conntrack.h>
15 #include <net/netfilter/nf_conntrack_core.h>
16 #include <net/netfilter/nf_conntrack_helper.h>
17 #include <net/netfilter/nf_conntrack_bridge.h>
18 
19 #include <linux/netfilter/nf_tables.h>
20 #include <net/netfilter/nf_tables.h>
21 
22 #include "../br_private.h"
23 
24 /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
25  * has been linearized or cloned.
26  */
nf_br_ip_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,struct nf_bridge_frag_data * data,int (* output)(struct net *,struct sock * sk,const struct nf_bridge_frag_data * data,struct sk_buff *))27 static int nf_br_ip_fragment(struct net *net, struct sock *sk,
28 			     struct sk_buff *skb,
29 			     struct nf_bridge_frag_data *data,
30 			     int (*output)(struct net *, struct sock *sk,
31 					   const struct nf_bridge_frag_data *data,
32 					   struct sk_buff *))
33 {
34 	int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
35 	unsigned int hlen, ll_rs, mtu;
36 	ktime_t tstamp = skb->tstamp;
37 	struct ip_frag_state state;
38 	struct iphdr *iph;
39 	int err;
40 
41 	/* for offloaded checksums cleanup checksum before fragmentation */
42 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
43 	    (err = skb_checksum_help(skb)))
44 		goto blackhole;
45 
46 	iph = ip_hdr(skb);
47 
48 	/*
49 	 *	Setup starting values
50 	 */
51 
52 	hlen = iph->ihl * 4;
53 	frag_max_size -= hlen;
54 	ll_rs = LL_RESERVED_SPACE(skb->dev);
55 	mtu = skb->dev->mtu;
56 
57 	if (skb_has_frag_list(skb)) {
58 		unsigned int first_len = skb_pagelen(skb);
59 		struct ip_fraglist_iter iter;
60 		struct sk_buff *frag;
61 
62 		if (first_len - hlen > mtu ||
63 		    skb_headroom(skb) < ll_rs)
64 			goto blackhole;
65 
66 		if (skb_cloned(skb))
67 			goto slow_path;
68 
69 		skb_walk_frags(skb, frag) {
70 			if (frag->len > mtu ||
71 			    skb_headroom(frag) < hlen + ll_rs)
72 				goto blackhole;
73 
74 			if (skb_shared(frag))
75 				goto slow_path;
76 		}
77 
78 		ip_fraglist_init(skb, iph, hlen, &iter);
79 
80 		for (;;) {
81 			if (iter.frag)
82 				ip_fraglist_prepare(skb, &iter);
83 
84 			skb->tstamp = tstamp;
85 			err = output(net, sk, data, skb);
86 			if (err || !iter.frag)
87 				break;
88 
89 			skb = ip_fraglist_next(&iter);
90 		}
91 		return err;
92 	}
93 slow_path:
94 	/* This is a linearized skbuff, the original geometry is lost for us.
95 	 * This may also be a clone skbuff, we could preserve the geometry for
96 	 * the copies but probably not worth the effort.
97 	 */
98 	ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state);
99 
100 	while (state.left > 0) {
101 		struct sk_buff *skb2;
102 
103 		skb2 = ip_frag_next(skb, &state);
104 		if (IS_ERR(skb2)) {
105 			err = PTR_ERR(skb2);
106 			goto blackhole;
107 		}
108 
109 		skb2->tstamp = tstamp;
110 		err = output(net, sk, data, skb2);
111 		if (err)
112 			goto blackhole;
113 	}
114 	consume_skb(skb);
115 	return err;
116 
117 blackhole:
118 	kfree_skb(skb);
119 	return 0;
120 }
121 
122 /* ip_defrag() expects IPCB() in place. */
br_skb_cb_save(struct sk_buff * skb,struct br_input_skb_cb * cb,size_t inet_skb_parm_size)123 static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
124 			   size_t inet_skb_parm_size)
125 {
126 	memcpy(cb, skb->cb, sizeof(*cb));
127 	memset(skb->cb, 0, inet_skb_parm_size);
128 }
129 
br_skb_cb_restore(struct sk_buff * skb,const struct br_input_skb_cb * cb,u16 fragsz)130 static void br_skb_cb_restore(struct sk_buff *skb,
131 			      const struct br_input_skb_cb *cb,
132 			      u16 fragsz)
133 {
134 	memcpy(skb->cb, cb, sizeof(*cb));
135 	BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
136 }
137 
nf_ct_br_defrag4(struct sk_buff * skb,const struct nf_hook_state * state)138 static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
139 				     const struct nf_hook_state *state)
140 {
141 	u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
142 	enum ip_conntrack_info ctinfo;
143 	struct br_input_skb_cb cb;
144 	const struct nf_conn *ct;
145 	int err;
146 
147 	if (!ip_is_fragment(ip_hdr(skb)))
148 		return NF_ACCEPT;
149 
150 	ct = nf_ct_get(skb, &ctinfo);
151 	if (ct)
152 		zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
153 
154 	br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
155 	local_bh_disable();
156 	err = ip_defrag(state->net, skb,
157 			IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
158 	local_bh_enable();
159 	if (!err) {
160 		br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
161 		skb->ignore_df = 1;
162 		return NF_ACCEPT;
163 	}
164 
165 	return NF_STOLEN;
166 }
167 
nf_ct_br_defrag6(struct sk_buff * skb,const struct nf_hook_state * state)168 static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
169 				     const struct nf_hook_state *state)
170 {
171 	u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
172 	enum ip_conntrack_info ctinfo;
173 	struct br_input_skb_cb cb;
174 	const struct nf_conn *ct;
175 	int err;
176 
177 	ct = nf_ct_get(skb, &ctinfo);
178 	if (ct)
179 		zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
180 
181 	br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
182 
183 	err = nf_ipv6_br_defrag(state->net, skb,
184 				IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
185 	/* queued */
186 	if (err == -EINPROGRESS)
187 		return NF_STOLEN;
188 
189 	br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
190 	return err == 0 ? NF_ACCEPT : NF_DROP;
191 }
192 
nf_ct_br_ip_check(const struct sk_buff * skb)193 static int nf_ct_br_ip_check(const struct sk_buff *skb)
194 {
195 	const struct iphdr *iph;
196 	int nhoff, len;
197 
198 	nhoff = skb_network_offset(skb);
199 	iph = ip_hdr(skb);
200 	if (iph->ihl < 5 ||
201 	    iph->version != 4)
202 		return -1;
203 
204 	len = ntohs(iph->tot_len);
205 	if (skb->len < nhoff + len ||
206 	    len < (iph->ihl * 4))
207                 return -1;
208 
209 	return 0;
210 }
211 
nf_ct_br_ipv6_check(const struct sk_buff * skb)212 static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
213 {
214 	const struct ipv6hdr *hdr;
215 	int nhoff, len;
216 
217 	nhoff = skb_network_offset(skb);
218 	hdr = ipv6_hdr(skb);
219 	if (hdr->version != 6)
220 		return -1;
221 
222 	len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
223 	if (skb->len < len)
224 		return -1;
225 
226 	return 0;
227 }
228 
nf_ct_bridge_pre(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)229 static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
230 				     const struct nf_hook_state *state)
231 {
232 	struct nf_hook_state bridge_state = *state;
233 	enum ip_conntrack_info ctinfo;
234 	struct nf_conn *ct;
235 	u32 len;
236 	int ret;
237 
238 	ct = nf_ct_get(skb, &ctinfo);
239 	if ((ct && !nf_ct_is_template(ct)) ||
240 	    ctinfo == IP_CT_UNTRACKED)
241 		return NF_ACCEPT;
242 
243 	switch (skb->protocol) {
244 	case htons(ETH_P_IP):
245 		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
246 			return NF_ACCEPT;
247 
248 		len = ntohs(ip_hdr(skb)->tot_len);
249 		if (pskb_trim_rcsum(skb, len))
250 			return NF_ACCEPT;
251 
252 		if (nf_ct_br_ip_check(skb))
253 			return NF_ACCEPT;
254 
255 		bridge_state.pf = NFPROTO_IPV4;
256 		ret = nf_ct_br_defrag4(skb, &bridge_state);
257 		break;
258 	case htons(ETH_P_IPV6):
259 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
260 			return NF_ACCEPT;
261 
262 		len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
263 		if (pskb_trim_rcsum(skb, len))
264 			return NF_ACCEPT;
265 
266 		if (nf_ct_br_ipv6_check(skb))
267 			return NF_ACCEPT;
268 
269 		bridge_state.pf = NFPROTO_IPV6;
270 		ret = nf_ct_br_defrag6(skb, &bridge_state);
271 		break;
272 	default:
273 		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
274 		return NF_ACCEPT;
275 	}
276 
277 	if (ret != NF_ACCEPT)
278 		return ret;
279 
280 	return nf_conntrack_in(skb, &bridge_state);
281 }
282 
nf_ct_bridge_frag_save(struct sk_buff * skb,struct nf_bridge_frag_data * data)283 static void nf_ct_bridge_frag_save(struct sk_buff *skb,
284 				   struct nf_bridge_frag_data *data)
285 {
286 	if (skb_vlan_tag_present(skb)) {
287 		data->vlan_present = true;
288 		data->vlan_tci = skb->vlan_tci;
289 		data->vlan_proto = skb->vlan_proto;
290 	} else {
291 		data->vlan_present = false;
292 	}
293 	skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
294 }
295 
296 static unsigned int
nf_ct_bridge_refrag(struct sk_buff * skb,const struct nf_hook_state * state,int (* output)(struct net *,struct sock * sk,const struct nf_bridge_frag_data * data,struct sk_buff *))297 nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
298 		    int (*output)(struct net *, struct sock *sk,
299 				  const struct nf_bridge_frag_data *data,
300 				  struct sk_buff *))
301 {
302 	struct nf_bridge_frag_data data;
303 
304 	if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
305 		return NF_ACCEPT;
306 
307 	nf_ct_bridge_frag_save(skb, &data);
308 	switch (skb->protocol) {
309 	case htons(ETH_P_IP):
310 		nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
311 		break;
312 	case htons(ETH_P_IPV6):
313 		nf_br_ip6_fragment(state->net, state->sk, skb, &data, output);
314 		break;
315 	default:
316 		WARN_ON_ONCE(1);
317 		return NF_DROP;
318 	}
319 
320 	return NF_STOLEN;
321 }
322 
323 /* Actually only slow path refragmentation needs this. */
nf_ct_bridge_frag_restore(struct sk_buff * skb,const struct nf_bridge_frag_data * data)324 static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
325 				     const struct nf_bridge_frag_data *data)
326 {
327 	int err;
328 
329 	err = skb_cow_head(skb, ETH_HLEN);
330 	if (err) {
331 		kfree_skb(skb);
332 		return -ENOMEM;
333 	}
334 	if (data->vlan_present)
335 		__vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
336 	else if (skb_vlan_tag_present(skb))
337 		__vlan_hwaccel_clear_tag(skb);
338 
339 	skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
340 	skb_reset_mac_header(skb);
341 
342 	return 0;
343 }
344 
nf_ct_bridge_refrag_post(struct net * net,struct sock * sk,const struct nf_bridge_frag_data * data,struct sk_buff * skb)345 static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
346 				    const struct nf_bridge_frag_data *data,
347 				    struct sk_buff *skb)
348 {
349 	int err;
350 
351 	err = nf_ct_bridge_frag_restore(skb, data);
352 	if (err < 0)
353 		return err;
354 
355 	return br_dev_queue_push_xmit(net, sk, skb);
356 }
357 
nf_ct_bridge_confirm(struct sk_buff * skb)358 static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb)
359 {
360 	enum ip_conntrack_info ctinfo;
361 	struct nf_conn *ct;
362 	int protoff;
363 
364 	ct = nf_ct_get(skb, &ctinfo);
365 	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
366 		return nf_conntrack_confirm(skb);
367 
368 	switch (skb->protocol) {
369 	case htons(ETH_P_IP):
370 		protoff = skb_network_offset(skb) + ip_hdrlen(skb);
371 		break;
372 	case htons(ETH_P_IPV6): {
373 		 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
374 		__be16 frag_off;
375 
376 		protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
377 					   &frag_off);
378 		if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
379 			return nf_conntrack_confirm(skb);
380 		}
381 		break;
382 	default:
383 		return NF_ACCEPT;
384 	}
385 	return nf_confirm(skb, protoff, ct, ctinfo);
386 }
387 
nf_ct_bridge_post(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)388 static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
389 				      const struct nf_hook_state *state)
390 {
391 	int ret;
392 
393 	ret = nf_ct_bridge_confirm(skb);
394 	if (ret != NF_ACCEPT)
395 		return ret;
396 
397 	return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
398 }
399 
400 static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
401 	{
402 		.hook		= nf_ct_bridge_pre,
403 		.pf		= NFPROTO_BRIDGE,
404 		.hooknum	= NF_BR_PRE_ROUTING,
405 		.priority	= NF_IP_PRI_CONNTRACK,
406 	},
407 	{
408 		.hook		= nf_ct_bridge_post,
409 		.pf		= NFPROTO_BRIDGE,
410 		.hooknum	= NF_BR_POST_ROUTING,
411 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
412 	},
413 };
414 
415 static struct nf_ct_bridge_info bridge_info = {
416 	.ops		= nf_ct_bridge_hook_ops,
417 	.ops_size	= ARRAY_SIZE(nf_ct_bridge_hook_ops),
418 	.me		= THIS_MODULE,
419 };
420 
nf_conntrack_l3proto_bridge_init(void)421 static int __init nf_conntrack_l3proto_bridge_init(void)
422 {
423 	nf_ct_bridge_register(&bridge_info);
424 
425 	return 0;
426 }
427 
nf_conntrack_l3proto_bridge_fini(void)428 static void __exit nf_conntrack_l3proto_bridge_fini(void)
429 {
430 	nf_ct_bridge_unregister(&bridge_info);
431 }
432 
433 module_init(nf_conntrack_l3proto_bridge_init);
434 module_exit(nf_conntrack_l3proto_bridge_fini);
435 
436 MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
437 MODULE_LICENSE("GPL");
438