1 /*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
2  *
3  *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
4  */
5 #include "vmlinux.h"
6 #include "xdp_sample.bpf.h"
7 #include "xdp_sample_shared.h"
8 #include "hash_func01.h"
9 
10 /* Special map type that can XDP_REDIRECT frames to another CPU */
11 struct {
12 	__uint(type, BPF_MAP_TYPE_CPUMAP);
13 	__uint(key_size, sizeof(u32));
14 	__uint(value_size, sizeof(struct bpf_cpumap_val));
15 } cpu_map SEC(".maps");
16 
17 /* Set of maps controlling available CPU, and for iterating through
18  * selectable redirect CPUs.
19  */
20 struct {
21 	__uint(type, BPF_MAP_TYPE_ARRAY);
22 	__type(key, u32);
23 	__type(value, u32);
24 } cpus_available SEC(".maps");
25 
26 struct {
27 	__uint(type, BPF_MAP_TYPE_ARRAY);
28 	__type(key, u32);
29 	__type(value, u32);
30 	__uint(max_entries, 1);
31 } cpus_count SEC(".maps");
32 
33 struct {
34 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
35 	__type(key, u32);
36 	__type(value, u32);
37 	__uint(max_entries, 1);
38 } cpus_iterator SEC(".maps");
39 
40 struct {
41 	__uint(type, BPF_MAP_TYPE_DEVMAP);
42 	__uint(key_size, sizeof(int));
43 	__uint(value_size, sizeof(struct bpf_devmap_val));
44 	__uint(max_entries, 1);
45 } tx_port SEC(".maps");
46 
47 char tx_mac_addr[ETH_ALEN];
48 
49 /* Helper parse functions */
50 
51 static __always_inline
parse_eth(struct ethhdr * eth,void * data_end,u16 * eth_proto,u64 * l3_offset)52 bool parse_eth(struct ethhdr *eth, void *data_end,
53 	       u16 *eth_proto, u64 *l3_offset)
54 {
55 	u16 eth_type;
56 	u64 offset;
57 
58 	offset = sizeof(*eth);
59 	if ((void *)eth + offset > data_end)
60 		return false;
61 
62 	eth_type = eth->h_proto;
63 
64 	/* Skip non 802.3 Ethertypes */
65 	if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
66 		return false;
67 
68 	/* Handle VLAN tagged packet */
69 	if (eth_type == bpf_htons(ETH_P_8021Q) ||
70 	    eth_type == bpf_htons(ETH_P_8021AD)) {
71 		struct vlan_hdr *vlan_hdr;
72 
73 		vlan_hdr = (void *)eth + offset;
74 		offset += sizeof(*vlan_hdr);
75 		if ((void *)eth + offset > data_end)
76 			return false;
77 		eth_type = vlan_hdr->h_vlan_encapsulated_proto;
78 	}
79 	/* Handle double VLAN tagged packet */
80 	if (eth_type == bpf_htons(ETH_P_8021Q) ||
81 	    eth_type == bpf_htons(ETH_P_8021AD)) {
82 		struct vlan_hdr *vlan_hdr;
83 
84 		vlan_hdr = (void *)eth + offset;
85 		offset += sizeof(*vlan_hdr);
86 		if ((void *)eth + offset > data_end)
87 			return false;
88 		eth_type = vlan_hdr->h_vlan_encapsulated_proto;
89 	}
90 
91 	*eth_proto = bpf_ntohs(eth_type);
92 	*l3_offset = offset;
93 	return true;
94 }
95 
96 static __always_inline
get_dest_port_ipv4_udp(struct xdp_md * ctx,u64 nh_off)97 u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
98 {
99 	void *data_end = (void *)(long)ctx->data_end;
100 	void *data     = (void *)(long)ctx->data;
101 	struct iphdr *iph = data + nh_off;
102 	struct udphdr *udph;
103 	u16 dport;
104 
105 	if (iph + 1 > data_end)
106 		return 0;
107 	if (!(iph->protocol == IPPROTO_UDP))
108 		return 0;
109 
110 	udph = (void *)(iph + 1);
111 	if (udph + 1 > data_end)
112 		return 0;
113 
114 	dport = bpf_ntohs(udph->dest);
115 	return dport;
116 }
117 
118 static __always_inline
get_proto_ipv4(struct xdp_md * ctx,u64 nh_off)119 int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
120 {
121 	void *data_end = (void *)(long)ctx->data_end;
122 	void *data     = (void *)(long)ctx->data;
123 	struct iphdr *iph = data + nh_off;
124 
125 	if (iph + 1 > data_end)
126 		return 0;
127 	return iph->protocol;
128 }
129 
130 static __always_inline
get_proto_ipv6(struct xdp_md * ctx,u64 nh_off)131 int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
132 {
133 	void *data_end = (void *)(long)ctx->data_end;
134 	void *data     = (void *)(long)ctx->data;
135 	struct ipv6hdr *ip6h = data + nh_off;
136 
137 	if (ip6h + 1 > data_end)
138 		return 0;
139 	return ip6h->nexthdr;
140 }
141 
142 SEC("xdp")
xdp_prognum0_no_touch(struct xdp_md * ctx)143 int  xdp_prognum0_no_touch(struct xdp_md *ctx)
144 {
145 	u32 key = bpf_get_smp_processor_id();
146 	struct datarec *rec;
147 	u32 *cpu_selected;
148 	u32 cpu_dest = 0;
149 	u32 key0 = 0;
150 
151 	/* Only use first entry in cpus_available */
152 	cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
153 	if (!cpu_selected)
154 		return XDP_ABORTED;
155 	cpu_dest = *cpu_selected;
156 
157 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
158 	if (!rec)
159 		return XDP_PASS;
160 	NO_TEAR_INC(rec->processed);
161 
162 	if (cpu_dest >= nr_cpus) {
163 		NO_TEAR_INC(rec->issue);
164 		return XDP_ABORTED;
165 	}
166 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
167 }
168 
169 SEC("xdp")
xdp_prognum1_touch_data(struct xdp_md * ctx)170 int  xdp_prognum1_touch_data(struct xdp_md *ctx)
171 {
172 	void *data_end = (void *)(long)ctx->data_end;
173 	void *data     = (void *)(long)ctx->data;
174 	u32 key = bpf_get_smp_processor_id();
175 	struct ethhdr *eth = data;
176 	struct datarec *rec;
177 	u32 *cpu_selected;
178 	u32 cpu_dest = 0;
179 	u32 key0 = 0;
180 	u16 eth_type;
181 
182 	/* Only use first entry in cpus_available */
183 	cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
184 	if (!cpu_selected)
185 		return XDP_ABORTED;
186 	cpu_dest = *cpu_selected;
187 
188 	/* Validate packet length is minimum Eth header size */
189 	if (eth + 1 > data_end)
190 		return XDP_ABORTED;
191 
192 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
193 	if (!rec)
194 		return XDP_PASS;
195 	NO_TEAR_INC(rec->processed);
196 
197 	/* Read packet data, and use it (drop non 802.3 Ethertypes) */
198 	eth_type = eth->h_proto;
199 	if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
200 		NO_TEAR_INC(rec->dropped);
201 		return XDP_DROP;
202 	}
203 
204 	if (cpu_dest >= nr_cpus) {
205 		NO_TEAR_INC(rec->issue);
206 		return XDP_ABORTED;
207 	}
208 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
209 }
210 
211 SEC("xdp")
xdp_prognum2_round_robin(struct xdp_md * ctx)212 int  xdp_prognum2_round_robin(struct xdp_md *ctx)
213 {
214 	void *data_end = (void *)(long)ctx->data_end;
215 	void *data     = (void *)(long)ctx->data;
216 	u32 key = bpf_get_smp_processor_id();
217 	struct datarec *rec;
218 	u32 cpu_dest = 0;
219 	u32 key0 = 0;
220 
221 	u32 *cpu_selected;
222 	u32 *cpu_iterator;
223 	u32 *cpu_max;
224 	u32 cpu_idx;
225 
226 	cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
227 	if (!cpu_max)
228 		return XDP_ABORTED;
229 
230 	cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
231 	if (!cpu_iterator)
232 		return XDP_ABORTED;
233 	cpu_idx = *cpu_iterator;
234 
235 	*cpu_iterator += 1;
236 	if (*cpu_iterator == *cpu_max)
237 		*cpu_iterator = 0;
238 
239 	cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
240 	if (!cpu_selected)
241 		return XDP_ABORTED;
242 	cpu_dest = *cpu_selected;
243 
244 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
245 	if (!rec)
246 		return XDP_PASS;
247 	NO_TEAR_INC(rec->processed);
248 
249 	if (cpu_dest >= nr_cpus) {
250 		NO_TEAR_INC(rec->issue);
251 		return XDP_ABORTED;
252 	}
253 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
254 }
255 
256 SEC("xdp")
xdp_prognum3_proto_separate(struct xdp_md * ctx)257 int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
258 {
259 	void *data_end = (void *)(long)ctx->data_end;
260 	void *data     = (void *)(long)ctx->data;
261 	u32 key = bpf_get_smp_processor_id();
262 	struct ethhdr *eth = data;
263 	u8 ip_proto = IPPROTO_UDP;
264 	struct datarec *rec;
265 	u16 eth_proto = 0;
266 	u64 l3_offset = 0;
267 	u32 cpu_dest = 0;
268 	u32 *cpu_lookup;
269 	u32 cpu_idx = 0;
270 
271 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
272 	if (!rec)
273 		return XDP_PASS;
274 	NO_TEAR_INC(rec->processed);
275 
276 	if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
277 		return XDP_PASS; /* Just skip */
278 
279 	/* Extract L4 protocol */
280 	switch (eth_proto) {
281 	case ETH_P_IP:
282 		ip_proto = get_proto_ipv4(ctx, l3_offset);
283 		break;
284 	case ETH_P_IPV6:
285 		ip_proto = get_proto_ipv6(ctx, l3_offset);
286 		break;
287 	case ETH_P_ARP:
288 		cpu_idx = 0; /* ARP packet handled on separate CPU */
289 		break;
290 	default:
291 		cpu_idx = 0;
292 	}
293 
294 	/* Choose CPU based on L4 protocol */
295 	switch (ip_proto) {
296 	case IPPROTO_ICMP:
297 	case IPPROTO_ICMPV6:
298 		cpu_idx = 2;
299 		break;
300 	case IPPROTO_TCP:
301 		cpu_idx = 0;
302 		break;
303 	case IPPROTO_UDP:
304 		cpu_idx = 1;
305 		break;
306 	default:
307 		cpu_idx = 0;
308 	}
309 
310 	cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
311 	if (!cpu_lookup)
312 		return XDP_ABORTED;
313 	cpu_dest = *cpu_lookup;
314 
315 	if (cpu_dest >= nr_cpus) {
316 		NO_TEAR_INC(rec->issue);
317 		return XDP_ABORTED;
318 	}
319 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
320 }
321 
322 SEC("xdp")
xdp_prognum4_ddos_filter_pktgen(struct xdp_md * ctx)323 int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
324 {
325 	void *data_end = (void *)(long)ctx->data_end;
326 	void *data     = (void *)(long)ctx->data;
327 	u32 key = bpf_get_smp_processor_id();
328 	struct ethhdr *eth = data;
329 	u8 ip_proto = IPPROTO_UDP;
330 	struct datarec *rec;
331 	u16 eth_proto = 0;
332 	u64 l3_offset = 0;
333 	u32 cpu_dest = 0;
334 	u32 *cpu_lookup;
335 	u32 cpu_idx = 0;
336 	u16 dest_port;
337 
338 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
339 	if (!rec)
340 		return XDP_PASS;
341 	NO_TEAR_INC(rec->processed);
342 
343 	if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
344 		return XDP_PASS; /* Just skip */
345 
346 	/* Extract L4 protocol */
347 	switch (eth_proto) {
348 	case ETH_P_IP:
349 		ip_proto = get_proto_ipv4(ctx, l3_offset);
350 		break;
351 	case ETH_P_IPV6:
352 		ip_proto = get_proto_ipv6(ctx, l3_offset);
353 		break;
354 	case ETH_P_ARP:
355 		cpu_idx = 0; /* ARP packet handled on separate CPU */
356 		break;
357 	default:
358 		cpu_idx = 0;
359 	}
360 
361 	/* Choose CPU based on L4 protocol */
362 	switch (ip_proto) {
363 	case IPPROTO_ICMP:
364 	case IPPROTO_ICMPV6:
365 		cpu_idx = 2;
366 		break;
367 	case IPPROTO_TCP:
368 		cpu_idx = 0;
369 		break;
370 	case IPPROTO_UDP:
371 		cpu_idx = 1;
372 		/* DDoS filter UDP port 9 (pktgen) */
373 		dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
374 		if (dest_port == 9) {
375 			NO_TEAR_INC(rec->dropped);
376 			return XDP_DROP;
377 		}
378 		break;
379 	default:
380 		cpu_idx = 0;
381 	}
382 
383 	cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
384 	if (!cpu_lookup)
385 		return XDP_ABORTED;
386 	cpu_dest = *cpu_lookup;
387 
388 	if (cpu_dest >= nr_cpus) {
389 		NO_TEAR_INC(rec->issue);
390 		return XDP_ABORTED;
391 	}
392 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
393 }
394 
395 /* Hashing initval */
396 #define INITVAL 15485863
397 
398 static __always_inline
get_ipv4_hash_ip_pair(struct xdp_md * ctx,u64 nh_off)399 u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
400 {
401 	void *data_end = (void *)(long)ctx->data_end;
402 	void *data     = (void *)(long)ctx->data;
403 	struct iphdr *iph = data + nh_off;
404 	u32 cpu_hash;
405 
406 	if (iph + 1 > data_end)
407 		return 0;
408 
409 	cpu_hash = iph->saddr + iph->daddr;
410 	cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
411 
412 	return cpu_hash;
413 }
414 
415 static __always_inline
get_ipv6_hash_ip_pair(struct xdp_md * ctx,u64 nh_off)416 u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
417 {
418 	void *data_end = (void *)(long)ctx->data_end;
419 	void *data     = (void *)(long)ctx->data;
420 	struct ipv6hdr *ip6h = data + nh_off;
421 	u32 cpu_hash;
422 
423 	if (ip6h + 1 > data_end)
424 		return 0;
425 
426 	cpu_hash  = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
427 	cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
428 	cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
429 	cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
430 	cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
431 
432 	return cpu_hash;
433 }
434 
435 /* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
436  * hashing scheme is symmetric, meaning swapping IP src/dest still hit
437  * same CPU.
438  */
439 SEC("xdp")
xdp_prognum5_lb_hash_ip_pairs(struct xdp_md * ctx)440 int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
441 {
442 	void *data_end = (void *)(long)ctx->data_end;
443 	void *data     = (void *)(long)ctx->data;
444 	u32 key = bpf_get_smp_processor_id();
445 	struct ethhdr *eth = data;
446 	struct datarec *rec;
447 	u16 eth_proto = 0;
448 	u64 l3_offset = 0;
449 	u32 cpu_dest = 0;
450 	u32 cpu_idx = 0;
451 	u32 *cpu_lookup;
452 	u32 key0 = 0;
453 	u32 *cpu_max;
454 	u32 cpu_hash;
455 
456 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
457 	if (!rec)
458 		return XDP_PASS;
459 	NO_TEAR_INC(rec->processed);
460 
461 	cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
462 	if (!cpu_max)
463 		return XDP_ABORTED;
464 
465 	if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
466 		return XDP_PASS; /* Just skip */
467 
468 	/* Hash for IPv4 and IPv6 */
469 	switch (eth_proto) {
470 	case ETH_P_IP:
471 		cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
472 		break;
473 	case ETH_P_IPV6:
474 		cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
475 		break;
476 	case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
477 	default:
478 		cpu_hash = 0;
479 	}
480 
481 	/* Choose CPU based on hash */
482 	cpu_idx = cpu_hash % *cpu_max;
483 
484 	cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
485 	if (!cpu_lookup)
486 		return XDP_ABORTED;
487 	cpu_dest = *cpu_lookup;
488 
489 	if (cpu_dest >= nr_cpus) {
490 		NO_TEAR_INC(rec->issue);
491 		return XDP_ABORTED;
492 	}
493 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
494 }
495 
496 SEC("xdp_cpumap/redirect")
xdp_redirect_cpu_devmap(struct xdp_md * ctx)497 int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
498 {
499 	void *data_end = (void *)(long)ctx->data_end;
500 	void *data = (void *)(long)ctx->data;
501 	struct ethhdr *eth = data;
502 	u64 nh_off;
503 
504 	nh_off = sizeof(*eth);
505 	if (data + nh_off > data_end)
506 		return XDP_DROP;
507 
508 	swap_src_dst_mac(data);
509 	return bpf_redirect_map(&tx_port, 0, 0);
510 }
511 
512 SEC("xdp_cpumap/pass")
xdp_redirect_cpu_pass(struct xdp_md * ctx)513 int xdp_redirect_cpu_pass(struct xdp_md *ctx)
514 {
515 	return XDP_PASS;
516 }
517 
518 SEC("xdp_cpumap/drop")
xdp_redirect_cpu_drop(struct xdp_md * ctx)519 int xdp_redirect_cpu_drop(struct xdp_md *ctx)
520 {
521 	return XDP_DROP;
522 }
523 
524 SEC("xdp_devmap/egress")
xdp_redirect_egress_prog(struct xdp_md * ctx)525 int xdp_redirect_egress_prog(struct xdp_md *ctx)
526 {
527 	void *data_end = (void *)(long)ctx->data_end;
528 	void *data = (void *)(long)ctx->data;
529 	struct ethhdr *eth = data;
530 	u64 nh_off;
531 
532 	nh_off = sizeof(*eth);
533 	if (data + nh_off > data_end)
534 		return XDP_DROP;
535 
536 	__builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
537 
538 	return XDP_PASS;
539 }
540 
541 char _license[] SEC("license") = "GPL";
542