1 /* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
2 *
3 * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
4 */
5 #include "vmlinux.h"
6 #include "xdp_sample.bpf.h"
7 #include "xdp_sample_shared.h"
8 #include "hash_func01.h"
9
10 /* Special map type that can XDP_REDIRECT frames to another CPU */
11 struct {
12 __uint(type, BPF_MAP_TYPE_CPUMAP);
13 __uint(key_size, sizeof(u32));
14 __uint(value_size, sizeof(struct bpf_cpumap_val));
15 } cpu_map SEC(".maps");
16
17 /* Set of maps controlling available CPU, and for iterating through
18 * selectable redirect CPUs.
19 */
20 struct {
21 __uint(type, BPF_MAP_TYPE_ARRAY);
22 __type(key, u32);
23 __type(value, u32);
24 } cpus_available SEC(".maps");
25
26 struct {
27 __uint(type, BPF_MAP_TYPE_ARRAY);
28 __type(key, u32);
29 __type(value, u32);
30 __uint(max_entries, 1);
31 } cpus_count SEC(".maps");
32
33 struct {
34 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
35 __type(key, u32);
36 __type(value, u32);
37 __uint(max_entries, 1);
38 } cpus_iterator SEC(".maps");
39
40 struct {
41 __uint(type, BPF_MAP_TYPE_DEVMAP);
42 __uint(key_size, sizeof(int));
43 __uint(value_size, sizeof(struct bpf_devmap_val));
44 __uint(max_entries, 1);
45 } tx_port SEC(".maps");
46
47 char tx_mac_addr[ETH_ALEN];
48
49 /* Helper parse functions */
50
51 static __always_inline
parse_eth(struct ethhdr * eth,void * data_end,u16 * eth_proto,u64 * l3_offset)52 bool parse_eth(struct ethhdr *eth, void *data_end,
53 u16 *eth_proto, u64 *l3_offset)
54 {
55 u16 eth_type;
56 u64 offset;
57
58 offset = sizeof(*eth);
59 if ((void *)eth + offset > data_end)
60 return false;
61
62 eth_type = eth->h_proto;
63
64 /* Skip non 802.3 Ethertypes */
65 if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
66 return false;
67
68 /* Handle VLAN tagged packet */
69 if (eth_type == bpf_htons(ETH_P_8021Q) ||
70 eth_type == bpf_htons(ETH_P_8021AD)) {
71 struct vlan_hdr *vlan_hdr;
72
73 vlan_hdr = (void *)eth + offset;
74 offset += sizeof(*vlan_hdr);
75 if ((void *)eth + offset > data_end)
76 return false;
77 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
78 }
79 /* Handle double VLAN tagged packet */
80 if (eth_type == bpf_htons(ETH_P_8021Q) ||
81 eth_type == bpf_htons(ETH_P_8021AD)) {
82 struct vlan_hdr *vlan_hdr;
83
84 vlan_hdr = (void *)eth + offset;
85 offset += sizeof(*vlan_hdr);
86 if ((void *)eth + offset > data_end)
87 return false;
88 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
89 }
90
91 *eth_proto = bpf_ntohs(eth_type);
92 *l3_offset = offset;
93 return true;
94 }
95
96 static __always_inline
get_dest_port_ipv4_udp(struct xdp_md * ctx,u64 nh_off)97 u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
98 {
99 void *data_end = (void *)(long)ctx->data_end;
100 void *data = (void *)(long)ctx->data;
101 struct iphdr *iph = data + nh_off;
102 struct udphdr *udph;
103 u16 dport;
104
105 if (iph + 1 > data_end)
106 return 0;
107 if (!(iph->protocol == IPPROTO_UDP))
108 return 0;
109
110 udph = (void *)(iph + 1);
111 if (udph + 1 > data_end)
112 return 0;
113
114 dport = bpf_ntohs(udph->dest);
115 return dport;
116 }
117
118 static __always_inline
get_proto_ipv4(struct xdp_md * ctx,u64 nh_off)119 int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
120 {
121 void *data_end = (void *)(long)ctx->data_end;
122 void *data = (void *)(long)ctx->data;
123 struct iphdr *iph = data + nh_off;
124
125 if (iph + 1 > data_end)
126 return 0;
127 return iph->protocol;
128 }
129
130 static __always_inline
get_proto_ipv6(struct xdp_md * ctx,u64 nh_off)131 int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
132 {
133 void *data_end = (void *)(long)ctx->data_end;
134 void *data = (void *)(long)ctx->data;
135 struct ipv6hdr *ip6h = data + nh_off;
136
137 if (ip6h + 1 > data_end)
138 return 0;
139 return ip6h->nexthdr;
140 }
141
142 SEC("xdp")
xdp_prognum0_no_touch(struct xdp_md * ctx)143 int xdp_prognum0_no_touch(struct xdp_md *ctx)
144 {
145 u32 key = bpf_get_smp_processor_id();
146 struct datarec *rec;
147 u32 *cpu_selected;
148 u32 cpu_dest = 0;
149 u32 key0 = 0;
150
151 /* Only use first entry in cpus_available */
152 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
153 if (!cpu_selected)
154 return XDP_ABORTED;
155 cpu_dest = *cpu_selected;
156
157 rec = bpf_map_lookup_elem(&rx_cnt, &key);
158 if (!rec)
159 return XDP_PASS;
160 NO_TEAR_INC(rec->processed);
161
162 if (cpu_dest >= nr_cpus) {
163 NO_TEAR_INC(rec->issue);
164 return XDP_ABORTED;
165 }
166 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
167 }
168
169 SEC("xdp")
xdp_prognum1_touch_data(struct xdp_md * ctx)170 int xdp_prognum1_touch_data(struct xdp_md *ctx)
171 {
172 void *data_end = (void *)(long)ctx->data_end;
173 void *data = (void *)(long)ctx->data;
174 u32 key = bpf_get_smp_processor_id();
175 struct ethhdr *eth = data;
176 struct datarec *rec;
177 u32 *cpu_selected;
178 u32 cpu_dest = 0;
179 u32 key0 = 0;
180 u16 eth_type;
181
182 /* Only use first entry in cpus_available */
183 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
184 if (!cpu_selected)
185 return XDP_ABORTED;
186 cpu_dest = *cpu_selected;
187
188 /* Validate packet length is minimum Eth header size */
189 if (eth + 1 > data_end)
190 return XDP_ABORTED;
191
192 rec = bpf_map_lookup_elem(&rx_cnt, &key);
193 if (!rec)
194 return XDP_PASS;
195 NO_TEAR_INC(rec->processed);
196
197 /* Read packet data, and use it (drop non 802.3 Ethertypes) */
198 eth_type = eth->h_proto;
199 if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
200 NO_TEAR_INC(rec->dropped);
201 return XDP_DROP;
202 }
203
204 if (cpu_dest >= nr_cpus) {
205 NO_TEAR_INC(rec->issue);
206 return XDP_ABORTED;
207 }
208 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
209 }
210
211 SEC("xdp")
xdp_prognum2_round_robin(struct xdp_md * ctx)212 int xdp_prognum2_round_robin(struct xdp_md *ctx)
213 {
214 void *data_end = (void *)(long)ctx->data_end;
215 void *data = (void *)(long)ctx->data;
216 u32 key = bpf_get_smp_processor_id();
217 struct datarec *rec;
218 u32 cpu_dest = 0;
219 u32 key0 = 0;
220
221 u32 *cpu_selected;
222 u32 *cpu_iterator;
223 u32 *cpu_max;
224 u32 cpu_idx;
225
226 cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
227 if (!cpu_max)
228 return XDP_ABORTED;
229
230 cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
231 if (!cpu_iterator)
232 return XDP_ABORTED;
233 cpu_idx = *cpu_iterator;
234
235 *cpu_iterator += 1;
236 if (*cpu_iterator == *cpu_max)
237 *cpu_iterator = 0;
238
239 cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
240 if (!cpu_selected)
241 return XDP_ABORTED;
242 cpu_dest = *cpu_selected;
243
244 rec = bpf_map_lookup_elem(&rx_cnt, &key);
245 if (!rec)
246 return XDP_PASS;
247 NO_TEAR_INC(rec->processed);
248
249 if (cpu_dest >= nr_cpus) {
250 NO_TEAR_INC(rec->issue);
251 return XDP_ABORTED;
252 }
253 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
254 }
255
256 SEC("xdp")
xdp_prognum3_proto_separate(struct xdp_md * ctx)257 int xdp_prognum3_proto_separate(struct xdp_md *ctx)
258 {
259 void *data_end = (void *)(long)ctx->data_end;
260 void *data = (void *)(long)ctx->data;
261 u32 key = bpf_get_smp_processor_id();
262 struct ethhdr *eth = data;
263 u8 ip_proto = IPPROTO_UDP;
264 struct datarec *rec;
265 u16 eth_proto = 0;
266 u64 l3_offset = 0;
267 u32 cpu_dest = 0;
268 u32 *cpu_lookup;
269 u32 cpu_idx = 0;
270
271 rec = bpf_map_lookup_elem(&rx_cnt, &key);
272 if (!rec)
273 return XDP_PASS;
274 NO_TEAR_INC(rec->processed);
275
276 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset)))
277 return XDP_PASS; /* Just skip */
278
279 /* Extract L4 protocol */
280 switch (eth_proto) {
281 case ETH_P_IP:
282 ip_proto = get_proto_ipv4(ctx, l3_offset);
283 break;
284 case ETH_P_IPV6:
285 ip_proto = get_proto_ipv6(ctx, l3_offset);
286 break;
287 case ETH_P_ARP:
288 cpu_idx = 0; /* ARP packet handled on separate CPU */
289 break;
290 default:
291 cpu_idx = 0;
292 }
293
294 /* Choose CPU based on L4 protocol */
295 switch (ip_proto) {
296 case IPPROTO_ICMP:
297 case IPPROTO_ICMPV6:
298 cpu_idx = 2;
299 break;
300 case IPPROTO_TCP:
301 cpu_idx = 0;
302 break;
303 case IPPROTO_UDP:
304 cpu_idx = 1;
305 break;
306 default:
307 cpu_idx = 0;
308 }
309
310 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
311 if (!cpu_lookup)
312 return XDP_ABORTED;
313 cpu_dest = *cpu_lookup;
314
315 if (cpu_dest >= nr_cpus) {
316 NO_TEAR_INC(rec->issue);
317 return XDP_ABORTED;
318 }
319 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
320 }
321
322 SEC("xdp")
xdp_prognum4_ddos_filter_pktgen(struct xdp_md * ctx)323 int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
324 {
325 void *data_end = (void *)(long)ctx->data_end;
326 void *data = (void *)(long)ctx->data;
327 u32 key = bpf_get_smp_processor_id();
328 struct ethhdr *eth = data;
329 u8 ip_proto = IPPROTO_UDP;
330 struct datarec *rec;
331 u16 eth_proto = 0;
332 u64 l3_offset = 0;
333 u32 cpu_dest = 0;
334 u32 *cpu_lookup;
335 u32 cpu_idx = 0;
336 u16 dest_port;
337
338 rec = bpf_map_lookup_elem(&rx_cnt, &key);
339 if (!rec)
340 return XDP_PASS;
341 NO_TEAR_INC(rec->processed);
342
343 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset)))
344 return XDP_PASS; /* Just skip */
345
346 /* Extract L4 protocol */
347 switch (eth_proto) {
348 case ETH_P_IP:
349 ip_proto = get_proto_ipv4(ctx, l3_offset);
350 break;
351 case ETH_P_IPV6:
352 ip_proto = get_proto_ipv6(ctx, l3_offset);
353 break;
354 case ETH_P_ARP:
355 cpu_idx = 0; /* ARP packet handled on separate CPU */
356 break;
357 default:
358 cpu_idx = 0;
359 }
360
361 /* Choose CPU based on L4 protocol */
362 switch (ip_proto) {
363 case IPPROTO_ICMP:
364 case IPPROTO_ICMPV6:
365 cpu_idx = 2;
366 break;
367 case IPPROTO_TCP:
368 cpu_idx = 0;
369 break;
370 case IPPROTO_UDP:
371 cpu_idx = 1;
372 /* DDoS filter UDP port 9 (pktgen) */
373 dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
374 if (dest_port == 9) {
375 NO_TEAR_INC(rec->dropped);
376 return XDP_DROP;
377 }
378 break;
379 default:
380 cpu_idx = 0;
381 }
382
383 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
384 if (!cpu_lookup)
385 return XDP_ABORTED;
386 cpu_dest = *cpu_lookup;
387
388 if (cpu_dest >= nr_cpus) {
389 NO_TEAR_INC(rec->issue);
390 return XDP_ABORTED;
391 }
392 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
393 }
394
395 /* Hashing initval */
396 #define INITVAL 15485863
397
398 static __always_inline
get_ipv4_hash_ip_pair(struct xdp_md * ctx,u64 nh_off)399 u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
400 {
401 void *data_end = (void *)(long)ctx->data_end;
402 void *data = (void *)(long)ctx->data;
403 struct iphdr *iph = data + nh_off;
404 u32 cpu_hash;
405
406 if (iph + 1 > data_end)
407 return 0;
408
409 cpu_hash = iph->saddr + iph->daddr;
410 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
411
412 return cpu_hash;
413 }
414
415 static __always_inline
get_ipv6_hash_ip_pair(struct xdp_md * ctx,u64 nh_off)416 u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
417 {
418 void *data_end = (void *)(long)ctx->data_end;
419 void *data = (void *)(long)ctx->data;
420 struct ipv6hdr *ip6h = data + nh_off;
421 u32 cpu_hash;
422
423 if (ip6h + 1 > data_end)
424 return 0;
425
426 cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
427 cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
428 cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
429 cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
430 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
431
432 return cpu_hash;
433 }
434
435 /* Load-Balance traffic based on hashing IP-addrs + L4-proto. The
436 * hashing scheme is symmetric, meaning swapping IP src/dest still hit
437 * same CPU.
438 */
439 SEC("xdp")
xdp_prognum5_lb_hash_ip_pairs(struct xdp_md * ctx)440 int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
441 {
442 void *data_end = (void *)(long)ctx->data_end;
443 void *data = (void *)(long)ctx->data;
444 u32 key = bpf_get_smp_processor_id();
445 struct ethhdr *eth = data;
446 struct datarec *rec;
447 u16 eth_proto = 0;
448 u64 l3_offset = 0;
449 u32 cpu_dest = 0;
450 u32 cpu_idx = 0;
451 u32 *cpu_lookup;
452 u32 key0 = 0;
453 u32 *cpu_max;
454 u32 cpu_hash;
455
456 rec = bpf_map_lookup_elem(&rx_cnt, &key);
457 if (!rec)
458 return XDP_PASS;
459 NO_TEAR_INC(rec->processed);
460
461 cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
462 if (!cpu_max)
463 return XDP_ABORTED;
464
465 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset)))
466 return XDP_PASS; /* Just skip */
467
468 /* Hash for IPv4 and IPv6 */
469 switch (eth_proto) {
470 case ETH_P_IP:
471 cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
472 break;
473 case ETH_P_IPV6:
474 cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
475 break;
476 case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
477 default:
478 cpu_hash = 0;
479 }
480
481 /* Choose CPU based on hash */
482 cpu_idx = cpu_hash % *cpu_max;
483
484 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
485 if (!cpu_lookup)
486 return XDP_ABORTED;
487 cpu_dest = *cpu_lookup;
488
489 if (cpu_dest >= nr_cpus) {
490 NO_TEAR_INC(rec->issue);
491 return XDP_ABORTED;
492 }
493 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
494 }
495
496 SEC("xdp_cpumap/redirect")
xdp_redirect_cpu_devmap(struct xdp_md * ctx)497 int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
498 {
499 void *data_end = (void *)(long)ctx->data_end;
500 void *data = (void *)(long)ctx->data;
501 struct ethhdr *eth = data;
502 u64 nh_off;
503
504 nh_off = sizeof(*eth);
505 if (data + nh_off > data_end)
506 return XDP_DROP;
507
508 swap_src_dst_mac(data);
509 return bpf_redirect_map(&tx_port, 0, 0);
510 }
511
512 SEC("xdp_cpumap/pass")
xdp_redirect_cpu_pass(struct xdp_md * ctx)513 int xdp_redirect_cpu_pass(struct xdp_md *ctx)
514 {
515 return XDP_PASS;
516 }
517
518 SEC("xdp_cpumap/drop")
xdp_redirect_cpu_drop(struct xdp_md * ctx)519 int xdp_redirect_cpu_drop(struct xdp_md *ctx)
520 {
521 return XDP_DROP;
522 }
523
524 SEC("xdp_devmap/egress")
xdp_redirect_egress_prog(struct xdp_md * ctx)525 int xdp_redirect_egress_prog(struct xdp_md *ctx)
526 {
527 void *data_end = (void *)(long)ctx->data_end;
528 void *data = (void *)(long)ctx->data;
529 struct ethhdr *eth = data;
530 u64 nh_off;
531
532 nh_off = sizeof(*eth);
533 if (data + nh_off > data_end)
534 return XDP_DROP;
535
536 __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
537
538 return XDP_PASS;
539 }
540
541 char _license[] SEC("license") = "GPL";
542