1 #include <stddef.h>
2 #include <inttypes.h>
3 #include <errno.h>
4 #include <linux/seg6_local.h>
5 #include <linux/bpf.h>
6 #include "bpf_helpers.h"
7 #include "bpf_endian.h"
8 
9 #define bpf_printk(fmt, ...)				\
10 ({							\
11 	char ____fmt[] = fmt;				\
12 	bpf_trace_printk(____fmt, sizeof(____fmt),	\
13 			##__VA_ARGS__);			\
14 })
15 
16 /* Packet parsing state machine helpers. */
17 #define cursor_advance(_cursor, _len) \
18 	({ void *_tmp = _cursor; _cursor += _len; _tmp; })
19 
20 #define SR6_FLAG_ALERT (1 << 4)
21 
22 #define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
23 				0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
24 #define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
25 				0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
26 #define BPF_PACKET_HEADER __attribute__((packed))
27 
28 struct ip6_t {
29 	unsigned int ver:4;
30 	unsigned int priority:8;
31 	unsigned int flow_label:20;
32 	unsigned short payload_len;
33 	unsigned char next_header;
34 	unsigned char hop_limit;
35 	unsigned long long src_hi;
36 	unsigned long long src_lo;
37 	unsigned long long dst_hi;
38 	unsigned long long dst_lo;
39 } BPF_PACKET_HEADER;
40 
41 struct ip6_addr_t {
42 	unsigned long long hi;
43 	unsigned long long lo;
44 } BPF_PACKET_HEADER;
45 
46 struct ip6_srh_t {
47 	unsigned char nexthdr;
48 	unsigned char hdrlen;
49 	unsigned char type;
50 	unsigned char segments_left;
51 	unsigned char first_segment;
52 	unsigned char flags;
53 	unsigned short tag;
54 
55 	struct ip6_addr_t segments[0];
56 } BPF_PACKET_HEADER;
57 
58 struct sr6_tlv_t {
59 	unsigned char type;
60 	unsigned char len;
61 	unsigned char value[0];
62 } BPF_PACKET_HEADER;
63 
get_srh(struct __sk_buff * skb)64 __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
65 {
66 	void *cursor, *data_end;
67 	struct ip6_srh_t *srh;
68 	struct ip6_t *ip;
69 	uint8_t *ipver;
70 
71 	data_end = (void *)(long)skb->data_end;
72 	cursor = (void *)(long)skb->data;
73 	ipver = (uint8_t *)cursor;
74 
75 	if ((void *)ipver + sizeof(*ipver) > data_end)
76 		return NULL;
77 
78 	if ((*ipver >> 4) != 6)
79 		return NULL;
80 
81 	ip = cursor_advance(cursor, sizeof(*ip));
82 	if ((void *)ip + sizeof(*ip) > data_end)
83 		return NULL;
84 
85 	if (ip->next_header != 43)
86 		return NULL;
87 
88 	srh = cursor_advance(cursor, sizeof(*srh));
89 	if ((void *)srh + sizeof(*srh) > data_end)
90 		return NULL;
91 
92 	if (srh->type != 4)
93 		return NULL;
94 
95 	return srh;
96 }
97 
98 __attribute__((always_inline))
update_tlv_pad(struct __sk_buff * skb,uint32_t new_pad,uint32_t old_pad,uint32_t pad_off)99 int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
100 		   uint32_t old_pad, uint32_t pad_off)
101 {
102 	int err;
103 
104 	if (new_pad != old_pad) {
105 		err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
106 					  (int) new_pad - (int) old_pad);
107 		if (err)
108 			return err;
109 	}
110 
111 	if (new_pad > 0) {
112 		char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
113 					0, 0, 0};
114 		struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
115 
116 		pad_tlv->type = SR6_TLV_PADDING;
117 		pad_tlv->len = new_pad - 2;
118 
119 		err = bpf_lwt_seg6_store_bytes(skb, pad_off,
120 					       (void *)pad_tlv_buf, new_pad);
121 		if (err)
122 			return err;
123 	}
124 
125 	return 0;
126 }
127 
128 __attribute__((always_inline))
is_valid_tlv_boundary(struct __sk_buff * skb,struct ip6_srh_t * srh,uint32_t * tlv_off,uint32_t * pad_size,uint32_t * pad_off)129 int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
130 			  uint32_t *tlv_off, uint32_t *pad_size,
131 			  uint32_t *pad_off)
132 {
133 	uint32_t srh_off, cur_off;
134 	int offset_valid = 0;
135 	int err;
136 
137 	srh_off = (char *)srh - (char *)(long)skb->data;
138 	// cur_off = end of segments, start of possible TLVs
139 	cur_off = srh_off + sizeof(*srh) +
140 		sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
141 
142 	*pad_off = 0;
143 
144 	// we can only go as far as ~10 TLVs due to the BPF max stack size
145 	#pragma clang loop unroll(full)
146 	for (int i = 0; i < 10; i++) {
147 		struct sr6_tlv_t tlv;
148 
149 		if (cur_off == *tlv_off)
150 			offset_valid = 1;
151 
152 		if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
153 			break;
154 
155 		err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
156 		if (err)
157 			return err;
158 
159 		if (tlv.type == SR6_TLV_PADDING) {
160 			*pad_size = tlv.len + sizeof(tlv);
161 			*pad_off = cur_off;
162 
163 			if (*tlv_off == srh_off) {
164 				*tlv_off = cur_off;
165 				offset_valid = 1;
166 			}
167 			break;
168 
169 		} else if (tlv.type == SR6_TLV_HMAC) {
170 			break;
171 		}
172 
173 		cur_off += sizeof(tlv) + tlv.len;
174 	} // we reached the padding or HMAC TLVs, or the end of the SRH
175 
176 	if (*pad_off == 0)
177 		*pad_off = cur_off;
178 
179 	if (*tlv_off == -1)
180 		*tlv_off = cur_off;
181 	else if (!offset_valid)
182 		return -EINVAL;
183 
184 	return 0;
185 }
186 
187 __attribute__((always_inline))
add_tlv(struct __sk_buff * skb,struct ip6_srh_t * srh,uint32_t tlv_off,struct sr6_tlv_t * itlv,uint8_t tlv_size)188 int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
189 	    struct sr6_tlv_t *itlv, uint8_t tlv_size)
190 {
191 	uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
192 	uint8_t len_remaining, new_pad;
193 	uint32_t pad_off = 0;
194 	uint32_t pad_size = 0;
195 	uint32_t partial_srh_len;
196 	int err;
197 
198 	if (tlv_off != -1)
199 		tlv_off += srh_off;
200 
201 	if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
202 		return -EINVAL;
203 
204 	err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
205 	if (err)
206 		return err;
207 
208 	err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
209 	if (err)
210 		return err;
211 
212 	err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
213 	if (err)
214 		return err;
215 
216 	// the following can't be moved inside update_tlv_pad because the
217 	// bpf verifier has some issues with it
218 	pad_off += sizeof(*itlv) + itlv->len;
219 	partial_srh_len = pad_off - srh_off;
220 	len_remaining = partial_srh_len % 8;
221 	new_pad = 8 - len_remaining;
222 
223 	if (new_pad == 1) // cannot pad for 1 byte only
224 		new_pad = 9;
225 	else if (new_pad == 8)
226 		new_pad = 0;
227 
228 	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
229 }
230 
231 __attribute__((always_inline))
delete_tlv(struct __sk_buff * skb,struct ip6_srh_t * srh,uint32_t tlv_off)232 int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
233 	       uint32_t tlv_off)
234 {
235 	uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
236 	uint8_t len_remaining, new_pad;
237 	uint32_t partial_srh_len;
238 	uint32_t pad_off = 0;
239 	uint32_t pad_size = 0;
240 	struct sr6_tlv_t tlv;
241 	int err;
242 
243 	tlv_off += srh_off;
244 
245 	err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
246 	if (err)
247 		return err;
248 
249 	err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
250 	if (err)
251 		return err;
252 
253 	err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
254 	if (err)
255 		return err;
256 
257 	pad_off -= sizeof(tlv) + tlv.len;
258 	partial_srh_len = pad_off - srh_off;
259 	len_remaining = partial_srh_len % 8;
260 	new_pad = 8 - len_remaining;
261 	if (new_pad == 1) // cannot pad for 1 byte only
262 		new_pad = 9;
263 	else if (new_pad == 8)
264 		new_pad = 0;
265 
266 	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
267 }
268 
269 __attribute__((always_inline))
has_egr_tlv(struct __sk_buff * skb,struct ip6_srh_t * srh)270 int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
271 {
272 	int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
273 		((srh->first_segment + 1) << 4);
274 	struct sr6_tlv_t tlv;
275 
276 	if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
277 		return 0;
278 
279 	if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
280 		struct ip6_addr_t egr_addr;
281 
282 		if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
283 			return 0;
284 
285 		// check if egress TLV value is correct
286 		if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
287 				ntohll(egr_addr.lo) == 0x4)
288 			return 1;
289 	}
290 
291 	return 0;
292 }
293 
294 // This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
295 // fd00::4
296 SEC("encap_srh")
__encap_srh(struct __sk_buff * skb)297 int __encap_srh(struct __sk_buff *skb)
298 {
299 	unsigned long long hi = 0xfd00000000000000;
300 	struct ip6_addr_t *seg;
301 	struct ip6_srh_t *srh;
302 	char srh_buf[72]; // room for 4 segments
303 	int err;
304 
305 	srh = (struct ip6_srh_t *)srh_buf;
306 	srh->nexthdr = 0;
307 	srh->hdrlen = 8;
308 	srh->type = 4;
309 	srh->segments_left = 3;
310 	srh->first_segment = 3;
311 	srh->flags = 0;
312 	srh->tag = 0;
313 
314 	seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
315 
316 	#pragma clang loop unroll(full)
317 	for (unsigned long long lo = 0; lo < 4; lo++) {
318 		seg->lo = htonll(4 - lo);
319 		seg->hi = htonll(hi);
320 		seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
321 	}
322 
323 	err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
324 	if (err)
325 		return BPF_DROP;
326 
327 	return BPF_REDIRECT;
328 }
329 
330 // Add an Egress TLV fc00::4, add the flag A,
331 // and apply End.X action to fc42::1
332 SEC("add_egr_x")
__add_egr_x(struct __sk_buff * skb)333 int __add_egr_x(struct __sk_buff *skb)
334 {
335 	unsigned long long hi = 0xfc42000000000000;
336 	unsigned long long lo = 0x1;
337 	struct ip6_srh_t *srh = get_srh(skb);
338 	uint8_t new_flags = SR6_FLAG_ALERT;
339 	struct ip6_addr_t addr;
340 	int err, offset;
341 
342 	if (srh == NULL)
343 		return BPF_DROP;
344 
345 	uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
346 			   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
347 
348 	err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
349 		      (struct sr6_tlv_t *)&tlv, 20);
350 	if (err)
351 		return BPF_DROP;
352 
353 	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
354 	err = bpf_lwt_seg6_store_bytes(skb, offset,
355 				       (void *)&new_flags, sizeof(new_flags));
356 	if (err)
357 		return BPF_DROP;
358 
359 	addr.lo = htonll(lo);
360 	addr.hi = htonll(hi);
361 	err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
362 				  (void *)&addr, sizeof(addr));
363 	if (err)
364 		return BPF_DROP;
365 	return BPF_REDIRECT;
366 }
367 
368 // Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
369 // simple End action
370 SEC("pop_egr")
__pop_egr(struct __sk_buff * skb)371 int __pop_egr(struct __sk_buff *skb)
372 {
373 	struct ip6_srh_t *srh = get_srh(skb);
374 	uint16_t new_tag = bpf_htons(2442);
375 	uint8_t new_flags = 0;
376 	int err, offset;
377 
378 	if (srh == NULL)
379 		return BPF_DROP;
380 
381 	if (srh->flags != SR6_FLAG_ALERT)
382 		return BPF_DROP;
383 
384 	if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
385 		return BPF_DROP;
386 
387 	if (!has_egr_tlv(skb, srh))
388 		return BPF_DROP;
389 
390 	err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
391 	if (err)
392 		return BPF_DROP;
393 
394 	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
395 	if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
396 				     sizeof(new_flags)))
397 		return BPF_DROP;
398 
399 	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
400 	if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
401 				     sizeof(new_tag)))
402 		return BPF_DROP;
403 
404 	return BPF_OK;
405 }
406 
407 // Inspect if the Egress TLV and flag have been removed, if the tag is correct,
408 // then apply a End.T action to reach the last segment
409 SEC("inspect_t")
__inspect_t(struct __sk_buff * skb)410 int __inspect_t(struct __sk_buff *skb)
411 {
412 	struct ip6_srh_t *srh = get_srh(skb);
413 	int table = 117;
414 	int err;
415 
416 	if (srh == NULL)
417 		return BPF_DROP;
418 
419 	if (srh->flags != 0)
420 		return BPF_DROP;
421 
422 	if (srh->tag != bpf_htons(2442))
423 		return BPF_DROP;
424 
425 	if (srh->hdrlen != 8) // 4 segments
426 		return BPF_DROP;
427 
428 	err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
429 				  (void *)&table, sizeof(table));
430 
431 	if (err)
432 		return BPF_DROP;
433 
434 	return BPF_REDIRECT;
435 }
436 
437 char __license[] SEC("license") = "GPL";
438