1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Cloudflare Ltd.
3 // Copyright (c) 2020 Isovalent, Inc.
4 
5 #include <stddef.h>
6 #include <stdbool.h>
7 #include <string.h>
8 #include <linux/bpf.h>
9 #include <linux/if_ether.h>
10 #include <linux/in.h>
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/pkt_cls.h>
14 #include <linux/tcp.h>
15 #include <sys/socket.h>
16 #include <bpf/bpf_helpers.h>
17 #include <bpf/bpf_endian.h>
18 
19 /* Pin map under /sys/fs/bpf/tc/globals/<map name> */
20 #define PIN_GLOBAL_NS 2
21 
22 /* Must match struct bpf_elf_map layout from iproute2 */
23 struct {
24 	__u32 type;
25 	__u32 size_key;
26 	__u32 size_value;
27 	__u32 max_elem;
28 	__u32 flags;
29 	__u32 id;
30 	__u32 pinning;
31 } server_map SEC("maps") = {
32 	.type = BPF_MAP_TYPE_SOCKMAP,
33 	.size_key = sizeof(int),
34 	.size_value  = sizeof(__u64),
35 	.max_elem = 1,
36 	.pinning = PIN_GLOBAL_NS,
37 };
38 
39 int _version SEC("version") = 1;
40 char _license[] SEC("license") = "GPL";
41 
42 /* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
43 static inline struct bpf_sock_tuple *
get_tuple(struct __sk_buff * skb,bool * ipv4,bool * tcp)44 get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
45 {
46 	void *data_end = (void *)(long)skb->data_end;
47 	void *data = (void *)(long)skb->data;
48 	struct bpf_sock_tuple *result;
49 	struct ethhdr *eth;
50 	__u64 tuple_len;
51 	__u8 proto = 0;
52 	__u64 ihl_len;
53 
54 	eth = (struct ethhdr *)(data);
55 	if (eth + 1 > data_end)
56 		return NULL;
57 
58 	if (eth->h_proto == bpf_htons(ETH_P_IP)) {
59 		struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));
60 
61 		if (iph + 1 > data_end)
62 			return NULL;
63 		if (iph->ihl != 5)
64 			/* Options are not supported */
65 			return NULL;
66 		ihl_len = iph->ihl * 4;
67 		proto = iph->protocol;
68 		*ipv4 = true;
69 		result = (struct bpf_sock_tuple *)&iph->saddr;
70 	} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
71 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));
72 
73 		if (ip6h + 1 > data_end)
74 			return NULL;
75 		ihl_len = sizeof(*ip6h);
76 		proto = ip6h->nexthdr;
77 		*ipv4 = false;
78 		result = (struct bpf_sock_tuple *)&ip6h->saddr;
79 	} else {
80 		return (struct bpf_sock_tuple *)data;
81 	}
82 
83 	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
84 		return NULL;
85 
86 	*tcp = (proto == IPPROTO_TCP);
87 	return result;
88 }
89 
90 static inline int
handle_udp(struct __sk_buff * skb,struct bpf_sock_tuple * tuple,bool ipv4)91 handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
92 {
93 	struct bpf_sock_tuple ln = {0};
94 	struct bpf_sock *sk;
95 	const int zero = 0;
96 	size_t tuple_len;
97 	__be16 dport;
98 	int ret;
99 
100 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
101 	if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
102 		return TC_ACT_SHOT;
103 
104 	sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
105 	if (sk)
106 		goto assign;
107 
108 	dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
109 	if (dport != bpf_htons(4321))
110 		return TC_ACT_OK;
111 
112 	sk = bpf_map_lookup_elem(&server_map, &zero);
113 	if (!sk)
114 		return TC_ACT_SHOT;
115 
116 assign:
117 	ret = bpf_sk_assign(skb, sk, 0);
118 	bpf_sk_release(sk);
119 	return ret;
120 }
121 
122 static inline int
handle_tcp(struct __sk_buff * skb,struct bpf_sock_tuple * tuple,bool ipv4)123 handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
124 {
125 	struct bpf_sock_tuple ln = {0};
126 	struct bpf_sock *sk;
127 	const int zero = 0;
128 	size_t tuple_len;
129 	__be16 dport;
130 	int ret;
131 
132 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
133 	if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
134 		return TC_ACT_SHOT;
135 
136 	sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
137 	if (sk) {
138 		if (sk->state != BPF_TCP_LISTEN)
139 			goto assign;
140 		bpf_sk_release(sk);
141 	}
142 
143 	dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
144 	if (dport != bpf_htons(4321))
145 		return TC_ACT_OK;
146 
147 	sk = bpf_map_lookup_elem(&server_map, &zero);
148 	if (!sk)
149 		return TC_ACT_SHOT;
150 
151 	if (sk->state != BPF_TCP_LISTEN) {
152 		bpf_sk_release(sk);
153 		return TC_ACT_SHOT;
154 	}
155 
156 assign:
157 	ret = bpf_sk_assign(skb, sk, 0);
158 	bpf_sk_release(sk);
159 	return ret;
160 }
161 
162 SEC("classifier/sk_assign_test")
bpf_sk_assign_test(struct __sk_buff * skb)163 int bpf_sk_assign_test(struct __sk_buff *skb)
164 {
165 	struct bpf_sock_tuple *tuple, ln = {0};
166 	bool ipv4 = false;
167 	bool tcp = false;
168 	int tuple_len;
169 	int ret = 0;
170 
171 	tuple = get_tuple(skb, &ipv4, &tcp);
172 	if (!tuple)
173 		return TC_ACT_SHOT;
174 
175 	/* Note that the verifier socket return type for bpf_skc_lookup_tcp()
176 	 * differs from bpf_sk_lookup_udp(), so even though the C-level type is
177 	 * the same here, if we try to share the implementations they will
178 	 * fail to verify because we're crossing pointer types.
179 	 */
180 	if (tcp)
181 		ret = handle_tcp(skb, tuple, ipv4);
182 	else
183 		ret = handle_udp(skb, tuple, ipv4);
184 
185 	return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
186 }
187