1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2018 Facebook
3 
4 #include <string.h>
5 
6 #include <linux/stddef.h>
7 #include <linux/bpf.h>
8 #include <linux/in.h>
9 #include <linux/in6.h>
10 #include <sys/socket.h>
11 #include <netinet/tcp.h>
12 #include <linux/if.h>
13 #include <errno.h>
14 
15 #include <bpf/bpf_helpers.h>
16 #include <bpf/bpf_endian.h>
17 
18 #define SRC_REWRITE_IP4		0x7f000004U
19 #define DST_REWRITE_IP4		0x7f000001U
20 #define DST_REWRITE_PORT4	4444
21 
22 #ifndef TCP_CA_NAME_MAX
23 #define TCP_CA_NAME_MAX 16
24 #endif
25 
26 #ifndef TCP_NOTSENT_LOWAT
27 #define TCP_NOTSENT_LOWAT 25
28 #endif
29 
30 #ifndef IFNAMSIZ
31 #define IFNAMSIZ 16
32 #endif
33 
34 int _version SEC("version") = 1;
35 
36 __attribute__ ((noinline))
do_bind(struct bpf_sock_addr * ctx)37 int do_bind(struct bpf_sock_addr *ctx)
38 {
39 	struct sockaddr_in sa = {};
40 
41 	sa.sin_family = AF_INET;
42 	sa.sin_port = bpf_htons(0);
43 	sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
44 
45 	if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
46 		return 0;
47 
48 	return 1;
49 }
50 
verify_cc(struct bpf_sock_addr * ctx,char expected[TCP_CA_NAME_MAX])51 static __inline int verify_cc(struct bpf_sock_addr *ctx,
52 			      char expected[TCP_CA_NAME_MAX])
53 {
54 	char buf[TCP_CA_NAME_MAX];
55 	int i;
56 
57 	if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
58 		return 1;
59 
60 	for (i = 0; i < TCP_CA_NAME_MAX; i++) {
61 		if (buf[i] != expected[i])
62 			return 1;
63 		if (buf[i] == 0)
64 			break;
65 	}
66 
67 	return 0;
68 }
69 
set_cc(struct bpf_sock_addr * ctx)70 static __inline int set_cc(struct bpf_sock_addr *ctx)
71 {
72 	char reno[TCP_CA_NAME_MAX] = "reno";
73 	char cubic[TCP_CA_NAME_MAX] = "cubic";
74 
75 	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
76 		return 1;
77 	if (verify_cc(ctx, reno))
78 		return 1;
79 
80 	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
81 		return 1;
82 	if (verify_cc(ctx, cubic))
83 		return 1;
84 
85 	return 0;
86 }
87 
bind_to_device(struct bpf_sock_addr * ctx)88 static __inline int bind_to_device(struct bpf_sock_addr *ctx)
89 {
90 	char veth1[IFNAMSIZ] = "test_sock_addr1";
91 	char veth2[IFNAMSIZ] = "test_sock_addr2";
92 	char missing[IFNAMSIZ] = "nonexistent_dev";
93 	char del_bind[IFNAMSIZ] = "";
94 
95 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
96 				&veth1, sizeof(veth1)))
97 		return 1;
98 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
99 				&veth2, sizeof(veth2)))
100 		return 1;
101 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
102 				&missing, sizeof(missing)) != -ENODEV)
103 		return 1;
104 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
105 				&del_bind, sizeof(del_bind)))
106 		return 1;
107 
108 	return 0;
109 }
110 
set_keepalive(struct bpf_sock_addr * ctx)111 static __inline int set_keepalive(struct bpf_sock_addr *ctx)
112 {
113 	int zero = 0, one = 1;
114 
115 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
116 		return 1;
117 	if (ctx->type == SOCK_STREAM) {
118 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
119 			return 1;
120 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
121 			return 1;
122 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
123 			return 1;
124 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
125 			return 1;
126 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
127 			return 1;
128 	}
129 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
130 		return 1;
131 
132 	return 0;
133 }
134 
set_notsent_lowat(struct bpf_sock_addr * ctx)135 static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx)
136 {
137 	int lowat = 65535;
138 
139 	if (ctx->type == SOCK_STREAM) {
140 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat)))
141 			return 1;
142 	}
143 
144 	return 0;
145 }
146 
147 SEC("cgroup/connect4")
connect_v4_prog(struct bpf_sock_addr * ctx)148 int connect_v4_prog(struct bpf_sock_addr *ctx)
149 {
150 	struct bpf_sock_tuple tuple = {};
151 	struct bpf_sock *sk;
152 
153 	/* Verify that new destination is available. */
154 	memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr));
155 	memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport));
156 
157 	tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
158 	tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
159 
160 	/* Bind to device and unbind it. */
161 	if (bind_to_device(ctx))
162 		return 0;
163 
164 	if (set_keepalive(ctx))
165 		return 0;
166 
167 	if (set_notsent_lowat(ctx))
168 		return 0;
169 
170 	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
171 		return 0;
172 	else if (ctx->type == SOCK_STREAM)
173 		sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4),
174 				       BPF_F_CURRENT_NETNS, 0);
175 	else
176 		sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4),
177 				       BPF_F_CURRENT_NETNS, 0);
178 
179 	if (!sk)
180 		return 0;
181 
182 	if (sk->src_ip4 != tuple.ipv4.daddr ||
183 	    sk->src_port != DST_REWRITE_PORT4) {
184 		bpf_sk_release(sk);
185 		return 0;
186 	}
187 
188 	bpf_sk_release(sk);
189 
190 	/* Rewrite congestion control. */
191 	if (ctx->type == SOCK_STREAM && set_cc(ctx))
192 		return 0;
193 
194 	/* Rewrite destination. */
195 	ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
196 	ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
197 
198 	return do_bind(ctx) ? 1 : 0;
199 }
200 
201 char _license[] SEC("license") = "GPL";
202