1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3 /*
4 * Test suite of lwt BPF programs that reroutes packets
5 * The file tests focus not only if these programs work as expected normally,
6 * but also if they can handle abnormal situations gracefully. This test
7 * suite currently only covers lwt_xmit hook. lwt_in tests have not been
8 * implemented.
9 *
10 * WARNING
11 * -------
12 * This test suite can crash the kernel, thus should be run in a VM.
13 *
14 * Setup:
15 * ---------
16 * all tests are performed in a single netns. A lwt encap route is setup for
17 * each subtest:
18 *
19 * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<section_N>" dev link_err
20 *
21 * Here <obj> is statically defined to test_lwt_reroute.bpf.o, and it contains
22 * a single test program entry. This program sets packet mark by last byte of
23 * the IPv4 daddr. For example, a packet going to 1.2.3.4 will receive a skb
24 * mark 4. A packet will only be marked once, and IP x.x.x.0 will be skipped
25 * to avoid route loop. We didn't use generated BPF skeleton since the
26 * attachment for lwt programs are not supported by libbpf yet.
27 *
28 * The test program will bring up a tun device, and sets up the following
29 * routes:
30 *
31 * ip rule add pref 100 from all fwmark <tun_index> lookup 100
32 * ip route add table 100 default dev tun0
33 *
34 * For normal testing, a ping command is running in the test netns:
35 *
36 * ping 10.0.0.<tun_index> -c 1 -w 1 -s 100
37 *
38 * For abnormal testing, fq is used as the qdisc of the tun device. Then a UDP
39 * socket will try to overflow the fq queue and trigger qdisc drop error.
40 *
41 * Scenarios:
42 * --------------------------------
43 * 1. Reroute to a running tun device
44 * 2. Reroute to a device where qdisc drop
45 *
46 * For case 1, ping packets should be received by the tun device.
47 *
48 * For case 2, force UDP packets to overflow fq limit. As long as kernel
49 * is not crashed, it is considered successful.
50 */
51 #include "lwt_helpers.h"
52 #include "network_helpers.h"
53 #include <linux/net_tstamp.h>
54
55 #define BPF_OBJECT "test_lwt_reroute.bpf.o"
56 #define LOCAL_SRC "10.0.0.1"
57 #define TEST_CIDR "10.0.0.0/24"
58 #define XMIT_HOOK "xmit"
59 #define XMIT_SECTION "lwt_xmit"
60 #define NSEC_PER_SEC 1000000000ULL
61
62 /* send a ping to be rerouted to the target device */
ping_once(const char * ip)63 static void ping_once(const char *ip)
64 {
65 /* We won't get a reply. Don't fail here */
66 SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1",
67 ip, ICMP_PAYLOAD_SIZE);
68 }
69
70 /* Send snd_target UDP packets to overflow the fq queue and trigger qdisc drop
71 * error. This is done via TX tstamp to force buffering delayed packets.
72 */
overflow_fq(int snd_target,const char * target_ip)73 static int overflow_fq(int snd_target, const char *target_ip)
74 {
75 struct sockaddr_in addr = {
76 .sin_family = AF_INET,
77 .sin_port = htons(1234),
78 };
79
80 char data_buf[8]; /* only #pkts matter, so use a random small buffer */
81 char control_buf[CMSG_SPACE(sizeof(uint64_t))];
82 struct iovec iov = {
83 .iov_base = data_buf,
84 .iov_len = sizeof(data_buf),
85 };
86 int err = -1;
87 int s = -1;
88 struct sock_txtime txtime_on = {
89 .clockid = CLOCK_MONOTONIC,
90 .flags = 0,
91 };
92 struct msghdr msg = {
93 .msg_name = &addr,
94 .msg_namelen = sizeof(addr),
95 .msg_control = control_buf,
96 .msg_controllen = sizeof(control_buf),
97 .msg_iovlen = 1,
98 .msg_iov = &iov,
99 };
100 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
101
102 memset(data_buf, 0, sizeof(data_buf));
103
104 s = socket(AF_INET, SOCK_DGRAM, 0);
105 if (!ASSERT_GE(s, 0, "socket"))
106 goto out;
107
108 err = setsockopt(s, SOL_SOCKET, SO_TXTIME, &txtime_on, sizeof(txtime_on));
109 if (!ASSERT_OK(err, "setsockopt(SO_TXTIME)"))
110 goto out;
111
112 err = inet_pton(AF_INET, target_ip, &addr.sin_addr);
113 if (!ASSERT_EQ(err, 1, "inet_pton"))
114 goto out;
115
116 while (snd_target > 0) {
117 struct timespec now;
118
119 memset(control_buf, 0, sizeof(control_buf));
120 cmsg->cmsg_type = SCM_TXTIME;
121 cmsg->cmsg_level = SOL_SOCKET;
122 cmsg->cmsg_len = CMSG_LEN(sizeof(uint64_t));
123
124 err = clock_gettime(CLOCK_MONOTONIC, &now);
125 if (!ASSERT_OK(err, "clock_gettime(CLOCK_MONOTONIC)")) {
126 err = -1;
127 goto out;
128 }
129
130 *(uint64_t *)CMSG_DATA(cmsg) = (now.tv_nsec + 1) * NSEC_PER_SEC +
131 now.tv_nsec;
132
133 /* we will intentionally send more than fq limit, so ignore
134 * the error here.
135 */
136 sendmsg(s, &msg, MSG_NOSIGNAL);
137 snd_target--;
138 }
139
140 /* no kernel crash so far is considered success */
141 err = 0;
142
143 out:
144 if (s >= 0)
145 close(s);
146
147 return err;
148 }
149
setup(const char * tun_dev)150 static int setup(const char *tun_dev)
151 {
152 int target_index = -1;
153 int tap_fd = -1;
154
155 tap_fd = open_tuntap(tun_dev, false);
156 if (!ASSERT_GE(tap_fd, 0, "open_tun"))
157 return -1;
158
159 target_index = if_nametoindex(tun_dev);
160 if (!ASSERT_GE(target_index, 0, "if_nametoindex"))
161 return -1;
162
163 SYS(fail, "ip link add link_err type dummy");
164 SYS(fail, "ip link set lo up");
165 SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32");
166 SYS(fail, "ip link set link_err up");
167 SYS(fail, "ip link set %s up", tun_dev);
168
169 SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec lwt_xmit",
170 TEST_CIDR, BPF_OBJECT);
171
172 SYS(fail, "ip rule add pref 100 from all fwmark %d lookup 100",
173 target_index);
174 SYS(fail, "ip route add t 100 default dev %s", tun_dev);
175
176 return tap_fd;
177
178 fail:
179 if (tap_fd >= 0)
180 close(tap_fd);
181 return -1;
182 }
183
test_lwt_reroute_normal_xmit(void)184 static void test_lwt_reroute_normal_xmit(void)
185 {
186 const char *tun_dev = "tun0";
187 int tun_fd = -1;
188 int ifindex = -1;
189 char ip[256];
190 struct timeval timeo = {
191 .tv_sec = 0,
192 .tv_usec = 250000,
193 };
194
195 tun_fd = setup(tun_dev);
196 if (!ASSERT_GE(tun_fd, 0, "setup_reroute"))
197 return;
198
199 ifindex = if_nametoindex(tun_dev);
200 if (!ASSERT_GE(ifindex, 0, "if_nametoindex"))
201 return;
202
203 snprintf(ip, 256, "10.0.0.%d", ifindex);
204
205 /* ping packets should be received by the tun device */
206 ping_once(ip);
207
208 if (!ASSERT_EQ(wait_for_packet(tun_fd, __expect_icmp_ipv4, &timeo), 1,
209 "wait_for_packet"))
210 log_err("%s xmit", __func__);
211 }
212
213 /*
214 * Test the failure case when the skb is dropped at the qdisc. This is a
215 * regression prevention at the xmit hook only.
216 */
test_lwt_reroute_qdisc_dropped(void)217 static void test_lwt_reroute_qdisc_dropped(void)
218 {
219 const char *tun_dev = "tun0";
220 int tun_fd = -1;
221 int ifindex = -1;
222 char ip[256];
223
224 tun_fd = setup(tun_dev);
225 if (!ASSERT_GE(tun_fd, 0, "setup_reroute"))
226 goto fail;
227
228 SYS(fail, "tc qdisc replace dev %s root fq limit 5 flow_limit 5", tun_dev);
229
230 ifindex = if_nametoindex(tun_dev);
231 if (!ASSERT_GE(ifindex, 0, "if_nametoindex"))
232 return;
233
234 snprintf(ip, 256, "10.0.0.%d", ifindex);
235 ASSERT_EQ(overflow_fq(10, ip), 0, "overflow_fq");
236
237 fail:
238 if (tun_fd >= 0)
239 close(tun_fd);
240 }
241
test_lwt_reroute_run(void * arg)242 static void *test_lwt_reroute_run(void *arg)
243 {
244 netns_delete();
245 RUN_TEST(lwt_reroute_normal_xmit);
246 RUN_TEST(lwt_reroute_qdisc_dropped);
247 return NULL;
248 }
249
test_lwt_reroute(void)250 void test_lwt_reroute(void)
251 {
252 pthread_t test_thread;
253 int err;
254
255 /* Run the tests in their own thread to isolate the namespace changes
256 * so they do not affect the environment of other tests.
257 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
258 */
259 err = pthread_create(&test_thread, NULL, &test_lwt_reroute_run, NULL);
260 if (ASSERT_OK(err, "pthread_create"))
261 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
262 }
263