1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3#
4# Check that route PMTU values match expectations, and that initial device MTU
5# values are assigned correctly
6#
7# Tests currently implemented:
8#
9# - pmtu_vti4_exception
10#	Set up vti tunnel on top of veth, with xfrm states and policies, in two
11#	namespaces with matching endpoints. Check that route exception is not
12#	created if link layer MTU is not exceeded, then exceed it and check that
13#	exception is created with the expected PMTU. The approach described
14#	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
15#	changes alone won't affect PMTU
16#
17# - pmtu_vti6_exception
18#	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
19#	namespaces with matching endpoints. Check that route exception is
20#	created by exceeding link layer MTU with ping to other endpoint. Then
21#	decrease and increase MTU of tunnel, checking that route exception PMTU
22#	changes accordingly
23#
24# - pmtu_vti4_default_mtu
25#	Set up vti4 tunnel on top of veth, in two namespaces with matching
26#	endpoints. Check that MTU assigned to vti interface is the MTU of the
27#	lower layer (veth) minus additional lower layer headers (zero, for veth)
28#	minus IPv4 header length
29#
30# - pmtu_vti6_default_mtu
31#	Same as above, for IPv6
32#
33# - pmtu_vti4_link_add_mtu
34#	Set up vti4 interface passing MTU value at link creation, check MTU is
35#	configured, and that link is not created with invalid MTU values
36#
37# - pmtu_vti6_link_add_mtu
38#	Same as above, for IPv6
39#
40# - pmtu_vti6_link_change_mtu
41#	Set up two dummy interfaces with different MTUs, create a vti6 tunnel
42#	and check that configured MTU is used on link creation and changes, and
43#	that MTU is properly calculated instead when MTU is not configured from
44#	userspace
45
46# Kselftest framework requirement - SKIP code is 4.
47ksft_skip=4
48
49# Some systems don't have a ping6 binary anymore
50which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
51
52tests="
53	pmtu_vti6_exception		vti6: PMTU exceptions
54	pmtu_vti4_exception		vti4: PMTU exceptions
55	pmtu_vti4_default_mtu		vti4: default MTU assignment
56	pmtu_vti6_default_mtu		vti6: default MTU assignment
57	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
58	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
59	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes"
60
61NS_A="ns-$(mktemp -u XXXXXX)"
62NS_B="ns-$(mktemp -u XXXXXX)"
63ns_a="ip netns exec ${NS_A}"
64ns_b="ip netns exec ${NS_B}"
65
66veth4_a_addr="192.168.1.1"
67veth4_b_addr="192.168.1.2"
68veth4_mask="24"
69veth6_a_addr="fd00:1::a"
70veth6_b_addr="fd00:1::b"
71veth6_mask="64"
72
73vti4_a_addr="192.168.2.1"
74vti4_b_addr="192.168.2.2"
75vti4_mask="24"
76vti6_a_addr="fd00:2::a"
77vti6_b_addr="fd00:2::b"
78vti6_mask="64"
79
80dummy6_0_addr="fc00:1000::0"
81dummy6_1_addr="fc00:1001::0"
82dummy6_mask="64"
83
84cleanup_done=1
85err_buf=
86
87err() {
88	err_buf="${err_buf}${1}
89"
90}
91
92err_flush() {
93	echo -n "${err_buf}"
94	err_buf=
95}
96
97setup_namespaces() {
98	ip netns add ${NS_A} || return 1
99	ip netns add ${NS_B}
100}
101
102setup_veth() {
103	${ns_a} ip link add veth_a type veth peer name veth_b || return 1
104	${ns_a} ip link set veth_b netns ${NS_B}
105
106	${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
107	${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
108
109	${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
110	${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
111
112	${ns_a} ip link set veth_a up
113	${ns_b} ip link set veth_b up
114}
115
116setup_vti() {
117	proto=${1}
118	veth_a_addr="${2}"
119	veth_b_addr="${3}"
120	vti_a_addr="${4}"
121	vti_b_addr="${5}"
122	vti_mask=${6}
123
124	[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
125
126	${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
127	${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
128
129	${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
130	${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
131
132	${ns_a} ip link set vti${proto}_a up
133	${ns_b} ip link set vti${proto}_b up
134
135	sleep 1
136}
137
138setup_vti4() {
139	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
140}
141
142setup_vti6() {
143	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
144}
145
146setup_xfrm() {
147	proto=${1}
148	veth_a_addr="${2}"
149	veth_b_addr="${3}"
150
151	${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
152	${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
153	${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
154	${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
155
156	${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
157	${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
158	${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
159	${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
160}
161
162setup_xfrm4() {
163	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
164}
165
166setup_xfrm6() {
167	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
168}
169
170setup() {
171	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
172
173	cleanup_done=0
174	for arg do
175		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
176	done
177}
178
179cleanup() {
180	[ ${cleanup_done} -eq 1 ] && return
181	ip netns del ${NS_A} 2> /dev/null
182	ip netns del ${NS_B} 2> /dev/null
183	cleanup_done=1
184}
185
186mtu() {
187	ns_cmd="${1}"
188	dev="${2}"
189	mtu="${3}"
190
191	${ns_cmd} ip link set dev ${dev} mtu ${mtu}
192}
193
194mtu_parse() {
195	input="${1}"
196
197	next=0
198	for i in ${input}; do
199		[ ${next} -eq 1 ] && echo "${i}" && return
200		[ "${i}" = "mtu" ] && next=1
201	done
202}
203
204link_get() {
205	ns_cmd="${1}"
206	name="${2}"
207
208	${ns_cmd} ip link show dev "${name}"
209}
210
211link_get_mtu() {
212	ns_cmd="${1}"
213	name="${2}"
214
215	mtu_parse "$(link_get "${ns_cmd}" ${name})"
216}
217
218route_get_dst_exception() {
219	ns_cmd="${1}"
220	dst="${2}"
221
222	${ns_cmd} ip route get "${dst}"
223}
224
225route_get_dst_pmtu_from_exception() {
226	ns_cmd="${1}"
227	dst="${2}"
228
229	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
230}
231
232test_pmtu_vti4_exception() {
233	setup namespaces veth vti4 xfrm4 || return 2
234
235	veth_mtu=1500
236	vti_mtu=$((veth_mtu - 20))
237
238	#                                SPI   SN   IV  ICV   pad length   next header
239	esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
240	ping_payload=$((esp_payload_rfc4106 - 28))
241
242	mtu "${ns_a}" veth_a ${veth_mtu}
243	mtu "${ns_b}" veth_b ${veth_mtu}
244	mtu "${ns_a}" vti4_a ${vti_mtu}
245	mtu "${ns_b}" vti4_b ${vti_mtu}
246
247	# Send DF packet without exceeding link layer MTU, check that no
248	# exception is created
249	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
250	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
251	if [ "${pmtu}" != "" ]; then
252		err "  unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
253		return 1
254	fi
255
256	# Now exceed link layer MTU by one byte, check that exception is created
257	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
258	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
259	if [ "${pmtu}" = "" ]; then
260		err "  exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
261		return 1
262	fi
263
264	# ...with the right PMTU value
265	if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
266		err "  wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
267		return 1
268	fi
269}
270
271test_pmtu_vti6_exception() {
272	setup namespaces veth vti6 xfrm6 || return 2
273	fail=0
274
275	# Create route exception by exceeding link layer MTU
276	mtu "${ns_a}" veth_a 4000
277	mtu "${ns_b}" veth_b 4000
278	mtu "${ns_a}" vti6_a 5000
279	mtu "${ns_b}" vti6_b 5000
280	${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
281
282	# Check that exception was created
283	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
284		err "  tunnel exceeding link layer MTU didn't create route exception"
285		return 1
286	fi
287
288	# Decrease tunnel MTU, check for PMTU decrease in route exception
289	mtu "${ns_a}" vti6_a 3000
290
291	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
292		err "  decreasing tunnel MTU didn't decrease route exception PMTU"
293		fail=1
294	fi
295
296	# Increase tunnel MTU, check for PMTU increase in route exception
297	mtu "${ns_a}" vti6_a 9000
298	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
299		err "  increasing tunnel MTU didn't increase route exception PMTU"
300		fail=1
301	fi
302
303	return ${fail}
304}
305
306test_pmtu_vti4_default_mtu() {
307	setup namespaces veth vti4 || return 2
308
309	# Check that MTU of vti device is MTU of veth minus IPv4 header length
310	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
311	vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
312	if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
313		err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
314		return 1
315	fi
316}
317
318test_pmtu_vti6_default_mtu() {
319	setup namespaces veth vti6 || return 2
320
321	# Check that MTU of vti device is MTU of veth minus IPv6 header length
322	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
323	vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
324	if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
325		err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
326		return 1
327	fi
328}
329
330test_pmtu_vti4_link_add_mtu() {
331	setup namespaces || return 2
332
333	${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
334	[ $? -ne 0 ] && err "  vti not supported" && return 2
335	${ns_a} ip link del vti4_a
336
337	fail=0
338
339	min=68
340	max=$((65535 - 20))
341	# Check invalid values first
342	for v in $((min - 1)) $((max + 1)); do
343		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
344		# This can fail, or MTU can be adjusted to a proper value
345		[ $? -ne 0 ] && continue
346		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
347		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
348			err "  vti tunnel created with invalid MTU ${mtu}"
349			fail=1
350		fi
351		${ns_a} ip link del vti4_a
352	done
353
354	# Now check valid values
355	for v in ${min} 1300 ${max}; do
356		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
357		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
358		${ns_a} ip link del vti4_a
359		if [ "${mtu}" != "${v}" ]; then
360			err "  vti MTU ${mtu} doesn't match configured value ${v}"
361			fail=1
362		fi
363	done
364
365	return ${fail}
366}
367
368test_pmtu_vti6_link_add_mtu() {
369	setup namespaces || return 2
370
371	${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
372	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
373	${ns_a} ip link del vti6_a
374
375	fail=0
376
377	min=68			# vti6 can carry IPv4 packets too
378	max=$((65535 - 40))
379	# Check invalid values first
380	for v in $((min - 1)) $((max + 1)); do
381		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
382		# This can fail, or MTU can be adjusted to a proper value
383		[ $? -ne 0 ] && continue
384		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
385		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
386			err "  vti6 tunnel created with invalid MTU ${v}"
387			fail=1
388		fi
389		${ns_a} ip link del vti6_a
390	done
391
392	# Now check valid values
393	for v in 68 1280 1300 $((65535 - 40)); do
394		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
395		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
396		${ns_a} ip link del vti6_a
397		if [ "${mtu}" != "${v}" ]; then
398			err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
399			fail=1
400		fi
401	done
402
403	return ${fail}
404}
405
406test_pmtu_vti6_link_change_mtu() {
407	setup namespaces || return 2
408
409	${ns_a} ip link add dummy0 mtu 1500 type dummy
410	[ $? -ne 0 ] && err "  dummy not supported" && return 2
411	${ns_a} ip link add dummy1 mtu 3000 type dummy
412	${ns_a} ip link set dummy0 up
413	${ns_a} ip link set dummy1 up
414
415	${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
416	${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
417
418	fail=0
419
420	# Create vti6 interface bound to device, passing MTU, check it
421	${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
422	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
423	if [ ${mtu} -ne 1300 ]; then
424		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
425		fail=1
426	fi
427
428	# Move to another device with different MTU, without passing MTU, check
429	# MTU is adjusted
430	${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
431	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
432	if [ ${mtu} -ne $((3000 - 40)) ]; then
433		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
434		fail=1
435	fi
436
437	# Move it back, passing MTU, check MTU is not overridden
438	${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
439	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
440	if [ ${mtu} -ne 1280 ]; then
441		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
442		fail=1
443	fi
444
445	return ${fail}
446}
447
448trap cleanup EXIT
449
450exitcode=0
451desc=0
452IFS="
453"
454for t in ${tests}; do
455	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
456
457	(
458		unset IFS
459		eval test_${name}
460		ret=$?
461		cleanup
462
463		if [ $ret -eq 0 ]; then
464			printf "TEST: %-60s  [ OK ]\n" "${t}"
465		elif [ $ret -eq 1 ]; then
466			printf "TEST: %-60s  [FAIL]\n" "${t}"
467			err_flush
468			exit 1
469		elif [ $ret -eq 2 ]; then
470			printf "TEST: %-60s  [SKIP]\n" "${t}"
471			err_flush
472		fi
473	)
474	[ $? -ne 0 ] && exitcode=1
475done
476
477exit ${exitcode}
478