pmtu.sh 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. #!/bin/sh
  2. # SPDX-License-Identifier: GPL-2.0
  3. #
  4. # Check that route PMTU values match expectations, and that initial device MTU
  5. # values are assigned correctly
  6. #
  7. # Tests currently implemented:
  8. #
  9. # - pmtu_vti4_exception
  10. # Set up vti tunnel on top of veth, with xfrm states and policies, in two
  11. # namespaces with matching endpoints. Check that route exception is not
  12. # created if link layer MTU is not exceeded, then exceed it and check that
  13. # exception is created with the expected PMTU. The approach described
  14. # below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
  15. # changes alone won't affect PMTU
  16. #
  17. # - pmtu_vti6_exception
  18. # Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
  19. # namespaces with matching endpoints. Check that route exception is
  20. # created by exceeding link layer MTU with ping to other endpoint. Then
  21. # decrease and increase MTU of tunnel, checking that route exception PMTU
  22. # changes accordingly
  23. #
  24. # - pmtu_vti4_default_mtu
  25. # Set up vti4 tunnel on top of veth, in two namespaces with matching
  26. # endpoints. Check that MTU assigned to vti interface is the MTU of the
  27. # lower layer (veth) minus additional lower layer headers (zero, for veth)
  28. # minus IPv4 header length
  29. #
  30. # - pmtu_vti6_default_mtu
  31. # Same as above, for IPv6
  32. #
  33. # - pmtu_vti4_link_add_mtu
  34. # Set up vti4 interface passing MTU value at link creation, check MTU is
  35. # configured, and that link is not created with invalid MTU values
  36. #
  37. # - pmtu_vti6_link_add_mtu
  38. # Same as above, for IPv6
  39. #
  40. # - pmtu_vti6_link_change_mtu
  41. # Set up two dummy interfaces with different MTUs, create a vti6 tunnel
  42. # and check that configured MTU is used on link creation and changes, and
  43. # that MTU is properly calculated instead when MTU is not configured from
  44. # userspace
  45. # Kselftest framework requirement - SKIP code is 4.
  46. ksft_skip=4
  47. # Some systems don't have a ping6 binary anymore
  48. which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
  49. tests="
  50. pmtu_vti6_exception vti6: PMTU exceptions
  51. pmtu_vti4_exception vti4: PMTU exceptions
  52. pmtu_vti4_default_mtu vti4: default MTU assignment
  53. pmtu_vti6_default_mtu vti6: default MTU assignment
  54. pmtu_vti4_link_add_mtu vti4: MTU setting on link creation
  55. pmtu_vti6_link_add_mtu vti6: MTU setting on link creation
  56. pmtu_vti6_link_change_mtu vti6: MTU changes on link changes"
  57. NS_A="ns-$(mktemp -u XXXXXX)"
  58. NS_B="ns-$(mktemp -u XXXXXX)"
  59. ns_a="ip netns exec ${NS_A}"
  60. ns_b="ip netns exec ${NS_B}"
  61. veth4_a_addr="192.168.1.1"
  62. veth4_b_addr="192.168.1.2"
  63. veth4_mask="24"
  64. veth6_a_addr="fd00:1::a"
  65. veth6_b_addr="fd00:1::b"
  66. veth6_mask="64"
  67. vti4_a_addr="192.168.2.1"
  68. vti4_b_addr="192.168.2.2"
  69. vti4_mask="24"
  70. vti6_a_addr="fd00:2::a"
  71. vti6_b_addr="fd00:2::b"
  72. vti6_mask="64"
  73. dummy6_0_addr="fc00:1000::0"
  74. dummy6_1_addr="fc00:1001::0"
  75. dummy6_mask="64"
  76. cleanup_done=1
  77. err_buf=
  78. err() {
  79. err_buf="${err_buf}${1}
  80. "
  81. }
  82. err_flush() {
  83. echo -n "${err_buf}"
  84. err_buf=
  85. }
  86. setup_namespaces() {
  87. ip netns add ${NS_A} || return 1
  88. ip netns add ${NS_B}
  89. }
  90. setup_veth() {
  91. ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
  92. ${ns_a} ip link set veth_b netns ${NS_B}
  93. ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
  94. ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
  95. ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
  96. ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
  97. ${ns_a} ip link set veth_a up
  98. ${ns_b} ip link set veth_b up
  99. }
  100. setup_vti() {
  101. proto=${1}
  102. veth_a_addr="${2}"
  103. veth_b_addr="${3}"
  104. vti_a_addr="${4}"
  105. vti_b_addr="${5}"
  106. vti_mask=${6}
  107. [ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
  108. ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
  109. ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
  110. ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
  111. ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
  112. ${ns_a} ip link set vti${proto}_a up
  113. ${ns_b} ip link set vti${proto}_b up
  114. sleep 1
  115. }
  116. setup_vti4() {
  117. setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
  118. }
  119. setup_vti6() {
  120. setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
  121. }
  122. setup_xfrm() {
  123. proto=${1}
  124. veth_a_addr="${2}"
  125. veth_b_addr="${3}"
  126. ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
  127. ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
  128. ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
  129. ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
  130. ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
  131. ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
  132. ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
  133. ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
  134. }
  135. setup_xfrm4() {
  136. setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
  137. }
  138. setup_xfrm6() {
  139. setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
  140. }
  141. setup() {
  142. [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
  143. cleanup_done=0
  144. for arg do
  145. eval setup_${arg} || { echo " ${arg} not supported"; return 1; }
  146. done
  147. }
  148. cleanup() {
  149. [ ${cleanup_done} -eq 1 ] && return
  150. ip netns del ${NS_A} 2> /dev/null
  151. ip netns del ${NS_B} 2> /dev/null
  152. cleanup_done=1
  153. }
  154. mtu() {
  155. ns_cmd="${1}"
  156. dev="${2}"
  157. mtu="${3}"
  158. ${ns_cmd} ip link set dev ${dev} mtu ${mtu}
  159. }
  160. mtu_parse() {
  161. input="${1}"
  162. next=0
  163. for i in ${input}; do
  164. [ ${next} -eq 1 ] && echo "${i}" && return
  165. [ "${i}" = "mtu" ] && next=1
  166. done
  167. }
  168. link_get() {
  169. ns_cmd="${1}"
  170. name="${2}"
  171. ${ns_cmd} ip link show dev "${name}"
  172. }
  173. link_get_mtu() {
  174. ns_cmd="${1}"
  175. name="${2}"
  176. mtu_parse "$(link_get "${ns_cmd}" ${name})"
  177. }
  178. route_get_dst_exception() {
  179. ns_cmd="${1}"
  180. dst="${2}"
  181. ${ns_cmd} ip route get "${dst}"
  182. }
  183. route_get_dst_pmtu_from_exception() {
  184. ns_cmd="${1}"
  185. dst="${2}"
  186. mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
  187. }
  188. test_pmtu_vti4_exception() {
  189. setup namespaces veth vti4 xfrm4 || return 2
  190. veth_mtu=1500
  191. vti_mtu=$((veth_mtu - 20))
  192. # SPI SN IV ICV pad length next header
  193. esp_payload_rfc4106=$((vti_mtu - 4 - 4 - 8 - 16 - 1 - 1))
  194. ping_payload=$((esp_payload_rfc4106 - 28))
  195. mtu "${ns_a}" veth_a ${veth_mtu}
  196. mtu "${ns_b}" veth_b ${veth_mtu}
  197. mtu "${ns_a}" vti4_a ${vti_mtu}
  198. mtu "${ns_b}" vti4_b ${vti_mtu}
  199. # Send DF packet without exceeding link layer MTU, check that no
  200. # exception is created
  201. ${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
  202. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
  203. if [ "${pmtu}" != "" ]; then
  204. err " unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
  205. return 1
  206. fi
  207. # Now exceed link layer MTU by one byte, check that exception is created
  208. ${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
  209. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
  210. if [ "${pmtu}" = "" ]; then
  211. err " exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
  212. return 1
  213. fi
  214. # ...with the right PMTU value
  215. if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
  216. err " wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
  217. return 1
  218. fi
  219. }
  220. test_pmtu_vti6_exception() {
  221. setup namespaces veth vti6 xfrm6 || return 2
  222. fail=0
  223. # Create route exception by exceeding link layer MTU
  224. mtu "${ns_a}" veth_a 4000
  225. mtu "${ns_b}" veth_b 4000
  226. mtu "${ns_a}" vti6_a 5000
  227. mtu "${ns_b}" vti6_b 5000
  228. ${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
  229. # Check that exception was created
  230. if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
  231. err " tunnel exceeding link layer MTU didn't create route exception"
  232. return 1
  233. fi
  234. # Decrease tunnel MTU, check for PMTU decrease in route exception
  235. mtu "${ns_a}" vti6_a 3000
  236. if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
  237. err " decreasing tunnel MTU didn't decrease route exception PMTU"
  238. fail=1
  239. fi
  240. # Increase tunnel MTU, check for PMTU increase in route exception
  241. mtu "${ns_a}" vti6_a 9000
  242. if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
  243. err " increasing tunnel MTU didn't increase route exception PMTU"
  244. fail=1
  245. fi
  246. return ${fail}
  247. }
  248. test_pmtu_vti4_default_mtu() {
  249. setup namespaces veth vti4 || return 2
  250. # Check that MTU of vti device is MTU of veth minus IPv4 header length
  251. veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
  252. vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
  253. if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
  254. err " vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
  255. return 1
  256. fi
  257. }
  258. test_pmtu_vti6_default_mtu() {
  259. setup namespaces veth vti6 || return 2
  260. # Check that MTU of vti device is MTU of veth minus IPv6 header length
  261. veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
  262. vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
  263. if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
  264. err " vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
  265. return 1
  266. fi
  267. }
  268. test_pmtu_vti4_link_add_mtu() {
  269. setup namespaces || return 2
  270. ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
  271. [ $? -ne 0 ] && err " vti not supported" && return 2
  272. ${ns_a} ip link del vti4_a
  273. fail=0
  274. min=68
  275. max=$((65535 - 20))
  276. # Check invalid values first
  277. for v in $((min - 1)) $((max + 1)); do
  278. ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
  279. # This can fail, or MTU can be adjusted to a proper value
  280. [ $? -ne 0 ] && continue
  281. mtu="$(link_get_mtu "${ns_a}" vti4_a)"
  282. if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
  283. err " vti tunnel created with invalid MTU ${mtu}"
  284. fail=1
  285. fi
  286. ${ns_a} ip link del vti4_a
  287. done
  288. # Now check valid values
  289. for v in ${min} 1300 ${max}; do
  290. ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
  291. mtu="$(link_get_mtu "${ns_a}" vti4_a)"
  292. ${ns_a} ip link del vti4_a
  293. if [ "${mtu}" != "${v}" ]; then
  294. err " vti MTU ${mtu} doesn't match configured value ${v}"
  295. fail=1
  296. fi
  297. done
  298. return ${fail}
  299. }
  300. test_pmtu_vti6_link_add_mtu() {
  301. setup namespaces || return 2
  302. ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
  303. [ $? -ne 0 ] && err " vti6 not supported" && return 2
  304. ${ns_a} ip link del vti6_a
  305. fail=0
  306. min=68 # vti6 can carry IPv4 packets too
  307. max=$((65535 - 40))
  308. # Check invalid values first
  309. for v in $((min - 1)) $((max + 1)); do
  310. ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
  311. # This can fail, or MTU can be adjusted to a proper value
  312. [ $? -ne 0 ] && continue
  313. mtu="$(link_get_mtu "${ns_a}" vti6_a)"
  314. if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
  315. err " vti6 tunnel created with invalid MTU ${v}"
  316. fail=1
  317. fi
  318. ${ns_a} ip link del vti6_a
  319. done
  320. # Now check valid values
  321. for v in 68 1280 1300 $((65535 - 40)); do
  322. ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
  323. mtu="$(link_get_mtu "${ns_a}" vti6_a)"
  324. ${ns_a} ip link del vti6_a
  325. if [ "${mtu}" != "${v}" ]; then
  326. err " vti6 MTU ${mtu} doesn't match configured value ${v}"
  327. fail=1
  328. fi
  329. done
  330. return ${fail}
  331. }
  332. test_pmtu_vti6_link_change_mtu() {
  333. setup namespaces || return 2
  334. ${ns_a} ip link add dummy0 mtu 1500 type dummy
  335. [ $? -ne 0 ] && err " dummy not supported" && return 2
  336. ${ns_a} ip link add dummy1 mtu 3000 type dummy
  337. ${ns_a} ip link set dummy0 up
  338. ${ns_a} ip link set dummy1 up
  339. ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
  340. ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
  341. fail=0
  342. # Create vti6 interface bound to device, passing MTU, check it
  343. ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
  344. mtu="$(link_get_mtu "${ns_a}" vti6_a)"
  345. if [ ${mtu} -ne 1300 ]; then
  346. err " vti6 MTU ${mtu} doesn't match configured value 1300"
  347. fail=1
  348. fi
  349. # Move to another device with different MTU, without passing MTU, check
  350. # MTU is adjusted
  351. ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
  352. mtu="$(link_get_mtu "${ns_a}" vti6_a)"
  353. if [ ${mtu} -ne $((3000 - 40)) ]; then
  354. err " vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
  355. fail=1
  356. fi
  357. # Move it back, passing MTU, check MTU is not overridden
  358. ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
  359. mtu="$(link_get_mtu "${ns_a}" vti6_a)"
  360. if [ ${mtu} -ne 1280 ]; then
  361. err " vti6 MTU ${mtu} doesn't match configured value 1280"
  362. fail=1
  363. fi
  364. return ${fail}
  365. }
  366. trap cleanup EXIT
  367. exitcode=0
  368. desc=0
  369. IFS="
  370. "
  371. for t in ${tests}; do
  372. [ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
  373. (
  374. unset IFS
  375. eval test_${name}
  376. ret=$?
  377. cleanup
  378. if [ $ret -eq 0 ]; then
  379. printf "TEST: %-60s [ OK ]\n" "${t}"
  380. elif [ $ret -eq 1 ]; then
  381. printf "TEST: %-60s [FAIL]\n" "${t}"
  382. err_flush
  383. exit 1
  384. elif [ $ret -eq 2 ]; then
  385. printf "TEST: %-60s [SKIP]\n" "${t}"
  386. err_flush
  387. fi
  388. )
  389. [ $? -ne 0 ] && exitcode=1
  390. done
  391. exit ${exitcode}