sockopt.c 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Multipath TCP
  3. *
  4. * Copyright (c) 2021, Red Hat.
  5. */
  6. #define pr_fmt(fmt) "MPTCP: " fmt
  7. #include <linux/kernel.h>
  8. #include <linux/module.h>
  9. #include <net/sock.h>
  10. #include <net/protocol.h>
  11. #include <net/tcp.h>
  12. #include <net/mptcp.h>
  13. #include "protocol.h"
  14. #define MIN_INFO_OPTLEN_SIZE 16
  15. #define MIN_FULL_INFO_OPTLEN_SIZE 40
  16. static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
  17. {
  18. msk_owned_by_me(msk);
  19. if (likely(!__mptcp_check_fallback(msk)))
  20. return NULL;
  21. return msk->first;
  22. }
  23. static u32 sockopt_seq_reset(const struct sock *sk)
  24. {
  25. sock_owned_by_me(sk);
  26. /* Highbits contain state. Allows to distinguish sockopt_seq
  27. * of listener and established:
  28. * s0 = new_listener()
  29. * sockopt(s0) - seq is 1
  30. * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0)
  31. * sockopt(s0) - seq increments to 2 on s0
  32. * sockopt(s1) // seq increments to 2 on s1 (different option)
  33. * new ssk completes join, inherits options from s0 // seq 2
  34. * Needs sync from mptcp join logic, but ssk->seq == msk->seq
  35. *
  36. * Set High order bits to sk_state so ssk->seq == msk->seq test
  37. * will fail.
  38. */
  39. return (u32)sk->sk_state << 24u;
  40. }
  41. static void sockopt_seq_inc(struct mptcp_sock *msk)
  42. {
  43. u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff;
  44. msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq;
  45. }
  46. static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval,
  47. unsigned int optlen, int *val)
  48. {
  49. if (optlen < sizeof(int))
  50. return -EINVAL;
  51. if (copy_from_sockptr(val, optval, sizeof(*val)))
  52. return -EFAULT;
  53. return 0;
  54. }
  55. static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val)
  56. {
  57. struct mptcp_subflow_context *subflow;
  58. struct sock *sk = (struct sock *)msk;
  59. lock_sock(sk);
  60. sockopt_seq_inc(msk);
  61. mptcp_for_each_subflow(msk, subflow) {
  62. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  63. bool slow = lock_sock_fast(ssk);
  64. switch (optname) {
  65. case SO_DEBUG:
  66. sock_valbool_flag(ssk, SOCK_DBG, !!val);
  67. break;
  68. case SO_KEEPALIVE:
  69. if (ssk->sk_prot->keepalive)
  70. ssk->sk_prot->keepalive(ssk, !!val);
  71. sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val);
  72. break;
  73. case SO_PRIORITY:
  74. WRITE_ONCE(ssk->sk_priority, val);
  75. break;
  76. case SO_SNDBUF:
  77. case SO_SNDBUFFORCE:
  78. ssk->sk_userlocks |= SOCK_SNDBUF_LOCK;
  79. WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
  80. mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
  81. break;
  82. case SO_RCVBUF:
  83. case SO_RCVBUFFORCE:
  84. ssk->sk_userlocks |= SOCK_RCVBUF_LOCK;
  85. WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
  86. break;
  87. case SO_MARK:
  88. if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
  89. WRITE_ONCE(ssk->sk_mark, sk->sk_mark);
  90. sk_dst_reset(ssk);
  91. }
  92. break;
  93. case SO_INCOMING_CPU:
  94. WRITE_ONCE(ssk->sk_incoming_cpu, val);
  95. break;
  96. }
  97. subflow->setsockopt_seq = msk->setsockopt_seq;
  98. unlock_sock_fast(ssk, slow);
  99. }
  100. release_sock(sk);
  101. }
  102. static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val)
  103. {
  104. sockptr_t optval = KERNEL_SOCKPTR(&val);
  105. struct sock *sk = (struct sock *)msk;
  106. int ret;
  107. ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
  108. optval, sizeof(val));
  109. if (ret)
  110. return ret;
  111. mptcp_sol_socket_sync_intval(msk, optname, val);
  112. return 0;
  113. }
  114. static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val)
  115. {
  116. struct sock *sk = (struct sock *)msk;
  117. WRITE_ONCE(sk->sk_incoming_cpu, val);
  118. mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val);
  119. }
  120. static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val)
  121. {
  122. sockptr_t optval = KERNEL_SOCKPTR(&val);
  123. struct mptcp_subflow_context *subflow;
  124. struct sock *sk = (struct sock *)msk;
  125. int ret;
  126. ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
  127. optval, sizeof(val));
  128. if (ret)
  129. return ret;
  130. lock_sock(sk);
  131. mptcp_for_each_subflow(msk, subflow) {
  132. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  133. bool slow = lock_sock_fast(ssk);
  134. sock_set_timestamp(sk, optname, !!val);
  135. unlock_sock_fast(ssk, slow);
  136. }
  137. release_sock(sk);
  138. return 0;
  139. }
  140. static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
  141. sockptr_t optval,
  142. unsigned int optlen)
  143. {
  144. int val, ret;
  145. ret = mptcp_get_int_option(msk, optval, optlen, &val);
  146. if (ret)
  147. return ret;
  148. switch (optname) {
  149. case SO_KEEPALIVE:
  150. case SO_DEBUG:
  151. case SO_MARK:
  152. case SO_PRIORITY:
  153. case SO_SNDBUF:
  154. case SO_SNDBUFFORCE:
  155. case SO_RCVBUF:
  156. case SO_RCVBUFFORCE:
  157. return mptcp_sol_socket_intval(msk, optname, val);
  158. case SO_INCOMING_CPU:
  159. mptcp_so_incoming_cpu(msk, val);
  160. return 0;
  161. case SO_TIMESTAMP_OLD:
  162. case SO_TIMESTAMP_NEW:
  163. case SO_TIMESTAMPNS_OLD:
  164. case SO_TIMESTAMPNS_NEW:
  165. return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val);
  166. }
  167. return -ENOPROTOOPT;
  168. }
  169. static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk,
  170. int optname,
  171. sockptr_t optval,
  172. unsigned int optlen)
  173. {
  174. struct mptcp_subflow_context *subflow;
  175. struct sock *sk = (struct sock *)msk;
  176. struct so_timestamping timestamping;
  177. int ret;
  178. if (optlen == sizeof(timestamping)) {
  179. if (copy_from_sockptr(&timestamping, optval,
  180. sizeof(timestamping)))
  181. return -EFAULT;
  182. } else if (optlen == sizeof(int)) {
  183. memset(&timestamping, 0, sizeof(timestamping));
  184. if (copy_from_sockptr(&timestamping.flags, optval, sizeof(int)))
  185. return -EFAULT;
  186. } else {
  187. return -EINVAL;
  188. }
  189. ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
  190. KERNEL_SOCKPTR(&timestamping),
  191. sizeof(timestamping));
  192. if (ret)
  193. return ret;
  194. lock_sock(sk);
  195. mptcp_for_each_subflow(msk, subflow) {
  196. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  197. bool slow = lock_sock_fast(ssk);
  198. sock_set_timestamping(sk, optname, timestamping);
  199. unlock_sock_fast(ssk, slow);
  200. }
  201. release_sock(sk);
  202. return 0;
  203. }
  204. static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval,
  205. unsigned int optlen)
  206. {
  207. struct mptcp_subflow_context *subflow;
  208. struct sock *sk = (struct sock *)msk;
  209. struct linger ling;
  210. sockptr_t kopt;
  211. int ret;
  212. if (optlen < sizeof(ling))
  213. return -EINVAL;
  214. if (copy_from_sockptr(&ling, optval, sizeof(ling)))
  215. return -EFAULT;
  216. kopt = KERNEL_SOCKPTR(&ling);
  217. ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling));
  218. if (ret)
  219. return ret;
  220. lock_sock(sk);
  221. sockopt_seq_inc(msk);
  222. mptcp_for_each_subflow(msk, subflow) {
  223. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  224. bool slow = lock_sock_fast(ssk);
  225. if (!ling.l_onoff) {
  226. sock_reset_flag(ssk, SOCK_LINGER);
  227. } else {
  228. ssk->sk_lingertime = sk->sk_lingertime;
  229. sock_set_flag(ssk, SOCK_LINGER);
  230. }
  231. subflow->setsockopt_seq = msk->setsockopt_seq;
  232. unlock_sock_fast(ssk, slow);
  233. }
  234. release_sock(sk);
  235. return 0;
  236. }
  237. static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
  238. sockptr_t optval, unsigned int optlen)
  239. {
  240. struct sock *sk = (struct sock *)msk;
  241. struct sock *ssk;
  242. int ret;
  243. switch (optname) {
  244. case SO_REUSEPORT:
  245. case SO_REUSEADDR:
  246. case SO_BINDTODEVICE:
  247. case SO_BINDTOIFINDEX:
  248. lock_sock(sk);
  249. ssk = __mptcp_nmpc_sk(msk);
  250. if (IS_ERR(ssk)) {
  251. release_sock(sk);
  252. return PTR_ERR(ssk);
  253. }
  254. ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen);
  255. if (ret == 0) {
  256. if (optname == SO_REUSEPORT)
  257. sk->sk_reuseport = ssk->sk_reuseport;
  258. else if (optname == SO_REUSEADDR)
  259. sk->sk_reuse = ssk->sk_reuse;
  260. else if (optname == SO_BINDTODEVICE)
  261. sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
  262. else if (optname == SO_BINDTOIFINDEX)
  263. sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
  264. }
  265. release_sock(sk);
  266. return ret;
  267. case SO_KEEPALIVE:
  268. case SO_PRIORITY:
  269. case SO_SNDBUF:
  270. case SO_SNDBUFFORCE:
  271. case SO_RCVBUF:
  272. case SO_RCVBUFFORCE:
  273. case SO_MARK:
  274. case SO_INCOMING_CPU:
  275. case SO_DEBUG:
  276. case SO_TIMESTAMP_OLD:
  277. case SO_TIMESTAMP_NEW:
  278. case SO_TIMESTAMPNS_OLD:
  279. case SO_TIMESTAMPNS_NEW:
  280. return mptcp_setsockopt_sol_socket_int(msk, optname, optval,
  281. optlen);
  282. case SO_TIMESTAMPING_OLD:
  283. case SO_TIMESTAMPING_NEW:
  284. return mptcp_setsockopt_sol_socket_timestamping(msk, optname,
  285. optval, optlen);
  286. case SO_LINGER:
  287. return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
  288. case SO_RCVLOWAT:
  289. case SO_RCVTIMEO_OLD:
  290. case SO_RCVTIMEO_NEW:
  291. case SO_SNDTIMEO_OLD:
  292. case SO_SNDTIMEO_NEW:
  293. case SO_BUSY_POLL:
  294. case SO_PREFER_BUSY_POLL:
  295. case SO_BUSY_POLL_BUDGET:
  296. /* No need to copy: only relevant for msk */
  297. return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
  298. case SO_NO_CHECK:
  299. case SO_DONTROUTE:
  300. case SO_BROADCAST:
  301. case SO_BSDCOMPAT:
  302. case SO_PASSCRED:
  303. case SO_PASSPIDFD:
  304. case SO_PASSSEC:
  305. case SO_RXQ_OVFL:
  306. case SO_WIFI_STATUS:
  307. case SO_NOFCS:
  308. case SO_SELECT_ERR_QUEUE:
  309. return 0;
  310. }
  311. /* SO_OOBINLINE is not supported, let's avoid the related mess
  312. * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
  313. * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
  314. * we must be careful with subflows
  315. *
  316. * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
  317. * explicitly the sk_protocol field
  318. *
  319. * SO_PEEK_OFF is unsupported, as it is for plain TCP
  320. * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows
  321. * SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
  322. * but likely needs careful design
  323. *
  324. * SO_ZEROCOPY is currently unsupported, TODO in sndmsg
  325. * SO_TXTIME is currently unsupported
  326. */
  327. return -EOPNOTSUPP;
  328. }
  329. static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
  330. sockptr_t optval, unsigned int optlen)
  331. {
  332. struct sock *sk = (struct sock *)msk;
  333. int ret = -EOPNOTSUPP;
  334. struct sock *ssk;
  335. switch (optname) {
  336. case IPV6_V6ONLY:
  337. case IPV6_TRANSPARENT:
  338. case IPV6_FREEBIND:
  339. lock_sock(sk);
  340. ssk = __mptcp_nmpc_sk(msk);
  341. if (IS_ERR(ssk)) {
  342. release_sock(sk);
  343. return PTR_ERR(ssk);
  344. }
  345. ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen);
  346. if (ret != 0) {
  347. release_sock(sk);
  348. return ret;
  349. }
  350. sockopt_seq_inc(msk);
  351. switch (optname) {
  352. case IPV6_V6ONLY:
  353. sk->sk_ipv6only = ssk->sk_ipv6only;
  354. break;
  355. case IPV6_TRANSPARENT:
  356. inet_assign_bit(TRANSPARENT, sk,
  357. inet_test_bit(TRANSPARENT, ssk));
  358. break;
  359. case IPV6_FREEBIND:
  360. inet_assign_bit(FREEBIND, sk,
  361. inet_test_bit(FREEBIND, ssk));
  362. break;
  363. }
  364. release_sock(sk);
  365. break;
  366. }
  367. return ret;
  368. }
  369. static bool mptcp_supported_sockopt(int level, int optname)
  370. {
  371. if (level == SOL_IP) {
  372. switch (optname) {
  373. /* should work fine */
  374. case IP_FREEBIND:
  375. case IP_TRANSPARENT:
  376. case IP_BIND_ADDRESS_NO_PORT:
  377. case IP_LOCAL_PORT_RANGE:
  378. /* the following are control cmsg related */
  379. case IP_PKTINFO:
  380. case IP_RECVTTL:
  381. case IP_RECVTOS:
  382. case IP_RECVOPTS:
  383. case IP_RETOPTS:
  384. case IP_PASSSEC:
  385. case IP_RECVORIGDSTADDR:
  386. case IP_CHECKSUM:
  387. case IP_RECVFRAGSIZE:
  388. /* common stuff that need some love */
  389. case IP_TOS:
  390. case IP_TTL:
  391. case IP_MTU_DISCOVER:
  392. case IP_RECVERR:
  393. /* possibly less common may deserve some love */
  394. case IP_MINTTL:
  395. /* the following is apparently a no-op for plain TCP */
  396. case IP_RECVERR_RFC4884:
  397. return true;
  398. }
  399. /* IP_OPTIONS is not supported, needs subflow care */
  400. /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */
  401. /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF,
  402. * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP,
  403. * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE,
  404. * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP,
  405. * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE,
  406. * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal
  407. * with mcast stuff
  408. */
  409. /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */
  410. return false;
  411. }
  412. if (level == SOL_IPV6) {
  413. switch (optname) {
  414. case IPV6_V6ONLY:
  415. /* the following are control cmsg related */
  416. case IPV6_RECVPKTINFO:
  417. case IPV6_2292PKTINFO:
  418. case IPV6_RECVHOPLIMIT:
  419. case IPV6_2292HOPLIMIT:
  420. case IPV6_RECVRTHDR:
  421. case IPV6_2292RTHDR:
  422. case IPV6_RECVHOPOPTS:
  423. case IPV6_2292HOPOPTS:
  424. case IPV6_RECVDSTOPTS:
  425. case IPV6_2292DSTOPTS:
  426. case IPV6_RECVTCLASS:
  427. case IPV6_FLOWINFO:
  428. case IPV6_RECVPATHMTU:
  429. case IPV6_RECVORIGDSTADDR:
  430. case IPV6_RECVFRAGSIZE:
  431. /* the following ones need some love but are quite common */
  432. case IPV6_TCLASS:
  433. case IPV6_TRANSPARENT:
  434. case IPV6_FREEBIND:
  435. case IPV6_PKTINFO:
  436. case IPV6_2292PKTOPTIONS:
  437. case IPV6_UNICAST_HOPS:
  438. case IPV6_MTU_DISCOVER:
  439. case IPV6_MTU:
  440. case IPV6_RECVERR:
  441. case IPV6_FLOWINFO_SEND:
  442. case IPV6_FLOWLABEL_MGR:
  443. case IPV6_MINHOPCOUNT:
  444. case IPV6_DONTFRAG:
  445. case IPV6_AUTOFLOWLABEL:
  446. /* the following one is a no-op for plain TCP */
  447. case IPV6_RECVERR_RFC4884:
  448. return true;
  449. }
  450. /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are
  451. * not supported
  452. */
  453. /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF,
  454. * IPV6_MULTICAST_IF, IPV6_ADDRFORM,
  455. * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST,
  456. * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP,
  457. * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP,
  458. * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER
  459. * are not supported better not deal with mcast
  460. */
  461. /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */
  462. /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */
  463. /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */
  464. return false;
  465. }
  466. if (level == SOL_TCP) {
  467. switch (optname) {
  468. /* the following are no-op or should work just fine */
  469. case TCP_THIN_DUPACK:
  470. case TCP_DEFER_ACCEPT:
  471. /* the following need some love */
  472. case TCP_MAXSEG:
  473. case TCP_NODELAY:
  474. case TCP_THIN_LINEAR_TIMEOUTS:
  475. case TCP_CONGESTION:
  476. case TCP_CORK:
  477. case TCP_KEEPIDLE:
  478. case TCP_KEEPINTVL:
  479. case TCP_KEEPCNT:
  480. case TCP_SYNCNT:
  481. case TCP_SAVE_SYN:
  482. case TCP_LINGER2:
  483. case TCP_WINDOW_CLAMP:
  484. case TCP_QUICKACK:
  485. case TCP_USER_TIMEOUT:
  486. case TCP_TIMESTAMP:
  487. case TCP_NOTSENT_LOWAT:
  488. case TCP_TX_DELAY:
  489. case TCP_INQ:
  490. case TCP_FASTOPEN:
  491. case TCP_FASTOPEN_CONNECT:
  492. case TCP_FASTOPEN_KEY:
  493. case TCP_FASTOPEN_NO_COOKIE:
  494. return true;
  495. }
  496. /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */
  497. /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS,
  498. * TCP_REPAIR_WINDOW are not supported, better avoid this mess
  499. */
  500. }
  501. return false;
  502. }
  503. static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval,
  504. unsigned int optlen)
  505. {
  506. struct mptcp_subflow_context *subflow;
  507. struct sock *sk = (struct sock *)msk;
  508. char name[TCP_CA_NAME_MAX];
  509. bool cap_net_admin;
  510. int ret;
  511. if (optlen < 1)
  512. return -EINVAL;
  513. ret = strncpy_from_sockptr(name, optval,
  514. min_t(long, TCP_CA_NAME_MAX - 1, optlen));
  515. if (ret < 0)
  516. return -EFAULT;
  517. name[ret] = 0;
  518. cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN);
  519. ret = 0;
  520. lock_sock(sk);
  521. sockopt_seq_inc(msk);
  522. mptcp_for_each_subflow(msk, subflow) {
  523. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  524. int err;
  525. lock_sock(ssk);
  526. err = tcp_set_congestion_control(ssk, name, true, cap_net_admin);
  527. if (err < 0 && ret == 0)
  528. ret = err;
  529. subflow->setsockopt_seq = msk->setsockopt_seq;
  530. release_sock(ssk);
  531. }
  532. if (ret == 0)
  533. strscpy(msk->ca_name, name, sizeof(msk->ca_name));
  534. release_sock(sk);
  535. return ret;
  536. }
  537. static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max,
  538. int (*set_val)(struct sock *, int),
  539. int *msk_val, int val)
  540. {
  541. struct mptcp_subflow_context *subflow;
  542. int err = 0;
  543. mptcp_for_each_subflow(msk, subflow) {
  544. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  545. int ret;
  546. lock_sock(ssk);
  547. ret = set_val(ssk, val);
  548. err = err ? : ret;
  549. release_sock(ssk);
  550. }
  551. if (!err) {
  552. *msk_val = val;
  553. sockopt_seq_inc(msk);
  554. }
  555. return err;
  556. }
  557. static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val)
  558. {
  559. struct mptcp_subflow_context *subflow;
  560. struct sock *sk = (struct sock *)msk;
  561. sockopt_seq_inc(msk);
  562. msk->cork = !!val;
  563. mptcp_for_each_subflow(msk, subflow) {
  564. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  565. lock_sock(ssk);
  566. __tcp_sock_set_cork(ssk, !!val);
  567. release_sock(ssk);
  568. }
  569. if (!val)
  570. mptcp_check_and_set_pending(sk);
  571. return 0;
  572. }
  573. static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val)
  574. {
  575. struct mptcp_subflow_context *subflow;
  576. struct sock *sk = (struct sock *)msk;
  577. sockopt_seq_inc(msk);
  578. msk->nodelay = !!val;
  579. mptcp_for_each_subflow(msk, subflow) {
  580. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  581. lock_sock(ssk);
  582. __tcp_sock_set_nodelay(ssk, !!val);
  583. release_sock(ssk);
  584. }
  585. if (val)
  586. mptcp_check_and_set_pending(sk);
  587. return 0;
  588. }
  589. static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname,
  590. sockptr_t optval, unsigned int optlen)
  591. {
  592. struct sock *sk = (struct sock *)msk;
  593. struct sock *ssk;
  594. int err;
  595. err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
  596. if (err != 0)
  597. return err;
  598. lock_sock(sk);
  599. ssk = __mptcp_nmpc_sk(msk);
  600. if (IS_ERR(ssk)) {
  601. release_sock(sk);
  602. return PTR_ERR(ssk);
  603. }
  604. switch (optname) {
  605. case IP_FREEBIND:
  606. inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
  607. break;
  608. case IP_TRANSPARENT:
  609. inet_assign_bit(TRANSPARENT, ssk,
  610. inet_test_bit(TRANSPARENT, sk));
  611. break;
  612. case IP_BIND_ADDRESS_NO_PORT:
  613. inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk,
  614. inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
  615. break;
  616. case IP_LOCAL_PORT_RANGE:
  617. WRITE_ONCE(inet_sk(ssk)->local_port_range,
  618. READ_ONCE(inet_sk(sk)->local_port_range));
  619. break;
  620. default:
  621. release_sock(sk);
  622. WARN_ON_ONCE(1);
  623. return -EOPNOTSUPP;
  624. }
  625. sockopt_seq_inc(msk);
  626. release_sock(sk);
  627. return 0;
  628. }
  629. static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
  630. sockptr_t optval, unsigned int optlen)
  631. {
  632. struct mptcp_subflow_context *subflow;
  633. struct sock *sk = (struct sock *)msk;
  634. int err, val;
  635. err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
  636. if (err != 0)
  637. return err;
  638. lock_sock(sk);
  639. sockopt_seq_inc(msk);
  640. val = READ_ONCE(inet_sk(sk)->tos);
  641. mptcp_for_each_subflow(msk, subflow) {
  642. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  643. bool slow;
  644. slow = lock_sock_fast(ssk);
  645. __ip_sock_set_tos(ssk, val);
  646. unlock_sock_fast(ssk, slow);
  647. }
  648. release_sock(sk);
  649. return 0;
  650. }
  651. static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
  652. sockptr_t optval, unsigned int optlen)
  653. {
  654. switch (optname) {
  655. case IP_FREEBIND:
  656. case IP_TRANSPARENT:
  657. case IP_BIND_ADDRESS_NO_PORT:
  658. case IP_LOCAL_PORT_RANGE:
  659. return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen);
  660. case IP_TOS:
  661. return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
  662. }
  663. return -EOPNOTSUPP;
  664. }
  665. static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
  666. sockptr_t optval, unsigned int optlen)
  667. {
  668. struct sock *sk = (struct sock *)msk;
  669. struct sock *ssk;
  670. int ret;
  671. /* Limit to first subflow, before the connection establishment */
  672. lock_sock(sk);
  673. ssk = __mptcp_nmpc_sk(msk);
  674. if (IS_ERR(ssk)) {
  675. ret = PTR_ERR(ssk);
  676. goto unlock;
  677. }
  678. ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
  679. unlock:
  680. release_sock(sk);
  681. return ret;
  682. }
  683. static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
  684. sockptr_t optval, unsigned int optlen)
  685. {
  686. struct sock *sk = (void *)msk;
  687. int ret, val;
  688. switch (optname) {
  689. case TCP_ULP:
  690. return -EOPNOTSUPP;
  691. case TCP_CONGESTION:
  692. return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
  693. case TCP_DEFER_ACCEPT:
  694. /* See tcp.c: TCP_DEFER_ACCEPT does not fail */
  695. mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen);
  696. return 0;
  697. case TCP_FASTOPEN:
  698. case TCP_FASTOPEN_CONNECT:
  699. case TCP_FASTOPEN_KEY:
  700. case TCP_FASTOPEN_NO_COOKIE:
  701. return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname,
  702. optval, optlen);
  703. }
  704. ret = mptcp_get_int_option(msk, optval, optlen, &val);
  705. if (ret)
  706. return ret;
  707. lock_sock(sk);
  708. switch (optname) {
  709. case TCP_INQ:
  710. if (val < 0 || val > 1)
  711. ret = -EINVAL;
  712. else
  713. msk->recvmsg_inq = !!val;
  714. break;
  715. case TCP_NOTSENT_LOWAT:
  716. WRITE_ONCE(msk->notsent_lowat, val);
  717. mptcp_write_space(sk);
  718. break;
  719. case TCP_CORK:
  720. ret = __mptcp_setsockopt_sol_tcp_cork(msk, val);
  721. break;
  722. case TCP_NODELAY:
  723. ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val);
  724. break;
  725. case TCP_KEEPIDLE:
  726. ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE,
  727. &tcp_sock_set_keepidle_locked,
  728. &msk->keepalive_idle, val);
  729. break;
  730. case TCP_KEEPINTVL:
  731. ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL,
  732. &tcp_sock_set_keepintvl,
  733. &msk->keepalive_intvl, val);
  734. break;
  735. case TCP_KEEPCNT:
  736. ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT,
  737. &tcp_sock_set_keepcnt,
  738. &msk->keepalive_cnt,
  739. val);
  740. break;
  741. default:
  742. ret = -ENOPROTOOPT;
  743. }
  744. release_sock(sk);
  745. return ret;
  746. }
  747. int mptcp_setsockopt(struct sock *sk, int level, int optname,
  748. sockptr_t optval, unsigned int optlen)
  749. {
  750. struct mptcp_sock *msk = mptcp_sk(sk);
  751. struct sock *ssk;
  752. pr_debug("msk=%p\n", msk);
  753. if (level == SOL_SOCKET)
  754. return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
  755. if (!mptcp_supported_sockopt(level, optname))
  756. return -ENOPROTOOPT;
  757. /* @@ the meaning of setsockopt() when the socket is connected and
  758. * there are multiple subflows is not yet defined. It is up to the
  759. * MPTCP-level socket to configure the subflows until the subflow
  760. * is in TCP fallback, when TCP socket options are passed through
  761. * to the one remaining subflow.
  762. */
  763. lock_sock(sk);
  764. ssk = __mptcp_tcp_fallback(msk);
  765. release_sock(sk);
  766. if (ssk)
  767. return tcp_setsockopt(ssk, level, optname, optval, optlen);
  768. if (level == SOL_IP)
  769. return mptcp_setsockopt_v4(msk, optname, optval, optlen);
  770. if (level == SOL_IPV6)
  771. return mptcp_setsockopt_v6(msk, optname, optval, optlen);
  772. if (level == SOL_TCP)
  773. return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen);
  774. return -EOPNOTSUPP;
  775. }
  776. static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
  777. char __user *optval, int __user *optlen)
  778. {
  779. struct sock *sk = (struct sock *)msk;
  780. struct sock *ssk;
  781. int ret;
  782. lock_sock(sk);
  783. ssk = msk->first;
  784. if (ssk) {
  785. ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
  786. goto out;
  787. }
  788. ssk = __mptcp_nmpc_sk(msk);
  789. if (IS_ERR(ssk)) {
  790. ret = PTR_ERR(ssk);
  791. goto out;
  792. }
  793. ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
  794. out:
  795. release_sock(sk);
  796. return ret;
  797. }
  798. void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
  799. {
  800. struct sock *sk = (struct sock *)msk;
  801. u32 flags = 0;
  802. bool slow;
  803. u32 now;
  804. memset(info, 0, sizeof(*info));
  805. info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
  806. info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
  807. info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
  808. info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
  809. if (inet_sk_state_load(sk) == TCP_LISTEN)
  810. return;
  811. /* The following limits only make sense for the in-kernel PM */
  812. if (mptcp_pm_is_kernel(msk)) {
  813. info->mptcpi_subflows_max =
  814. mptcp_pm_get_subflows_max(msk);
  815. info->mptcpi_add_addr_signal_max =
  816. mptcp_pm_get_add_addr_signal_max(msk);
  817. info->mptcpi_add_addr_accepted_max =
  818. mptcp_pm_get_add_addr_accept_max(msk);
  819. info->mptcpi_local_addr_max =
  820. mptcp_pm_get_local_addr_max(msk);
  821. }
  822. if (__mptcp_check_fallback(msk))
  823. flags |= MPTCP_INFO_FLAG_FALLBACK;
  824. if (READ_ONCE(msk->can_ack))
  825. flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
  826. info->mptcpi_flags = flags;
  827. slow = lock_sock_fast(sk);
  828. info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
  829. info->mptcpi_token = msk->token;
  830. info->mptcpi_write_seq = msk->write_seq;
  831. info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits;
  832. info->mptcpi_bytes_sent = msk->bytes_sent;
  833. info->mptcpi_bytes_received = msk->bytes_received;
  834. info->mptcpi_bytes_retrans = msk->bytes_retrans;
  835. info->mptcpi_subflows_total = info->mptcpi_subflows +
  836. __mptcp_has_initial_subflow(msk);
  837. now = tcp_jiffies32;
  838. info->mptcpi_last_data_sent = jiffies_to_msecs(now - msk->last_data_sent);
  839. info->mptcpi_last_data_recv = jiffies_to_msecs(now - msk->last_data_recv);
  840. unlock_sock_fast(sk, slow);
  841. mptcp_data_lock(sk);
  842. info->mptcpi_last_ack_recv = jiffies_to_msecs(now - msk->last_ack_recv);
  843. info->mptcpi_snd_una = msk->snd_una;
  844. info->mptcpi_rcv_nxt = msk->ack_seq;
  845. info->mptcpi_bytes_acked = msk->bytes_acked;
  846. mptcp_data_unlock(sk);
  847. }
  848. EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
  849. static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen)
  850. {
  851. struct mptcp_info m_info;
  852. int len;
  853. if (get_user(len, optlen))
  854. return -EFAULT;
  855. /* When used only to check if a fallback to TCP happened. */
  856. if (len == 0)
  857. return 0;
  858. len = min_t(unsigned int, len, sizeof(struct mptcp_info));
  859. mptcp_diag_fill_info(msk, &m_info);
  860. if (put_user(len, optlen))
  861. return -EFAULT;
  862. if (copy_to_user(optval, &m_info, len))
  863. return -EFAULT;
  864. return 0;
  865. }
  866. static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
  867. char __user *optval,
  868. u32 copied,
  869. int __user *optlen)
  870. {
  871. u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd));
  872. if (copied)
  873. copied += sfd->size_subflow_data;
  874. else
  875. copied = copylen;
  876. if (put_user(copied, optlen))
  877. return -EFAULT;
  878. if (copy_to_user(optval, sfd, copylen))
  879. return -EFAULT;
  880. return 0;
  881. }
  882. static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
  883. char __user *optval,
  884. int __user *optlen)
  885. {
  886. int len, copylen;
  887. if (get_user(len, optlen))
  888. return -EFAULT;
  889. /* if mptcp_subflow_data size is changed, need to adjust
  890. * this function to deal with programs using old version.
  891. */
  892. BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE);
  893. if (len < MIN_INFO_OPTLEN_SIZE)
  894. return -EINVAL;
  895. memset(sfd, 0, sizeof(*sfd));
  896. copylen = min_t(unsigned int, len, sizeof(*sfd));
  897. if (copy_from_user(sfd, optval, copylen))
  898. return -EFAULT;
  899. /* size_subflow_data is u32, but len is signed */
  900. if (sfd->size_subflow_data > INT_MAX ||
  901. sfd->size_user > INT_MAX)
  902. return -EINVAL;
  903. if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE ||
  904. sfd->size_subflow_data > len)
  905. return -EINVAL;
  906. if (sfd->num_subflows || sfd->size_kernel)
  907. return -EINVAL;
  908. return len - sfd->size_subflow_data;
  909. }
  910. static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval,
  911. int __user *optlen)
  912. {
  913. struct mptcp_subflow_context *subflow;
  914. struct sock *sk = (struct sock *)msk;
  915. unsigned int sfcount = 0, copied = 0;
  916. struct mptcp_subflow_data sfd;
  917. char __user *infoptr;
  918. int len;
  919. len = mptcp_get_subflow_data(&sfd, optval, optlen);
  920. if (len < 0)
  921. return len;
  922. sfd.size_kernel = sizeof(struct tcp_info);
  923. sfd.size_user = min_t(unsigned int, sfd.size_user,
  924. sizeof(struct tcp_info));
  925. infoptr = optval + sfd.size_subflow_data;
  926. lock_sock(sk);
  927. mptcp_for_each_subflow(msk, subflow) {
  928. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  929. ++sfcount;
  930. if (len && len >= sfd.size_user) {
  931. struct tcp_info info;
  932. tcp_get_info(ssk, &info);
  933. if (copy_to_user(infoptr, &info, sfd.size_user)) {
  934. release_sock(sk);
  935. return -EFAULT;
  936. }
  937. infoptr += sfd.size_user;
  938. copied += sfd.size_user;
  939. len -= sfd.size_user;
  940. }
  941. }
  942. release_sock(sk);
  943. sfd.num_subflows = sfcount;
  944. if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
  945. return -EFAULT;
  946. return 0;
  947. }
  948. static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a)
  949. {
  950. const struct inet_sock *inet = inet_sk(sk);
  951. memset(a, 0, sizeof(*a));
  952. if (sk->sk_family == AF_INET) {
  953. a->sin_local.sin_family = AF_INET;
  954. a->sin_local.sin_port = inet->inet_sport;
  955. a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr;
  956. if (!a->sin_local.sin_addr.s_addr)
  957. a->sin_local.sin_addr.s_addr = inet->inet_saddr;
  958. a->sin_remote.sin_family = AF_INET;
  959. a->sin_remote.sin_port = inet->inet_dport;
  960. a->sin_remote.sin_addr.s_addr = inet->inet_daddr;
  961. #if IS_ENABLED(CONFIG_IPV6)
  962. } else if (sk->sk_family == AF_INET6) {
  963. const struct ipv6_pinfo *np = inet6_sk(sk);
  964. if (WARN_ON_ONCE(!np))
  965. return;
  966. a->sin6_local.sin6_family = AF_INET6;
  967. a->sin6_local.sin6_port = inet->inet_sport;
  968. if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
  969. a->sin6_local.sin6_addr = np->saddr;
  970. else
  971. a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr;
  972. a->sin6_remote.sin6_family = AF_INET6;
  973. a->sin6_remote.sin6_port = inet->inet_dport;
  974. a->sin6_remote.sin6_addr = sk->sk_v6_daddr;
  975. #endif
  976. }
  977. }
  978. static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval,
  979. int __user *optlen)
  980. {
  981. struct mptcp_subflow_context *subflow;
  982. struct sock *sk = (struct sock *)msk;
  983. unsigned int sfcount = 0, copied = 0;
  984. struct mptcp_subflow_data sfd;
  985. char __user *addrptr;
  986. int len;
  987. len = mptcp_get_subflow_data(&sfd, optval, optlen);
  988. if (len < 0)
  989. return len;
  990. sfd.size_kernel = sizeof(struct mptcp_subflow_addrs);
  991. sfd.size_user = min_t(unsigned int, sfd.size_user,
  992. sizeof(struct mptcp_subflow_addrs));
  993. addrptr = optval + sfd.size_subflow_data;
  994. lock_sock(sk);
  995. mptcp_for_each_subflow(msk, subflow) {
  996. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  997. ++sfcount;
  998. if (len && len >= sfd.size_user) {
  999. struct mptcp_subflow_addrs a;
  1000. mptcp_get_sub_addrs(ssk, &a);
  1001. if (copy_to_user(addrptr, &a, sfd.size_user)) {
  1002. release_sock(sk);
  1003. return -EFAULT;
  1004. }
  1005. addrptr += sfd.size_user;
  1006. copied += sfd.size_user;
  1007. len -= sfd.size_user;
  1008. }
  1009. }
  1010. release_sock(sk);
  1011. sfd.num_subflows = sfcount;
  1012. if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
  1013. return -EFAULT;
  1014. return 0;
  1015. }
  1016. static int mptcp_get_full_info(struct mptcp_full_info *mfi,
  1017. char __user *optval,
  1018. int __user *optlen)
  1019. {
  1020. int len;
  1021. BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) !=
  1022. MIN_FULL_INFO_OPTLEN_SIZE);
  1023. if (get_user(len, optlen))
  1024. return -EFAULT;
  1025. if (len < MIN_FULL_INFO_OPTLEN_SIZE)
  1026. return -EINVAL;
  1027. memset(mfi, 0, sizeof(*mfi));
  1028. if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE))
  1029. return -EFAULT;
  1030. if (mfi->size_tcpinfo_kernel ||
  1031. mfi->size_sfinfo_kernel ||
  1032. mfi->num_subflows)
  1033. return -EINVAL;
  1034. if (mfi->size_sfinfo_user > INT_MAX ||
  1035. mfi->size_tcpinfo_user > INT_MAX)
  1036. return -EINVAL;
  1037. return len - MIN_FULL_INFO_OPTLEN_SIZE;
  1038. }
  1039. static int mptcp_put_full_info(struct mptcp_full_info *mfi,
  1040. char __user *optval,
  1041. u32 copylen,
  1042. int __user *optlen)
  1043. {
  1044. copylen += MIN_FULL_INFO_OPTLEN_SIZE;
  1045. if (put_user(copylen, optlen))
  1046. return -EFAULT;
  1047. if (copy_to_user(optval, mfi, copylen))
  1048. return -EFAULT;
  1049. return 0;
  1050. }
  1051. static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval,
  1052. int __user *optlen)
  1053. {
  1054. unsigned int sfcount = 0, copylen = 0;
  1055. struct mptcp_subflow_context *subflow;
  1056. struct sock *sk = (struct sock *)msk;
  1057. void __user *tcpinfoptr, *sfinfoptr;
  1058. struct mptcp_full_info mfi;
  1059. int len;
  1060. len = mptcp_get_full_info(&mfi, optval, optlen);
  1061. if (len < 0)
  1062. return len;
  1063. /* don't bother filling the mptcp info if there is not enough
  1064. * user-space-provided storage
  1065. */
  1066. if (len > 0) {
  1067. mptcp_diag_fill_info(msk, &mfi.mptcp_info);
  1068. copylen += min_t(unsigned int, len, sizeof(struct mptcp_info));
  1069. }
  1070. mfi.size_tcpinfo_kernel = sizeof(struct tcp_info);
  1071. mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user,
  1072. sizeof(struct tcp_info));
  1073. sfinfoptr = u64_to_user_ptr(mfi.subflow_info);
  1074. mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info);
  1075. mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user,
  1076. sizeof(struct mptcp_subflow_info));
  1077. tcpinfoptr = u64_to_user_ptr(mfi.tcp_info);
  1078. lock_sock(sk);
  1079. mptcp_for_each_subflow(msk, subflow) {
  1080. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  1081. struct mptcp_subflow_info sfinfo;
  1082. struct tcp_info tcp_info;
  1083. if (sfcount++ >= mfi.size_arrays_user)
  1084. continue;
  1085. /* fetch addr/tcp_info only if the user space buffers
  1086. * are wide enough
  1087. */
  1088. memset(&sfinfo, 0, sizeof(sfinfo));
  1089. sfinfo.id = subflow->subflow_id;
  1090. if (mfi.size_sfinfo_user >
  1091. offsetof(struct mptcp_subflow_info, addrs))
  1092. mptcp_get_sub_addrs(ssk, &sfinfo.addrs);
  1093. if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user))
  1094. goto fail_release;
  1095. if (mfi.size_tcpinfo_user) {
  1096. tcp_get_info(ssk, &tcp_info);
  1097. if (copy_to_user(tcpinfoptr, &tcp_info,
  1098. mfi.size_tcpinfo_user))
  1099. goto fail_release;
  1100. }
  1101. tcpinfoptr += mfi.size_tcpinfo_user;
  1102. sfinfoptr += mfi.size_sfinfo_user;
  1103. }
  1104. release_sock(sk);
  1105. mfi.num_subflows = sfcount;
  1106. if (mptcp_put_full_info(&mfi, optval, copylen, optlen))
  1107. return -EFAULT;
  1108. return 0;
  1109. fail_release:
  1110. release_sock(sk);
  1111. return -EFAULT;
  1112. }
  1113. static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
  1114. int __user *optlen, int val)
  1115. {
  1116. int len;
  1117. if (get_user(len, optlen))
  1118. return -EFAULT;
  1119. if (len < 0)
  1120. return -EINVAL;
  1121. if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
  1122. unsigned char ucval = (unsigned char)val;
  1123. len = 1;
  1124. if (put_user(len, optlen))
  1125. return -EFAULT;
  1126. if (copy_to_user(optval, &ucval, 1))
  1127. return -EFAULT;
  1128. } else {
  1129. len = min_t(unsigned int, len, sizeof(int));
  1130. if (put_user(len, optlen))
  1131. return -EFAULT;
  1132. if (copy_to_user(optval, &val, len))
  1133. return -EFAULT;
  1134. }
  1135. return 0;
  1136. }
  1137. static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
  1138. char __user *optval, int __user *optlen)
  1139. {
  1140. struct sock *sk = (void *)msk;
  1141. switch (optname) {
  1142. case TCP_ULP:
  1143. case TCP_CONGESTION:
  1144. case TCP_INFO:
  1145. case TCP_CC_INFO:
  1146. case TCP_DEFER_ACCEPT:
  1147. case TCP_FASTOPEN:
  1148. case TCP_FASTOPEN_CONNECT:
  1149. case TCP_FASTOPEN_KEY:
  1150. case TCP_FASTOPEN_NO_COOKIE:
  1151. return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
  1152. optval, optlen);
  1153. case TCP_INQ:
  1154. return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq);
  1155. case TCP_CORK:
  1156. return mptcp_put_int_option(msk, optval, optlen, msk->cork);
  1157. case TCP_NODELAY:
  1158. return mptcp_put_int_option(msk, optval, optlen, msk->nodelay);
  1159. case TCP_KEEPIDLE:
  1160. return mptcp_put_int_option(msk, optval, optlen,
  1161. msk->keepalive_idle ? :
  1162. READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ);
  1163. case TCP_KEEPINTVL:
  1164. return mptcp_put_int_option(msk, optval, optlen,
  1165. msk->keepalive_intvl ? :
  1166. READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ);
  1167. case TCP_KEEPCNT:
  1168. return mptcp_put_int_option(msk, optval, optlen,
  1169. msk->keepalive_cnt ? :
  1170. READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes));
  1171. case TCP_NOTSENT_LOWAT:
  1172. return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat);
  1173. case TCP_IS_MPTCP:
  1174. return mptcp_put_int_option(msk, optval, optlen, 1);
  1175. }
  1176. return -EOPNOTSUPP;
  1177. }
  1178. static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
  1179. char __user *optval, int __user *optlen)
  1180. {
  1181. struct sock *sk = (void *)msk;
  1182. switch (optname) {
  1183. case IP_TOS:
  1184. return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
  1185. case IP_FREEBIND:
  1186. return mptcp_put_int_option(msk, optval, optlen,
  1187. inet_test_bit(FREEBIND, sk));
  1188. case IP_TRANSPARENT:
  1189. return mptcp_put_int_option(msk, optval, optlen,
  1190. inet_test_bit(TRANSPARENT, sk));
  1191. case IP_BIND_ADDRESS_NO_PORT:
  1192. return mptcp_put_int_option(msk, optval, optlen,
  1193. inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
  1194. case IP_LOCAL_PORT_RANGE:
  1195. return mptcp_put_int_option(msk, optval, optlen,
  1196. READ_ONCE(inet_sk(sk)->local_port_range));
  1197. }
  1198. return -EOPNOTSUPP;
  1199. }
  1200. static int mptcp_getsockopt_v6(struct mptcp_sock *msk, int optname,
  1201. char __user *optval, int __user *optlen)
  1202. {
  1203. struct sock *sk = (void *)msk;
  1204. switch (optname) {
  1205. case IPV6_V6ONLY:
  1206. return mptcp_put_int_option(msk, optval, optlen,
  1207. sk->sk_ipv6only);
  1208. case IPV6_TRANSPARENT:
  1209. return mptcp_put_int_option(msk, optval, optlen,
  1210. inet_test_bit(TRANSPARENT, sk));
  1211. case IPV6_FREEBIND:
  1212. return mptcp_put_int_option(msk, optval, optlen,
  1213. inet_test_bit(FREEBIND, sk));
  1214. }
  1215. return -EOPNOTSUPP;
  1216. }
  1217. static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
  1218. char __user *optval, int __user *optlen)
  1219. {
  1220. switch (optname) {
  1221. case MPTCP_INFO:
  1222. return mptcp_getsockopt_info(msk, optval, optlen);
  1223. case MPTCP_FULL_INFO:
  1224. return mptcp_getsockopt_full_info(msk, optval, optlen);
  1225. case MPTCP_TCPINFO:
  1226. return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
  1227. case MPTCP_SUBFLOW_ADDRS:
  1228. return mptcp_getsockopt_subflow_addrs(msk, optval, optlen);
  1229. }
  1230. return -EOPNOTSUPP;
  1231. }
  1232. int mptcp_getsockopt(struct sock *sk, int level, int optname,
  1233. char __user *optval, int __user *option)
  1234. {
  1235. struct mptcp_sock *msk = mptcp_sk(sk);
  1236. struct sock *ssk;
  1237. pr_debug("msk=%p\n", msk);
  1238. /* @@ the meaning of setsockopt() when the socket is connected and
  1239. * there are multiple subflows is not yet defined. It is up to the
  1240. * MPTCP-level socket to configure the subflows until the subflow
  1241. * is in TCP fallback, when socket options are passed through
  1242. * to the one remaining subflow.
  1243. */
  1244. lock_sock(sk);
  1245. ssk = __mptcp_tcp_fallback(msk);
  1246. release_sock(sk);
  1247. if (ssk)
  1248. return tcp_getsockopt(ssk, level, optname, optval, option);
  1249. if (level == SOL_IP)
  1250. return mptcp_getsockopt_v4(msk, optname, optval, option);
  1251. if (level == SOL_IPV6)
  1252. return mptcp_getsockopt_v6(msk, optname, optval, option);
  1253. if (level == SOL_TCP)
  1254. return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
  1255. if (level == SOL_MPTCP)
  1256. return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option);
  1257. return -EOPNOTSUPP;
  1258. }
  1259. static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
  1260. {
  1261. static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK;
  1262. struct sock *sk = (struct sock *)msk;
  1263. bool keep_open;
  1264. keep_open = sock_flag(sk, SOCK_KEEPOPEN);
  1265. if (ssk->sk_prot->keepalive)
  1266. ssk->sk_prot->keepalive(ssk, keep_open);
  1267. sock_valbool_flag(ssk, SOCK_KEEPOPEN, keep_open);
  1268. ssk->sk_priority = sk->sk_priority;
  1269. ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
  1270. ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
  1271. ssk->sk_ipv6only = sk->sk_ipv6only;
  1272. __ip_sock_set_tos(ssk, inet_sk(sk)->tos);
  1273. if (sk->sk_userlocks & tx_rx_locks) {
  1274. ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
  1275. if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) {
  1276. WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
  1277. mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
  1278. }
  1279. if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
  1280. WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
  1281. }
  1282. if (sock_flag(sk, SOCK_LINGER)) {
  1283. ssk->sk_lingertime = sk->sk_lingertime;
  1284. sock_set_flag(ssk, SOCK_LINGER);
  1285. } else {
  1286. sock_reset_flag(ssk, SOCK_LINGER);
  1287. }
  1288. if (sk->sk_mark != ssk->sk_mark) {
  1289. ssk->sk_mark = sk->sk_mark;
  1290. sk_dst_reset(ssk);
  1291. }
  1292. sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG));
  1293. if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
  1294. tcp_set_congestion_control(ssk, msk->ca_name, false, true);
  1295. __tcp_sock_set_cork(ssk, !!msk->cork);
  1296. __tcp_sock_set_nodelay(ssk, !!msk->nodelay);
  1297. tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle);
  1298. tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl);
  1299. tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt);
  1300. inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
  1301. inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
  1302. inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
  1303. WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range));
  1304. }
  1305. void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
  1306. {
  1307. struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
  1308. msk_owned_by_me(msk);
  1309. ssk->sk_rcvlowat = 0;
  1310. /* subflows must ignore any latency-related settings: will not affect
  1311. * the user-space - only the msk is relevant - but will foul the
  1312. * mptcp scheduler
  1313. */
  1314. tcp_sk(ssk)->notsent_lowat = UINT_MAX;
  1315. if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
  1316. sync_socket_options(msk, ssk);
  1317. subflow->setsockopt_seq = msk->setsockopt_seq;
  1318. }
  1319. }
  1320. /* unfortunately this is different enough from the tcp version so
  1321. * that we can't factor it out
  1322. */
  1323. int mptcp_set_rcvlowat(struct sock *sk, int val)
  1324. {
  1325. struct mptcp_subflow_context *subflow;
  1326. int space, cap;
  1327. /* bpf can land here with a wrong sk type */
  1328. if (sk->sk_protocol == IPPROTO_TCP)
  1329. return -EINVAL;
  1330. if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
  1331. cap = sk->sk_rcvbuf >> 1;
  1332. else
  1333. cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
  1334. val = min(val, cap);
  1335. WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
  1336. /* Check if we need to signal EPOLLIN right now */
  1337. if (mptcp_epollin_ready(sk))
  1338. sk->sk_data_ready(sk);
  1339. if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
  1340. return 0;
  1341. space = mptcp_space_from_win(sk, val);
  1342. if (space <= sk->sk_rcvbuf)
  1343. return 0;
  1344. /* propagate the rcvbuf changes to all the subflows */
  1345. WRITE_ONCE(sk->sk_rcvbuf, space);
  1346. mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
  1347. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  1348. bool slow;
  1349. slow = lock_sock_fast(ssk);
  1350. WRITE_ONCE(ssk->sk_rcvbuf, space);
  1351. WRITE_ONCE(tcp_sk(ssk)->window_clamp, val);
  1352. unlock_sock_fast(ssk, slow);
  1353. }
  1354. return 0;
  1355. }