ctrl.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Multipath TCP
  3. *
  4. * Copyright (c) 2019, Tessares SA.
  5. */
  6. #ifdef CONFIG_SYSCTL
  7. #include <linux/sysctl.h>
  8. #endif
  9. #include <net/net_namespace.h>
  10. #include <net/netns/generic.h>
  11. #include "protocol.h"
  12. #include "mib.h"
  13. #define MPTCP_SYSCTL_PATH "net/mptcp"
  14. static int mptcp_pernet_id;
  15. #ifdef CONFIG_SYSCTL
  16. static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
  17. #endif
  18. struct mptcp_pernet {
  19. #ifdef CONFIG_SYSCTL
  20. struct ctl_table_header *ctl_table_hdr;
  21. #endif
  22. unsigned int add_addr_timeout;
  23. unsigned int blackhole_timeout;
  24. unsigned int close_timeout;
  25. unsigned int stale_loss_cnt;
  26. atomic_t active_disable_times;
  27. unsigned long active_disable_stamp;
  28. u8 mptcp_enabled;
  29. u8 checksum_enabled;
  30. u8 allow_join_initial_addr_port;
  31. u8 pm_type;
  32. char scheduler[MPTCP_SCHED_NAME_MAX];
  33. };
  34. static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
  35. {
  36. return net_generic(net, mptcp_pernet_id);
  37. }
  38. int mptcp_is_enabled(const struct net *net)
  39. {
  40. return mptcp_get_pernet(net)->mptcp_enabled;
  41. }
  42. unsigned int mptcp_get_add_addr_timeout(const struct net *net)
  43. {
  44. return mptcp_get_pernet(net)->add_addr_timeout;
  45. }
  46. int mptcp_is_checksum_enabled(const struct net *net)
  47. {
  48. return mptcp_get_pernet(net)->checksum_enabled;
  49. }
  50. int mptcp_allow_join_id0(const struct net *net)
  51. {
  52. return mptcp_get_pernet(net)->allow_join_initial_addr_port;
  53. }
  54. unsigned int mptcp_stale_loss_cnt(const struct net *net)
  55. {
  56. return mptcp_get_pernet(net)->stale_loss_cnt;
  57. }
  58. unsigned int mptcp_close_timeout(const struct sock *sk)
  59. {
  60. if (sock_flag(sk, SOCK_DEAD))
  61. return TCP_TIMEWAIT_LEN;
  62. return mptcp_get_pernet(sock_net(sk))->close_timeout;
  63. }
  64. int mptcp_get_pm_type(const struct net *net)
  65. {
  66. return mptcp_get_pernet(net)->pm_type;
  67. }
  68. const char *mptcp_get_scheduler(const struct net *net)
  69. {
  70. return mptcp_get_pernet(net)->scheduler;
  71. }
  72. static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
  73. {
  74. pernet->mptcp_enabled = 1;
  75. pernet->add_addr_timeout = TCP_RTO_MAX;
  76. pernet->blackhole_timeout = 3600;
  77. atomic_set(&pernet->active_disable_times, 0);
  78. pernet->close_timeout = TCP_TIMEWAIT_LEN;
  79. pernet->checksum_enabled = 0;
  80. pernet->allow_join_initial_addr_port = 1;
  81. pernet->stale_loss_cnt = 4;
  82. pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
  83. strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler));
  84. }
  85. #ifdef CONFIG_SYSCTL
  86. static int mptcp_set_scheduler(char *scheduler, const char *name)
  87. {
  88. struct mptcp_sched_ops *sched;
  89. int ret = 0;
  90. rcu_read_lock();
  91. sched = mptcp_sched_find(name);
  92. if (sched)
  93. strscpy(scheduler, name, MPTCP_SCHED_NAME_MAX);
  94. else
  95. ret = -ENOENT;
  96. rcu_read_unlock();
  97. return ret;
  98. }
  99. static int proc_scheduler(const struct ctl_table *ctl, int write,
  100. void *buffer, size_t *lenp, loff_t *ppos)
  101. {
  102. char (*scheduler)[MPTCP_SCHED_NAME_MAX] = ctl->data;
  103. char val[MPTCP_SCHED_NAME_MAX];
  104. struct ctl_table tbl = {
  105. .data = val,
  106. .maxlen = MPTCP_SCHED_NAME_MAX,
  107. };
  108. int ret;
  109. strscpy(val, *scheduler, MPTCP_SCHED_NAME_MAX);
  110. ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
  111. if (write && ret == 0)
  112. ret = mptcp_set_scheduler(*scheduler, val);
  113. return ret;
  114. }
  115. static int proc_available_schedulers(const struct ctl_table *ctl,
  116. int write, void *buffer,
  117. size_t *lenp, loff_t *ppos)
  118. {
  119. struct ctl_table tbl = { .maxlen = MPTCP_SCHED_BUF_MAX, };
  120. int ret;
  121. tbl.data = kmalloc(tbl.maxlen, GFP_USER);
  122. if (!tbl.data)
  123. return -ENOMEM;
  124. mptcp_get_available_schedulers(tbl.data, MPTCP_SCHED_BUF_MAX);
  125. ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
  126. kfree(tbl.data);
  127. return ret;
  128. }
  129. static int proc_blackhole_detect_timeout(const struct ctl_table *table,
  130. int write, void *buffer, size_t *lenp,
  131. loff_t *ppos)
  132. {
  133. struct mptcp_pernet *pernet = container_of(table->data,
  134. struct mptcp_pernet,
  135. blackhole_timeout);
  136. int ret;
  137. ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  138. if (write && ret == 0)
  139. atomic_set(&pernet->active_disable_times, 0);
  140. return ret;
  141. }
  142. static struct ctl_table mptcp_sysctl_table[] = {
  143. {
  144. .procname = "enabled",
  145. .maxlen = sizeof(u8),
  146. .mode = 0644,
  147. /* users with CAP_NET_ADMIN or root (not and) can change this
  148. * value, same as other sysctl or the 'net' tree.
  149. */
  150. .proc_handler = proc_dou8vec_minmax,
  151. .extra1 = SYSCTL_ZERO,
  152. .extra2 = SYSCTL_ONE
  153. },
  154. {
  155. .procname = "add_addr_timeout",
  156. .maxlen = sizeof(unsigned int),
  157. .mode = 0644,
  158. .proc_handler = proc_dointvec_jiffies,
  159. },
  160. {
  161. .procname = "checksum_enabled",
  162. .maxlen = sizeof(u8),
  163. .mode = 0644,
  164. .proc_handler = proc_dou8vec_minmax,
  165. .extra1 = SYSCTL_ZERO,
  166. .extra2 = SYSCTL_ONE
  167. },
  168. {
  169. .procname = "allow_join_initial_addr_port",
  170. .maxlen = sizeof(u8),
  171. .mode = 0644,
  172. .proc_handler = proc_dou8vec_minmax,
  173. .extra1 = SYSCTL_ZERO,
  174. .extra2 = SYSCTL_ONE
  175. },
  176. {
  177. .procname = "stale_loss_cnt",
  178. .maxlen = sizeof(unsigned int),
  179. .mode = 0644,
  180. .proc_handler = proc_douintvec_minmax,
  181. },
  182. {
  183. .procname = "pm_type",
  184. .maxlen = sizeof(u8),
  185. .mode = 0644,
  186. .proc_handler = proc_dou8vec_minmax,
  187. .extra1 = SYSCTL_ZERO,
  188. .extra2 = &mptcp_pm_type_max
  189. },
  190. {
  191. .procname = "scheduler",
  192. .maxlen = MPTCP_SCHED_NAME_MAX,
  193. .mode = 0644,
  194. .proc_handler = proc_scheduler,
  195. },
  196. {
  197. .procname = "available_schedulers",
  198. .maxlen = MPTCP_SCHED_BUF_MAX,
  199. .mode = 0444,
  200. .proc_handler = proc_available_schedulers,
  201. },
  202. {
  203. .procname = "close_timeout",
  204. .maxlen = sizeof(unsigned int),
  205. .mode = 0644,
  206. .proc_handler = proc_dointvec_jiffies,
  207. },
  208. {
  209. .procname = "blackhole_timeout",
  210. .maxlen = sizeof(unsigned int),
  211. .mode = 0644,
  212. .proc_handler = proc_blackhole_detect_timeout,
  213. .extra1 = SYSCTL_ZERO,
  214. },
  215. };
  216. static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
  217. {
  218. struct ctl_table_header *hdr;
  219. struct ctl_table *table;
  220. table = mptcp_sysctl_table;
  221. if (!net_eq(net, &init_net)) {
  222. table = kmemdup(table, sizeof(mptcp_sysctl_table), GFP_KERNEL);
  223. if (!table)
  224. goto err_alloc;
  225. }
  226. table[0].data = &pernet->mptcp_enabled;
  227. table[1].data = &pernet->add_addr_timeout;
  228. table[2].data = &pernet->checksum_enabled;
  229. table[3].data = &pernet->allow_join_initial_addr_port;
  230. table[4].data = &pernet->stale_loss_cnt;
  231. table[5].data = &pernet->pm_type;
  232. table[6].data = &pernet->scheduler;
  233. /* table[7] is for available_schedulers which is read-only info */
  234. table[8].data = &pernet->close_timeout;
  235. table[9].data = &pernet->blackhole_timeout;
  236. hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
  237. ARRAY_SIZE(mptcp_sysctl_table));
  238. if (!hdr)
  239. goto err_reg;
  240. pernet->ctl_table_hdr = hdr;
  241. return 0;
  242. err_reg:
  243. if (!net_eq(net, &init_net))
  244. kfree(table);
  245. err_alloc:
  246. return -ENOMEM;
  247. }
  248. static void mptcp_pernet_del_table(struct mptcp_pernet *pernet)
  249. {
  250. const struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg;
  251. unregister_net_sysctl_table(pernet->ctl_table_hdr);
  252. kfree(table);
  253. }
  254. #else
  255. static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
  256. {
  257. return 0;
  258. }
  259. static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
  260. #endif /* CONFIG_SYSCTL */
  261. /* The following code block is to deal with middle box issues with MPTCP,
  262. * similar to what is done with TFO.
  263. * The proposed solution is to disable active MPTCP globally when SYN+MPC are
  264. * dropped, while SYN without MPC aren't. In this case, active side MPTCP is
  265. * disabled globally for 1hr at first. Then if it happens again, it is disabled
  266. * for 2h, then 4h, 8h, ...
  267. * The timeout is reset back to 1hr when a successful active MPTCP connection is
  268. * fully established.
  269. */
  270. /* Disable active MPTCP and record current jiffies and active_disable_times */
  271. void mptcp_active_disable(struct sock *sk)
  272. {
  273. struct net *net = sock_net(sk);
  274. struct mptcp_pernet *pernet;
  275. pernet = mptcp_get_pernet(net);
  276. if (!READ_ONCE(pernet->blackhole_timeout))
  277. return;
  278. /* Paired with READ_ONCE() in mptcp_active_should_disable() */
  279. WRITE_ONCE(pernet->active_disable_stamp, jiffies);
  280. /* Paired with smp_rmb() in mptcp_active_should_disable().
  281. * We want pernet->active_disable_stamp to be updated first.
  282. */
  283. smp_mb__before_atomic();
  284. atomic_inc(&pernet->active_disable_times);
  285. MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE);
  286. }
  287. /* Calculate timeout for MPTCP active disable
  288. * Return true if we are still in the active MPTCP disable period
  289. * Return false if timeout already expired and we should use active MPTCP
  290. */
  291. bool mptcp_active_should_disable(struct sock *ssk)
  292. {
  293. struct net *net = sock_net(ssk);
  294. unsigned int blackhole_timeout;
  295. struct mptcp_pernet *pernet;
  296. unsigned long timeout;
  297. int disable_times;
  298. int multiplier;
  299. pernet = mptcp_get_pernet(net);
  300. blackhole_timeout = READ_ONCE(pernet->blackhole_timeout);
  301. if (!blackhole_timeout)
  302. return false;
  303. disable_times = atomic_read(&pernet->active_disable_times);
  304. if (!disable_times)
  305. return false;
  306. /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
  307. smp_rmb();
  308. /* Limit timeout to max: 2^6 * initial timeout */
  309. multiplier = 1 << min(disable_times - 1, 6);
  310. /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
  311. timeout = READ_ONCE(pernet->active_disable_stamp) +
  312. multiplier * blackhole_timeout * HZ;
  313. return time_before(jiffies, timeout);
  314. }
  315. /* Enable active MPTCP and reset active_disable_times if needed */
  316. void mptcp_active_enable(struct sock *sk)
  317. {
  318. struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk));
  319. if (atomic_read(&pernet->active_disable_times)) {
  320. struct net_device *dev;
  321. struct dst_entry *dst;
  322. rcu_read_lock();
  323. dst = __sk_dst_get(sk);
  324. dev = dst ? dst_dev_rcu(dst) : NULL;
  325. if (!(dev && (dev->flags & IFF_LOOPBACK)))
  326. atomic_set(&pernet->active_disable_times, 0);
  327. rcu_read_unlock();
  328. }
  329. }
  330. /* Check the number of retransmissions, and fallback to TCP if needed */
  331. void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
  332. {
  333. struct mptcp_subflow_context *subflow;
  334. u32 timeouts;
  335. if (!sk_is_mptcp(ssk))
  336. return;
  337. timeouts = inet_csk(ssk)->icsk_retransmits;
  338. subflow = mptcp_subflow_ctx(ssk);
  339. if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
  340. if (timeouts == 2 || (timeouts < 2 && expired)) {
  341. MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
  342. subflow->mpc_drop = 1;
  343. mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
  344. }
  345. } else if (ssk->sk_state == TCP_SYN_SENT) {
  346. subflow->mpc_drop = 0;
  347. }
  348. }
  349. static int __net_init mptcp_net_init(struct net *net)
  350. {
  351. struct mptcp_pernet *pernet = mptcp_get_pernet(net);
  352. mptcp_pernet_set_defaults(pernet);
  353. return mptcp_pernet_new_table(net, pernet);
  354. }
  355. /* Note: the callback will only be called per extra netns */
  356. static void __net_exit mptcp_net_exit(struct net *net)
  357. {
  358. struct mptcp_pernet *pernet = mptcp_get_pernet(net);
  359. mptcp_pernet_del_table(pernet);
  360. }
  361. static struct pernet_operations mptcp_pernet_ops = {
  362. .init = mptcp_net_init,
  363. .exit = mptcp_net_exit,
  364. .id = &mptcp_pernet_id,
  365. .size = sizeof(struct mptcp_pernet),
  366. };
  367. void __init mptcp_init(void)
  368. {
  369. mptcp_join_cookie_init();
  370. mptcp_proto_init();
  371. if (register_pernet_subsys(&mptcp_pernet_ops) < 0)
  372. panic("Failed to register MPTCP pernet subsystem.\n");
  373. }
  374. #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  375. int __init mptcpv6_init(void)
  376. {
  377. int err;
  378. err = mptcp_proto_v6_init();
  379. return err;
  380. }
  381. #endif