| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461 |
- // SPDX-License-Identifier: GPL-2.0
- /* Multipath TCP
- *
- * Copyright (c) 2019, Tessares SA.
- */
- #ifdef CONFIG_SYSCTL
- #include <linux/sysctl.h>
- #endif
- #include <net/net_namespace.h>
- #include <net/netns/generic.h>
- #include "protocol.h"
- #include "mib.h"
- #define MPTCP_SYSCTL_PATH "net/mptcp"
- static int mptcp_pernet_id;
- #ifdef CONFIG_SYSCTL
- static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
- #endif
- struct mptcp_pernet {
- #ifdef CONFIG_SYSCTL
- struct ctl_table_header *ctl_table_hdr;
- #endif
- unsigned int add_addr_timeout;
- unsigned int blackhole_timeout;
- unsigned int close_timeout;
- unsigned int stale_loss_cnt;
- atomic_t active_disable_times;
- unsigned long active_disable_stamp;
- u8 mptcp_enabled;
- u8 checksum_enabled;
- u8 allow_join_initial_addr_port;
- u8 pm_type;
- char scheduler[MPTCP_SCHED_NAME_MAX];
- };
- static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
- {
- return net_generic(net, mptcp_pernet_id);
- }
- int mptcp_is_enabled(const struct net *net)
- {
- return mptcp_get_pernet(net)->mptcp_enabled;
- }
- unsigned int mptcp_get_add_addr_timeout(const struct net *net)
- {
- return mptcp_get_pernet(net)->add_addr_timeout;
- }
- int mptcp_is_checksum_enabled(const struct net *net)
- {
- return mptcp_get_pernet(net)->checksum_enabled;
- }
- int mptcp_allow_join_id0(const struct net *net)
- {
- return mptcp_get_pernet(net)->allow_join_initial_addr_port;
- }
- unsigned int mptcp_stale_loss_cnt(const struct net *net)
- {
- return mptcp_get_pernet(net)->stale_loss_cnt;
- }
- unsigned int mptcp_close_timeout(const struct sock *sk)
- {
- if (sock_flag(sk, SOCK_DEAD))
- return TCP_TIMEWAIT_LEN;
- return mptcp_get_pernet(sock_net(sk))->close_timeout;
- }
- int mptcp_get_pm_type(const struct net *net)
- {
- return mptcp_get_pernet(net)->pm_type;
- }
- const char *mptcp_get_scheduler(const struct net *net)
- {
- return mptcp_get_pernet(net)->scheduler;
- }
- static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
- {
- pernet->mptcp_enabled = 1;
- pernet->add_addr_timeout = TCP_RTO_MAX;
- pernet->blackhole_timeout = 3600;
- atomic_set(&pernet->active_disable_times, 0);
- pernet->close_timeout = TCP_TIMEWAIT_LEN;
- pernet->checksum_enabled = 0;
- pernet->allow_join_initial_addr_port = 1;
- pernet->stale_loss_cnt = 4;
- pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
- strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler));
- }
- #ifdef CONFIG_SYSCTL
- static int mptcp_set_scheduler(char *scheduler, const char *name)
- {
- struct mptcp_sched_ops *sched;
- int ret = 0;
- rcu_read_lock();
- sched = mptcp_sched_find(name);
- if (sched)
- strscpy(scheduler, name, MPTCP_SCHED_NAME_MAX);
- else
- ret = -ENOENT;
- rcu_read_unlock();
- return ret;
- }
- static int proc_scheduler(const struct ctl_table *ctl, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
- {
- char (*scheduler)[MPTCP_SCHED_NAME_MAX] = ctl->data;
- char val[MPTCP_SCHED_NAME_MAX];
- struct ctl_table tbl = {
- .data = val,
- .maxlen = MPTCP_SCHED_NAME_MAX,
- };
- int ret;
- strscpy(val, *scheduler, MPTCP_SCHED_NAME_MAX);
- ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
- if (write && ret == 0)
- ret = mptcp_set_scheduler(*scheduler, val);
- return ret;
- }
- static int proc_available_schedulers(const struct ctl_table *ctl,
- int write, void *buffer,
- size_t *lenp, loff_t *ppos)
- {
- struct ctl_table tbl = { .maxlen = MPTCP_SCHED_BUF_MAX, };
- int ret;
- tbl.data = kmalloc(tbl.maxlen, GFP_USER);
- if (!tbl.data)
- return -ENOMEM;
- mptcp_get_available_schedulers(tbl.data, MPTCP_SCHED_BUF_MAX);
- ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
- kfree(tbl.data);
- return ret;
- }
- static int proc_blackhole_detect_timeout(const struct ctl_table *table,
- int write, void *buffer, size_t *lenp,
- loff_t *ppos)
- {
- struct mptcp_pernet *pernet = container_of(table->data,
- struct mptcp_pernet,
- blackhole_timeout);
- int ret;
- ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
- if (write && ret == 0)
- atomic_set(&pernet->active_disable_times, 0);
- return ret;
- }
- static struct ctl_table mptcp_sysctl_table[] = {
- {
- .procname = "enabled",
- .maxlen = sizeof(u8),
- .mode = 0644,
- /* users with CAP_NET_ADMIN or root (not and) can change this
- * value, same as other sysctl or the 'net' tree.
- */
- .proc_handler = proc_dou8vec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE
- },
- {
- .procname = "add_addr_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "checksum_enabled",
- .maxlen = sizeof(u8),
- .mode = 0644,
- .proc_handler = proc_dou8vec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE
- },
- {
- .procname = "allow_join_initial_addr_port",
- .maxlen = sizeof(u8),
- .mode = 0644,
- .proc_handler = proc_dou8vec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE
- },
- {
- .procname = "stale_loss_cnt",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_douintvec_minmax,
- },
- {
- .procname = "pm_type",
- .maxlen = sizeof(u8),
- .mode = 0644,
- .proc_handler = proc_dou8vec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &mptcp_pm_type_max
- },
- {
- .procname = "scheduler",
- .maxlen = MPTCP_SCHED_NAME_MAX,
- .mode = 0644,
- .proc_handler = proc_scheduler,
- },
- {
- .procname = "available_schedulers",
- .maxlen = MPTCP_SCHED_BUF_MAX,
- .mode = 0444,
- .proc_handler = proc_available_schedulers,
- },
- {
- .procname = "close_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "blackhole_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_blackhole_detect_timeout,
- .extra1 = SYSCTL_ZERO,
- },
- };
- static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
- {
- struct ctl_table_header *hdr;
- struct ctl_table *table;
- table = mptcp_sysctl_table;
- if (!net_eq(net, &init_net)) {
- table = kmemdup(table, sizeof(mptcp_sysctl_table), GFP_KERNEL);
- if (!table)
- goto err_alloc;
- }
- table[0].data = &pernet->mptcp_enabled;
- table[1].data = &pernet->add_addr_timeout;
- table[2].data = &pernet->checksum_enabled;
- table[3].data = &pernet->allow_join_initial_addr_port;
- table[4].data = &pernet->stale_loss_cnt;
- table[5].data = &pernet->pm_type;
- table[6].data = &pernet->scheduler;
- /* table[7] is for available_schedulers which is read-only info */
- table[8].data = &pernet->close_timeout;
- table[9].data = &pernet->blackhole_timeout;
- hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
- ARRAY_SIZE(mptcp_sysctl_table));
- if (!hdr)
- goto err_reg;
- pernet->ctl_table_hdr = hdr;
- return 0;
- err_reg:
- if (!net_eq(net, &init_net))
- kfree(table);
- err_alloc:
- return -ENOMEM;
- }
- static void mptcp_pernet_del_table(struct mptcp_pernet *pernet)
- {
- const struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg;
- unregister_net_sysctl_table(pernet->ctl_table_hdr);
- kfree(table);
- }
- #else
- static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
- {
- return 0;
- }
- static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
- #endif /* CONFIG_SYSCTL */
- /* The following code block is to deal with middle box issues with MPTCP,
- * similar to what is done with TFO.
- * The proposed solution is to disable active MPTCP globally when SYN+MPC are
- * dropped, while SYN without MPC aren't. In this case, active side MPTCP is
- * disabled globally for 1hr at first. Then if it happens again, it is disabled
- * for 2h, then 4h, 8h, ...
- * The timeout is reset back to 1hr when a successful active MPTCP connection is
- * fully established.
- */
- /* Disable active MPTCP and record current jiffies and active_disable_times */
- void mptcp_active_disable(struct sock *sk)
- {
- struct net *net = sock_net(sk);
- struct mptcp_pernet *pernet;
- pernet = mptcp_get_pernet(net);
- if (!READ_ONCE(pernet->blackhole_timeout))
- return;
- /* Paired with READ_ONCE() in mptcp_active_should_disable() */
- WRITE_ONCE(pernet->active_disable_stamp, jiffies);
- /* Paired with smp_rmb() in mptcp_active_should_disable().
- * We want pernet->active_disable_stamp to be updated first.
- */
- smp_mb__before_atomic();
- atomic_inc(&pernet->active_disable_times);
- MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE);
- }
- /* Calculate timeout for MPTCP active disable
- * Return true if we are still in the active MPTCP disable period
- * Return false if timeout already expired and we should use active MPTCP
- */
- bool mptcp_active_should_disable(struct sock *ssk)
- {
- struct net *net = sock_net(ssk);
- unsigned int blackhole_timeout;
- struct mptcp_pernet *pernet;
- unsigned long timeout;
- int disable_times;
- int multiplier;
- pernet = mptcp_get_pernet(net);
- blackhole_timeout = READ_ONCE(pernet->blackhole_timeout);
- if (!blackhole_timeout)
- return false;
- disable_times = atomic_read(&pernet->active_disable_times);
- if (!disable_times)
- return false;
- /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
- smp_rmb();
- /* Limit timeout to max: 2^6 * initial timeout */
- multiplier = 1 << min(disable_times - 1, 6);
- /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
- timeout = READ_ONCE(pernet->active_disable_stamp) +
- multiplier * blackhole_timeout * HZ;
- return time_before(jiffies, timeout);
- }
- /* Enable active MPTCP and reset active_disable_times if needed */
- void mptcp_active_enable(struct sock *sk)
- {
- struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk));
- if (atomic_read(&pernet->active_disable_times)) {
- struct net_device *dev;
- struct dst_entry *dst;
- rcu_read_lock();
- dst = __sk_dst_get(sk);
- dev = dst ? dst_dev_rcu(dst) : NULL;
- if (!(dev && (dev->flags & IFF_LOOPBACK)))
- atomic_set(&pernet->active_disable_times, 0);
- rcu_read_unlock();
- }
- }
- /* Check the number of retransmissions, and fallback to TCP if needed */
- void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
- {
- struct mptcp_subflow_context *subflow;
- u32 timeouts;
- if (!sk_is_mptcp(ssk))
- return;
- timeouts = inet_csk(ssk)->icsk_retransmits;
- subflow = mptcp_subflow_ctx(ssk);
- if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
- if (timeouts == 2 || (timeouts < 2 && expired)) {
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
- subflow->mpc_drop = 1;
- mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
- }
- } else if (ssk->sk_state == TCP_SYN_SENT) {
- subflow->mpc_drop = 0;
- }
- }
- static int __net_init mptcp_net_init(struct net *net)
- {
- struct mptcp_pernet *pernet = mptcp_get_pernet(net);
- mptcp_pernet_set_defaults(pernet);
- return mptcp_pernet_new_table(net, pernet);
- }
- /* Note: the callback will only be called per extra netns */
- static void __net_exit mptcp_net_exit(struct net *net)
- {
- struct mptcp_pernet *pernet = mptcp_get_pernet(net);
- mptcp_pernet_del_table(pernet);
- }
- static struct pernet_operations mptcp_pernet_ops = {
- .init = mptcp_net_init,
- .exit = mptcp_net_exit,
- .id = &mptcp_pernet_id,
- .size = sizeof(struct mptcp_pernet),
- };
- void __init mptcp_init(void)
- {
- mptcp_join_cookie_init();
- mptcp_proto_init();
- if (register_pernet_subsys(&mptcp_pernet_ops) < 0)
- panic("Failed to register MPTCP pernet subsystem.\n");
- }
- #if IS_ENABLED(CONFIG_MPTCP_IPV6)
- int __init mptcpv6_init(void)
- {
- int err;
- err = mptcp_proto_v6_init();
- return err;
- }
- #endif
|