mpls_iptunnel.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * mpls tunnels An implementation mpls tunnels using the light weight tunnel
  4. * infrastructure
  5. *
  6. * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
  7. */
  8. #include <linux/types.h>
  9. #include <linux/skbuff.h>
  10. #include <linux/net.h>
  11. #include <linux/module.h>
  12. #include <linux/mpls.h>
  13. #include <linux/vmalloc.h>
  14. #include <net/ip.h>
  15. #include <net/dst.h>
  16. #include <net/lwtunnel.h>
  17. #include <net/netevent.h>
  18. #include <net/netns/generic.h>
  19. #include <net/ip6_fib.h>
  20. #include <net/route.h>
  21. #include <net/mpls_iptunnel.h>
  22. #include <linux/mpls_iptunnel.h>
  23. #include "internal.h"
  24. static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = {
  25. [MPLS_IPTUNNEL_DST] = { .len = sizeof(u32) },
  26. [MPLS_IPTUNNEL_TTL] = { .type = NLA_U8 },
  27. };
  28. static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
  29. {
  30. /* The size of the layer 2.5 labels to be added for this route */
  31. return en->labels * sizeof(struct mpls_shim_hdr);
  32. }
  33. static int mpls_xmit(struct sk_buff *skb)
  34. {
  35. struct mpls_iptunnel_encap *tun_encap_info;
  36. struct mpls_shim_hdr *hdr;
  37. struct net_device *out_dev;
  38. unsigned int hh_len;
  39. unsigned int new_header_size;
  40. unsigned int mtu;
  41. struct dst_entry *dst = skb_dst(skb);
  42. struct rtable *rt = NULL;
  43. struct rt6_info *rt6 = NULL;
  44. struct mpls_dev *out_mdev;
  45. struct net *net;
  46. int err = 0;
  47. bool bos;
  48. int i;
  49. unsigned int ttl;
  50. /* Find the output device */
  51. out_dev = dst->dev;
  52. net = dev_net(out_dev);
  53. if (!mpls_output_possible(out_dev) ||
  54. !dst->lwtstate || skb_warn_if_lro(skb))
  55. goto drop;
  56. skb_forward_csum(skb);
  57. tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate);
  58. /* Obtain the ttl using the following set of rules.
  59. *
  60. * LWT ttl propagation setting:
  61. * - disabled => use default TTL value from LWT
  62. * - enabled => use TTL value from IPv4/IPv6 header
  63. * - default =>
  64. * Global ttl propagation setting:
  65. * - disabled => use default TTL value from global setting
  66. * - enabled => use TTL value from IPv4/IPv6 header
  67. */
  68. if (dst->ops->family == AF_INET) {
  69. if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
  70. ttl = tun_encap_info->default_ttl;
  71. else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
  72. !net->mpls.ip_ttl_propagate)
  73. ttl = net->mpls.default_ttl;
  74. else
  75. ttl = ip_hdr(skb)->ttl;
  76. rt = dst_rtable(dst);
  77. } else if (dst->ops->family == AF_INET6) {
  78. if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
  79. ttl = tun_encap_info->default_ttl;
  80. else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
  81. !net->mpls.ip_ttl_propagate)
  82. ttl = net->mpls.default_ttl;
  83. else
  84. ttl = ipv6_hdr(skb)->hop_limit;
  85. rt6 = dst_rt6_info(dst);
  86. } else {
  87. goto drop;
  88. }
  89. /* Verify the destination can hold the packet */
  90. new_header_size = mpls_encap_size(tun_encap_info);
  91. mtu = mpls_dev_mtu(out_dev);
  92. if (mpls_pkt_too_big(skb, mtu - new_header_size))
  93. goto drop;
  94. hh_len = LL_RESERVED_SPACE(out_dev);
  95. if (!out_dev->header_ops)
  96. hh_len = 0;
  97. /* Ensure there is enough space for the headers in the skb */
  98. if (skb_cow_head(skb, hh_len + new_header_size))
  99. goto drop;
  100. skb_set_inner_protocol(skb, skb->protocol);
  101. skb_reset_inner_network_header(skb);
  102. skb_push(skb, new_header_size);
  103. skb_reset_network_header(skb);
  104. skb->dev = out_dev;
  105. skb->protocol = htons(ETH_P_MPLS_UC);
  106. /* Push the new labels */
  107. hdr = mpls_hdr(skb);
  108. bos = true;
  109. for (i = tun_encap_info->labels - 1; i >= 0; i--) {
  110. hdr[i] = mpls_entry_encode(tun_encap_info->label[i],
  111. ttl, 0, bos);
  112. bos = false;
  113. }
  114. mpls_stats_inc_outucastpkts(out_dev, skb);
  115. if (rt) {
  116. if (rt->rt_gw_family == AF_INET6)
  117. err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt->rt_gw6,
  118. skb);
  119. else
  120. err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gw4,
  121. skb);
  122. } else if (rt6) {
  123. if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) {
  124. /* 6PE (RFC 4798) */
  125. err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt6->rt6i_gateway.s6_addr32[3],
  126. skb);
  127. } else
  128. err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway,
  129. skb);
  130. }
  131. if (err)
  132. net_dbg_ratelimited("%s: packet transmission failed: %d\n",
  133. __func__, err);
  134. return LWTUNNEL_XMIT_DONE;
  135. drop:
  136. out_mdev = out_dev ? mpls_dev_get(out_dev) : NULL;
  137. if (out_mdev)
  138. MPLS_INC_STATS(out_mdev, tx_errors);
  139. kfree_skb(skb);
  140. return -EINVAL;
  141. }
  142. static int mpls_build_state(struct net *net, struct nlattr *nla,
  143. unsigned int family, const void *cfg,
  144. struct lwtunnel_state **ts,
  145. struct netlink_ext_ack *extack)
  146. {
  147. struct mpls_iptunnel_encap *tun_encap_info;
  148. struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1];
  149. struct lwtunnel_state *newts;
  150. u8 n_labels;
  151. int ret;
  152. ret = nla_parse_nested_deprecated(tb, MPLS_IPTUNNEL_MAX, nla,
  153. mpls_iptunnel_policy, extack);
  154. if (ret < 0)
  155. return ret;
  156. if (!tb[MPLS_IPTUNNEL_DST]) {
  157. NL_SET_ERR_MSG(extack, "MPLS_IPTUNNEL_DST attribute is missing");
  158. return -EINVAL;
  159. }
  160. /* determine number of labels */
  161. if (nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS,
  162. &n_labels, NULL, extack))
  163. return -EINVAL;
  164. newts = lwtunnel_state_alloc(struct_size(tun_encap_info, label,
  165. n_labels));
  166. if (!newts)
  167. return -ENOMEM;
  168. tun_encap_info = mpls_lwtunnel_encap(newts);
  169. ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels,
  170. &tun_encap_info->labels, tun_encap_info->label,
  171. extack);
  172. if (ret)
  173. goto errout;
  174. tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT;
  175. if (tb[MPLS_IPTUNNEL_TTL]) {
  176. tun_encap_info->default_ttl = nla_get_u8(tb[MPLS_IPTUNNEL_TTL]);
  177. /* TTL 0 implies propagate from IP header */
  178. tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ?
  179. MPLS_TTL_PROP_DISABLED :
  180. MPLS_TTL_PROP_ENABLED;
  181. }
  182. newts->type = LWTUNNEL_ENCAP_MPLS;
  183. newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
  184. newts->headroom = mpls_encap_size(tun_encap_info);
  185. *ts = newts;
  186. return 0;
  187. errout:
  188. kfree(newts);
  189. *ts = NULL;
  190. return ret;
  191. }
  192. static int mpls_fill_encap_info(struct sk_buff *skb,
  193. struct lwtunnel_state *lwtstate)
  194. {
  195. struct mpls_iptunnel_encap *tun_encap_info;
  196. tun_encap_info = mpls_lwtunnel_encap(lwtstate);
  197. if (nla_put_labels(skb, MPLS_IPTUNNEL_DST, tun_encap_info->labels,
  198. tun_encap_info->label))
  199. goto nla_put_failure;
  200. if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT &&
  201. nla_put_u8(skb, MPLS_IPTUNNEL_TTL, tun_encap_info->default_ttl))
  202. goto nla_put_failure;
  203. return 0;
  204. nla_put_failure:
  205. return -EMSGSIZE;
  206. }
  207. static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate)
  208. {
  209. struct mpls_iptunnel_encap *tun_encap_info;
  210. int nlsize;
  211. tun_encap_info = mpls_lwtunnel_encap(lwtstate);
  212. nlsize = nla_total_size(tun_encap_info->labels * 4);
  213. if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT)
  214. nlsize += nla_total_size(1);
  215. return nlsize;
  216. }
  217. static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
  218. {
  219. struct mpls_iptunnel_encap *a_hdr = mpls_lwtunnel_encap(a);
  220. struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b);
  221. int l;
  222. if (a_hdr->labels != b_hdr->labels ||
  223. a_hdr->ttl_propagate != b_hdr->ttl_propagate ||
  224. a_hdr->default_ttl != b_hdr->default_ttl)
  225. return 1;
  226. for (l = 0; l < a_hdr->labels; l++)
  227. if (a_hdr->label[l] != b_hdr->label[l])
  228. return 1;
  229. return 0;
  230. }
  231. static const struct lwtunnel_encap_ops mpls_iptun_ops = {
  232. .build_state = mpls_build_state,
  233. .xmit = mpls_xmit,
  234. .fill_encap = mpls_fill_encap_info,
  235. .get_encap_size = mpls_encap_nlsize,
  236. .cmp_encap = mpls_encap_cmp,
  237. .owner = THIS_MODULE,
  238. };
  239. static int __init mpls_iptunnel_init(void)
  240. {
  241. return lwtunnel_encap_add_ops(&mpls_iptun_ops, LWTUNNEL_ENCAP_MPLS);
  242. }
  243. module_init(mpls_iptunnel_init);
  244. static void __exit mpls_iptunnel_exit(void)
  245. {
  246. lwtunnel_encap_del_ops(&mpls_iptun_ops, LWTUNNEL_ENCAP_MPLS);
  247. }
  248. module_exit(mpls_iptunnel_exit);
  249. MODULE_ALIAS_RTNL_LWT(MPLS);
  250. MODULE_SOFTDEP("post: mpls_gso");
  251. MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels");
  252. MODULE_LICENSE("GPL v2");