ipvlan_core.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852
  1. /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
  2. *
  3. * This program is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU General Public License as
  5. * published by the Free Software Foundation; either version 2 of
  6. * the License, or (at your option) any later version.
  7. *
  8. */
  9. #include "ipvlan.h"
  10. static u32 ipvlan_jhash_secret __read_mostly;
  11. void ipvlan_init_secret(void)
  12. {
  13. net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret));
  14. }
  15. void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
  16. unsigned int len, bool success, bool mcast)
  17. {
  18. if (likely(success)) {
  19. struct ipvl_pcpu_stats *pcptr;
  20. pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
  21. u64_stats_update_begin(&pcptr->syncp);
  22. pcptr->rx_pkts++;
  23. pcptr->rx_bytes += len;
  24. if (mcast)
  25. pcptr->rx_mcast++;
  26. u64_stats_update_end(&pcptr->syncp);
  27. } else {
  28. this_cpu_inc(ipvlan->pcpu_stats->rx_errs);
  29. }
  30. }
  31. EXPORT_SYMBOL_GPL(ipvlan_count_rx);
  32. #if IS_ENABLED(CONFIG_IPV6)
  33. static u8 ipvlan_get_v6_hash(const void *iaddr)
  34. {
  35. const struct in6_addr *ip6_addr = iaddr;
  36. return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
  37. IPVLAN_HASH_MASK;
  38. }
  39. #else
  40. static u8 ipvlan_get_v6_hash(const void *iaddr)
  41. {
  42. return 0;
  43. }
  44. #endif
  45. static u8 ipvlan_get_v4_hash(const void *iaddr)
  46. {
  47. const struct in_addr *ip4_addr = iaddr;
  48. return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) &
  49. IPVLAN_HASH_MASK;
  50. }
  51. static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr)
  52. {
  53. if (!is_v6 && addr->atype == IPVL_IPV4) {
  54. struct in_addr *i4addr = (struct in_addr *)iaddr;
  55. return addr->ip4addr.s_addr == i4addr->s_addr;
  56. #if IS_ENABLED(CONFIG_IPV6)
  57. } else if (is_v6 && addr->atype == IPVL_IPV6) {
  58. struct in6_addr *i6addr = (struct in6_addr *)iaddr;
  59. return ipv6_addr_equal(&addr->ip6addr, i6addr);
  60. #endif
  61. }
  62. return false;
  63. }
  64. static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
  65. const void *iaddr, bool is_v6)
  66. {
  67. struct ipvl_addr *addr;
  68. u8 hash;
  69. hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
  70. ipvlan_get_v4_hash(iaddr);
  71. hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode)
  72. if (addr_equal(is_v6, addr, iaddr))
  73. return addr;
  74. return NULL;
  75. }
  76. void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr)
  77. {
  78. struct ipvl_port *port = ipvlan->port;
  79. u8 hash;
  80. hash = (addr->atype == IPVL_IPV6) ?
  81. ipvlan_get_v6_hash(&addr->ip6addr) :
  82. ipvlan_get_v4_hash(&addr->ip4addr);
  83. if (hlist_unhashed(&addr->hlnode))
  84. hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]);
  85. }
  86. void ipvlan_ht_addr_del(struct ipvl_addr *addr)
  87. {
  88. hlist_del_init_rcu(&addr->hlnode);
  89. }
  90. struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
  91. const void *iaddr, bool is_v6)
  92. {
  93. struct ipvl_addr *addr, *ret = NULL;
  94. rcu_read_lock();
  95. list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
  96. if (addr_equal(is_v6, addr, iaddr)) {
  97. ret = addr;
  98. break;
  99. }
  100. }
  101. rcu_read_unlock();
  102. return ret;
  103. }
  104. bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
  105. {
  106. struct ipvl_dev *ipvlan;
  107. bool ret = false;
  108. rcu_read_lock();
  109. list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
  110. if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
  111. ret = true;
  112. break;
  113. }
  114. }
  115. rcu_read_unlock();
  116. return ret;
  117. }
  118. static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
  119. {
  120. void *lyr3h = NULL;
  121. switch (skb->protocol) {
  122. case htons(ETH_P_ARP): {
  123. struct arphdr *arph;
  124. if (unlikely(!pskb_may_pull(skb, arp_hdr_len(port->dev))))
  125. return NULL;
  126. arph = arp_hdr(skb);
  127. *type = IPVL_ARP;
  128. lyr3h = arph;
  129. break;
  130. }
  131. case htons(ETH_P_IP): {
  132. u32 pktlen;
  133. struct iphdr *ip4h;
  134. if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h))))
  135. return NULL;
  136. ip4h = ip_hdr(skb);
  137. pktlen = ntohs(ip4h->tot_len);
  138. if (ip4h->ihl < 5 || ip4h->version != 4)
  139. return NULL;
  140. if (skb->len < pktlen || pktlen < (ip4h->ihl * 4))
  141. return NULL;
  142. *type = IPVL_IPV4;
  143. lyr3h = ip4h;
  144. break;
  145. }
  146. #if IS_ENABLED(CONFIG_IPV6)
  147. case htons(ETH_P_IPV6): {
  148. struct ipv6hdr *ip6h;
  149. if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h))))
  150. return NULL;
  151. ip6h = ipv6_hdr(skb);
  152. if (ip6h->version != 6)
  153. return NULL;
  154. *type = IPVL_IPV6;
  155. lyr3h = ip6h;
  156. /* Only Neighbour Solicitation pkts need different treatment */
  157. if (ipv6_addr_any(&ip6h->saddr) &&
  158. ip6h->nexthdr == NEXTHDR_ICMP) {
  159. struct icmp6hdr *icmph;
  160. if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph))))
  161. return NULL;
  162. ip6h = ipv6_hdr(skb);
  163. icmph = (struct icmp6hdr *)(ip6h + 1);
  164. if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
  165. /* Need to access the ipv6 address in body */
  166. if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph)
  167. + sizeof(struct in6_addr))))
  168. return NULL;
  169. ip6h = ipv6_hdr(skb);
  170. icmph = (struct icmp6hdr *)(ip6h + 1);
  171. }
  172. *type = IPVL_ICMPV6;
  173. lyr3h = icmph;
  174. }
  175. break;
  176. }
  177. #endif
  178. default:
  179. return NULL;
  180. }
  181. return lyr3h;
  182. }
  183. unsigned int ipvlan_mac_hash(const unsigned char *addr)
  184. {
  185. u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2),
  186. ipvlan_jhash_secret);
  187. return hash & IPVLAN_MAC_FILTER_MASK;
  188. }
  189. void ipvlan_process_multicast(struct work_struct *work)
  190. {
  191. struct ipvl_port *port = container_of(work, struct ipvl_port, wq);
  192. struct ethhdr *ethh;
  193. struct ipvl_dev *ipvlan;
  194. struct sk_buff *skb, *nskb;
  195. struct sk_buff_head list;
  196. unsigned int len;
  197. unsigned int mac_hash;
  198. int ret;
  199. u8 pkt_type;
  200. bool tx_pkt;
  201. __skb_queue_head_init(&list);
  202. spin_lock_bh(&port->backlog.lock);
  203. skb_queue_splice_tail_init(&port->backlog, &list);
  204. spin_unlock_bh(&port->backlog.lock);
  205. while ((skb = __skb_dequeue(&list)) != NULL) {
  206. struct net_device *dev = skb->dev;
  207. bool consumed = false;
  208. ethh = eth_hdr(skb);
  209. tx_pkt = IPVL_SKB_CB(skb)->tx_pkt;
  210. mac_hash = ipvlan_mac_hash(ethh->h_dest);
  211. if (ether_addr_equal(ethh->h_dest, port->dev->broadcast))
  212. pkt_type = PACKET_BROADCAST;
  213. else
  214. pkt_type = PACKET_MULTICAST;
  215. rcu_read_lock();
  216. list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
  217. if (tx_pkt && (ipvlan->dev == skb->dev))
  218. continue;
  219. if (!test_bit(mac_hash, ipvlan->mac_filters))
  220. continue;
  221. if (!(ipvlan->dev->flags & IFF_UP))
  222. continue;
  223. ret = NET_RX_DROP;
  224. len = skb->len + ETH_HLEN;
  225. nskb = skb_clone(skb, GFP_ATOMIC);
  226. local_bh_disable();
  227. if (nskb) {
  228. consumed = true;
  229. nskb->pkt_type = pkt_type;
  230. nskb->dev = ipvlan->dev;
  231. if (tx_pkt)
  232. ret = dev_forward_skb(ipvlan->dev, nskb);
  233. else
  234. ret = netif_rx(nskb);
  235. }
  236. ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
  237. local_bh_enable();
  238. }
  239. rcu_read_unlock();
  240. if (tx_pkt) {
  241. /* If the packet originated here, send it out. */
  242. skb->dev = port->dev;
  243. skb->pkt_type = pkt_type;
  244. dev_queue_xmit(skb);
  245. } else {
  246. if (consumed)
  247. consume_skb(skb);
  248. else
  249. kfree_skb(skb);
  250. }
  251. if (dev)
  252. dev_put(dev);
  253. cond_resched();
  254. }
  255. }
  256. static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev)
  257. {
  258. bool xnet = true;
  259. if (dev)
  260. xnet = !net_eq(dev_net(skb->dev), dev_net(dev));
  261. skb_scrub_packet(skb, xnet);
  262. if (dev)
  263. skb->dev = dev;
  264. }
  265. static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
  266. bool local)
  267. {
  268. struct ipvl_dev *ipvlan = addr->master;
  269. struct net_device *dev = ipvlan->dev;
  270. unsigned int len;
  271. rx_handler_result_t ret = RX_HANDLER_CONSUMED;
  272. bool success = false;
  273. struct sk_buff *skb = *pskb;
  274. len = skb->len + ETH_HLEN;
  275. /* Only packets exchanged between two local slaves need to have
  276. * device-up check as well as skb-share check.
  277. */
  278. if (local) {
  279. if (unlikely(!(dev->flags & IFF_UP))) {
  280. kfree_skb(skb);
  281. goto out;
  282. }
  283. skb = skb_share_check(skb, GFP_ATOMIC);
  284. if (!skb)
  285. goto out;
  286. *pskb = skb;
  287. }
  288. if (local) {
  289. skb->pkt_type = PACKET_HOST;
  290. if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS)
  291. success = true;
  292. } else {
  293. skb->dev = dev;
  294. ret = RX_HANDLER_ANOTHER;
  295. success = true;
  296. }
  297. out:
  298. ipvlan_count_rx(ipvlan, len, success, false);
  299. return ret;
  300. }
  301. static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
  302. void *lyr3h, int addr_type,
  303. bool use_dest)
  304. {
  305. struct ipvl_addr *addr = NULL;
  306. switch (addr_type) {
  307. #if IS_ENABLED(CONFIG_IPV6)
  308. case IPVL_IPV6: {
  309. struct ipv6hdr *ip6h;
  310. struct in6_addr *i6addr;
  311. ip6h = (struct ipv6hdr *)lyr3h;
  312. i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
  313. addr = ipvlan_ht_addr_lookup(port, i6addr, true);
  314. break;
  315. }
  316. case IPVL_ICMPV6: {
  317. struct nd_msg *ndmh;
  318. struct in6_addr *i6addr;
  319. /* Make sure that the NeighborSolicitation ICMPv6 packets
  320. * are handled to avoid DAD issue.
  321. */
  322. ndmh = (struct nd_msg *)lyr3h;
  323. if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
  324. i6addr = &ndmh->target;
  325. addr = ipvlan_ht_addr_lookup(port, i6addr, true);
  326. }
  327. break;
  328. }
  329. #endif
  330. case IPVL_IPV4: {
  331. struct iphdr *ip4h;
  332. __be32 *i4addr;
  333. ip4h = (struct iphdr *)lyr3h;
  334. i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
  335. addr = ipvlan_ht_addr_lookup(port, i4addr, false);
  336. break;
  337. }
  338. case IPVL_ARP: {
  339. struct arphdr *arph;
  340. unsigned char *arp_ptr;
  341. __be32 dip;
  342. arph = (struct arphdr *)lyr3h;
  343. arp_ptr = (unsigned char *)(arph + 1);
  344. if (use_dest)
  345. arp_ptr += (2 * port->dev->addr_len) + 4;
  346. else
  347. arp_ptr += port->dev->addr_len;
  348. memcpy(&dip, arp_ptr, 4);
  349. addr = ipvlan_ht_addr_lookup(port, &dip, false);
  350. break;
  351. }
  352. }
  353. return addr;
  354. }
  355. static int ipvlan_process_v4_outbound(struct sk_buff *skb)
  356. {
  357. const struct iphdr *ip4h = ip_hdr(skb);
  358. struct net_device *dev = skb->dev;
  359. struct net *net = dev_net(dev);
  360. struct rtable *rt;
  361. int err, ret = NET_XMIT_DROP;
  362. struct flowi4 fl4 = {
  363. .flowi4_oif = dev->ifindex,
  364. .flowi4_tos = RT_TOS(ip4h->tos),
  365. .flowi4_flags = FLOWI_FLAG_ANYSRC,
  366. .flowi4_mark = skb->mark,
  367. .daddr = ip4h->daddr,
  368. .saddr = ip4h->saddr,
  369. };
  370. rt = ip_route_output_flow(net, &fl4, NULL);
  371. if (IS_ERR(rt))
  372. goto err;
  373. if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
  374. ip_rt_put(rt);
  375. goto err;
  376. }
  377. skb_dst_set(skb, &rt->dst);
  378. err = ip_local_out(net, skb->sk, skb);
  379. if (unlikely(net_xmit_eval(err)))
  380. dev->stats.tx_errors++;
  381. else
  382. ret = NET_XMIT_SUCCESS;
  383. goto out;
  384. err:
  385. dev->stats.tx_errors++;
  386. kfree_skb(skb);
  387. out:
  388. return ret;
  389. }
  390. #if IS_ENABLED(CONFIG_IPV6)
  391. static int ipvlan_process_v6_outbound(struct sk_buff *skb)
  392. {
  393. const struct ipv6hdr *ip6h = ipv6_hdr(skb);
  394. struct net_device *dev = skb->dev;
  395. struct net *net = dev_net(dev);
  396. struct dst_entry *dst;
  397. int err, ret = NET_XMIT_DROP;
  398. struct flowi6 fl6 = {
  399. .flowi6_oif = dev->ifindex,
  400. .daddr = ip6h->daddr,
  401. .saddr = ip6h->saddr,
  402. .flowi6_flags = FLOWI_FLAG_ANYSRC,
  403. .flowlabel = ip6_flowinfo(ip6h),
  404. .flowi6_mark = skb->mark,
  405. .flowi6_proto = ip6h->nexthdr,
  406. };
  407. dst = ip6_route_output(net, NULL, &fl6);
  408. if (dst->error) {
  409. ret = dst->error;
  410. dst_release(dst);
  411. goto err;
  412. }
  413. skb_dst_set(skb, dst);
  414. err = ip6_local_out(net, skb->sk, skb);
  415. if (unlikely(net_xmit_eval(err)))
  416. dev->stats.tx_errors++;
  417. else
  418. ret = NET_XMIT_SUCCESS;
  419. goto out;
  420. err:
  421. dev->stats.tx_errors++;
  422. kfree_skb(skb);
  423. out:
  424. return ret;
  425. }
  426. #else
  427. static int ipvlan_process_v6_outbound(struct sk_buff *skb)
  428. {
  429. return NET_XMIT_DROP;
  430. }
  431. #endif
  432. static int ipvlan_process_outbound(struct sk_buff *skb)
  433. {
  434. struct ethhdr *ethh = eth_hdr(skb);
  435. int ret = NET_XMIT_DROP;
  436. /* The ipvlan is a pseudo-L2 device, so the packets that we receive
  437. * will have L2; which need to discarded and processed further
  438. * in the net-ns of the main-device.
  439. */
  440. if (skb_mac_header_was_set(skb)) {
  441. /* In this mode we dont care about
  442. * multicast and broadcast traffic */
  443. if (is_multicast_ether_addr(ethh->h_dest)) {
  444. pr_debug_ratelimited(
  445. "Dropped {multi|broad}cast of type=[%x]\n",
  446. ntohs(skb->protocol));
  447. kfree_skb(skb);
  448. goto out;
  449. }
  450. skb_pull(skb, sizeof(*ethh));
  451. skb->mac_header = (typeof(skb->mac_header))~0U;
  452. skb_reset_network_header(skb);
  453. }
  454. if (skb->protocol == htons(ETH_P_IPV6))
  455. ret = ipvlan_process_v6_outbound(skb);
  456. else if (skb->protocol == htons(ETH_P_IP))
  457. ret = ipvlan_process_v4_outbound(skb);
  458. else {
  459. pr_warn_ratelimited("Dropped outbound packet type=%x\n",
  460. ntohs(skb->protocol));
  461. kfree_skb(skb);
  462. }
  463. out:
  464. return ret;
  465. }
  466. static void ipvlan_multicast_enqueue(struct ipvl_port *port,
  467. struct sk_buff *skb, bool tx_pkt)
  468. {
  469. if (skb->protocol == htons(ETH_P_PAUSE)) {
  470. kfree_skb(skb);
  471. return;
  472. }
  473. /* Record that the deferred packet is from TX or RX path. By
  474. * looking at mac-addresses on packet will lead to erronus decisions.
  475. * (This would be true for a loopback-mode on master device or a
  476. * hair-pin mode of the switch.)
  477. */
  478. IPVL_SKB_CB(skb)->tx_pkt = tx_pkt;
  479. spin_lock(&port->backlog.lock);
  480. if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
  481. if (skb->dev)
  482. dev_hold(skb->dev);
  483. __skb_queue_tail(&port->backlog, skb);
  484. spin_unlock(&port->backlog.lock);
  485. schedule_work(&port->wq);
  486. } else {
  487. spin_unlock(&port->backlog.lock);
  488. atomic_long_inc(&skb->dev->rx_dropped);
  489. kfree_skb(skb);
  490. }
  491. }
  492. static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
  493. {
  494. const struct ipvl_dev *ipvlan = netdev_priv(dev);
  495. void *lyr3h;
  496. struct ipvl_addr *addr;
  497. int addr_type;
  498. lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type);
  499. if (!lyr3h)
  500. goto out;
  501. if (!ipvlan_is_vepa(ipvlan->port)) {
  502. addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
  503. if (addr) {
  504. if (ipvlan_is_private(ipvlan->port)) {
  505. consume_skb(skb);
  506. return NET_XMIT_DROP;
  507. }
  508. return ipvlan_rcv_frame(addr, &skb, true);
  509. }
  510. }
  511. out:
  512. ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev);
  513. return ipvlan_process_outbound(skb);
  514. }
  515. static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
  516. {
  517. const struct ipvl_dev *ipvlan = netdev_priv(dev);
  518. struct ethhdr *eth = eth_hdr(skb);
  519. struct ipvl_addr *addr;
  520. void *lyr3h;
  521. int addr_type;
  522. if (!ipvlan_is_vepa(ipvlan->port) &&
  523. ether_addr_equal(eth->h_dest, eth->h_source)) {
  524. lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type);
  525. if (lyr3h) {
  526. addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
  527. if (addr) {
  528. if (ipvlan_is_private(ipvlan->port)) {
  529. consume_skb(skb);
  530. return NET_XMIT_DROP;
  531. }
  532. return ipvlan_rcv_frame(addr, &skb, true);
  533. }
  534. }
  535. skb = skb_share_check(skb, GFP_ATOMIC);
  536. if (!skb)
  537. return NET_XMIT_DROP;
  538. /* Packet definitely does not belong to any of the
  539. * virtual devices, but the dest is local. So forward
  540. * the skb for the main-dev. At the RX side we just return
  541. * RX_PASS for it to be processed further on the stack.
  542. */
  543. return dev_forward_skb(ipvlan->phy_dev, skb);
  544. } else if (is_multicast_ether_addr(eth->h_dest)) {
  545. ipvlan_skb_crossing_ns(skb, NULL);
  546. ipvlan_multicast_enqueue(ipvlan->port, skb, true);
  547. return NET_XMIT_SUCCESS;
  548. }
  549. skb->dev = ipvlan->phy_dev;
  550. return dev_queue_xmit(skb);
  551. }
  552. int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
  553. {
  554. struct ipvl_dev *ipvlan = netdev_priv(dev);
  555. struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev);
  556. if (!port)
  557. goto out;
  558. if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
  559. goto out;
  560. switch(port->mode) {
  561. case IPVLAN_MODE_L2:
  562. return ipvlan_xmit_mode_l2(skb, dev);
  563. case IPVLAN_MODE_L3:
  564. case IPVLAN_MODE_L3S:
  565. return ipvlan_xmit_mode_l3(skb, dev);
  566. }
  567. /* Should not reach here */
  568. WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n",
  569. port->mode);
  570. out:
  571. kfree_skb(skb);
  572. return NET_XMIT_DROP;
  573. }
  574. static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port)
  575. {
  576. struct ethhdr *eth = eth_hdr(skb);
  577. struct ipvl_addr *addr;
  578. void *lyr3h;
  579. int addr_type;
  580. if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) {
  581. lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
  582. if (!lyr3h)
  583. return true;
  584. addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false);
  585. if (addr)
  586. return false;
  587. }
  588. return true;
  589. }
  590. static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
  591. struct ipvl_port *port)
  592. {
  593. void *lyr3h;
  594. int addr_type;
  595. struct ipvl_addr *addr;
  596. struct sk_buff *skb = *pskb;
  597. rx_handler_result_t ret = RX_HANDLER_PASS;
  598. lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
  599. if (!lyr3h)
  600. goto out;
  601. addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
  602. if (addr)
  603. ret = ipvlan_rcv_frame(addr, pskb, false);
  604. out:
  605. return ret;
  606. }
  607. static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
  608. struct ipvl_port *port)
  609. {
  610. struct sk_buff *skb = *pskb;
  611. struct ethhdr *eth = eth_hdr(skb);
  612. rx_handler_result_t ret = RX_HANDLER_PASS;
  613. if (is_multicast_ether_addr(eth->h_dest)) {
  614. if (ipvlan_external_frame(skb, port)) {
  615. struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
  616. /* External frames are queued for device local
  617. * distribution, but a copy is given to master
  618. * straight away to avoid sending duplicates later
  619. * when work-queue processes this frame. This is
  620. * achieved by returning RX_HANDLER_PASS.
  621. */
  622. if (nskb) {
  623. ipvlan_skb_crossing_ns(nskb, NULL);
  624. ipvlan_multicast_enqueue(port, nskb, false);
  625. }
  626. }
  627. } else {
  628. /* Perform like l3 mode for non-multicast packet */
  629. ret = ipvlan_handle_mode_l3(pskb, port);
  630. }
  631. return ret;
  632. }
  633. rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
  634. {
  635. struct sk_buff *skb = *pskb;
  636. struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev);
  637. if (!port)
  638. return RX_HANDLER_PASS;
  639. switch (port->mode) {
  640. case IPVLAN_MODE_L2:
  641. return ipvlan_handle_mode_l2(pskb, port);
  642. case IPVLAN_MODE_L3:
  643. return ipvlan_handle_mode_l3(pskb, port);
  644. case IPVLAN_MODE_L3S:
  645. return RX_HANDLER_PASS;
  646. }
  647. /* Should not reach here */
  648. WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n",
  649. port->mode);
  650. kfree_skb(skb);
  651. return RX_HANDLER_CONSUMED;
  652. }
  653. static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
  654. struct net_device *dev)
  655. {
  656. struct ipvl_addr *addr = NULL;
  657. struct ipvl_port *port;
  658. void *lyr3h;
  659. int addr_type;
  660. if (!dev || !netif_is_ipvlan_port(dev))
  661. goto out;
  662. port = ipvlan_port_get_rcu(dev);
  663. if (!port || port->mode != IPVLAN_MODE_L3S)
  664. goto out;
  665. lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
  666. if (!lyr3h)
  667. goto out;
  668. addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
  669. out:
  670. return addr;
  671. }
  672. struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
  673. u16 proto)
  674. {
  675. struct ipvl_addr *addr;
  676. struct net_device *sdev;
  677. addr = ipvlan_skb_to_addr(skb, dev);
  678. if (!addr)
  679. goto out;
  680. sdev = addr->master->dev;
  681. switch (proto) {
  682. case AF_INET:
  683. {
  684. int err;
  685. struct iphdr *ip4h = ip_hdr(skb);
  686. err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
  687. ip4h->tos, sdev);
  688. if (unlikely(err))
  689. goto out;
  690. break;
  691. }
  692. #if IS_ENABLED(CONFIG_IPV6)
  693. case AF_INET6:
  694. {
  695. struct dst_entry *dst;
  696. struct ipv6hdr *ip6h = ipv6_hdr(skb);
  697. int flags = RT6_LOOKUP_F_HAS_SADDR;
  698. struct flowi6 fl6 = {
  699. .flowi6_iif = sdev->ifindex,
  700. .daddr = ip6h->daddr,
  701. .saddr = ip6h->saddr,
  702. .flowlabel = ip6_flowinfo(ip6h),
  703. .flowi6_mark = skb->mark,
  704. .flowi6_proto = ip6h->nexthdr,
  705. };
  706. skb_dst_drop(skb);
  707. dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
  708. skb, flags);
  709. skb_dst_set(skb, dst);
  710. break;
  711. }
  712. #endif
  713. default:
  714. break;
  715. }
  716. out:
  717. return skb;
  718. }
  719. unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
  720. const struct nf_hook_state *state)
  721. {
  722. struct ipvl_addr *addr;
  723. unsigned int len;
  724. addr = ipvlan_skb_to_addr(skb, skb->dev);
  725. if (!addr)
  726. goto out;
  727. skb->dev = addr->master->dev;
  728. len = skb->len + ETH_HLEN;
  729. ipvlan_count_rx(addr->master, len, true, false);
  730. out:
  731. return NF_ACCEPT;
  732. }