actions.c 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2007-2017 Nicira, Inc.
  4. */
  5. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  6. #include <linux/skbuff.h>
  7. #include <linux/in.h>
  8. #include <linux/ip.h>
  9. #include <linux/openvswitch.h>
  10. #include <linux/sctp.h>
  11. #include <linux/tcp.h>
  12. #include <linux/udp.h>
  13. #include <linux/in6.h>
  14. #include <linux/if_arp.h>
  15. #include <linux/if_vlan.h>
  16. #include <net/dst.h>
  17. #include <net/gso.h>
  18. #include <net/ip.h>
  19. #include <net/ipv6.h>
  20. #include <net/ip6_fib.h>
  21. #include <net/checksum.h>
  22. #include <net/dsfield.h>
  23. #include <net/mpls.h>
  24. #if IS_ENABLED(CONFIG_PSAMPLE)
  25. #include <net/psample.h>
  26. #endif
  27. #include <net/sctp/checksum.h>
  28. #include "datapath.h"
  29. #include "drop.h"
  30. #include "flow.h"
  31. #include "conntrack.h"
  32. #include "vport.h"
  33. #include "flow_netlink.h"
  34. #include "openvswitch_trace.h"
  35. struct deferred_action {
  36. struct sk_buff *skb;
  37. const struct nlattr *actions;
  38. int actions_len;
  39. /* Store pkt_key clone when creating deferred action. */
  40. struct sw_flow_key pkt_key;
  41. };
  42. #define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN)
  43. struct ovs_frag_data {
  44. unsigned long dst;
  45. struct vport *vport;
  46. struct ovs_skb_cb cb;
  47. __be16 inner_protocol;
  48. u16 network_offset; /* valid only for MPLS */
  49. u16 vlan_tci;
  50. __be16 vlan_proto;
  51. unsigned int l2_len;
  52. u8 mac_proto;
  53. u8 l2_data[MAX_L2_LEN];
  54. };
  55. static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
  56. #define DEFERRED_ACTION_FIFO_SIZE 10
  57. #define OVS_RECURSION_LIMIT 5
  58. #define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
  59. struct action_fifo {
  60. int head;
  61. int tail;
  62. /* Deferred action fifo queue storage. */
  63. struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
  64. };
  65. struct action_flow_keys {
  66. struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
  67. };
  68. static struct action_fifo __percpu *action_fifos;
  69. static struct action_flow_keys __percpu *flow_keys;
  70. static DEFINE_PER_CPU(int, exec_actions_level);
  71. /* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
  72. * space. Return NULL if out of key spaces.
  73. */
  74. static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
  75. {
  76. struct action_flow_keys *keys = this_cpu_ptr(flow_keys);
  77. int level = this_cpu_read(exec_actions_level);
  78. struct sw_flow_key *key = NULL;
  79. if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
  80. key = &keys->key[level - 1];
  81. *key = *key_;
  82. }
  83. return key;
  84. }
  85. static void action_fifo_init(struct action_fifo *fifo)
  86. {
  87. fifo->head = 0;
  88. fifo->tail = 0;
  89. }
  90. static bool action_fifo_is_empty(const struct action_fifo *fifo)
  91. {
  92. return (fifo->head == fifo->tail);
  93. }
  94. static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
  95. {
  96. if (action_fifo_is_empty(fifo))
  97. return NULL;
  98. return &fifo->fifo[fifo->tail++];
  99. }
  100. static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
  101. {
  102. if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
  103. return NULL;
  104. return &fifo->fifo[fifo->head++];
  105. }
  106. /* Return true if fifo is not full */
  107. static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
  108. const struct sw_flow_key *key,
  109. const struct nlattr *actions,
  110. const int actions_len)
  111. {
  112. struct action_fifo *fifo;
  113. struct deferred_action *da;
  114. fifo = this_cpu_ptr(action_fifos);
  115. da = action_fifo_put(fifo);
  116. if (da) {
  117. da->skb = skb;
  118. da->actions = actions;
  119. da->actions_len = actions_len;
  120. da->pkt_key = *key;
  121. }
  122. return da;
  123. }
  124. static void invalidate_flow_key(struct sw_flow_key *key)
  125. {
  126. key->mac_proto |= SW_FLOW_KEY_INVALID;
  127. }
  128. static bool is_flow_key_valid(const struct sw_flow_key *key)
  129. {
  130. return !(key->mac_proto & SW_FLOW_KEY_INVALID);
  131. }
  132. static int clone_execute(struct datapath *dp, struct sk_buff *skb,
  133. struct sw_flow_key *key,
  134. u32 recirc_id,
  135. const struct nlattr *actions, int len,
  136. bool last, bool clone_flow_key);
  137. static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
  138. struct sw_flow_key *key,
  139. const struct nlattr *attr, int len);
  140. static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
  141. __be32 mpls_lse, __be16 mpls_ethertype, __u16 mac_len)
  142. {
  143. int err;
  144. err = skb_mpls_push(skb, mpls_lse, mpls_ethertype, mac_len, !!mac_len);
  145. if (err)
  146. return err;
  147. if (!mac_len)
  148. key->mac_proto = MAC_PROTO_NONE;
  149. invalidate_flow_key(key);
  150. return 0;
  151. }
  152. static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
  153. const __be16 ethertype)
  154. {
  155. int err;
  156. err = skb_mpls_pop(skb, ethertype, skb->mac_len,
  157. ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET);
  158. if (err)
  159. return err;
  160. if (ethertype == htons(ETH_P_TEB))
  161. key->mac_proto = MAC_PROTO_ETHERNET;
  162. invalidate_flow_key(key);
  163. return 0;
  164. }
  165. static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
  166. const __be32 *mpls_lse, const __be32 *mask)
  167. {
  168. struct mpls_shim_hdr *stack;
  169. __be32 lse;
  170. int err;
  171. if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
  172. return -ENOMEM;
  173. stack = mpls_hdr(skb);
  174. lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
  175. err = skb_mpls_update_lse(skb, lse);
  176. if (err)
  177. return err;
  178. flow_key->mpls.lse[0] = lse;
  179. return 0;
  180. }
  181. static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
  182. {
  183. int err;
  184. err = skb_vlan_pop(skb);
  185. if (skb_vlan_tag_present(skb)) {
  186. invalidate_flow_key(key);
  187. } else {
  188. key->eth.vlan.tci = 0;
  189. key->eth.vlan.tpid = 0;
  190. }
  191. return err;
  192. }
  193. static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
  194. const struct ovs_action_push_vlan *vlan)
  195. {
  196. int err;
  197. if (skb_vlan_tag_present(skb)) {
  198. invalidate_flow_key(key);
  199. } else {
  200. key->eth.vlan.tci = vlan->vlan_tci;
  201. key->eth.vlan.tpid = vlan->vlan_tpid;
  202. }
  203. err = skb_vlan_push(skb, vlan->vlan_tpid,
  204. ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
  205. skb_reset_mac_len(skb);
  206. return err;
  207. }
  208. /* 'src' is already properly masked. */
  209. static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
  210. {
  211. u16 *dst = (u16 *)dst_;
  212. const u16 *src = (const u16 *)src_;
  213. const u16 *mask = (const u16 *)mask_;
  214. OVS_SET_MASKED(dst[0], src[0], mask[0]);
  215. OVS_SET_MASKED(dst[1], src[1], mask[1]);
  216. OVS_SET_MASKED(dst[2], src[2], mask[2]);
  217. }
  218. static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
  219. const struct ovs_key_ethernet *key,
  220. const struct ovs_key_ethernet *mask)
  221. {
  222. int err;
  223. err = skb_ensure_writable(skb, ETH_HLEN);
  224. if (unlikely(err))
  225. return err;
  226. skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
  227. ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
  228. mask->eth_src);
  229. ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
  230. mask->eth_dst);
  231. skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
  232. ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
  233. ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
  234. return 0;
  235. }
  236. /* pop_eth does not support VLAN packets as this action is never called
  237. * for them.
  238. */
  239. static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
  240. {
  241. int err;
  242. err = skb_eth_pop(skb);
  243. if (err)
  244. return err;
  245. /* safe right before invalidate_flow_key */
  246. key->mac_proto = MAC_PROTO_NONE;
  247. invalidate_flow_key(key);
  248. return 0;
  249. }
  250. static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
  251. const struct ovs_action_push_eth *ethh)
  252. {
  253. int err;
  254. err = skb_eth_push(skb, ethh->addresses.eth_dst,
  255. ethh->addresses.eth_src);
  256. if (err)
  257. return err;
  258. /* safe right before invalidate_flow_key */
  259. key->mac_proto = MAC_PROTO_ETHERNET;
  260. invalidate_flow_key(key);
  261. return 0;
  262. }
  263. static noinline_for_stack int push_nsh(struct sk_buff *skb,
  264. struct sw_flow_key *key,
  265. const struct nlattr *a)
  266. {
  267. u8 buffer[NSH_HDR_MAX_LEN];
  268. struct nshhdr *nh = (struct nshhdr *)buffer;
  269. int err;
  270. err = nsh_hdr_from_nlattr(a, nh, NSH_HDR_MAX_LEN);
  271. if (err)
  272. return err;
  273. err = nsh_push(skb, nh);
  274. if (err)
  275. return err;
  276. /* safe right before invalidate_flow_key */
  277. key->mac_proto = MAC_PROTO_NONE;
  278. invalidate_flow_key(key);
  279. return 0;
  280. }
  281. static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
  282. {
  283. int err;
  284. err = nsh_pop(skb);
  285. if (err)
  286. return err;
  287. /* safe right before invalidate_flow_key */
  288. if (skb->protocol == htons(ETH_P_TEB))
  289. key->mac_proto = MAC_PROTO_ETHERNET;
  290. else
  291. key->mac_proto = MAC_PROTO_NONE;
  292. invalidate_flow_key(key);
  293. return 0;
  294. }
  295. static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
  296. __be32 addr, __be32 new_addr)
  297. {
  298. int transport_len = skb->len - skb_transport_offset(skb);
  299. if (nh->frag_off & htons(IP_OFFSET))
  300. return;
  301. if (nh->protocol == IPPROTO_TCP) {
  302. if (likely(transport_len >= sizeof(struct tcphdr)))
  303. inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
  304. addr, new_addr, true);
  305. } else if (nh->protocol == IPPROTO_UDP) {
  306. if (likely(transport_len >= sizeof(struct udphdr))) {
  307. struct udphdr *uh = udp_hdr(skb);
  308. if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
  309. inet_proto_csum_replace4(&uh->check, skb,
  310. addr, new_addr, true);
  311. if (!uh->check)
  312. uh->check = CSUM_MANGLED_0;
  313. }
  314. }
  315. }
  316. }
  317. static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
  318. __be32 *addr, __be32 new_addr)
  319. {
  320. update_ip_l4_checksum(skb, nh, *addr, new_addr);
  321. csum_replace4(&nh->check, *addr, new_addr);
  322. skb_clear_hash(skb);
  323. ovs_ct_clear(skb, NULL);
  324. *addr = new_addr;
  325. }
  326. static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
  327. __be32 addr[4], const __be32 new_addr[4])
  328. {
  329. int transport_len = skb->len - skb_transport_offset(skb);
  330. if (l4_proto == NEXTHDR_TCP) {
  331. if (likely(transport_len >= sizeof(struct tcphdr)))
  332. inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
  333. addr, new_addr, true);
  334. } else if (l4_proto == NEXTHDR_UDP) {
  335. if (likely(transport_len >= sizeof(struct udphdr))) {
  336. struct udphdr *uh = udp_hdr(skb);
  337. if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
  338. inet_proto_csum_replace16(&uh->check, skb,
  339. addr, new_addr, true);
  340. if (!uh->check)
  341. uh->check = CSUM_MANGLED_0;
  342. }
  343. }
  344. } else if (l4_proto == NEXTHDR_ICMP) {
  345. if (likely(transport_len >= sizeof(struct icmp6hdr)))
  346. inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
  347. skb, addr, new_addr, true);
  348. }
  349. }
  350. static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
  351. const __be32 mask[4], __be32 masked[4])
  352. {
  353. masked[0] = OVS_MASKED(old[0], addr[0], mask[0]);
  354. masked[1] = OVS_MASKED(old[1], addr[1], mask[1]);
  355. masked[2] = OVS_MASKED(old[2], addr[2], mask[2]);
  356. masked[3] = OVS_MASKED(old[3], addr[3], mask[3]);
  357. }
  358. static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
  359. __be32 addr[4], const __be32 new_addr[4],
  360. bool recalculate_csum)
  361. {
  362. if (recalculate_csum)
  363. update_ipv6_checksum(skb, l4_proto, addr, new_addr);
  364. skb_clear_hash(skb);
  365. ovs_ct_clear(skb, NULL);
  366. memcpy(addr, new_addr, sizeof(__be32[4]));
  367. }
  368. static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask)
  369. {
  370. u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
  371. ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
  372. if (skb->ip_summed == CHECKSUM_COMPLETE)
  373. csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12),
  374. (__force __wsum)(ipv6_tclass << 12));
  375. ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
  376. }
  377. static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask)
  378. {
  379. u32 ofl;
  380. ofl = nh->flow_lbl[0] << 16 | nh->flow_lbl[1] << 8 | nh->flow_lbl[2];
  381. fl = OVS_MASKED(ofl, fl, mask);
  382. /* Bits 21-24 are always unmasked, so this retains their values. */
  383. nh->flow_lbl[0] = (u8)(fl >> 16);
  384. nh->flow_lbl[1] = (u8)(fl >> 8);
  385. nh->flow_lbl[2] = (u8)fl;
  386. if (skb->ip_summed == CHECKSUM_COMPLETE)
  387. csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl));
  388. }
  389. static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask)
  390. {
  391. new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask);
  392. if (skb->ip_summed == CHECKSUM_COMPLETE)
  393. csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8),
  394. (__force __wsum)(new_ttl << 8));
  395. nh->hop_limit = new_ttl;
  396. }
  397. static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
  398. u8 mask)
  399. {
  400. new_ttl = OVS_MASKED(nh->ttl, new_ttl, mask);
  401. csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
  402. nh->ttl = new_ttl;
  403. }
  404. static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
  405. const struct ovs_key_ipv4 *key,
  406. const struct ovs_key_ipv4 *mask)
  407. {
  408. struct iphdr *nh;
  409. __be32 new_addr;
  410. int err;
  411. err = skb_ensure_writable(skb, skb_network_offset(skb) +
  412. sizeof(struct iphdr));
  413. if (unlikely(err))
  414. return err;
  415. nh = ip_hdr(skb);
  416. /* Setting an IP addresses is typically only a side effect of
  417. * matching on them in the current userspace implementation, so it
  418. * makes sense to check if the value actually changed.
  419. */
  420. if (mask->ipv4_src) {
  421. new_addr = OVS_MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
  422. if (unlikely(new_addr != nh->saddr)) {
  423. set_ip_addr(skb, nh, &nh->saddr, new_addr);
  424. flow_key->ipv4.addr.src = new_addr;
  425. }
  426. }
  427. if (mask->ipv4_dst) {
  428. new_addr = OVS_MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
  429. if (unlikely(new_addr != nh->daddr)) {
  430. set_ip_addr(skb, nh, &nh->daddr, new_addr);
  431. flow_key->ipv4.addr.dst = new_addr;
  432. }
  433. }
  434. if (mask->ipv4_tos) {
  435. ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
  436. flow_key->ip.tos = nh->tos;
  437. }
  438. if (mask->ipv4_ttl) {
  439. set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
  440. flow_key->ip.ttl = nh->ttl;
  441. }
  442. return 0;
  443. }
  444. static bool is_ipv6_mask_nonzero(const __be32 addr[4])
  445. {
  446. return !!(addr[0] | addr[1] | addr[2] | addr[3]);
  447. }
  448. static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
  449. const struct ovs_key_ipv6 *key,
  450. const struct ovs_key_ipv6 *mask)
  451. {
  452. struct ipv6hdr *nh;
  453. int err;
  454. err = skb_ensure_writable(skb, skb_network_offset(skb) +
  455. sizeof(struct ipv6hdr));
  456. if (unlikely(err))
  457. return err;
  458. nh = ipv6_hdr(skb);
  459. /* Setting an IP addresses is typically only a side effect of
  460. * matching on them in the current userspace implementation, so it
  461. * makes sense to check if the value actually changed.
  462. */
  463. if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
  464. __be32 *saddr = (__be32 *)&nh->saddr;
  465. __be32 masked[4];
  466. mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
  467. if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
  468. set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked,
  469. true);
  470. memcpy(&flow_key->ipv6.addr.src, masked,
  471. sizeof(flow_key->ipv6.addr.src));
  472. }
  473. }
  474. if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
  475. unsigned int offset = 0;
  476. int flags = IP6_FH_F_SKIP_RH;
  477. bool recalc_csum = true;
  478. __be32 *daddr = (__be32 *)&nh->daddr;
  479. __be32 masked[4];
  480. mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
  481. if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
  482. if (ipv6_ext_hdr(nh->nexthdr))
  483. recalc_csum = (ipv6_find_hdr(skb, &offset,
  484. NEXTHDR_ROUTING,
  485. NULL, &flags)
  486. != NEXTHDR_ROUTING);
  487. set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked,
  488. recalc_csum);
  489. memcpy(&flow_key->ipv6.addr.dst, masked,
  490. sizeof(flow_key->ipv6.addr.dst));
  491. }
  492. }
  493. if (mask->ipv6_tclass) {
  494. set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass);
  495. flow_key->ip.tos = ipv6_get_dsfield(nh);
  496. }
  497. if (mask->ipv6_label) {
  498. set_ipv6_fl(skb, nh, ntohl(key->ipv6_label),
  499. ntohl(mask->ipv6_label));
  500. flow_key->ipv6.label =
  501. *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
  502. }
  503. if (mask->ipv6_hlimit) {
  504. set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit);
  505. flow_key->ip.ttl = nh->hop_limit;
  506. }
  507. return 0;
  508. }
  509. static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
  510. const struct nlattr *a)
  511. {
  512. struct nshhdr *nh;
  513. size_t length;
  514. int err;
  515. u8 flags;
  516. u8 ttl;
  517. int i;
  518. struct ovs_key_nsh key;
  519. struct ovs_key_nsh mask;
  520. err = nsh_key_from_nlattr(a, &key, &mask);
  521. if (err)
  522. return err;
  523. /* Make sure the NSH base header is there */
  524. if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN))
  525. return -ENOMEM;
  526. nh = nsh_hdr(skb);
  527. length = nsh_hdr_len(nh);
  528. /* Make sure the whole NSH header is there */
  529. err = skb_ensure_writable(skb, skb_network_offset(skb) +
  530. length);
  531. if (unlikely(err))
  532. return err;
  533. nh = nsh_hdr(skb);
  534. skb_postpull_rcsum(skb, nh, length);
  535. flags = nsh_get_flags(nh);
  536. flags = OVS_MASKED(flags, key.base.flags, mask.base.flags);
  537. flow_key->nsh.base.flags = flags;
  538. ttl = nsh_get_ttl(nh);
  539. ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl);
  540. flow_key->nsh.base.ttl = ttl;
  541. nsh_set_flags_and_ttl(nh, flags, ttl);
  542. nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr,
  543. mask.base.path_hdr);
  544. flow_key->nsh.base.path_hdr = nh->path_hdr;
  545. switch (nh->mdtype) {
  546. case NSH_M_TYPE1:
  547. for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) {
  548. nh->md1.context[i] =
  549. OVS_MASKED(nh->md1.context[i], key.context[i],
  550. mask.context[i]);
  551. }
  552. memcpy(flow_key->nsh.context, nh->md1.context,
  553. sizeof(nh->md1.context));
  554. break;
  555. case NSH_M_TYPE2:
  556. memset(flow_key->nsh.context, 0,
  557. sizeof(flow_key->nsh.context));
  558. break;
  559. default:
  560. return -EINVAL;
  561. }
  562. skb_postpush_rcsum(skb, nh, length);
  563. return 0;
  564. }
  565. /* Must follow skb_ensure_writable() since that can move the skb data. */
  566. static void set_tp_port(struct sk_buff *skb, __be16 *port,
  567. __be16 new_port, __sum16 *check)
  568. {
  569. ovs_ct_clear(skb, NULL);
  570. inet_proto_csum_replace2(check, skb, *port, new_port, false);
  571. *port = new_port;
  572. }
  573. static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
  574. const struct ovs_key_udp *key,
  575. const struct ovs_key_udp *mask)
  576. {
  577. struct udphdr *uh;
  578. __be16 src, dst;
  579. int err;
  580. err = skb_ensure_writable(skb, skb_transport_offset(skb) +
  581. sizeof(struct udphdr));
  582. if (unlikely(err))
  583. return err;
  584. uh = udp_hdr(skb);
  585. /* Either of the masks is non-zero, so do not bother checking them. */
  586. src = OVS_MASKED(uh->source, key->udp_src, mask->udp_src);
  587. dst = OVS_MASKED(uh->dest, key->udp_dst, mask->udp_dst);
  588. if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
  589. if (likely(src != uh->source)) {
  590. set_tp_port(skb, &uh->source, src, &uh->check);
  591. flow_key->tp.src = src;
  592. }
  593. if (likely(dst != uh->dest)) {
  594. set_tp_port(skb, &uh->dest, dst, &uh->check);
  595. flow_key->tp.dst = dst;
  596. }
  597. if (unlikely(!uh->check))
  598. uh->check = CSUM_MANGLED_0;
  599. } else {
  600. uh->source = src;
  601. uh->dest = dst;
  602. flow_key->tp.src = src;
  603. flow_key->tp.dst = dst;
  604. ovs_ct_clear(skb, NULL);
  605. }
  606. skb_clear_hash(skb);
  607. return 0;
  608. }
  609. static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
  610. const struct ovs_key_tcp *key,
  611. const struct ovs_key_tcp *mask)
  612. {
  613. struct tcphdr *th;
  614. __be16 src, dst;
  615. int err;
  616. err = skb_ensure_writable(skb, skb_transport_offset(skb) +
  617. sizeof(struct tcphdr));
  618. if (unlikely(err))
  619. return err;
  620. th = tcp_hdr(skb);
  621. src = OVS_MASKED(th->source, key->tcp_src, mask->tcp_src);
  622. if (likely(src != th->source)) {
  623. set_tp_port(skb, &th->source, src, &th->check);
  624. flow_key->tp.src = src;
  625. }
  626. dst = OVS_MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
  627. if (likely(dst != th->dest)) {
  628. set_tp_port(skb, &th->dest, dst, &th->check);
  629. flow_key->tp.dst = dst;
  630. }
  631. skb_clear_hash(skb);
  632. return 0;
  633. }
  634. static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
  635. const struct ovs_key_sctp *key,
  636. const struct ovs_key_sctp *mask)
  637. {
  638. unsigned int sctphoff = skb_transport_offset(skb);
  639. struct sctphdr *sh;
  640. __le32 old_correct_csum, new_csum, old_csum;
  641. int err;
  642. err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
  643. if (unlikely(err))
  644. return err;
  645. sh = sctp_hdr(skb);
  646. old_csum = sh->checksum;
  647. old_correct_csum = sctp_compute_cksum(skb, sctphoff);
  648. sh->source = OVS_MASKED(sh->source, key->sctp_src, mask->sctp_src);
  649. sh->dest = OVS_MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
  650. new_csum = sctp_compute_cksum(skb, sctphoff);
  651. /* Carry any checksum errors through. */
  652. sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
  653. skb_clear_hash(skb);
  654. ovs_ct_clear(skb, NULL);
  655. flow_key->tp.src = sh->source;
  656. flow_key->tp.dst = sh->dest;
  657. return 0;
  658. }
  659. static int ovs_vport_output(struct net *net, struct sock *sk,
  660. struct sk_buff *skb)
  661. {
  662. struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
  663. struct vport *vport = data->vport;
  664. if (skb_cow_head(skb, data->l2_len) < 0) {
  665. kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM);
  666. return -ENOMEM;
  667. }
  668. __skb_dst_copy(skb, data->dst);
  669. *OVS_CB(skb) = data->cb;
  670. skb->inner_protocol = data->inner_protocol;
  671. if (data->vlan_tci & VLAN_CFI_MASK)
  672. __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci & ~VLAN_CFI_MASK);
  673. else
  674. __vlan_hwaccel_clear_tag(skb);
  675. /* Reconstruct the MAC header. */
  676. skb_push(skb, data->l2_len);
  677. memcpy(skb->data, &data->l2_data, data->l2_len);
  678. skb_postpush_rcsum(skb, skb->data, data->l2_len);
  679. skb_reset_mac_header(skb);
  680. if (eth_p_mpls(skb->protocol)) {
  681. skb->inner_network_header = skb->network_header;
  682. skb_set_network_header(skb, data->network_offset);
  683. skb_reset_mac_len(skb);
  684. }
  685. ovs_vport_send(vport, skb, data->mac_proto);
  686. return 0;
  687. }
  688. static unsigned int
  689. ovs_dst_get_mtu(const struct dst_entry *dst)
  690. {
  691. return dst->dev->mtu;
  692. }
  693. static struct dst_ops ovs_dst_ops = {
  694. .family = AF_UNSPEC,
  695. .mtu = ovs_dst_get_mtu,
  696. };
  697. /* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
  698. * ovs_vport_output(), which is called once per fragmented packet.
  699. */
  700. static void prepare_frag(struct vport *vport, struct sk_buff *skb,
  701. u16 orig_network_offset, u8 mac_proto)
  702. {
  703. unsigned int hlen = skb_network_offset(skb);
  704. struct ovs_frag_data *data;
  705. data = this_cpu_ptr(&ovs_frag_data_storage);
  706. data->dst = skb->_skb_refdst;
  707. data->vport = vport;
  708. data->cb = *OVS_CB(skb);
  709. data->inner_protocol = skb->inner_protocol;
  710. data->network_offset = orig_network_offset;
  711. if (skb_vlan_tag_present(skb))
  712. data->vlan_tci = skb_vlan_tag_get(skb) | VLAN_CFI_MASK;
  713. else
  714. data->vlan_tci = 0;
  715. data->vlan_proto = skb->vlan_proto;
  716. data->mac_proto = mac_proto;
  717. data->l2_len = hlen;
  718. memcpy(&data->l2_data, skb->data, hlen);
  719. memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
  720. skb_pull(skb, hlen);
  721. }
  722. static void ovs_fragment(struct net *net, struct vport *vport,
  723. struct sk_buff *skb, u16 mru,
  724. struct sw_flow_key *key)
  725. {
  726. enum ovs_drop_reason reason;
  727. u16 orig_network_offset = 0;
  728. if (eth_p_mpls(skb->protocol)) {
  729. orig_network_offset = skb_network_offset(skb);
  730. skb->network_header = skb->inner_network_header;
  731. }
  732. if (skb_network_offset(skb) > MAX_L2_LEN) {
  733. OVS_NLERR(1, "L2 header too long to fragment");
  734. reason = OVS_DROP_FRAG_L2_TOO_LONG;
  735. goto err;
  736. }
  737. if (key->eth.type == htons(ETH_P_IP)) {
  738. struct rtable ovs_rt = { 0 };
  739. unsigned long orig_dst;
  740. prepare_frag(vport, skb, orig_network_offset,
  741. ovs_key_mac_proto(key));
  742. dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
  743. DST_OBSOLETE_NONE, DST_NOCOUNT);
  744. ovs_rt.dst.dev = vport->dev;
  745. orig_dst = skb->_skb_refdst;
  746. skb_dst_set_noref(skb, &ovs_rt.dst);
  747. IPCB(skb)->frag_max_size = mru;
  748. ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
  749. refdst_drop(orig_dst);
  750. } else if (key->eth.type == htons(ETH_P_IPV6)) {
  751. unsigned long orig_dst;
  752. struct rt6_info ovs_rt;
  753. prepare_frag(vport, skb, orig_network_offset,
  754. ovs_key_mac_proto(key));
  755. memset(&ovs_rt, 0, sizeof(ovs_rt));
  756. dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
  757. DST_OBSOLETE_NONE, DST_NOCOUNT);
  758. ovs_rt.dst.dev = vport->dev;
  759. orig_dst = skb->_skb_refdst;
  760. skb_dst_set_noref(skb, &ovs_rt.dst);
  761. IP6CB(skb)->frag_max_size = mru;
  762. ipv6_stub->ipv6_fragment(net, skb->sk, skb, ovs_vport_output);
  763. refdst_drop(orig_dst);
  764. } else {
  765. WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
  766. ovs_vport_name(vport), ntohs(key->eth.type), mru,
  767. vport->dev->mtu);
  768. reason = OVS_DROP_FRAG_INVALID_PROTO;
  769. goto err;
  770. }
  771. return;
  772. err:
  773. ovs_kfree_skb_reason(skb, reason);
  774. }
  775. static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
  776. struct sw_flow_key *key)
  777. {
  778. struct vport *vport = ovs_vport_rcu(dp, out_port);
  779. if (likely(vport &&
  780. netif_running(vport->dev) &&
  781. netif_carrier_ok(vport->dev))) {
  782. u16 mru = OVS_CB(skb)->mru;
  783. u32 cutlen = OVS_CB(skb)->cutlen;
  784. if (unlikely(cutlen > 0)) {
  785. if (skb->len - cutlen > ovs_mac_header_len(key))
  786. pskb_trim(skb, skb->len - cutlen);
  787. else
  788. pskb_trim(skb, ovs_mac_header_len(key));
  789. }
  790. if (likely(!mru ||
  791. (skb->len <= mru + vport->dev->hard_header_len))) {
  792. ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
  793. } else if (mru <= vport->dev->mtu) {
  794. struct net *net = read_pnet(&dp->net);
  795. ovs_fragment(net, vport, skb, mru, key);
  796. } else {
  797. kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
  798. }
  799. } else {
  800. kfree_skb_reason(skb, SKB_DROP_REASON_DEV_READY);
  801. }
  802. }
  803. static int output_userspace(struct datapath *dp, struct sk_buff *skb,
  804. struct sw_flow_key *key, const struct nlattr *attr,
  805. const struct nlattr *actions, int actions_len,
  806. uint32_t cutlen)
  807. {
  808. struct dp_upcall_info upcall;
  809. const struct nlattr *a;
  810. int rem;
  811. memset(&upcall, 0, sizeof(upcall));
  812. upcall.cmd = OVS_PACKET_CMD_ACTION;
  813. upcall.mru = OVS_CB(skb)->mru;
  814. nla_for_each_nested(a, attr, rem) {
  815. switch (nla_type(a)) {
  816. case OVS_USERSPACE_ATTR_USERDATA:
  817. upcall.userdata = a;
  818. break;
  819. case OVS_USERSPACE_ATTR_PID:
  820. if (dp->user_features &
  821. OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
  822. upcall.portid =
  823. ovs_dp_get_upcall_portid(dp,
  824. smp_processor_id());
  825. else
  826. upcall.portid = nla_get_u32(a);
  827. break;
  828. case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
  829. /* Get out tunnel info. */
  830. struct vport *vport;
  831. vport = ovs_vport_rcu(dp, nla_get_u32(a));
  832. if (vport) {
  833. int err;
  834. err = dev_fill_metadata_dst(vport->dev, skb);
  835. if (!err)
  836. upcall.egress_tun_info = skb_tunnel_info(skb);
  837. }
  838. break;
  839. }
  840. case OVS_USERSPACE_ATTR_ACTIONS: {
  841. /* Include actions. */
  842. upcall.actions = actions;
  843. upcall.actions_len = actions_len;
  844. break;
  845. }
  846. } /* End of switch. */
  847. }
  848. return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
  849. }
  850. static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
  851. struct sw_flow_key *key,
  852. const struct nlattr *attr)
  853. {
  854. /* The first attribute is always 'OVS_DEC_TTL_ATTR_ACTION'. */
  855. struct nlattr *actions = nla_data(attr);
  856. if (nla_len(actions))
  857. return clone_execute(dp, skb, key, 0, nla_data(actions),
  858. nla_len(actions), true, false);
  859. ovs_kfree_skb_reason(skb, OVS_DROP_IP_TTL);
  860. return 0;
  861. }
  862. /* When 'last' is true, sample() should always consume the 'skb'.
  863. * Otherwise, sample() should keep 'skb' intact regardless what
  864. * actions are executed within sample().
  865. */
  866. static int sample(struct datapath *dp, struct sk_buff *skb,
  867. struct sw_flow_key *key, const struct nlattr *attr,
  868. bool last)
  869. {
  870. struct nlattr *actions;
  871. struct nlattr *sample_arg;
  872. int rem = nla_len(attr);
  873. const struct sample_arg *arg;
  874. u32 init_probability;
  875. bool clone_flow_key;
  876. int err;
  877. /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
  878. sample_arg = nla_data(attr);
  879. arg = nla_data(sample_arg);
  880. actions = nla_next(sample_arg, &rem);
  881. init_probability = OVS_CB(skb)->probability;
  882. if ((arg->probability != U32_MAX) &&
  883. (!arg->probability || get_random_u32() > arg->probability)) {
  884. if (last)
  885. ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
  886. return 0;
  887. }
  888. OVS_CB(skb)->probability = arg->probability;
  889. clone_flow_key = !arg->exec;
  890. err = clone_execute(dp, skb, key, 0, actions, rem, last,
  891. clone_flow_key);
  892. if (!last)
  893. OVS_CB(skb)->probability = init_probability;
  894. return err;
  895. }
  896. /* When 'last' is true, clone() should always consume the 'skb'.
  897. * Otherwise, clone() should keep 'skb' intact regardless what
  898. * actions are executed within clone().
  899. */
  900. static int clone(struct datapath *dp, struct sk_buff *skb,
  901. struct sw_flow_key *key, const struct nlattr *attr,
  902. bool last)
  903. {
  904. struct nlattr *actions;
  905. struct nlattr *clone_arg;
  906. int rem = nla_len(attr);
  907. bool dont_clone_flow_key;
  908. /* The first action is always 'OVS_CLONE_ATTR_EXEC'. */
  909. clone_arg = nla_data(attr);
  910. dont_clone_flow_key = nla_get_u32(clone_arg);
  911. actions = nla_next(clone_arg, &rem);
  912. return clone_execute(dp, skb, key, 0, actions, rem, last,
  913. !dont_clone_flow_key);
  914. }
  915. static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
  916. const struct nlattr *attr)
  917. {
  918. struct ovs_action_hash *hash_act = nla_data(attr);
  919. u32 hash = 0;
  920. if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
  921. /* OVS_HASH_ALG_L4 hasing type. */
  922. hash = skb_get_hash(skb);
  923. } else if (hash_act->hash_alg == OVS_HASH_ALG_SYM_L4) {
  924. /* OVS_HASH_ALG_SYM_L4 hashing type. NOTE: this doesn't
  925. * extend past an encapsulated header.
  926. */
  927. hash = __skb_get_hash_symmetric(skb);
  928. }
  929. hash = jhash_1word(hash, hash_act->hash_basis);
  930. if (!hash)
  931. hash = 0x1;
  932. key->ovs_flow_hash = hash;
  933. }
  934. static int execute_set_action(struct sk_buff *skb,
  935. struct sw_flow_key *flow_key,
  936. const struct nlattr *a)
  937. {
  938. /* Only tunnel set execution is supported without a mask. */
  939. if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
  940. struct ovs_tunnel_info *tun = nla_data(a);
  941. skb_dst_drop(skb);
  942. dst_hold((struct dst_entry *)tun->tun_dst);
  943. skb_dst_set(skb, (struct dst_entry *)tun->tun_dst);
  944. return 0;
  945. }
  946. return -EINVAL;
  947. }
  948. /* Mask is at the midpoint of the data. */
  949. #define get_mask(a, type) ((const type)nla_data(a) + 1)
  950. static int execute_masked_set_action(struct sk_buff *skb,
  951. struct sw_flow_key *flow_key,
  952. const struct nlattr *a)
  953. {
  954. int err = 0;
  955. switch (nla_type(a)) {
  956. case OVS_KEY_ATTR_PRIORITY:
  957. OVS_SET_MASKED(skb->priority, nla_get_u32(a),
  958. *get_mask(a, u32 *));
  959. flow_key->phy.priority = skb->priority;
  960. break;
  961. case OVS_KEY_ATTR_SKB_MARK:
  962. OVS_SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
  963. flow_key->phy.skb_mark = skb->mark;
  964. break;
  965. case OVS_KEY_ATTR_TUNNEL_INFO:
  966. /* Masked data not supported for tunnel. */
  967. err = -EINVAL;
  968. break;
  969. case OVS_KEY_ATTR_ETHERNET:
  970. err = set_eth_addr(skb, flow_key, nla_data(a),
  971. get_mask(a, struct ovs_key_ethernet *));
  972. break;
  973. case OVS_KEY_ATTR_NSH:
  974. err = set_nsh(skb, flow_key, a);
  975. break;
  976. case OVS_KEY_ATTR_IPV4:
  977. err = set_ipv4(skb, flow_key, nla_data(a),
  978. get_mask(a, struct ovs_key_ipv4 *));
  979. break;
  980. case OVS_KEY_ATTR_IPV6:
  981. err = set_ipv6(skb, flow_key, nla_data(a),
  982. get_mask(a, struct ovs_key_ipv6 *));
  983. break;
  984. case OVS_KEY_ATTR_TCP:
  985. err = set_tcp(skb, flow_key, nla_data(a),
  986. get_mask(a, struct ovs_key_tcp *));
  987. break;
  988. case OVS_KEY_ATTR_UDP:
  989. err = set_udp(skb, flow_key, nla_data(a),
  990. get_mask(a, struct ovs_key_udp *));
  991. break;
  992. case OVS_KEY_ATTR_SCTP:
  993. err = set_sctp(skb, flow_key, nla_data(a),
  994. get_mask(a, struct ovs_key_sctp *));
  995. break;
  996. case OVS_KEY_ATTR_MPLS:
  997. err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
  998. __be32 *));
  999. break;
  1000. case OVS_KEY_ATTR_CT_STATE:
  1001. case OVS_KEY_ATTR_CT_ZONE:
  1002. case OVS_KEY_ATTR_CT_MARK:
  1003. case OVS_KEY_ATTR_CT_LABELS:
  1004. case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
  1005. case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
  1006. err = -EINVAL;
  1007. break;
  1008. }
  1009. return err;
  1010. }
  1011. static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
  1012. struct sw_flow_key *key,
  1013. const struct nlattr *a, bool last)
  1014. {
  1015. u32 recirc_id;
  1016. if (!is_flow_key_valid(key)) {
  1017. int err;
  1018. err = ovs_flow_key_update(skb, key);
  1019. if (err)
  1020. return err;
  1021. }
  1022. BUG_ON(!is_flow_key_valid(key));
  1023. recirc_id = nla_get_u32(a);
  1024. return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
  1025. }
  1026. static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
  1027. struct sw_flow_key *key,
  1028. const struct nlattr *attr, bool last)
  1029. {
  1030. struct ovs_skb_cb *ovs_cb = OVS_CB(skb);
  1031. const struct nlattr *actions, *cpl_arg;
  1032. int len, max_len, rem = nla_len(attr);
  1033. const struct check_pkt_len_arg *arg;
  1034. bool clone_flow_key;
  1035. /* The first netlink attribute in 'attr' is always
  1036. * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
  1037. */
  1038. cpl_arg = nla_data(attr);
  1039. arg = nla_data(cpl_arg);
  1040. len = ovs_cb->mru ? ovs_cb->mru + skb->mac_len : skb->len;
  1041. max_len = arg->pkt_len;
  1042. if ((skb_is_gso(skb) && skb_gso_validate_mac_len(skb, max_len)) ||
  1043. len <= max_len) {
  1044. /* Second netlink attribute in 'attr' is always
  1045. * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
  1046. */
  1047. actions = nla_next(cpl_arg, &rem);
  1048. clone_flow_key = !arg->exec_for_lesser_equal;
  1049. } else {
  1050. /* Third netlink attribute in 'attr' is always
  1051. * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER'.
  1052. */
  1053. actions = nla_next(cpl_arg, &rem);
  1054. actions = nla_next(actions, &rem);
  1055. clone_flow_key = !arg->exec_for_greater;
  1056. }
  1057. return clone_execute(dp, skb, key, 0, nla_data(actions),
  1058. nla_len(actions), last, clone_flow_key);
  1059. }
  1060. static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
  1061. {
  1062. int err;
  1063. if (skb->protocol == htons(ETH_P_IPV6)) {
  1064. struct ipv6hdr *nh;
  1065. err = skb_ensure_writable(skb, skb_network_offset(skb) +
  1066. sizeof(*nh));
  1067. if (unlikely(err))
  1068. return err;
  1069. nh = ipv6_hdr(skb);
  1070. if (nh->hop_limit <= 1)
  1071. return -EHOSTUNREACH;
  1072. key->ip.ttl = --nh->hop_limit;
  1073. } else if (skb->protocol == htons(ETH_P_IP)) {
  1074. struct iphdr *nh;
  1075. u8 old_ttl;
  1076. err = skb_ensure_writable(skb, skb_network_offset(skb) +
  1077. sizeof(*nh));
  1078. if (unlikely(err))
  1079. return err;
  1080. nh = ip_hdr(skb);
  1081. if (nh->ttl <= 1)
  1082. return -EHOSTUNREACH;
  1083. old_ttl = nh->ttl--;
  1084. csum_replace2(&nh->check, htons(old_ttl << 8),
  1085. htons(nh->ttl << 8));
  1086. key->ip.ttl = nh->ttl;
  1087. }
  1088. return 0;
  1089. }
  1090. #if IS_ENABLED(CONFIG_PSAMPLE)
  1091. static void execute_psample(struct datapath *dp, struct sk_buff *skb,
  1092. const struct nlattr *attr)
  1093. {
  1094. struct psample_group psample_group = {};
  1095. struct psample_metadata md = {};
  1096. const struct nlattr *a;
  1097. u32 rate;
  1098. int rem;
  1099. nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
  1100. switch (nla_type(a)) {
  1101. case OVS_PSAMPLE_ATTR_GROUP:
  1102. psample_group.group_num = nla_get_u32(a);
  1103. break;
  1104. case OVS_PSAMPLE_ATTR_COOKIE:
  1105. md.user_cookie = nla_data(a);
  1106. md.user_cookie_len = nla_len(a);
  1107. break;
  1108. }
  1109. }
  1110. psample_group.net = ovs_dp_get_net(dp);
  1111. md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex;
  1112. md.trunc_size = skb->len - OVS_CB(skb)->cutlen;
  1113. md.rate_as_probability = 1;
  1114. rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX;
  1115. psample_sample_packet(&psample_group, skb, rate, &md);
  1116. }
  1117. #else
  1118. static void execute_psample(struct datapath *dp, struct sk_buff *skb,
  1119. const struct nlattr *attr)
  1120. {}
  1121. #endif
  1122. /* Execute a list of actions against 'skb'. */
  1123. static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
  1124. struct sw_flow_key *key,
  1125. const struct nlattr *attr, int len)
  1126. {
  1127. const struct nlattr *a;
  1128. int rem;
  1129. for (a = attr, rem = len; rem > 0;
  1130. a = nla_next(a, &rem)) {
  1131. int err = 0;
  1132. if (trace_ovs_do_execute_action_enabled())
  1133. trace_ovs_do_execute_action(dp, skb, key, a, rem);
  1134. /* Actions that rightfully have to consume the skb should do it
  1135. * and return directly.
  1136. */
  1137. switch (nla_type(a)) {
  1138. case OVS_ACTION_ATTR_OUTPUT: {
  1139. int port = nla_get_u32(a);
  1140. struct sk_buff *clone;
  1141. /* Every output action needs a separate clone
  1142. * of 'skb', In case the output action is the
  1143. * last action, cloning can be avoided.
  1144. */
  1145. if (nla_is_last(a, rem)) {
  1146. do_output(dp, skb, port, key);
  1147. /* 'skb' has been used for output.
  1148. */
  1149. return 0;
  1150. }
  1151. clone = skb_clone(skb, GFP_ATOMIC);
  1152. if (clone)
  1153. do_output(dp, clone, port, key);
  1154. OVS_CB(skb)->cutlen = 0;
  1155. break;
  1156. }
  1157. case OVS_ACTION_ATTR_TRUNC: {
  1158. struct ovs_action_trunc *trunc = nla_data(a);
  1159. if (skb->len > trunc->max_len)
  1160. OVS_CB(skb)->cutlen = skb->len - trunc->max_len;
  1161. break;
  1162. }
  1163. case OVS_ACTION_ATTR_USERSPACE:
  1164. output_userspace(dp, skb, key, a, attr,
  1165. len, OVS_CB(skb)->cutlen);
  1166. OVS_CB(skb)->cutlen = 0;
  1167. if (nla_is_last(a, rem)) {
  1168. consume_skb(skb);
  1169. return 0;
  1170. }
  1171. break;
  1172. case OVS_ACTION_ATTR_HASH:
  1173. execute_hash(skb, key, a);
  1174. break;
  1175. case OVS_ACTION_ATTR_PUSH_MPLS: {
  1176. struct ovs_action_push_mpls *mpls = nla_data(a);
  1177. err = push_mpls(skb, key, mpls->mpls_lse,
  1178. mpls->mpls_ethertype, skb->mac_len);
  1179. break;
  1180. }
  1181. case OVS_ACTION_ATTR_ADD_MPLS: {
  1182. struct ovs_action_add_mpls *mpls = nla_data(a);
  1183. __u16 mac_len = 0;
  1184. if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK)
  1185. mac_len = skb->mac_len;
  1186. err = push_mpls(skb, key, mpls->mpls_lse,
  1187. mpls->mpls_ethertype, mac_len);
  1188. break;
  1189. }
  1190. case OVS_ACTION_ATTR_POP_MPLS:
  1191. err = pop_mpls(skb, key, nla_get_be16(a));
  1192. break;
  1193. case OVS_ACTION_ATTR_PUSH_VLAN:
  1194. err = push_vlan(skb, key, nla_data(a));
  1195. break;
  1196. case OVS_ACTION_ATTR_POP_VLAN:
  1197. err = pop_vlan(skb, key);
  1198. break;
  1199. case OVS_ACTION_ATTR_RECIRC: {
  1200. bool last = nla_is_last(a, rem);
  1201. err = execute_recirc(dp, skb, key, a, last);
  1202. if (last) {
  1203. /* If this is the last action, the skb has
  1204. * been consumed or freed.
  1205. * Return immediately.
  1206. */
  1207. return err;
  1208. }
  1209. break;
  1210. }
  1211. case OVS_ACTION_ATTR_SET:
  1212. err = execute_set_action(skb, key, nla_data(a));
  1213. break;
  1214. case OVS_ACTION_ATTR_SET_MASKED:
  1215. case OVS_ACTION_ATTR_SET_TO_MASKED:
  1216. err = execute_masked_set_action(skb, key, nla_data(a));
  1217. break;
  1218. case OVS_ACTION_ATTR_SAMPLE: {
  1219. bool last = nla_is_last(a, rem);
  1220. err = sample(dp, skb, key, a, last);
  1221. if (last)
  1222. return err;
  1223. break;
  1224. }
  1225. case OVS_ACTION_ATTR_CT:
  1226. if (!is_flow_key_valid(key)) {
  1227. err = ovs_flow_key_update(skb, key);
  1228. if (err)
  1229. return err;
  1230. }
  1231. err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
  1232. nla_data(a));
  1233. /* Hide stolen IP fragments from user space. */
  1234. if (err)
  1235. return err == -EINPROGRESS ? 0 : err;
  1236. break;
  1237. case OVS_ACTION_ATTR_CT_CLEAR:
  1238. err = ovs_ct_clear(skb, key);
  1239. break;
  1240. case OVS_ACTION_ATTR_PUSH_ETH:
  1241. err = push_eth(skb, key, nla_data(a));
  1242. break;
  1243. case OVS_ACTION_ATTR_POP_ETH:
  1244. err = pop_eth(skb, key);
  1245. break;
  1246. case OVS_ACTION_ATTR_PUSH_NSH:
  1247. err = push_nsh(skb, key, nla_data(a));
  1248. break;
  1249. case OVS_ACTION_ATTR_POP_NSH:
  1250. err = pop_nsh(skb, key);
  1251. break;
  1252. case OVS_ACTION_ATTR_METER:
  1253. if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
  1254. ovs_kfree_skb_reason(skb, OVS_DROP_METER);
  1255. return 0;
  1256. }
  1257. break;
  1258. case OVS_ACTION_ATTR_CLONE: {
  1259. bool last = nla_is_last(a, rem);
  1260. err = clone(dp, skb, key, a, last);
  1261. if (last)
  1262. return err;
  1263. break;
  1264. }
  1265. case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
  1266. bool last = nla_is_last(a, rem);
  1267. err = execute_check_pkt_len(dp, skb, key, a, last);
  1268. if (last)
  1269. return err;
  1270. break;
  1271. }
  1272. case OVS_ACTION_ATTR_DEC_TTL:
  1273. err = execute_dec_ttl(skb, key);
  1274. if (err == -EHOSTUNREACH)
  1275. return dec_ttl_exception_handler(dp, skb,
  1276. key, a);
  1277. break;
  1278. case OVS_ACTION_ATTR_DROP: {
  1279. enum ovs_drop_reason reason = nla_get_u32(a)
  1280. ? OVS_DROP_EXPLICIT_WITH_ERROR
  1281. : OVS_DROP_EXPLICIT;
  1282. ovs_kfree_skb_reason(skb, reason);
  1283. return 0;
  1284. }
  1285. case OVS_ACTION_ATTR_PSAMPLE:
  1286. execute_psample(dp, skb, a);
  1287. OVS_CB(skb)->cutlen = 0;
  1288. if (nla_is_last(a, rem)) {
  1289. consume_skb(skb);
  1290. return 0;
  1291. }
  1292. break;
  1293. }
  1294. if (unlikely(err)) {
  1295. ovs_kfree_skb_reason(skb, OVS_DROP_ACTION_ERROR);
  1296. return err;
  1297. }
  1298. }
  1299. ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
  1300. return 0;
  1301. }
  1302. /* Execute the actions on the clone of the packet. The effect of the
  1303. * execution does not affect the original 'skb' nor the original 'key'.
  1304. *
  1305. * The execution may be deferred in case the actions can not be executed
  1306. * immediately.
  1307. */
  1308. static int clone_execute(struct datapath *dp, struct sk_buff *skb,
  1309. struct sw_flow_key *key, u32 recirc_id,
  1310. const struct nlattr *actions, int len,
  1311. bool last, bool clone_flow_key)
  1312. {
  1313. struct deferred_action *da;
  1314. struct sw_flow_key *clone;
  1315. skb = last ? skb : skb_clone(skb, GFP_ATOMIC);
  1316. if (!skb) {
  1317. /* Out of memory, skip this action.
  1318. */
  1319. return 0;
  1320. }
  1321. /* When clone_flow_key is false, the 'key' will not be change
  1322. * by the actions, then the 'key' can be used directly.
  1323. * Otherwise, try to clone key from the next recursion level of
  1324. * 'flow_keys'. If clone is successful, execute the actions
  1325. * without deferring.
  1326. */
  1327. clone = clone_flow_key ? clone_key(key) : key;
  1328. if (clone) {
  1329. int err = 0;
  1330. if (actions) { /* Sample action */
  1331. if (clone_flow_key)
  1332. __this_cpu_inc(exec_actions_level);
  1333. err = do_execute_actions(dp, skb, clone,
  1334. actions, len);
  1335. if (clone_flow_key)
  1336. __this_cpu_dec(exec_actions_level);
  1337. } else { /* Recirc action */
  1338. clone->recirc_id = recirc_id;
  1339. ovs_dp_process_packet(skb, clone);
  1340. }
  1341. return err;
  1342. }
  1343. /* Out of 'flow_keys' space. Defer actions */
  1344. da = add_deferred_actions(skb, key, actions, len);
  1345. if (da) {
  1346. if (!actions) { /* Recirc action */
  1347. key = &da->pkt_key;
  1348. key->recirc_id = recirc_id;
  1349. }
  1350. } else {
  1351. /* Out of per CPU action FIFO space. Drop the 'skb' and
  1352. * log an error.
  1353. */
  1354. ovs_kfree_skb_reason(skb, OVS_DROP_DEFERRED_LIMIT);
  1355. if (net_ratelimit()) {
  1356. if (actions) { /* Sample action */
  1357. pr_warn("%s: deferred action limit reached, drop sample action\n",
  1358. ovs_dp_name(dp));
  1359. } else { /* Recirc action */
  1360. pr_warn("%s: deferred action limit reached, drop recirc action (recirc_id=%#x)\n",
  1361. ovs_dp_name(dp), recirc_id);
  1362. }
  1363. }
  1364. }
  1365. return 0;
  1366. }
  1367. static void process_deferred_actions(struct datapath *dp)
  1368. {
  1369. struct action_fifo *fifo = this_cpu_ptr(action_fifos);
  1370. /* Do not touch the FIFO in case there is no deferred actions. */
  1371. if (action_fifo_is_empty(fifo))
  1372. return;
  1373. /* Finishing executing all deferred actions. */
  1374. do {
  1375. struct deferred_action *da = action_fifo_get(fifo);
  1376. struct sk_buff *skb = da->skb;
  1377. struct sw_flow_key *key = &da->pkt_key;
  1378. const struct nlattr *actions = da->actions;
  1379. int actions_len = da->actions_len;
  1380. if (actions)
  1381. do_execute_actions(dp, skb, key, actions, actions_len);
  1382. else
  1383. ovs_dp_process_packet(skb, key);
  1384. } while (!action_fifo_is_empty(fifo));
  1385. /* Reset FIFO for the next packet. */
  1386. action_fifo_init(fifo);
  1387. }
  1388. /* Execute a list of actions against 'skb'. */
  1389. int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
  1390. const struct sw_flow_actions *acts,
  1391. struct sw_flow_key *key)
  1392. {
  1393. int err, level;
  1394. level = __this_cpu_inc_return(exec_actions_level);
  1395. if (unlikely(level > OVS_RECURSION_LIMIT)) {
  1396. net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
  1397. ovs_dp_name(dp));
  1398. ovs_kfree_skb_reason(skb, OVS_DROP_RECURSION_LIMIT);
  1399. err = -ENETDOWN;
  1400. goto out;
  1401. }
  1402. OVS_CB(skb)->acts_origlen = acts->orig_len;
  1403. err = do_execute_actions(dp, skb, key,
  1404. acts->actions, acts->actions_len);
  1405. if (level == 1)
  1406. process_deferred_actions(dp);
  1407. out:
  1408. __this_cpu_dec(exec_actions_level);
  1409. return err;
  1410. }
  1411. int action_fifos_init(void)
  1412. {
  1413. action_fifos = alloc_percpu(struct action_fifo);
  1414. if (!action_fifos)
  1415. return -ENOMEM;
  1416. flow_keys = alloc_percpu(struct action_flow_keys);
  1417. if (!flow_keys) {
  1418. free_percpu(action_fifos);
  1419. return -ENOMEM;
  1420. }
  1421. return 0;
  1422. }
  1423. void action_fifos_exit(void)
  1424. {
  1425. free_percpu(action_fifos);
  1426. free_percpu(flow_keys);
  1427. }