test_lwt_seg6local.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. #include <stddef.h>
  2. #include <inttypes.h>
  3. #include <errno.h>
  4. #include <linux/seg6_local.h>
  5. #include <linux/bpf.h>
  6. #include "bpf_helpers.h"
  7. #include "bpf_endian.h"
  8. #define bpf_printk(fmt, ...) \
  9. ({ \
  10. char ____fmt[] = fmt; \
  11. bpf_trace_printk(____fmt, sizeof(____fmt), \
  12. ##__VA_ARGS__); \
  13. })
  14. /* Packet parsing state machine helpers. */
  15. #define cursor_advance(_cursor, _len) \
  16. ({ void *_tmp = _cursor; _cursor += _len; _tmp; })
  17. #define SR6_FLAG_ALERT (1 << 4)
  18. #define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
  19. 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
  20. #define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
  21. 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
  22. #define BPF_PACKET_HEADER __attribute__((packed))
  23. struct ip6_t {
  24. unsigned int ver:4;
  25. unsigned int priority:8;
  26. unsigned int flow_label:20;
  27. unsigned short payload_len;
  28. unsigned char next_header;
  29. unsigned char hop_limit;
  30. unsigned long long src_hi;
  31. unsigned long long src_lo;
  32. unsigned long long dst_hi;
  33. unsigned long long dst_lo;
  34. } BPF_PACKET_HEADER;
  35. struct ip6_addr_t {
  36. unsigned long long hi;
  37. unsigned long long lo;
  38. } BPF_PACKET_HEADER;
  39. struct ip6_srh_t {
  40. unsigned char nexthdr;
  41. unsigned char hdrlen;
  42. unsigned char type;
  43. unsigned char segments_left;
  44. unsigned char first_segment;
  45. unsigned char flags;
  46. unsigned short tag;
  47. struct ip6_addr_t segments[0];
  48. } BPF_PACKET_HEADER;
  49. struct sr6_tlv_t {
  50. unsigned char type;
  51. unsigned char len;
  52. unsigned char value[0];
  53. } BPF_PACKET_HEADER;
  54. static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb)
  55. {
  56. void *cursor, *data_end;
  57. struct ip6_srh_t *srh;
  58. struct ip6_t *ip;
  59. uint8_t *ipver;
  60. data_end = (void *)(long)skb->data_end;
  61. cursor = (void *)(long)skb->data;
  62. ipver = (uint8_t *)cursor;
  63. if ((void *)ipver + sizeof(*ipver) > data_end)
  64. return NULL;
  65. if ((*ipver >> 4) != 6)
  66. return NULL;
  67. ip = cursor_advance(cursor, sizeof(*ip));
  68. if ((void *)ip + sizeof(*ip) > data_end)
  69. return NULL;
  70. if (ip->next_header != 43)
  71. return NULL;
  72. srh = cursor_advance(cursor, sizeof(*srh));
  73. if ((void *)srh + sizeof(*srh) > data_end)
  74. return NULL;
  75. if (srh->type != 4)
  76. return NULL;
  77. return srh;
  78. }
  79. static __always_inline
  80. int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
  81. uint32_t old_pad, uint32_t pad_off)
  82. {
  83. int err;
  84. if (new_pad != old_pad) {
  85. err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
  86. (int) new_pad - (int) old_pad);
  87. if (err)
  88. return err;
  89. }
  90. if (new_pad > 0) {
  91. char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  92. 0, 0, 0};
  93. struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
  94. pad_tlv->type = SR6_TLV_PADDING;
  95. pad_tlv->len = new_pad - 2;
  96. err = bpf_lwt_seg6_store_bytes(skb, pad_off,
  97. (void *)pad_tlv_buf, new_pad);
  98. if (err)
  99. return err;
  100. }
  101. return 0;
  102. }
  103. static __always_inline
  104. int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
  105. uint32_t *tlv_off, uint32_t *pad_size,
  106. uint32_t *pad_off)
  107. {
  108. uint32_t srh_off, cur_off;
  109. int offset_valid = 0;
  110. int err;
  111. srh_off = (char *)srh - (char *)(long)skb->data;
  112. // cur_off = end of segments, start of possible TLVs
  113. cur_off = srh_off + sizeof(*srh) +
  114. sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
  115. *pad_off = 0;
  116. // we can only go as far as ~10 TLVs due to the BPF max stack size
  117. #pragma clang loop unroll(full)
  118. for (int i = 0; i < 10; i++) {
  119. struct sr6_tlv_t tlv;
  120. if (cur_off == *tlv_off)
  121. offset_valid = 1;
  122. if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
  123. break;
  124. err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
  125. if (err)
  126. return err;
  127. if (tlv.type == SR6_TLV_PADDING) {
  128. *pad_size = tlv.len + sizeof(tlv);
  129. *pad_off = cur_off;
  130. if (*tlv_off == srh_off) {
  131. *tlv_off = cur_off;
  132. offset_valid = 1;
  133. }
  134. break;
  135. } else if (tlv.type == SR6_TLV_HMAC) {
  136. break;
  137. }
  138. cur_off += sizeof(tlv) + tlv.len;
  139. } // we reached the padding or HMAC TLVs, or the end of the SRH
  140. if (*pad_off == 0)
  141. *pad_off = cur_off;
  142. if (*tlv_off == -1)
  143. *tlv_off = cur_off;
  144. else if (!offset_valid)
  145. return -EINVAL;
  146. return 0;
  147. }
  148. static __always_inline
  149. int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
  150. struct sr6_tlv_t *itlv, uint8_t tlv_size)
  151. {
  152. uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
  153. uint8_t len_remaining, new_pad;
  154. uint32_t pad_off = 0;
  155. uint32_t pad_size = 0;
  156. uint32_t partial_srh_len;
  157. int err;
  158. if (tlv_off != -1)
  159. tlv_off += srh_off;
  160. if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
  161. return -EINVAL;
  162. err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
  163. if (err)
  164. return err;
  165. err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
  166. if (err)
  167. return err;
  168. err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
  169. if (err)
  170. return err;
  171. // the following can't be moved inside update_tlv_pad because the
  172. // bpf verifier has some issues with it
  173. pad_off += sizeof(*itlv) + itlv->len;
  174. partial_srh_len = pad_off - srh_off;
  175. len_remaining = partial_srh_len % 8;
  176. new_pad = 8 - len_remaining;
  177. if (new_pad == 1) // cannot pad for 1 byte only
  178. new_pad = 9;
  179. else if (new_pad == 8)
  180. new_pad = 0;
  181. return update_tlv_pad(skb, new_pad, pad_size, pad_off);
  182. }
  183. static __always_inline
  184. int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
  185. uint32_t tlv_off)
  186. {
  187. uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
  188. uint8_t len_remaining, new_pad;
  189. uint32_t partial_srh_len;
  190. uint32_t pad_off = 0;
  191. uint32_t pad_size = 0;
  192. struct sr6_tlv_t tlv;
  193. int err;
  194. tlv_off += srh_off;
  195. err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
  196. if (err)
  197. return err;
  198. err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
  199. if (err)
  200. return err;
  201. err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
  202. if (err)
  203. return err;
  204. pad_off -= sizeof(tlv) + tlv.len;
  205. partial_srh_len = pad_off - srh_off;
  206. len_remaining = partial_srh_len % 8;
  207. new_pad = 8 - len_remaining;
  208. if (new_pad == 1) // cannot pad for 1 byte only
  209. new_pad = 9;
  210. else if (new_pad == 8)
  211. new_pad = 0;
  212. return update_tlv_pad(skb, new_pad, pad_size, pad_off);
  213. }
  214. static __always_inline
  215. int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
  216. {
  217. int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
  218. ((srh->first_segment + 1) << 4);
  219. struct sr6_tlv_t tlv;
  220. if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
  221. return 0;
  222. if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
  223. struct ip6_addr_t egr_addr;
  224. if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
  225. return 0;
  226. // check if egress TLV value is correct
  227. if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
  228. ntohll(egr_addr.lo) == 0x4)
  229. return 1;
  230. }
  231. return 0;
  232. }
  233. // This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
  234. // fd00::4
  235. SEC("encap_srh")
  236. int __encap_srh(struct __sk_buff *skb)
  237. {
  238. unsigned long long hi = 0xfd00000000000000;
  239. struct ip6_addr_t *seg;
  240. struct ip6_srh_t *srh;
  241. char srh_buf[72]; // room for 4 segments
  242. int err;
  243. srh = (struct ip6_srh_t *)srh_buf;
  244. srh->nexthdr = 0;
  245. srh->hdrlen = 8;
  246. srh->type = 4;
  247. srh->segments_left = 3;
  248. srh->first_segment = 3;
  249. srh->flags = 0;
  250. srh->tag = 0;
  251. seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
  252. #pragma clang loop unroll(full)
  253. for (unsigned long long lo = 0; lo < 4; lo++) {
  254. seg->lo = htonll(4 - lo);
  255. seg->hi = htonll(hi);
  256. seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
  257. }
  258. err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
  259. if (err)
  260. return BPF_DROP;
  261. return BPF_REDIRECT;
  262. }
  263. // Add an Egress TLV fc00::4, add the flag A,
  264. // and apply End.X action to fc42::1
  265. SEC("add_egr_x")
  266. int __add_egr_x(struct __sk_buff *skb)
  267. {
  268. unsigned long long hi = 0xfc42000000000000;
  269. unsigned long long lo = 0x1;
  270. struct ip6_srh_t *srh = get_srh(skb);
  271. uint8_t new_flags = SR6_FLAG_ALERT;
  272. struct ip6_addr_t addr;
  273. int err, offset;
  274. if (srh == NULL)
  275. return BPF_DROP;
  276. uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
  277. 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
  278. err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
  279. (struct sr6_tlv_t *)&tlv, 20);
  280. if (err)
  281. return BPF_DROP;
  282. offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
  283. err = bpf_lwt_seg6_store_bytes(skb, offset,
  284. (void *)&new_flags, sizeof(new_flags));
  285. if (err)
  286. return BPF_DROP;
  287. addr.lo = htonll(lo);
  288. addr.hi = htonll(hi);
  289. err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
  290. (void *)&addr, sizeof(addr));
  291. if (err)
  292. return BPF_DROP;
  293. return BPF_REDIRECT;
  294. }
  295. // Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
  296. // simple End action
  297. SEC("pop_egr")
  298. int __pop_egr(struct __sk_buff *skb)
  299. {
  300. struct ip6_srh_t *srh = get_srh(skb);
  301. uint16_t new_tag = bpf_htons(2442);
  302. uint8_t new_flags = 0;
  303. int err, offset;
  304. if (srh == NULL)
  305. return BPF_DROP;
  306. if (srh->flags != SR6_FLAG_ALERT)
  307. return BPF_DROP;
  308. if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
  309. return BPF_DROP;
  310. if (!has_egr_tlv(skb, srh))
  311. return BPF_DROP;
  312. err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
  313. if (err)
  314. return BPF_DROP;
  315. offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
  316. if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
  317. sizeof(new_flags)))
  318. return BPF_DROP;
  319. offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
  320. if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
  321. sizeof(new_tag)))
  322. return BPF_DROP;
  323. return BPF_OK;
  324. }
  325. // Inspect if the Egress TLV and flag have been removed, if the tag is correct,
  326. // then apply a End.T action to reach the last segment
  327. SEC("inspect_t")
  328. int __inspect_t(struct __sk_buff *skb)
  329. {
  330. struct ip6_srh_t *srh = get_srh(skb);
  331. int table = 117;
  332. int err;
  333. if (srh == NULL)
  334. return BPF_DROP;
  335. if (srh->flags != 0)
  336. return BPF_DROP;
  337. if (srh->tag != bpf_htons(2442))
  338. return BPF_DROP;
  339. if (srh->hdrlen != 8) // 4 segments
  340. return BPF_DROP;
  341. err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
  342. (void *)&table, sizeof(table));
  343. if (err)
  344. return BPF_DROP;
  345. return BPF_REDIRECT;
  346. }
  347. char __license[] SEC("license") = "GPL";