sch_etf.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* net/sched/sch_etf.c Earliest TxTime First queueing discipline.
  3. *
  4. * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
  5. * Vinicius Costa Gomes <vinicius.gomes@intel.com>
  6. */
  7. #include <linux/module.h>
  8. #include <linux/types.h>
  9. #include <linux/kernel.h>
  10. #include <linux/string.h>
  11. #include <linux/errno.h>
  12. #include <linux/errqueue.h>
  13. #include <linux/rbtree.h>
  14. #include <linux/skbuff.h>
  15. #include <linux/posix-timers.h>
  16. #include <net/netlink.h>
  17. #include <net/sch_generic.h>
  18. #include <net/pkt_sched.h>
  19. #include <net/sock.h>
  20. #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
  21. #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
  22. struct etf_sched_data {
  23. bool offload;
  24. bool deadline_mode;
  25. int clockid;
  26. int queue;
  27. s32 delta; /* in ns */
  28. ktime_t last; /* The txtime of the last skb sent to the netdevice. */
  29. struct rb_root head;
  30. struct qdisc_watchdog watchdog;
  31. ktime_t (*get_time)(void);
  32. };
  33. static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
  34. [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
  35. };
  36. static inline int validate_input_params(struct tc_etf_qopt *qopt,
  37. struct netlink_ext_ack *extack)
  38. {
  39. /* Check if params comply to the following rules:
  40. * * Clockid and delta must be valid.
  41. *
  42. * * Dynamic clockids are not supported.
  43. *
  44. * * Delta must be a positive integer.
  45. *
  46. * Also note that for the HW offload case, we must
  47. * expect that system clocks have been synchronized to PHC.
  48. */
  49. if (qopt->clockid < 0) {
  50. NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
  51. return -ENOTSUPP;
  52. }
  53. if (qopt->clockid != CLOCK_TAI) {
  54. NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
  55. return -EINVAL;
  56. }
  57. if (qopt->delta < 0) {
  58. NL_SET_ERR_MSG(extack, "Delta must be positive");
  59. return -EINVAL;
  60. }
  61. return 0;
  62. }
  63. static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
  64. {
  65. struct etf_sched_data *q = qdisc_priv(sch);
  66. ktime_t txtime = nskb->tstamp;
  67. struct sock *sk = nskb->sk;
  68. ktime_t now;
  69. if (!sk || !sk_fullsock(sk))
  70. return false;
  71. if (!sock_flag(sk, SOCK_TXTIME))
  72. return false;
  73. /* We don't perform crosstimestamping.
  74. * Drop if packet's clockid differs from qdisc's.
  75. */
  76. if (sk->sk_clockid != q->clockid)
  77. return false;
  78. if (sk->sk_txtime_deadline_mode != q->deadline_mode)
  79. return false;
  80. now = q->get_time();
  81. if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
  82. return false;
  83. return true;
  84. }
  85. static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
  86. {
  87. struct etf_sched_data *q = qdisc_priv(sch);
  88. struct rb_node *p;
  89. p = rb_first(&q->head);
  90. if (!p)
  91. return NULL;
  92. return rb_to_skb(p);
  93. }
  94. static void reset_watchdog(struct Qdisc *sch)
  95. {
  96. struct etf_sched_data *q = qdisc_priv(sch);
  97. struct sk_buff *skb = etf_peek_timesortedlist(sch);
  98. ktime_t next;
  99. if (!skb)
  100. return;
  101. next = ktime_sub_ns(skb->tstamp, q->delta);
  102. qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
  103. }
  104. static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
  105. {
  106. struct sock_exterr_skb *serr;
  107. struct sk_buff *clone;
  108. ktime_t txtime = skb->tstamp;
  109. struct sock *sk = skb->sk;
  110. if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors))
  111. return;
  112. clone = skb_clone(skb, GFP_ATOMIC);
  113. if (!clone)
  114. return;
  115. serr = SKB_EXT_ERR(clone);
  116. serr->ee.ee_errno = err;
  117. serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
  118. serr->ee.ee_type = 0;
  119. serr->ee.ee_code = code;
  120. serr->ee.ee_pad = 0;
  121. serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
  122. serr->ee.ee_info = txtime; /* low part of tstamp */
  123. if (sock_queue_err_skb(sk, clone))
  124. kfree_skb(clone);
  125. }
  126. static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
  127. struct sk_buff **to_free)
  128. {
  129. struct etf_sched_data *q = qdisc_priv(sch);
  130. struct rb_node **p = &q->head.rb_node, *parent = NULL;
  131. ktime_t txtime = nskb->tstamp;
  132. if (!is_packet_valid(sch, nskb)) {
  133. report_sock_error(nskb, EINVAL,
  134. SO_EE_CODE_TXTIME_INVALID_PARAM);
  135. return qdisc_drop(nskb, sch, to_free);
  136. }
  137. while (*p) {
  138. struct sk_buff *skb;
  139. parent = *p;
  140. skb = rb_to_skb(parent);
  141. if (ktime_after(txtime, skb->tstamp))
  142. p = &parent->rb_right;
  143. else
  144. p = &parent->rb_left;
  145. }
  146. rb_link_node(&nskb->rbnode, parent, p);
  147. rb_insert_color(&nskb->rbnode, &q->head);
  148. qdisc_qstats_backlog_inc(sch, nskb);
  149. sch->q.qlen++;
  150. /* Now we may need to re-arm the qdisc watchdog for the next packet. */
  151. reset_watchdog(sch);
  152. return NET_XMIT_SUCCESS;
  153. }
  154. static void timesortedlist_erase(struct Qdisc *sch, struct sk_buff *skb,
  155. bool drop)
  156. {
  157. struct etf_sched_data *q = qdisc_priv(sch);
  158. rb_erase(&skb->rbnode, &q->head);
  159. /* The rbnode field in the skb re-uses these fields, now that
  160. * we are done with the rbnode, reset them.
  161. */
  162. skb->next = NULL;
  163. skb->prev = NULL;
  164. skb->dev = qdisc_dev(sch);
  165. qdisc_qstats_backlog_dec(sch, skb);
  166. if (drop) {
  167. struct sk_buff *to_free = NULL;
  168. report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
  169. qdisc_drop(skb, sch, &to_free);
  170. kfree_skb_list(to_free);
  171. qdisc_qstats_overlimit(sch);
  172. } else {
  173. qdisc_bstats_update(sch, skb);
  174. q->last = skb->tstamp;
  175. }
  176. sch->q.qlen--;
  177. }
  178. static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
  179. {
  180. struct etf_sched_data *q = qdisc_priv(sch);
  181. struct sk_buff *skb;
  182. ktime_t now, next;
  183. skb = etf_peek_timesortedlist(sch);
  184. if (!skb)
  185. return NULL;
  186. now = q->get_time();
  187. /* Drop if packet has expired while in queue. */
  188. if (ktime_before(skb->tstamp, now)) {
  189. timesortedlist_erase(sch, skb, true);
  190. skb = NULL;
  191. goto out;
  192. }
  193. /* When in deadline mode, dequeue as soon as possible and change the
  194. * txtime from deadline to (now + delta).
  195. */
  196. if (q->deadline_mode) {
  197. timesortedlist_erase(sch, skb, false);
  198. skb->tstamp = now;
  199. goto out;
  200. }
  201. next = ktime_sub_ns(skb->tstamp, q->delta);
  202. /* Dequeue only if now is within the [txtime - delta, txtime] range. */
  203. if (ktime_after(now, next))
  204. timesortedlist_erase(sch, skb, false);
  205. else
  206. skb = NULL;
  207. out:
  208. /* Now we may need to re-arm the qdisc watchdog for the next packet. */
  209. reset_watchdog(sch);
  210. return skb;
  211. }
  212. static void etf_disable_offload(struct net_device *dev,
  213. struct etf_sched_data *q)
  214. {
  215. struct tc_etf_qopt_offload etf = { };
  216. const struct net_device_ops *ops;
  217. int err;
  218. if (!q->offload)
  219. return;
  220. ops = dev->netdev_ops;
  221. if (!ops->ndo_setup_tc)
  222. return;
  223. etf.queue = q->queue;
  224. etf.enable = 0;
  225. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
  226. if (err < 0)
  227. pr_warn("Couldn't disable ETF offload for queue %d\n",
  228. etf.queue);
  229. }
  230. static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
  231. struct netlink_ext_ack *extack)
  232. {
  233. const struct net_device_ops *ops = dev->netdev_ops;
  234. struct tc_etf_qopt_offload etf = { };
  235. int err;
  236. if (q->offload)
  237. return 0;
  238. if (!ops->ndo_setup_tc) {
  239. NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
  240. return -EOPNOTSUPP;
  241. }
  242. etf.queue = q->queue;
  243. etf.enable = 1;
  244. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
  245. if (err < 0) {
  246. NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
  247. return err;
  248. }
  249. return 0;
  250. }
  251. static int etf_init(struct Qdisc *sch, struct nlattr *opt,
  252. struct netlink_ext_ack *extack)
  253. {
  254. struct etf_sched_data *q = qdisc_priv(sch);
  255. struct net_device *dev = qdisc_dev(sch);
  256. struct nlattr *tb[TCA_ETF_MAX + 1];
  257. struct tc_etf_qopt *qopt;
  258. int err;
  259. if (!opt) {
  260. NL_SET_ERR_MSG(extack,
  261. "Missing ETF qdisc options which are mandatory");
  262. return -EINVAL;
  263. }
  264. err = nla_parse_nested(tb, TCA_ETF_MAX, opt, etf_policy, extack);
  265. if (err < 0)
  266. return err;
  267. if (!tb[TCA_ETF_PARMS]) {
  268. NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
  269. return -EINVAL;
  270. }
  271. qopt = nla_data(tb[TCA_ETF_PARMS]);
  272. pr_debug("delta %d clockid %d offload %s deadline %s\n",
  273. qopt->delta, qopt->clockid,
  274. OFFLOAD_IS_ON(qopt) ? "on" : "off",
  275. DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
  276. err = validate_input_params(qopt, extack);
  277. if (err < 0)
  278. return err;
  279. q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
  280. if (OFFLOAD_IS_ON(qopt)) {
  281. err = etf_enable_offload(dev, q, extack);
  282. if (err < 0)
  283. return err;
  284. }
  285. /* Everything went OK, save the parameters used. */
  286. q->delta = qopt->delta;
  287. q->clockid = qopt->clockid;
  288. q->offload = OFFLOAD_IS_ON(qopt);
  289. q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
  290. switch (q->clockid) {
  291. case CLOCK_REALTIME:
  292. q->get_time = ktime_get_real;
  293. break;
  294. case CLOCK_MONOTONIC:
  295. q->get_time = ktime_get;
  296. break;
  297. case CLOCK_BOOTTIME:
  298. q->get_time = ktime_get_boottime;
  299. break;
  300. case CLOCK_TAI:
  301. q->get_time = ktime_get_clocktai;
  302. break;
  303. default:
  304. NL_SET_ERR_MSG(extack, "Clockid is not supported");
  305. return -ENOTSUPP;
  306. }
  307. qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
  308. return 0;
  309. }
  310. static void timesortedlist_clear(struct Qdisc *sch)
  311. {
  312. struct etf_sched_data *q = qdisc_priv(sch);
  313. struct rb_node *p = rb_first(&q->head);
  314. while (p) {
  315. struct sk_buff *skb = rb_to_skb(p);
  316. p = rb_next(p);
  317. rb_erase(&skb->rbnode, &q->head);
  318. rtnl_kfree_skbs(skb, skb);
  319. sch->q.qlen--;
  320. }
  321. }
  322. static void etf_reset(struct Qdisc *sch)
  323. {
  324. struct etf_sched_data *q = qdisc_priv(sch);
  325. /* Only cancel watchdog if it's been initialized. */
  326. if (q->watchdog.qdisc == sch)
  327. qdisc_watchdog_cancel(&q->watchdog);
  328. /* No matter which mode we are on, it's safe to clear both lists. */
  329. timesortedlist_clear(sch);
  330. __qdisc_reset_queue(&sch->q);
  331. sch->qstats.backlog = 0;
  332. sch->q.qlen = 0;
  333. q->last = 0;
  334. }
  335. static void etf_destroy(struct Qdisc *sch)
  336. {
  337. struct etf_sched_data *q = qdisc_priv(sch);
  338. struct net_device *dev = qdisc_dev(sch);
  339. /* Only cancel watchdog if it's been initialized. */
  340. if (q->watchdog.qdisc == sch)
  341. qdisc_watchdog_cancel(&q->watchdog);
  342. etf_disable_offload(dev, q);
  343. }
  344. static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
  345. {
  346. struct etf_sched_data *q = qdisc_priv(sch);
  347. struct tc_etf_qopt opt = { };
  348. struct nlattr *nest;
  349. nest = nla_nest_start(skb, TCA_OPTIONS);
  350. if (!nest)
  351. goto nla_put_failure;
  352. opt.delta = q->delta;
  353. opt.clockid = q->clockid;
  354. if (q->offload)
  355. opt.flags |= TC_ETF_OFFLOAD_ON;
  356. if (q->deadline_mode)
  357. opt.flags |= TC_ETF_DEADLINE_MODE_ON;
  358. if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
  359. goto nla_put_failure;
  360. return nla_nest_end(skb, nest);
  361. nla_put_failure:
  362. nla_nest_cancel(skb, nest);
  363. return -1;
  364. }
  365. static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
  366. .id = "etf",
  367. .priv_size = sizeof(struct etf_sched_data),
  368. .enqueue = etf_enqueue_timesortedlist,
  369. .dequeue = etf_dequeue_timesortedlist,
  370. .peek = etf_peek_timesortedlist,
  371. .init = etf_init,
  372. .reset = etf_reset,
  373. .destroy = etf_destroy,
  374. .dump = etf_dump,
  375. .owner = THIS_MODULE,
  376. };
  377. static int __init etf_module_init(void)
  378. {
  379. return register_qdisc(&etf_qdisc_ops);
  380. }
  381. static void __exit etf_module_exit(void)
  382. {
  383. unregister_qdisc(&etf_qdisc_ops);
  384. }
  385. module_init(etf_module_init)
  386. module_exit(etf_module_exit)
  387. MODULE_LICENSE("GPL");