sch_mqprio.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * net/sched/sch_mqprio.c
  4. *
  5. * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
  6. */
  7. #include <linux/ethtool_netlink.h>
  8. #include <linux/types.h>
  9. #include <linux/slab.h>
  10. #include <linux/kernel.h>
  11. #include <linux/string.h>
  12. #include <linux/errno.h>
  13. #include <linux/skbuff.h>
  14. #include <linux/module.h>
  15. #include <net/netlink.h>
  16. #include <net/pkt_sched.h>
  17. #include <net/sch_generic.h>
  18. #include <net/pkt_cls.h>
  19. #include "sch_mqprio_lib.h"
  20. struct mqprio_sched {
  21. struct Qdisc **qdiscs;
  22. u16 mode;
  23. u16 shaper;
  24. int hw_offload;
  25. u32 flags;
  26. u64 min_rate[TC_QOPT_MAX_QUEUE];
  27. u64 max_rate[TC_QOPT_MAX_QUEUE];
  28. u32 fp[TC_QOPT_MAX_QUEUE];
  29. };
  30. static int mqprio_enable_offload(struct Qdisc *sch,
  31. const struct tc_mqprio_qopt *qopt,
  32. struct netlink_ext_ack *extack)
  33. {
  34. struct mqprio_sched *priv = qdisc_priv(sch);
  35. struct net_device *dev = qdisc_dev(sch);
  36. struct tc_mqprio_qopt_offload mqprio = {
  37. .qopt = *qopt,
  38. .extack = extack,
  39. };
  40. int err, i;
  41. switch (priv->mode) {
  42. case TC_MQPRIO_MODE_DCB:
  43. if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
  44. return -EINVAL;
  45. break;
  46. case TC_MQPRIO_MODE_CHANNEL:
  47. mqprio.flags = priv->flags;
  48. if (priv->flags & TC_MQPRIO_F_MODE)
  49. mqprio.mode = priv->mode;
  50. if (priv->flags & TC_MQPRIO_F_SHAPER)
  51. mqprio.shaper = priv->shaper;
  52. if (priv->flags & TC_MQPRIO_F_MIN_RATE)
  53. for (i = 0; i < mqprio.qopt.num_tc; i++)
  54. mqprio.min_rate[i] = priv->min_rate[i];
  55. if (priv->flags & TC_MQPRIO_F_MAX_RATE)
  56. for (i = 0; i < mqprio.qopt.num_tc; i++)
  57. mqprio.max_rate[i] = priv->max_rate[i];
  58. break;
  59. default:
  60. return -EINVAL;
  61. }
  62. mqprio_fp_to_offload(priv->fp, &mqprio);
  63. err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQPRIO,
  64. &mqprio);
  65. if (err)
  66. return err;
  67. priv->hw_offload = mqprio.qopt.hw;
  68. return 0;
  69. }
  70. static void mqprio_disable_offload(struct Qdisc *sch)
  71. {
  72. struct tc_mqprio_qopt_offload mqprio = { { 0 } };
  73. struct mqprio_sched *priv = qdisc_priv(sch);
  74. struct net_device *dev = qdisc_dev(sch);
  75. switch (priv->mode) {
  76. case TC_MQPRIO_MODE_DCB:
  77. case TC_MQPRIO_MODE_CHANNEL:
  78. dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQPRIO,
  79. &mqprio);
  80. break;
  81. }
  82. }
  83. static void mqprio_destroy(struct Qdisc *sch)
  84. {
  85. struct net_device *dev = qdisc_dev(sch);
  86. struct mqprio_sched *priv = qdisc_priv(sch);
  87. unsigned int ntx;
  88. if (priv->qdiscs) {
  89. for (ntx = 0;
  90. ntx < dev->num_tx_queues && priv->qdiscs[ntx];
  91. ntx++)
  92. qdisc_put(priv->qdiscs[ntx]);
  93. kfree(priv->qdiscs);
  94. }
  95. if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc)
  96. mqprio_disable_offload(sch);
  97. else
  98. netdev_set_num_tc(dev, 0);
  99. }
  100. static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
  101. const struct tc_mqprio_caps *caps,
  102. struct netlink_ext_ack *extack)
  103. {
  104. int err;
  105. /* Limit qopt->hw to maximum supported offload value. Drivers have
  106. * the option of overriding this later if they don't support the a
  107. * given offload type.
  108. */
  109. if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX)
  110. qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX;
  111. /* If hardware offload is requested, we will leave 3 options to the
  112. * device driver:
  113. * - populate the queue counts itself (and ignore what was requested)
  114. * - validate the provided queue counts by itself (and apply them)
  115. * - request queue count validation here (and apply them)
  116. */
  117. err = mqprio_validate_qopt(dev, qopt,
  118. !qopt->hw || caps->validate_queue_counts,
  119. false, extack);
  120. if (err)
  121. return err;
  122. /* If ndo_setup_tc is not present then hardware doesn't support offload
  123. * and we should return an error.
  124. */
  125. if (qopt->hw && !dev->netdev_ops->ndo_setup_tc) {
  126. NL_SET_ERR_MSG(extack,
  127. "Device does not support hardware offload");
  128. return -EINVAL;
  129. }
  130. return 0;
  131. }
  132. static const struct
  133. nla_policy mqprio_tc_entry_policy[TCA_MQPRIO_TC_ENTRY_MAX + 1] = {
  134. [TCA_MQPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32,
  135. TC_QOPT_MAX_QUEUE),
  136. [TCA_MQPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32,
  137. TC_FP_EXPRESS,
  138. TC_FP_PREEMPTIBLE),
  139. };
  140. static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = {
  141. [TCA_MQPRIO_MODE] = { .len = sizeof(u16) },
  142. [TCA_MQPRIO_SHAPER] = { .len = sizeof(u16) },
  143. [TCA_MQPRIO_MIN_RATE64] = { .type = NLA_NESTED },
  144. [TCA_MQPRIO_MAX_RATE64] = { .type = NLA_NESTED },
  145. [TCA_MQPRIO_TC_ENTRY] = { .type = NLA_NESTED },
  146. };
  147. static int mqprio_parse_tc_entry(u32 fp[TC_QOPT_MAX_QUEUE],
  148. struct nlattr *opt,
  149. unsigned long *seen_tcs,
  150. struct netlink_ext_ack *extack)
  151. {
  152. struct nlattr *tb[TCA_MQPRIO_TC_ENTRY_MAX + 1];
  153. int err, tc;
  154. err = nla_parse_nested(tb, TCA_MQPRIO_TC_ENTRY_MAX, opt,
  155. mqprio_tc_entry_policy, extack);
  156. if (err < 0)
  157. return err;
  158. if (NL_REQ_ATTR_CHECK(extack, opt, tb, TCA_MQPRIO_TC_ENTRY_INDEX)) {
  159. NL_SET_ERR_MSG(extack, "TC entry index missing");
  160. return -EINVAL;
  161. }
  162. tc = nla_get_u32(tb[TCA_MQPRIO_TC_ENTRY_INDEX]);
  163. if (*seen_tcs & BIT(tc)) {
  164. NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_TC_ENTRY_INDEX],
  165. "Duplicate tc entry");
  166. return -EINVAL;
  167. }
  168. *seen_tcs |= BIT(tc);
  169. if (tb[TCA_MQPRIO_TC_ENTRY_FP])
  170. fp[tc] = nla_get_u32(tb[TCA_MQPRIO_TC_ENTRY_FP]);
  171. return 0;
  172. }
  173. static int mqprio_parse_tc_entries(struct Qdisc *sch, struct nlattr *nlattr_opt,
  174. int nlattr_opt_len,
  175. struct netlink_ext_ack *extack)
  176. {
  177. struct mqprio_sched *priv = qdisc_priv(sch);
  178. struct net_device *dev = qdisc_dev(sch);
  179. bool have_preemption = false;
  180. unsigned long seen_tcs = 0;
  181. u32 fp[TC_QOPT_MAX_QUEUE];
  182. struct nlattr *n;
  183. int tc, rem;
  184. int err = 0;
  185. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
  186. fp[tc] = priv->fp[tc];
  187. nla_for_each_attr_type(n, TCA_MQPRIO_TC_ENTRY, nlattr_opt,
  188. nlattr_opt_len, rem) {
  189. err = mqprio_parse_tc_entry(fp, n, &seen_tcs, extack);
  190. if (err)
  191. goto out;
  192. }
  193. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
  194. priv->fp[tc] = fp[tc];
  195. if (fp[tc] == TC_FP_PREEMPTIBLE)
  196. have_preemption = true;
  197. }
  198. if (have_preemption && !ethtool_dev_mm_supported(dev)) {
  199. NL_SET_ERR_MSG(extack, "Device does not support preemption");
  200. return -EOPNOTSUPP;
  201. }
  202. out:
  203. return err;
  204. }
  205. /* Parse the other netlink attributes that represent the payload of
  206. * TCA_OPTIONS, which are appended right after struct tc_mqprio_qopt.
  207. */
  208. static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
  209. struct nlattr *opt,
  210. struct netlink_ext_ack *extack)
  211. {
  212. struct nlattr *nlattr_opt = nla_data(opt) + NLA_ALIGN(sizeof(*qopt));
  213. int nlattr_opt_len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
  214. struct mqprio_sched *priv = qdisc_priv(sch);
  215. struct nlattr *tb[TCA_MQPRIO_MAX + 1] = {};
  216. struct nlattr *attr;
  217. int i, rem, err;
  218. if (nlattr_opt_len >= nla_attr_size(0)) {
  219. err = nla_parse_deprecated(tb, TCA_MQPRIO_MAX, nlattr_opt,
  220. nlattr_opt_len, mqprio_policy,
  221. NULL);
  222. if (err < 0)
  223. return err;
  224. }
  225. if (!qopt->hw) {
  226. NL_SET_ERR_MSG(extack,
  227. "mqprio TCA_OPTIONS can only contain netlink attributes in hardware mode");
  228. return -EINVAL;
  229. }
  230. if (tb[TCA_MQPRIO_MODE]) {
  231. priv->flags |= TC_MQPRIO_F_MODE;
  232. priv->mode = nla_get_u16(tb[TCA_MQPRIO_MODE]);
  233. }
  234. if (tb[TCA_MQPRIO_SHAPER]) {
  235. priv->flags |= TC_MQPRIO_F_SHAPER;
  236. priv->shaper = nla_get_u16(tb[TCA_MQPRIO_SHAPER]);
  237. }
  238. if (tb[TCA_MQPRIO_MIN_RATE64]) {
  239. if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) {
  240. NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MIN_RATE64],
  241. "min_rate accepted only when shaper is in bw_rlimit mode");
  242. return -EINVAL;
  243. }
  244. i = 0;
  245. nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
  246. rem) {
  247. if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) {
  248. NL_SET_ERR_MSG_ATTR(extack, attr,
  249. "Attribute type expected to be TCA_MQPRIO_MIN_RATE64");
  250. return -EINVAL;
  251. }
  252. if (nla_len(attr) != sizeof(u64)) {
  253. NL_SET_ERR_MSG_ATTR(extack, attr,
  254. "Attribute TCA_MQPRIO_MIN_RATE64 expected to have 8 bytes length");
  255. return -EINVAL;
  256. }
  257. if (i >= qopt->num_tc)
  258. break;
  259. priv->min_rate[i] = nla_get_u64(attr);
  260. i++;
  261. }
  262. priv->flags |= TC_MQPRIO_F_MIN_RATE;
  263. }
  264. if (tb[TCA_MQPRIO_MAX_RATE64]) {
  265. if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) {
  266. NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MAX_RATE64],
  267. "max_rate accepted only when shaper is in bw_rlimit mode");
  268. return -EINVAL;
  269. }
  270. i = 0;
  271. nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
  272. rem) {
  273. if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) {
  274. NL_SET_ERR_MSG_ATTR(extack, attr,
  275. "Attribute type expected to be TCA_MQPRIO_MAX_RATE64");
  276. return -EINVAL;
  277. }
  278. if (nla_len(attr) != sizeof(u64)) {
  279. NL_SET_ERR_MSG_ATTR(extack, attr,
  280. "Attribute TCA_MQPRIO_MAX_RATE64 expected to have 8 bytes length");
  281. return -EINVAL;
  282. }
  283. if (i >= qopt->num_tc)
  284. break;
  285. priv->max_rate[i] = nla_get_u64(attr);
  286. i++;
  287. }
  288. priv->flags |= TC_MQPRIO_F_MAX_RATE;
  289. }
  290. if (tb[TCA_MQPRIO_TC_ENTRY]) {
  291. err = mqprio_parse_tc_entries(sch, nlattr_opt, nlattr_opt_len,
  292. extack);
  293. if (err)
  294. return err;
  295. }
  296. return 0;
  297. }
  298. static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
  299. struct netlink_ext_ack *extack)
  300. {
  301. struct net_device *dev = qdisc_dev(sch);
  302. struct mqprio_sched *priv = qdisc_priv(sch);
  303. struct netdev_queue *dev_queue;
  304. struct Qdisc *qdisc;
  305. int i, err = -EOPNOTSUPP;
  306. struct tc_mqprio_qopt *qopt = NULL;
  307. struct tc_mqprio_caps caps;
  308. int len, tc;
  309. BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
  310. BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
  311. if (sch->parent != TC_H_ROOT)
  312. return -EOPNOTSUPP;
  313. if (!netif_is_multiqueue(dev))
  314. return -EOPNOTSUPP;
  315. /* make certain can allocate enough classids to handle queues */
  316. if (dev->num_tx_queues >= TC_H_MIN_PRIORITY)
  317. return -ENOMEM;
  318. if (!opt || nla_len(opt) < sizeof(*qopt))
  319. return -EINVAL;
  320. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
  321. priv->fp[tc] = TC_FP_EXPRESS;
  322. qdisc_offload_query_caps(dev, TC_SETUP_QDISC_MQPRIO,
  323. &caps, sizeof(caps));
  324. qopt = nla_data(opt);
  325. if (mqprio_parse_opt(dev, qopt, &caps, extack))
  326. return -EINVAL;
  327. len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
  328. if (len > 0) {
  329. err = mqprio_parse_nlattr(sch, qopt, opt, extack);
  330. if (err)
  331. return err;
  332. }
  333. /* pre-allocate qdisc, attachment can't fail */
  334. priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
  335. GFP_KERNEL);
  336. if (!priv->qdiscs)
  337. return -ENOMEM;
  338. for (i = 0; i < dev->num_tx_queues; i++) {
  339. dev_queue = netdev_get_tx_queue(dev, i);
  340. qdisc = qdisc_create_dflt(dev_queue,
  341. get_default_qdisc_ops(dev, i),
  342. TC_H_MAKE(TC_H_MAJ(sch->handle),
  343. TC_H_MIN(i + 1)), extack);
  344. if (!qdisc)
  345. return -ENOMEM;
  346. priv->qdiscs[i] = qdisc;
  347. qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
  348. }
  349. /* If the mqprio options indicate that hardware should own
  350. * the queue mapping then run ndo_setup_tc otherwise use the
  351. * supplied and verified mapping
  352. */
  353. if (qopt->hw) {
  354. err = mqprio_enable_offload(sch, qopt, extack);
  355. if (err)
  356. return err;
  357. } else {
  358. netdev_set_num_tc(dev, qopt->num_tc);
  359. for (i = 0; i < qopt->num_tc; i++)
  360. netdev_set_tc_queue(dev, i,
  361. qopt->count[i], qopt->offset[i]);
  362. }
  363. /* Always use supplied priority mappings */
  364. for (i = 0; i < TC_BITMASK + 1; i++)
  365. netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]);
  366. sch->flags |= TCQ_F_MQROOT;
  367. return 0;
  368. }
  369. static void mqprio_attach(struct Qdisc *sch)
  370. {
  371. struct net_device *dev = qdisc_dev(sch);
  372. struct mqprio_sched *priv = qdisc_priv(sch);
  373. struct Qdisc *qdisc, *old;
  374. unsigned int ntx;
  375. /* Attach underlying qdisc */
  376. for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
  377. qdisc = priv->qdiscs[ntx];
  378. old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
  379. if (old)
  380. qdisc_put(old);
  381. if (ntx < dev->real_num_tx_queues)
  382. qdisc_hash_add(qdisc, false);
  383. }
  384. kfree(priv->qdiscs);
  385. priv->qdiscs = NULL;
  386. }
  387. static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
  388. unsigned long cl)
  389. {
  390. struct net_device *dev = qdisc_dev(sch);
  391. unsigned long ntx = cl - 1;
  392. if (ntx >= dev->num_tx_queues)
  393. return NULL;
  394. return netdev_get_tx_queue(dev, ntx);
  395. }
  396. static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
  397. struct Qdisc **old, struct netlink_ext_ack *extack)
  398. {
  399. struct net_device *dev = qdisc_dev(sch);
  400. struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
  401. if (!dev_queue)
  402. return -EINVAL;
  403. if (dev->flags & IFF_UP)
  404. dev_deactivate(dev);
  405. *old = dev_graft_qdisc(dev_queue, new);
  406. if (new)
  407. new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
  408. if (dev->flags & IFF_UP)
  409. dev_activate(dev);
  410. return 0;
  411. }
  412. static int dump_rates(struct mqprio_sched *priv,
  413. struct tc_mqprio_qopt *opt, struct sk_buff *skb)
  414. {
  415. struct nlattr *nest;
  416. int i;
  417. if (priv->flags & TC_MQPRIO_F_MIN_RATE) {
  418. nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MIN_RATE64);
  419. if (!nest)
  420. goto nla_put_failure;
  421. for (i = 0; i < opt->num_tc; i++) {
  422. if (nla_put(skb, TCA_MQPRIO_MIN_RATE64,
  423. sizeof(priv->min_rate[i]),
  424. &priv->min_rate[i]))
  425. goto nla_put_failure;
  426. }
  427. nla_nest_end(skb, nest);
  428. }
  429. if (priv->flags & TC_MQPRIO_F_MAX_RATE) {
  430. nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MAX_RATE64);
  431. if (!nest)
  432. goto nla_put_failure;
  433. for (i = 0; i < opt->num_tc; i++) {
  434. if (nla_put(skb, TCA_MQPRIO_MAX_RATE64,
  435. sizeof(priv->max_rate[i]),
  436. &priv->max_rate[i]))
  437. goto nla_put_failure;
  438. }
  439. nla_nest_end(skb, nest);
  440. }
  441. return 0;
  442. nla_put_failure:
  443. nla_nest_cancel(skb, nest);
  444. return -1;
  445. }
  446. static int mqprio_dump_tc_entries(struct mqprio_sched *priv,
  447. struct sk_buff *skb)
  448. {
  449. struct nlattr *n;
  450. int tc;
  451. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
  452. n = nla_nest_start(skb, TCA_MQPRIO_TC_ENTRY);
  453. if (!n)
  454. return -EMSGSIZE;
  455. if (nla_put_u32(skb, TCA_MQPRIO_TC_ENTRY_INDEX, tc))
  456. goto nla_put_failure;
  457. if (nla_put_u32(skb, TCA_MQPRIO_TC_ENTRY_FP, priv->fp[tc]))
  458. goto nla_put_failure;
  459. nla_nest_end(skb, n);
  460. }
  461. return 0;
  462. nla_put_failure:
  463. nla_nest_cancel(skb, n);
  464. return -EMSGSIZE;
  465. }
  466. static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
  467. {
  468. struct net_device *dev = qdisc_dev(sch);
  469. struct mqprio_sched *priv = qdisc_priv(sch);
  470. struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
  471. struct tc_mqprio_qopt opt = { 0 };
  472. struct Qdisc *qdisc;
  473. unsigned int ntx;
  474. sch->q.qlen = 0;
  475. gnet_stats_basic_sync_init(&sch->bstats);
  476. memset(&sch->qstats, 0, sizeof(sch->qstats));
  477. /* MQ supports lockless qdiscs. However, statistics accounting needs
  478. * to account for all, none, or a mix of locked and unlocked child
  479. * qdiscs. Percpu stats are added to counters in-band and locking
  480. * qdisc totals are added at end.
  481. */
  482. for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
  483. qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
  484. spin_lock_bh(qdisc_lock(qdisc));
  485. gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
  486. &qdisc->bstats, false);
  487. gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
  488. &qdisc->qstats);
  489. sch->q.qlen += qdisc_qlen(qdisc);
  490. spin_unlock_bh(qdisc_lock(qdisc));
  491. }
  492. mqprio_qopt_reconstruct(dev, &opt);
  493. opt.hw = priv->hw_offload;
  494. if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
  495. goto nla_put_failure;
  496. if ((priv->flags & TC_MQPRIO_F_MODE) &&
  497. nla_put_u16(skb, TCA_MQPRIO_MODE, priv->mode))
  498. goto nla_put_failure;
  499. if ((priv->flags & TC_MQPRIO_F_SHAPER) &&
  500. nla_put_u16(skb, TCA_MQPRIO_SHAPER, priv->shaper))
  501. goto nla_put_failure;
  502. if ((priv->flags & TC_MQPRIO_F_MIN_RATE ||
  503. priv->flags & TC_MQPRIO_F_MAX_RATE) &&
  504. (dump_rates(priv, &opt, skb) != 0))
  505. goto nla_put_failure;
  506. if (mqprio_dump_tc_entries(priv, skb))
  507. goto nla_put_failure;
  508. return nla_nest_end(skb, nla);
  509. nla_put_failure:
  510. nlmsg_trim(skb, nla);
  511. return -1;
  512. }
  513. static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
  514. {
  515. struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
  516. if (!dev_queue)
  517. return NULL;
  518. return rtnl_dereference(dev_queue->qdisc_sleeping);
  519. }
  520. static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
  521. {
  522. struct net_device *dev = qdisc_dev(sch);
  523. unsigned int ntx = TC_H_MIN(classid);
  524. /* There are essentially two regions here that have valid classid
  525. * values. The first region will have a classid value of 1 through
  526. * num_tx_queues. All of these are backed by actual Qdiscs.
  527. */
  528. if (ntx < TC_H_MIN_PRIORITY)
  529. return (ntx <= dev->num_tx_queues) ? ntx : 0;
  530. /* The second region represents the hardware traffic classes. These
  531. * are represented by classid values of TC_H_MIN_PRIORITY through
  532. * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1
  533. */
  534. return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0;
  535. }
  536. static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
  537. struct sk_buff *skb, struct tcmsg *tcm)
  538. {
  539. if (cl < TC_H_MIN_PRIORITY) {
  540. struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
  541. struct net_device *dev = qdisc_dev(sch);
  542. int tc = netdev_txq_to_tc(dev, cl - 1);
  543. tcm->tcm_parent = (tc < 0) ? 0 :
  544. TC_H_MAKE(TC_H_MAJ(sch->handle),
  545. TC_H_MIN(tc + TC_H_MIN_PRIORITY));
  546. tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
  547. } else {
  548. tcm->tcm_parent = TC_H_ROOT;
  549. tcm->tcm_info = 0;
  550. }
  551. tcm->tcm_handle |= TC_H_MIN(cl);
  552. return 0;
  553. }
  554. static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
  555. struct gnet_dump *d)
  556. __releases(d->lock)
  557. __acquires(d->lock)
  558. {
  559. if (cl >= TC_H_MIN_PRIORITY) {
  560. int i;
  561. __u32 qlen;
  562. struct gnet_stats_queue qstats = {0};
  563. struct gnet_stats_basic_sync bstats;
  564. struct net_device *dev = qdisc_dev(sch);
  565. struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
  566. gnet_stats_basic_sync_init(&bstats);
  567. /* Drop lock here it will be reclaimed before touching
  568. * statistics this is required because the d->lock we
  569. * hold here is the look on dev_queue->qdisc_sleeping
  570. * also acquired below.
  571. */
  572. if (d->lock)
  573. spin_unlock_bh(d->lock);
  574. for (i = tc.offset; i < tc.offset + tc.count; i++) {
  575. struct netdev_queue *q = netdev_get_tx_queue(dev, i);
  576. struct Qdisc *qdisc = rtnl_dereference(q->qdisc);
  577. spin_lock_bh(qdisc_lock(qdisc));
  578. gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
  579. &qdisc->bstats, false);
  580. gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
  581. &qdisc->qstats);
  582. sch->q.qlen += qdisc_qlen(qdisc);
  583. spin_unlock_bh(qdisc_lock(qdisc));
  584. }
  585. qlen = qdisc_qlen(sch) + qstats.qlen;
  586. /* Reclaim root sleeping lock before completing stats */
  587. if (d->lock)
  588. spin_lock_bh(d->lock);
  589. if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 ||
  590. gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
  591. return -1;
  592. } else {
  593. struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
  594. sch = rtnl_dereference(dev_queue->qdisc_sleeping);
  595. if (gnet_stats_copy_basic(d, sch->cpu_bstats,
  596. &sch->bstats, true) < 0 ||
  597. qdisc_qstats_copy(d, sch) < 0)
  598. return -1;
  599. }
  600. return 0;
  601. }
  602. static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
  603. {
  604. struct net_device *dev = qdisc_dev(sch);
  605. unsigned long ntx;
  606. if (arg->stop)
  607. return;
  608. /* Walk hierarchy with a virtual class per tc */
  609. arg->count = arg->skip;
  610. for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) {
  611. if (!tc_qdisc_stats_dump(sch, ntx + TC_H_MIN_PRIORITY, arg))
  612. return;
  613. }
  614. /* Pad the values and skip over unused traffic classes */
  615. if (ntx < TC_MAX_QUEUE) {
  616. arg->count = TC_MAX_QUEUE;
  617. ntx = TC_MAX_QUEUE;
  618. }
  619. /* Reset offset, sort out remaining per-queue qdiscs */
  620. for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) {
  621. if (arg->fn(sch, ntx + 1, arg) < 0) {
  622. arg->stop = 1;
  623. return;
  624. }
  625. arg->count++;
  626. }
  627. }
  628. static struct netdev_queue *mqprio_select_queue(struct Qdisc *sch,
  629. struct tcmsg *tcm)
  630. {
  631. return mqprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
  632. }
  633. static const struct Qdisc_class_ops mqprio_class_ops = {
  634. .graft = mqprio_graft,
  635. .leaf = mqprio_leaf,
  636. .find = mqprio_find,
  637. .walk = mqprio_walk,
  638. .dump = mqprio_dump_class,
  639. .dump_stats = mqprio_dump_class_stats,
  640. .select_queue = mqprio_select_queue,
  641. };
  642. static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
  643. .cl_ops = &mqprio_class_ops,
  644. .id = "mqprio",
  645. .priv_size = sizeof(struct mqprio_sched),
  646. .init = mqprio_init,
  647. .destroy = mqprio_destroy,
  648. .attach = mqprio_attach,
  649. .change_real_num_tx = mq_change_real_num_tx,
  650. .dump = mqprio_dump,
  651. .owner = THIS_MODULE,
  652. };
  653. MODULE_ALIAS_NET_SCH("mqprio");
  654. static int __init mqprio_module_init(void)
  655. {
  656. return register_qdisc(&mqprio_qdisc_ops);
  657. }
  658. static void __exit mqprio_module_exit(void)
  659. {
  660. unregister_qdisc(&mqprio_qdisc_ops);
  661. }
  662. module_init(mqprio_module_init);
  663. module_exit(mqprio_module_exit);
  664. MODULE_LICENSE("GPL");
  665. MODULE_DESCRIPTION("Classful multiqueue prio qdisc");