sch_taprio.c 66 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* net/sched/sch_taprio.c Time Aware Priority Scheduler
  3. *
  4. * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com>
  5. *
  6. */
  7. #include <linux/ethtool.h>
  8. #include <linux/ethtool_netlink.h>
  9. #include <linux/types.h>
  10. #include <linux/slab.h>
  11. #include <linux/kernel.h>
  12. #include <linux/string.h>
  13. #include <linux/list.h>
  14. #include <linux/errno.h>
  15. #include <linux/skbuff.h>
  16. #include <linux/math64.h>
  17. #include <linux/module.h>
  18. #include <linux/spinlock.h>
  19. #include <linux/rcupdate.h>
  20. #include <linux/time.h>
  21. #include <net/gso.h>
  22. #include <net/netlink.h>
  23. #include <net/pkt_sched.h>
  24. #include <net/pkt_cls.h>
  25. #include <net/sch_generic.h>
  26. #include <net/sock.h>
  27. #include <net/tcp.h>
  28. #define TAPRIO_STAT_NOT_SET (~0ULL)
  29. #include "sch_mqprio_lib.h"
  30. static LIST_HEAD(taprio_list);
  31. static struct static_key_false taprio_have_broken_mqprio;
  32. static struct static_key_false taprio_have_working_mqprio;
  33. #define TAPRIO_ALL_GATES_OPEN -1
  34. #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
  35. #define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
  36. #define TAPRIO_SUPPORTED_FLAGS \
  37. (TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
  38. #define TAPRIO_FLAGS_INVALID U32_MAX
  39. struct sched_entry {
  40. /* Durations between this GCL entry and the GCL entry where the
  41. * respective traffic class gate closes
  42. */
  43. u64 gate_duration[TC_MAX_QUEUE];
  44. atomic_t budget[TC_MAX_QUEUE];
  45. /* The qdisc makes some effort so that no packet leaves
  46. * after this time
  47. */
  48. ktime_t gate_close_time[TC_MAX_QUEUE];
  49. struct list_head list;
  50. /* Used to calculate when to advance the schedule */
  51. ktime_t end_time;
  52. ktime_t next_txtime;
  53. int index;
  54. u32 gate_mask;
  55. u32 interval;
  56. u8 command;
  57. };
  58. struct sched_gate_list {
  59. /* Longest non-zero contiguous gate durations per traffic class,
  60. * or 0 if a traffic class gate never opens during the schedule.
  61. */
  62. u64 max_open_gate_duration[TC_MAX_QUEUE];
  63. u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */
  64. u32 max_sdu[TC_MAX_QUEUE]; /* for dump */
  65. struct rcu_head rcu;
  66. struct list_head entries;
  67. size_t num_entries;
  68. ktime_t cycle_end_time;
  69. s64 cycle_time;
  70. s64 cycle_time_extension;
  71. s64 base_time;
  72. };
  73. struct taprio_sched {
  74. struct Qdisc **qdiscs;
  75. struct Qdisc *root;
  76. u32 flags;
  77. enum tk_offsets tk_offset;
  78. int clockid;
  79. bool offloaded;
  80. bool detected_mqprio;
  81. bool broken_mqprio;
  82. atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
  83. * speeds it's sub-nanoseconds per byte
  84. */
  85. /* Protects the update side of the RCU protected current_entry */
  86. spinlock_t current_entry_lock;
  87. struct sched_entry __rcu *current_entry;
  88. struct sched_gate_list __rcu *oper_sched;
  89. struct sched_gate_list __rcu *admin_sched;
  90. struct hrtimer advance_timer;
  91. struct list_head taprio_list;
  92. int cur_txq[TC_MAX_QUEUE];
  93. u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
  94. u32 fp[TC_QOPT_MAX_QUEUE]; /* only for dump and offloading */
  95. u32 txtime_delay;
  96. };
  97. struct __tc_taprio_qopt_offload {
  98. refcount_t users;
  99. struct tc_taprio_qopt_offload offload;
  100. };
  101. static void taprio_calculate_gate_durations(struct taprio_sched *q,
  102. struct sched_gate_list *sched)
  103. {
  104. struct net_device *dev = qdisc_dev(q->root);
  105. int num_tc = netdev_get_num_tc(dev);
  106. struct sched_entry *entry, *cur;
  107. int tc;
  108. list_for_each_entry(entry, &sched->entries, list) {
  109. u32 gates_still_open = entry->gate_mask;
  110. /* For each traffic class, calculate each open gate duration,
  111. * starting at this schedule entry and ending at the schedule
  112. * entry containing a gate close event for that TC.
  113. */
  114. cur = entry;
  115. do {
  116. if (!gates_still_open)
  117. break;
  118. for (tc = 0; tc < num_tc; tc++) {
  119. if (!(gates_still_open & BIT(tc)))
  120. continue;
  121. if (cur->gate_mask & BIT(tc))
  122. entry->gate_duration[tc] += cur->interval;
  123. else
  124. gates_still_open &= ~BIT(tc);
  125. }
  126. cur = list_next_entry_circular(cur, &sched->entries, list);
  127. } while (cur != entry);
  128. /* Keep track of the maximum gate duration for each traffic
  129. * class, taking care to not confuse a traffic class which is
  130. * temporarily closed with one that is always closed.
  131. */
  132. for (tc = 0; tc < num_tc; tc++)
  133. if (entry->gate_duration[tc] &&
  134. sched->max_open_gate_duration[tc] < entry->gate_duration[tc])
  135. sched->max_open_gate_duration[tc] = entry->gate_duration[tc];
  136. }
  137. }
  138. static bool taprio_entry_allows_tx(ktime_t skb_end_time,
  139. struct sched_entry *entry, int tc)
  140. {
  141. return ktime_before(skb_end_time, entry->gate_close_time[tc]);
  142. }
  143. static ktime_t sched_base_time(const struct sched_gate_list *sched)
  144. {
  145. if (!sched)
  146. return KTIME_MAX;
  147. return ns_to_ktime(sched->base_time);
  148. }
  149. static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono)
  150. {
  151. /* This pairs with WRITE_ONCE() in taprio_parse_clockid() */
  152. enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
  153. switch (tk_offset) {
  154. case TK_OFFS_MAX:
  155. return mono;
  156. default:
  157. return ktime_mono_to_any(mono, tk_offset);
  158. }
  159. }
  160. static ktime_t taprio_get_time(const struct taprio_sched *q)
  161. {
  162. return taprio_mono_to_any(q, ktime_get());
  163. }
  164. static void taprio_free_sched_cb(struct rcu_head *head)
  165. {
  166. struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
  167. struct sched_entry *entry, *n;
  168. list_for_each_entry_safe(entry, n, &sched->entries, list) {
  169. list_del(&entry->list);
  170. kfree(entry);
  171. }
  172. kfree(sched);
  173. }
  174. static void switch_schedules(struct taprio_sched *q,
  175. struct sched_gate_list **admin,
  176. struct sched_gate_list **oper)
  177. {
  178. rcu_assign_pointer(q->oper_sched, *admin);
  179. rcu_assign_pointer(q->admin_sched, NULL);
  180. if (*oper)
  181. call_rcu(&(*oper)->rcu, taprio_free_sched_cb);
  182. *oper = *admin;
  183. *admin = NULL;
  184. }
  185. /* Get how much time has been already elapsed in the current cycle. */
  186. static s32 get_cycle_time_elapsed(struct sched_gate_list *sched, ktime_t time)
  187. {
  188. ktime_t time_since_sched_start;
  189. s32 time_elapsed;
  190. time_since_sched_start = ktime_sub(time, sched->base_time);
  191. div_s64_rem(time_since_sched_start, sched->cycle_time, &time_elapsed);
  192. return time_elapsed;
  193. }
  194. static ktime_t get_interval_end_time(struct sched_gate_list *sched,
  195. struct sched_gate_list *admin,
  196. struct sched_entry *entry,
  197. ktime_t intv_start)
  198. {
  199. s32 cycle_elapsed = get_cycle_time_elapsed(sched, intv_start);
  200. ktime_t intv_end, cycle_ext_end, cycle_end;
  201. cycle_end = ktime_add_ns(intv_start, sched->cycle_time - cycle_elapsed);
  202. intv_end = ktime_add_ns(intv_start, entry->interval);
  203. cycle_ext_end = ktime_add(cycle_end, sched->cycle_time_extension);
  204. if (ktime_before(intv_end, cycle_end))
  205. return intv_end;
  206. else if (admin && admin != sched &&
  207. ktime_after(admin->base_time, cycle_end) &&
  208. ktime_before(admin->base_time, cycle_ext_end))
  209. return admin->base_time;
  210. else
  211. return cycle_end;
  212. }
  213. static int length_to_duration(struct taprio_sched *q, int len)
  214. {
  215. return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC);
  216. }
  217. static int duration_to_length(struct taprio_sched *q, u64 duration)
  218. {
  219. return div_u64(duration * PSEC_PER_NSEC, atomic64_read(&q->picos_per_byte));
  220. }
  221. /* Sets sched->max_sdu[] and sched->max_frm_len[] to the minimum between the
  222. * q->max_sdu[] requested by the user and the max_sdu dynamically determined by
  223. * the maximum open gate durations at the given link speed.
  224. */
  225. static void taprio_update_queue_max_sdu(struct taprio_sched *q,
  226. struct sched_gate_list *sched,
  227. struct qdisc_size_table *stab)
  228. {
  229. struct net_device *dev = qdisc_dev(q->root);
  230. int num_tc = netdev_get_num_tc(dev);
  231. u32 max_sdu_from_user;
  232. u32 max_sdu_dynamic;
  233. u32 max_sdu;
  234. int tc;
  235. for (tc = 0; tc < num_tc; tc++) {
  236. max_sdu_from_user = q->max_sdu[tc] ?: U32_MAX;
  237. /* TC gate never closes => keep the queueMaxSDU
  238. * selected by the user
  239. */
  240. if (sched->max_open_gate_duration[tc] == sched->cycle_time) {
  241. max_sdu_dynamic = U32_MAX;
  242. } else {
  243. u32 max_frm_len;
  244. max_frm_len = duration_to_length(q, sched->max_open_gate_duration[tc]);
  245. /* Compensate for L1 overhead from size table,
  246. * but don't let the frame size go negative
  247. */
  248. if (stab) {
  249. max_frm_len -= stab->szopts.overhead;
  250. max_frm_len = max_t(int, max_frm_len,
  251. dev->hard_header_len + 1);
  252. }
  253. max_sdu_dynamic = max_frm_len - dev->hard_header_len;
  254. if (max_sdu_dynamic > dev->max_mtu)
  255. max_sdu_dynamic = U32_MAX;
  256. }
  257. max_sdu = min(max_sdu_dynamic, max_sdu_from_user);
  258. if (max_sdu != U32_MAX) {
  259. sched->max_frm_len[tc] = max_sdu + dev->hard_header_len;
  260. sched->max_sdu[tc] = max_sdu;
  261. } else {
  262. sched->max_frm_len[tc] = U32_MAX; /* never oversized */
  263. sched->max_sdu[tc] = 0;
  264. }
  265. }
  266. }
  267. /* Returns the entry corresponding to next available interval. If
  268. * validate_interval is set, it only validates whether the timestamp occurs
  269. * when the gate corresponding to the skb's traffic class is open.
  270. */
  271. static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb,
  272. struct Qdisc *sch,
  273. struct sched_gate_list *sched,
  274. struct sched_gate_list *admin,
  275. ktime_t time,
  276. ktime_t *interval_start,
  277. ktime_t *interval_end,
  278. bool validate_interval)
  279. {
  280. ktime_t curr_intv_start, curr_intv_end, cycle_end, packet_transmit_time;
  281. ktime_t earliest_txtime = KTIME_MAX, txtime, cycle, transmit_end_time;
  282. struct sched_entry *entry = NULL, *entry_found = NULL;
  283. struct taprio_sched *q = qdisc_priv(sch);
  284. struct net_device *dev = qdisc_dev(sch);
  285. bool entry_available = false;
  286. s32 cycle_elapsed;
  287. int tc, n;
  288. tc = netdev_get_prio_tc_map(dev, skb->priority);
  289. packet_transmit_time = length_to_duration(q, qdisc_pkt_len(skb));
  290. *interval_start = 0;
  291. *interval_end = 0;
  292. if (!sched)
  293. return NULL;
  294. cycle = sched->cycle_time;
  295. cycle_elapsed = get_cycle_time_elapsed(sched, time);
  296. curr_intv_end = ktime_sub_ns(time, cycle_elapsed);
  297. cycle_end = ktime_add_ns(curr_intv_end, cycle);
  298. list_for_each_entry(entry, &sched->entries, list) {
  299. curr_intv_start = curr_intv_end;
  300. curr_intv_end = get_interval_end_time(sched, admin, entry,
  301. curr_intv_start);
  302. if (ktime_after(curr_intv_start, cycle_end))
  303. break;
  304. if (!(entry->gate_mask & BIT(tc)) ||
  305. packet_transmit_time > entry->interval)
  306. continue;
  307. txtime = entry->next_txtime;
  308. if (ktime_before(txtime, time) || validate_interval) {
  309. transmit_end_time = ktime_add_ns(time, packet_transmit_time);
  310. if ((ktime_before(curr_intv_start, time) &&
  311. ktime_before(transmit_end_time, curr_intv_end)) ||
  312. (ktime_after(curr_intv_start, time) && !validate_interval)) {
  313. entry_found = entry;
  314. *interval_start = curr_intv_start;
  315. *interval_end = curr_intv_end;
  316. break;
  317. } else if (!entry_available && !validate_interval) {
  318. /* Here, we are just trying to find out the
  319. * first available interval in the next cycle.
  320. */
  321. entry_available = true;
  322. entry_found = entry;
  323. *interval_start = ktime_add_ns(curr_intv_start, cycle);
  324. *interval_end = ktime_add_ns(curr_intv_end, cycle);
  325. }
  326. } else if (ktime_before(txtime, earliest_txtime) &&
  327. !entry_available) {
  328. earliest_txtime = txtime;
  329. entry_found = entry;
  330. n = div_s64(ktime_sub(txtime, curr_intv_start), cycle);
  331. *interval_start = ktime_add(curr_intv_start, n * cycle);
  332. *interval_end = ktime_add(curr_intv_end, n * cycle);
  333. }
  334. }
  335. return entry_found;
  336. }
  337. static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
  338. {
  339. struct taprio_sched *q = qdisc_priv(sch);
  340. struct sched_gate_list *sched, *admin;
  341. ktime_t interval_start, interval_end;
  342. struct sched_entry *entry;
  343. rcu_read_lock();
  344. sched = rcu_dereference(q->oper_sched);
  345. admin = rcu_dereference(q->admin_sched);
  346. entry = find_entry_to_transmit(skb, sch, sched, admin, skb->tstamp,
  347. &interval_start, &interval_end, true);
  348. rcu_read_unlock();
  349. return entry;
  350. }
  351. /* This returns the tstamp value set by TCP in terms of the set clock. */
  352. static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
  353. {
  354. unsigned int offset = skb_network_offset(skb);
  355. const struct ipv6hdr *ipv6h;
  356. const struct iphdr *iph;
  357. struct ipv6hdr _ipv6h;
  358. ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
  359. if (!ipv6h)
  360. return 0;
  361. if (ipv6h->version == 4) {
  362. iph = (struct iphdr *)ipv6h;
  363. offset += iph->ihl * 4;
  364. /* special-case 6in4 tunnelling, as that is a common way to get
  365. * v6 connectivity in the home
  366. */
  367. if (iph->protocol == IPPROTO_IPV6) {
  368. ipv6h = skb_header_pointer(skb, offset,
  369. sizeof(_ipv6h), &_ipv6h);
  370. if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
  371. return 0;
  372. } else if (iph->protocol != IPPROTO_TCP) {
  373. return 0;
  374. }
  375. } else if (ipv6h->version == 6 && ipv6h->nexthdr != IPPROTO_TCP) {
  376. return 0;
  377. }
  378. return taprio_mono_to_any(q, skb->skb_mstamp_ns);
  379. }
  380. /* There are a few scenarios where we will have to modify the txtime from
  381. * what is read from next_txtime in sched_entry. They are:
  382. * 1. If txtime is in the past,
  383. * a. The gate for the traffic class is currently open and packet can be
  384. * transmitted before it closes, schedule the packet right away.
  385. * b. If the gate corresponding to the traffic class is going to open later
  386. * in the cycle, set the txtime of packet to the interval start.
  387. * 2. If txtime is in the future, there are packets corresponding to the
  388. * current traffic class waiting to be transmitted. So, the following
  389. * possibilities exist:
  390. * a. We can transmit the packet before the window containing the txtime
  391. * closes.
  392. * b. The window might close before the transmission can be completed
  393. * successfully. So, schedule the packet in the next open window.
  394. */
  395. static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch)
  396. {
  397. ktime_t transmit_end_time, interval_end, interval_start, tcp_tstamp;
  398. struct taprio_sched *q = qdisc_priv(sch);
  399. struct sched_gate_list *sched, *admin;
  400. ktime_t minimum_time, now, txtime;
  401. int len, packet_transmit_time;
  402. struct sched_entry *entry;
  403. bool sched_changed;
  404. now = taprio_get_time(q);
  405. minimum_time = ktime_add_ns(now, q->txtime_delay);
  406. tcp_tstamp = get_tcp_tstamp(q, skb);
  407. minimum_time = max_t(ktime_t, minimum_time, tcp_tstamp);
  408. rcu_read_lock();
  409. admin = rcu_dereference(q->admin_sched);
  410. sched = rcu_dereference(q->oper_sched);
  411. if (admin && ktime_after(minimum_time, admin->base_time))
  412. switch_schedules(q, &admin, &sched);
  413. /* Until the schedule starts, all the queues are open */
  414. if (!sched || ktime_before(minimum_time, sched->base_time)) {
  415. txtime = minimum_time;
  416. goto done;
  417. }
  418. len = qdisc_pkt_len(skb);
  419. packet_transmit_time = length_to_duration(q, len);
  420. do {
  421. sched_changed = false;
  422. entry = find_entry_to_transmit(skb, sch, sched, admin,
  423. minimum_time,
  424. &interval_start, &interval_end,
  425. false);
  426. if (!entry) {
  427. txtime = 0;
  428. goto done;
  429. }
  430. txtime = entry->next_txtime;
  431. txtime = max_t(ktime_t, txtime, minimum_time);
  432. txtime = max_t(ktime_t, txtime, interval_start);
  433. if (admin && admin != sched &&
  434. ktime_after(txtime, admin->base_time)) {
  435. sched = admin;
  436. sched_changed = true;
  437. continue;
  438. }
  439. transmit_end_time = ktime_add(txtime, packet_transmit_time);
  440. minimum_time = transmit_end_time;
  441. /* Update the txtime of current entry to the next time it's
  442. * interval starts.
  443. */
  444. if (ktime_after(transmit_end_time, interval_end))
  445. entry->next_txtime = ktime_add(interval_start, sched->cycle_time);
  446. } while (sched_changed || ktime_after(transmit_end_time, interval_end));
  447. entry->next_txtime = transmit_end_time;
  448. done:
  449. rcu_read_unlock();
  450. return txtime;
  451. }
  452. /* Devices with full offload are expected to honor this in hardware */
  453. static bool taprio_skb_exceeds_queue_max_sdu(struct Qdisc *sch,
  454. struct sk_buff *skb)
  455. {
  456. struct taprio_sched *q = qdisc_priv(sch);
  457. struct net_device *dev = qdisc_dev(sch);
  458. struct sched_gate_list *sched;
  459. int prio = skb->priority;
  460. bool exceeds = false;
  461. u8 tc;
  462. tc = netdev_get_prio_tc_map(dev, prio);
  463. rcu_read_lock();
  464. sched = rcu_dereference(q->oper_sched);
  465. if (sched && skb->len > sched->max_frm_len[tc])
  466. exceeds = true;
  467. rcu_read_unlock();
  468. return exceeds;
  469. }
  470. static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
  471. struct Qdisc *child, struct sk_buff **to_free)
  472. {
  473. struct taprio_sched *q = qdisc_priv(sch);
  474. /* sk_flags are only safe to use on full sockets. */
  475. if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
  476. if (!is_valid_interval(skb, sch))
  477. return qdisc_drop(skb, sch, to_free);
  478. } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
  479. skb->tstamp = get_packet_txtime(skb, sch);
  480. if (!skb->tstamp)
  481. return qdisc_drop(skb, sch, to_free);
  482. }
  483. qdisc_qstats_backlog_inc(sch, skb);
  484. sch->q.qlen++;
  485. return qdisc_enqueue(skb, child, to_free);
  486. }
  487. static int taprio_enqueue_segmented(struct sk_buff *skb, struct Qdisc *sch,
  488. struct Qdisc *child,
  489. struct sk_buff **to_free)
  490. {
  491. unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
  492. netdev_features_t features = netif_skb_features(skb);
  493. struct sk_buff *segs, *nskb;
  494. int ret;
  495. segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
  496. if (IS_ERR_OR_NULL(segs))
  497. return qdisc_drop(skb, sch, to_free);
  498. skb_list_walk_safe(segs, segs, nskb) {
  499. skb_mark_not_on_list(segs);
  500. qdisc_skb_cb(segs)->pkt_len = segs->len;
  501. slen += segs->len;
  502. /* FIXME: we should be segmenting to a smaller size
  503. * rather than dropping these
  504. */
  505. if (taprio_skb_exceeds_queue_max_sdu(sch, segs))
  506. ret = qdisc_drop(segs, sch, to_free);
  507. else
  508. ret = taprio_enqueue_one(segs, sch, child, to_free);
  509. if (ret != NET_XMIT_SUCCESS) {
  510. if (net_xmit_drop_count(ret))
  511. qdisc_qstats_drop(sch);
  512. } else {
  513. numsegs++;
  514. }
  515. }
  516. if (numsegs > 1)
  517. qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
  518. consume_skb(skb);
  519. return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
  520. }
  521. /* Will not be called in the full offload case, since the TX queues are
  522. * attached to the Qdisc created using qdisc_create_dflt()
  523. */
  524. static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  525. struct sk_buff **to_free)
  526. {
  527. struct taprio_sched *q = qdisc_priv(sch);
  528. struct Qdisc *child;
  529. int queue;
  530. queue = skb_get_queue_mapping(skb);
  531. child = q->qdiscs[queue];
  532. if (unlikely(!child))
  533. return qdisc_drop(skb, sch, to_free);
  534. if (taprio_skb_exceeds_queue_max_sdu(sch, skb)) {
  535. /* Large packets might not be transmitted when the transmission
  536. * duration exceeds any configured interval. Therefore, segment
  537. * the skb into smaller chunks. Drivers with full offload are
  538. * expected to handle this in hardware.
  539. */
  540. if (skb_is_gso(skb))
  541. return taprio_enqueue_segmented(skb, sch, child,
  542. to_free);
  543. return qdisc_drop(skb, sch, to_free);
  544. }
  545. return taprio_enqueue_one(skb, sch, child, to_free);
  546. }
  547. static struct sk_buff *taprio_peek(struct Qdisc *sch)
  548. {
  549. WARN_ONCE(1, "taprio only supports operating as root qdisc, peek() not implemented");
  550. return NULL;
  551. }
  552. static void taprio_set_budgets(struct taprio_sched *q,
  553. struct sched_gate_list *sched,
  554. struct sched_entry *entry)
  555. {
  556. struct net_device *dev = qdisc_dev(q->root);
  557. int num_tc = netdev_get_num_tc(dev);
  558. int tc, budget;
  559. for (tc = 0; tc < num_tc; tc++) {
  560. /* Traffic classes which never close have infinite budget */
  561. if (entry->gate_duration[tc] == sched->cycle_time)
  562. budget = INT_MAX;
  563. else
  564. budget = div64_u64((u64)entry->gate_duration[tc] * PSEC_PER_NSEC,
  565. atomic64_read(&q->picos_per_byte));
  566. atomic_set(&entry->budget[tc], budget);
  567. }
  568. }
  569. /* When an skb is sent, it consumes from the budget of all traffic classes */
  570. static int taprio_update_budgets(struct sched_entry *entry, size_t len,
  571. int tc_consumed, int num_tc)
  572. {
  573. int tc, budget, new_budget = 0;
  574. for (tc = 0; tc < num_tc; tc++) {
  575. budget = atomic_read(&entry->budget[tc]);
  576. /* Don't consume from infinite budget */
  577. if (budget == INT_MAX) {
  578. if (tc == tc_consumed)
  579. new_budget = budget;
  580. continue;
  581. }
  582. if (tc == tc_consumed)
  583. new_budget = atomic_sub_return(len, &entry->budget[tc]);
  584. else
  585. atomic_sub(len, &entry->budget[tc]);
  586. }
  587. return new_budget;
  588. }
  589. static struct sk_buff *taprio_dequeue_from_txq(struct Qdisc *sch, int txq,
  590. struct sched_entry *entry,
  591. u32 gate_mask)
  592. {
  593. struct taprio_sched *q = qdisc_priv(sch);
  594. struct net_device *dev = qdisc_dev(sch);
  595. struct Qdisc *child = q->qdiscs[txq];
  596. int num_tc = netdev_get_num_tc(dev);
  597. struct sk_buff *skb;
  598. ktime_t guard;
  599. int prio;
  600. int len;
  601. u8 tc;
  602. if (unlikely(!child))
  603. return NULL;
  604. if (TXTIME_ASSIST_IS_ENABLED(q->flags))
  605. goto skip_peek_checks;
  606. skb = child->ops->peek(child);
  607. if (!skb)
  608. return NULL;
  609. prio = skb->priority;
  610. tc = netdev_get_prio_tc_map(dev, prio);
  611. if (!(gate_mask & BIT(tc)))
  612. return NULL;
  613. len = qdisc_pkt_len(skb);
  614. guard = ktime_add_ns(taprio_get_time(q), length_to_duration(q, len));
  615. /* In the case that there's no gate entry, there's no
  616. * guard band ...
  617. */
  618. if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
  619. !taprio_entry_allows_tx(guard, entry, tc))
  620. return NULL;
  621. /* ... and no budget. */
  622. if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
  623. taprio_update_budgets(entry, len, tc, num_tc) < 0)
  624. return NULL;
  625. skip_peek_checks:
  626. skb = child->ops->dequeue(child);
  627. if (unlikely(!skb))
  628. return NULL;
  629. qdisc_bstats_update(sch, skb);
  630. qdisc_qstats_backlog_dec(sch, skb);
  631. sch->q.qlen--;
  632. return skb;
  633. }
  634. static void taprio_next_tc_txq(struct net_device *dev, int tc, int *txq)
  635. {
  636. int offset = dev->tc_to_txq[tc].offset;
  637. int count = dev->tc_to_txq[tc].count;
  638. (*txq)++;
  639. if (*txq == offset + count)
  640. *txq = offset;
  641. }
  642. /* Prioritize higher traffic classes, and select among TXQs belonging to the
  643. * same TC using round robin
  644. */
  645. static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch,
  646. struct sched_entry *entry,
  647. u32 gate_mask)
  648. {
  649. struct taprio_sched *q = qdisc_priv(sch);
  650. struct net_device *dev = qdisc_dev(sch);
  651. int num_tc = netdev_get_num_tc(dev);
  652. struct sk_buff *skb;
  653. int tc;
  654. for (tc = num_tc - 1; tc >= 0; tc--) {
  655. int first_txq = q->cur_txq[tc];
  656. if (!(gate_mask & BIT(tc)))
  657. continue;
  658. do {
  659. skb = taprio_dequeue_from_txq(sch, q->cur_txq[tc],
  660. entry, gate_mask);
  661. taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]);
  662. if (q->cur_txq[tc] >= dev->num_tx_queues)
  663. q->cur_txq[tc] = first_txq;
  664. if (skb)
  665. return skb;
  666. } while (q->cur_txq[tc] != first_txq);
  667. }
  668. return NULL;
  669. }
  670. /* Broken way of prioritizing smaller TXQ indices and ignoring the traffic
  671. * class other than to determine whether the gate is open or not
  672. */
  673. static struct sk_buff *taprio_dequeue_txq_priority(struct Qdisc *sch,
  674. struct sched_entry *entry,
  675. u32 gate_mask)
  676. {
  677. struct net_device *dev = qdisc_dev(sch);
  678. struct sk_buff *skb;
  679. int i;
  680. for (i = 0; i < dev->num_tx_queues; i++) {
  681. skb = taprio_dequeue_from_txq(sch, i, entry, gate_mask);
  682. if (skb)
  683. return skb;
  684. }
  685. return NULL;
  686. }
  687. /* Will not be called in the full offload case, since the TX queues are
  688. * attached to the Qdisc created using qdisc_create_dflt()
  689. */
  690. static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
  691. {
  692. struct taprio_sched *q = qdisc_priv(sch);
  693. struct sk_buff *skb = NULL;
  694. struct sched_entry *entry;
  695. u32 gate_mask;
  696. rcu_read_lock();
  697. entry = rcu_dereference(q->current_entry);
  698. /* if there's no entry, it means that the schedule didn't
  699. * start yet, so force all gates to be open, this is in
  700. * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
  701. * "AdminGateStates"
  702. */
  703. gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
  704. if (!gate_mask)
  705. goto done;
  706. if (static_branch_unlikely(&taprio_have_broken_mqprio) &&
  707. !static_branch_likely(&taprio_have_working_mqprio)) {
  708. /* Single NIC kind which is broken */
  709. skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
  710. } else if (static_branch_likely(&taprio_have_working_mqprio) &&
  711. !static_branch_unlikely(&taprio_have_broken_mqprio)) {
  712. /* Single NIC kind which prioritizes properly */
  713. skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
  714. } else {
  715. /* Mixed NIC kinds present in system, need dynamic testing */
  716. if (q->broken_mqprio)
  717. skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
  718. else
  719. skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
  720. }
  721. done:
  722. rcu_read_unlock();
  723. return skb;
  724. }
  725. static bool should_restart_cycle(const struct sched_gate_list *oper,
  726. const struct sched_entry *entry)
  727. {
  728. if (list_is_last(&entry->list, &oper->entries))
  729. return true;
  730. if (ktime_compare(entry->end_time, oper->cycle_end_time) == 0)
  731. return true;
  732. return false;
  733. }
  734. static bool should_change_schedules(const struct sched_gate_list *admin,
  735. const struct sched_gate_list *oper,
  736. ktime_t end_time)
  737. {
  738. ktime_t next_base_time, extension_time;
  739. if (!admin)
  740. return false;
  741. next_base_time = sched_base_time(admin);
  742. /* This is the simple case, the end_time would fall after
  743. * the next schedule base_time.
  744. */
  745. if (ktime_compare(next_base_time, end_time) <= 0)
  746. return true;
  747. /* This is the cycle_time_extension case, if the end_time
  748. * plus the amount that can be extended would fall after the
  749. * next schedule base_time, we can extend the current schedule
  750. * for that amount.
  751. */
  752. extension_time = ktime_add_ns(end_time, oper->cycle_time_extension);
  753. /* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about
  754. * how precisely the extension should be made. So after
  755. * conformance testing, this logic may change.
  756. */
  757. if (ktime_compare(next_base_time, extension_time) <= 0)
  758. return true;
  759. return false;
  760. }
  761. static enum hrtimer_restart advance_sched(struct hrtimer *timer)
  762. {
  763. struct taprio_sched *q = container_of(timer, struct taprio_sched,
  764. advance_timer);
  765. struct net_device *dev = qdisc_dev(q->root);
  766. struct sched_gate_list *oper, *admin;
  767. int num_tc = netdev_get_num_tc(dev);
  768. struct sched_entry *entry, *next;
  769. struct Qdisc *sch = q->root;
  770. ktime_t end_time;
  771. int tc;
  772. spin_lock(&q->current_entry_lock);
  773. entry = rcu_dereference_protected(q->current_entry,
  774. lockdep_is_held(&q->current_entry_lock));
  775. oper = rcu_dereference_protected(q->oper_sched,
  776. lockdep_is_held(&q->current_entry_lock));
  777. admin = rcu_dereference_protected(q->admin_sched,
  778. lockdep_is_held(&q->current_entry_lock));
  779. if (!oper)
  780. switch_schedules(q, &admin, &oper);
  781. /* This can happen in two cases: 1. this is the very first run
  782. * of this function (i.e. we weren't running any schedule
  783. * previously); 2. The previous schedule just ended. The first
  784. * entry of all schedules are pre-calculated during the
  785. * schedule initialization.
  786. */
  787. if (unlikely(!entry || entry->end_time == oper->base_time)) {
  788. next = list_first_entry(&oper->entries, struct sched_entry,
  789. list);
  790. end_time = next->end_time;
  791. goto first_run;
  792. }
  793. if (should_restart_cycle(oper, entry)) {
  794. next = list_first_entry(&oper->entries, struct sched_entry,
  795. list);
  796. oper->cycle_end_time = ktime_add_ns(oper->cycle_end_time,
  797. oper->cycle_time);
  798. } else {
  799. next = list_next_entry(entry, list);
  800. }
  801. end_time = ktime_add_ns(entry->end_time, next->interval);
  802. end_time = min_t(ktime_t, end_time, oper->cycle_end_time);
  803. for (tc = 0; tc < num_tc; tc++) {
  804. if (next->gate_duration[tc] == oper->cycle_time)
  805. next->gate_close_time[tc] = KTIME_MAX;
  806. else
  807. next->gate_close_time[tc] = ktime_add_ns(entry->end_time,
  808. next->gate_duration[tc]);
  809. }
  810. if (should_change_schedules(admin, oper, end_time)) {
  811. /* Set things so the next time this runs, the new
  812. * schedule runs.
  813. */
  814. end_time = sched_base_time(admin);
  815. switch_schedules(q, &admin, &oper);
  816. }
  817. next->end_time = end_time;
  818. taprio_set_budgets(q, oper, next);
  819. first_run:
  820. rcu_assign_pointer(q->current_entry, next);
  821. spin_unlock(&q->current_entry_lock);
  822. hrtimer_set_expires(&q->advance_timer, end_time);
  823. rcu_read_lock();
  824. __netif_schedule(sch);
  825. rcu_read_unlock();
  826. return HRTIMER_RESTART;
  827. }
  828. static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
  829. [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 },
  830. [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 },
  831. [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
  832. [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 },
  833. };
  834. static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
  835. [TCA_TAPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32,
  836. TC_QOPT_MAX_QUEUE),
  837. [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 },
  838. [TCA_TAPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32,
  839. TC_FP_EXPRESS,
  840. TC_FP_PREEMPTIBLE),
  841. };
  842. static const struct netlink_range_validation_signed taprio_cycle_time_range = {
  843. .min = 0,
  844. .max = INT_MAX,
  845. };
  846. static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
  847. [TCA_TAPRIO_ATTR_PRIOMAP] = {
  848. .len = sizeof(struct tc_mqprio_qopt)
  849. },
  850. [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED },
  851. [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
  852. [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
  853. [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
  854. [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] =
  855. NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
  856. [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
  857. [TCA_TAPRIO_ATTR_FLAGS] =
  858. NLA_POLICY_MASK(NLA_U32, TAPRIO_SUPPORTED_FLAGS),
  859. [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
  860. [TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED },
  861. };
  862. static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb,
  863. struct sched_entry *entry,
  864. struct netlink_ext_ack *extack)
  865. {
  866. int min_duration = length_to_duration(q, ETH_ZLEN);
  867. u32 interval = 0;
  868. if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
  869. entry->command = nla_get_u8(
  870. tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
  871. if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
  872. entry->gate_mask = nla_get_u32(
  873. tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
  874. if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
  875. interval = nla_get_u32(
  876. tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
  877. /* The interval should allow at least the minimum ethernet
  878. * frame to go out.
  879. */
  880. if (interval < min_duration) {
  881. NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
  882. return -EINVAL;
  883. }
  884. entry->interval = interval;
  885. return 0;
  886. }
  887. static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n,
  888. struct sched_entry *entry, int index,
  889. struct netlink_ext_ack *extack)
  890. {
  891. struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
  892. int err;
  893. err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
  894. entry_policy, NULL);
  895. if (err < 0) {
  896. NL_SET_ERR_MSG(extack, "Could not parse nested entry");
  897. return -EINVAL;
  898. }
  899. entry->index = index;
  900. return fill_sched_entry(q, tb, entry, extack);
  901. }
  902. static int parse_sched_list(struct taprio_sched *q, struct nlattr *list,
  903. struct sched_gate_list *sched,
  904. struct netlink_ext_ack *extack)
  905. {
  906. struct nlattr *n;
  907. int err, rem;
  908. int i = 0;
  909. if (!list)
  910. return -EINVAL;
  911. nla_for_each_nested(n, list, rem) {
  912. struct sched_entry *entry;
  913. if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
  914. NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
  915. continue;
  916. }
  917. entry = kzalloc(sizeof(*entry), GFP_KERNEL);
  918. if (!entry) {
  919. NL_SET_ERR_MSG(extack, "Not enough memory for entry");
  920. return -ENOMEM;
  921. }
  922. err = parse_sched_entry(q, n, entry, i, extack);
  923. if (err < 0) {
  924. kfree(entry);
  925. return err;
  926. }
  927. list_add_tail(&entry->list, &sched->entries);
  928. i++;
  929. }
  930. sched->num_entries = i;
  931. return i;
  932. }
  933. static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
  934. struct sched_gate_list *new,
  935. struct netlink_ext_ack *extack)
  936. {
  937. int err = 0;
  938. if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) {
  939. NL_SET_ERR_MSG(extack, "Adding a single entry is not supported");
  940. return -ENOTSUPP;
  941. }
  942. if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
  943. new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
  944. if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION])
  945. new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]);
  946. if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME])
  947. new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
  948. if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
  949. err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST],
  950. new, extack);
  951. if (err < 0)
  952. return err;
  953. if (!new->cycle_time) {
  954. struct sched_entry *entry;
  955. ktime_t cycle = 0;
  956. list_for_each_entry(entry, &new->entries, list)
  957. cycle = ktime_add_ns(cycle, entry->interval);
  958. if (cycle < 0 || cycle > INT_MAX) {
  959. NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
  960. return -EINVAL;
  961. }
  962. new->cycle_time = cycle;
  963. }
  964. if (new->cycle_time < new->num_entries * length_to_duration(q, ETH_ZLEN)) {
  965. NL_SET_ERR_MSG(extack, "'cycle_time' is too small");
  966. return -EINVAL;
  967. }
  968. taprio_calculate_gate_durations(q, new);
  969. return 0;
  970. }
  971. static int taprio_parse_mqprio_opt(struct net_device *dev,
  972. struct tc_mqprio_qopt *qopt,
  973. struct netlink_ext_ack *extack,
  974. u32 taprio_flags)
  975. {
  976. bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags);
  977. if (!qopt) {
  978. if (!dev->num_tc) {
  979. NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
  980. return -EINVAL;
  981. }
  982. return 0;
  983. }
  984. /* taprio imposes that traffic classes map 1:n to tx queues */
  985. if (qopt->num_tc > dev->num_tx_queues) {
  986. NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
  987. return -EINVAL;
  988. }
  989. /* For some reason, in txtime-assist mode, we allow TXQ ranges for
  990. * different TCs to overlap, and just validate the TXQ ranges.
  991. */
  992. return mqprio_validate_qopt(dev, qopt, true, allow_overlapping_txqs,
  993. extack);
  994. }
  995. static int taprio_get_start_time(struct Qdisc *sch,
  996. struct sched_gate_list *sched,
  997. ktime_t *start)
  998. {
  999. struct taprio_sched *q = qdisc_priv(sch);
  1000. ktime_t now, base, cycle;
  1001. s64 n;
  1002. base = sched_base_time(sched);
  1003. now = taprio_get_time(q);
  1004. if (ktime_after(base, now)) {
  1005. *start = base;
  1006. return 0;
  1007. }
  1008. cycle = sched->cycle_time;
  1009. /* The qdisc is expected to have at least one sched_entry. Moreover,
  1010. * any entry must have 'interval' > 0. Thus if the cycle time is zero,
  1011. * something went really wrong. In that case, we should warn about this
  1012. * inconsistent state and return error.
  1013. */
  1014. if (WARN_ON(!cycle))
  1015. return -EFAULT;
  1016. /* Schedule the start time for the beginning of the next
  1017. * cycle.
  1018. */
  1019. n = div64_s64(ktime_sub_ns(now, base), cycle);
  1020. *start = ktime_add_ns(base, (n + 1) * cycle);
  1021. return 0;
  1022. }
  1023. static void setup_first_end_time(struct taprio_sched *q,
  1024. struct sched_gate_list *sched, ktime_t base)
  1025. {
  1026. struct net_device *dev = qdisc_dev(q->root);
  1027. int num_tc = netdev_get_num_tc(dev);
  1028. struct sched_entry *first;
  1029. ktime_t cycle;
  1030. int tc;
  1031. first = list_first_entry(&sched->entries,
  1032. struct sched_entry, list);
  1033. cycle = sched->cycle_time;
  1034. /* FIXME: find a better place to do this */
  1035. sched->cycle_end_time = ktime_add_ns(base, cycle);
  1036. first->end_time = ktime_add_ns(base, first->interval);
  1037. taprio_set_budgets(q, sched, first);
  1038. for (tc = 0; tc < num_tc; tc++) {
  1039. if (first->gate_duration[tc] == sched->cycle_time)
  1040. first->gate_close_time[tc] = KTIME_MAX;
  1041. else
  1042. first->gate_close_time[tc] = ktime_add_ns(base, first->gate_duration[tc]);
  1043. }
  1044. rcu_assign_pointer(q->current_entry, NULL);
  1045. }
  1046. static void taprio_start_sched(struct Qdisc *sch,
  1047. ktime_t start, struct sched_gate_list *new)
  1048. {
  1049. struct taprio_sched *q = qdisc_priv(sch);
  1050. ktime_t expires;
  1051. if (FULL_OFFLOAD_IS_ENABLED(q->flags))
  1052. return;
  1053. expires = hrtimer_get_expires(&q->advance_timer);
  1054. if (expires == 0)
  1055. expires = KTIME_MAX;
  1056. /* If the new schedule starts before the next expiration, we
  1057. * reprogram it to the earliest one, so we change the admin
  1058. * schedule to the operational one at the right time.
  1059. */
  1060. start = min_t(ktime_t, start, expires);
  1061. hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
  1062. }
  1063. static void taprio_set_picos_per_byte(struct net_device *dev,
  1064. struct taprio_sched *q)
  1065. {
  1066. struct ethtool_link_ksettings ecmd;
  1067. int speed = SPEED_10;
  1068. int picos_per_byte;
  1069. int err;
  1070. err = __ethtool_get_link_ksettings(dev, &ecmd);
  1071. if (err < 0)
  1072. goto skip;
  1073. if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN)
  1074. speed = ecmd.base.speed;
  1075. skip:
  1076. picos_per_byte = (USEC_PER_SEC * 8) / speed;
  1077. atomic64_set(&q->picos_per_byte, picos_per_byte);
  1078. netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
  1079. dev->name, (long long)atomic64_read(&q->picos_per_byte),
  1080. ecmd.base.speed);
  1081. }
  1082. static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
  1083. void *ptr)
  1084. {
  1085. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  1086. struct sched_gate_list *oper, *admin;
  1087. struct qdisc_size_table *stab;
  1088. struct taprio_sched *q;
  1089. ASSERT_RTNL();
  1090. if (event != NETDEV_UP && event != NETDEV_CHANGE)
  1091. return NOTIFY_DONE;
  1092. list_for_each_entry(q, &taprio_list, taprio_list) {
  1093. if (dev != qdisc_dev(q->root))
  1094. continue;
  1095. taprio_set_picos_per_byte(dev, q);
  1096. stab = rtnl_dereference(q->root->stab);
  1097. oper = rtnl_dereference(q->oper_sched);
  1098. if (oper)
  1099. taprio_update_queue_max_sdu(q, oper, stab);
  1100. admin = rtnl_dereference(q->admin_sched);
  1101. if (admin)
  1102. taprio_update_queue_max_sdu(q, admin, stab);
  1103. break;
  1104. }
  1105. return NOTIFY_DONE;
  1106. }
  1107. static void setup_txtime(struct taprio_sched *q,
  1108. struct sched_gate_list *sched, ktime_t base)
  1109. {
  1110. struct sched_entry *entry;
  1111. u64 interval = 0;
  1112. list_for_each_entry(entry, &sched->entries, list) {
  1113. entry->next_txtime = ktime_add_ns(base, interval);
  1114. interval += entry->interval;
  1115. }
  1116. }
  1117. static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries)
  1118. {
  1119. struct __tc_taprio_qopt_offload *__offload;
  1120. __offload = kzalloc(struct_size(__offload, offload.entries, num_entries),
  1121. GFP_KERNEL);
  1122. if (!__offload)
  1123. return NULL;
  1124. refcount_set(&__offload->users, 1);
  1125. return &__offload->offload;
  1126. }
  1127. struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
  1128. *offload)
  1129. {
  1130. struct __tc_taprio_qopt_offload *__offload;
  1131. __offload = container_of(offload, struct __tc_taprio_qopt_offload,
  1132. offload);
  1133. refcount_inc(&__offload->users);
  1134. return offload;
  1135. }
  1136. EXPORT_SYMBOL_GPL(taprio_offload_get);
  1137. void taprio_offload_free(struct tc_taprio_qopt_offload *offload)
  1138. {
  1139. struct __tc_taprio_qopt_offload *__offload;
  1140. __offload = container_of(offload, struct __tc_taprio_qopt_offload,
  1141. offload);
  1142. if (!refcount_dec_and_test(&__offload->users))
  1143. return;
  1144. kfree(__offload);
  1145. }
  1146. EXPORT_SYMBOL_GPL(taprio_offload_free);
  1147. /* The function will only serve to keep the pointers to the "oper" and "admin"
  1148. * schedules valid in relation to their base times, so when calling dump() the
  1149. * users looks at the right schedules.
  1150. * When using full offload, the admin configuration is promoted to oper at the
  1151. * base_time in the PHC time domain. But because the system time is not
  1152. * necessarily in sync with that, we can't just trigger a hrtimer to call
  1153. * switch_schedules at the right hardware time.
  1154. * At the moment we call this by hand right away from taprio, but in the future
  1155. * it will be useful to create a mechanism for drivers to notify taprio of the
  1156. * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump().
  1157. * This is left as TODO.
  1158. */
  1159. static void taprio_offload_config_changed(struct taprio_sched *q)
  1160. {
  1161. struct sched_gate_list *oper, *admin;
  1162. oper = rtnl_dereference(q->oper_sched);
  1163. admin = rtnl_dereference(q->admin_sched);
  1164. switch_schedules(q, &admin, &oper);
  1165. }
  1166. static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask)
  1167. {
  1168. u32 i, queue_mask = 0;
  1169. for (i = 0; i < dev->num_tc; i++) {
  1170. u32 offset, count;
  1171. if (!(tc_mask & BIT(i)))
  1172. continue;
  1173. offset = dev->tc_to_txq[i].offset;
  1174. count = dev->tc_to_txq[i].count;
  1175. queue_mask |= GENMASK(offset + count - 1, offset);
  1176. }
  1177. return queue_mask;
  1178. }
  1179. static void taprio_sched_to_offload(struct net_device *dev,
  1180. struct sched_gate_list *sched,
  1181. struct tc_taprio_qopt_offload *offload,
  1182. const struct tc_taprio_caps *caps)
  1183. {
  1184. struct sched_entry *entry;
  1185. int i = 0;
  1186. offload->base_time = sched->base_time;
  1187. offload->cycle_time = sched->cycle_time;
  1188. offload->cycle_time_extension = sched->cycle_time_extension;
  1189. list_for_each_entry(entry, &sched->entries, list) {
  1190. struct tc_taprio_sched_entry *e = &offload->entries[i];
  1191. e->command = entry->command;
  1192. e->interval = entry->interval;
  1193. if (caps->gate_mask_per_txq)
  1194. e->gate_mask = tc_map_to_queue_mask(dev,
  1195. entry->gate_mask);
  1196. else
  1197. e->gate_mask = entry->gate_mask;
  1198. i++;
  1199. }
  1200. offload->num_entries = i;
  1201. }
  1202. static void taprio_detect_broken_mqprio(struct taprio_sched *q)
  1203. {
  1204. struct net_device *dev = qdisc_dev(q->root);
  1205. struct tc_taprio_caps caps;
  1206. qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO,
  1207. &caps, sizeof(caps));
  1208. q->broken_mqprio = caps.broken_mqprio;
  1209. if (q->broken_mqprio)
  1210. static_branch_inc(&taprio_have_broken_mqprio);
  1211. else
  1212. static_branch_inc(&taprio_have_working_mqprio);
  1213. q->detected_mqprio = true;
  1214. }
  1215. static void taprio_cleanup_broken_mqprio(struct taprio_sched *q)
  1216. {
  1217. if (!q->detected_mqprio)
  1218. return;
  1219. if (q->broken_mqprio)
  1220. static_branch_dec(&taprio_have_broken_mqprio);
  1221. else
  1222. static_branch_dec(&taprio_have_working_mqprio);
  1223. }
  1224. static int taprio_enable_offload(struct net_device *dev,
  1225. struct taprio_sched *q,
  1226. struct sched_gate_list *sched,
  1227. struct netlink_ext_ack *extack)
  1228. {
  1229. const struct net_device_ops *ops = dev->netdev_ops;
  1230. struct tc_taprio_qopt_offload *offload;
  1231. struct tc_taprio_caps caps;
  1232. int tc, err = 0;
  1233. if (!ops->ndo_setup_tc) {
  1234. NL_SET_ERR_MSG(extack,
  1235. "Device does not support taprio offload");
  1236. return -EOPNOTSUPP;
  1237. }
  1238. qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO,
  1239. &caps, sizeof(caps));
  1240. if (!caps.supports_queue_max_sdu) {
  1241. for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
  1242. if (q->max_sdu[tc]) {
  1243. NL_SET_ERR_MSG_MOD(extack,
  1244. "Device does not handle queueMaxSDU");
  1245. return -EOPNOTSUPP;
  1246. }
  1247. }
  1248. }
  1249. offload = taprio_offload_alloc(sched->num_entries);
  1250. if (!offload) {
  1251. NL_SET_ERR_MSG(extack,
  1252. "Not enough memory for enabling offload mode");
  1253. return -ENOMEM;
  1254. }
  1255. offload->cmd = TAPRIO_CMD_REPLACE;
  1256. offload->extack = extack;
  1257. mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
  1258. offload->mqprio.extack = extack;
  1259. taprio_sched_to_offload(dev, sched, offload, &caps);
  1260. mqprio_fp_to_offload(q->fp, &offload->mqprio);
  1261. for (tc = 0; tc < TC_MAX_QUEUE; tc++)
  1262. offload->max_sdu[tc] = q->max_sdu[tc];
  1263. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
  1264. if (err < 0) {
  1265. NL_SET_ERR_MSG_WEAK(extack,
  1266. "Device failed to setup taprio offload");
  1267. goto done;
  1268. }
  1269. q->offloaded = true;
  1270. done:
  1271. /* The offload structure may linger around via a reference taken by the
  1272. * device driver, so clear up the netlink extack pointer so that the
  1273. * driver isn't tempted to dereference data which stopped being valid
  1274. */
  1275. offload->extack = NULL;
  1276. offload->mqprio.extack = NULL;
  1277. taprio_offload_free(offload);
  1278. return err;
  1279. }
  1280. static int taprio_disable_offload(struct net_device *dev,
  1281. struct taprio_sched *q,
  1282. struct netlink_ext_ack *extack)
  1283. {
  1284. const struct net_device_ops *ops = dev->netdev_ops;
  1285. struct tc_taprio_qopt_offload *offload;
  1286. int err;
  1287. if (!q->offloaded)
  1288. return 0;
  1289. offload = taprio_offload_alloc(0);
  1290. if (!offload) {
  1291. NL_SET_ERR_MSG(extack,
  1292. "Not enough memory to disable offload mode");
  1293. return -ENOMEM;
  1294. }
  1295. offload->cmd = TAPRIO_CMD_DESTROY;
  1296. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
  1297. if (err < 0) {
  1298. NL_SET_ERR_MSG(extack,
  1299. "Device failed to disable offload");
  1300. goto out;
  1301. }
  1302. q->offloaded = false;
  1303. out:
  1304. taprio_offload_free(offload);
  1305. return err;
  1306. }
  1307. /* If full offload is enabled, the only possible clockid is the net device's
  1308. * PHC. For that reason, specifying a clockid through netlink is incorrect.
  1309. * For txtime-assist, it is implicitly assumed that the device's PHC is kept
  1310. * in sync with the specified clockid via a user space daemon such as phc2sys.
  1311. * For both software taprio and txtime-assist, the clockid is used for the
  1312. * hrtimer that advances the schedule and hence mandatory.
  1313. */
  1314. static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
  1315. struct netlink_ext_ack *extack)
  1316. {
  1317. struct taprio_sched *q = qdisc_priv(sch);
  1318. struct net_device *dev = qdisc_dev(sch);
  1319. int err = -EINVAL;
  1320. if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1321. const struct ethtool_ops *ops = dev->ethtool_ops;
  1322. struct kernel_ethtool_ts_info info = {
  1323. .cmd = ETHTOOL_GET_TS_INFO,
  1324. .phc_index = -1,
  1325. };
  1326. if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
  1327. NL_SET_ERR_MSG(extack,
  1328. "The 'clockid' cannot be specified for full offload");
  1329. goto out;
  1330. }
  1331. if (ops && ops->get_ts_info)
  1332. err = ops->get_ts_info(dev, &info);
  1333. if (err || info.phc_index < 0) {
  1334. NL_SET_ERR_MSG(extack,
  1335. "Device does not have a PTP clock");
  1336. err = -ENOTSUPP;
  1337. goto out;
  1338. }
  1339. } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
  1340. int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
  1341. enum tk_offsets tk_offset;
  1342. /* We only support static clockids and we don't allow
  1343. * for it to be modified after the first init.
  1344. */
  1345. if (clockid < 0 ||
  1346. (q->clockid != -1 && q->clockid != clockid)) {
  1347. NL_SET_ERR_MSG(extack,
  1348. "Changing the 'clockid' of a running schedule is not supported");
  1349. err = -ENOTSUPP;
  1350. goto out;
  1351. }
  1352. switch (clockid) {
  1353. case CLOCK_REALTIME:
  1354. tk_offset = TK_OFFS_REAL;
  1355. break;
  1356. case CLOCK_MONOTONIC:
  1357. tk_offset = TK_OFFS_MAX;
  1358. break;
  1359. case CLOCK_BOOTTIME:
  1360. tk_offset = TK_OFFS_BOOT;
  1361. break;
  1362. case CLOCK_TAI:
  1363. tk_offset = TK_OFFS_TAI;
  1364. break;
  1365. default:
  1366. NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
  1367. err = -EINVAL;
  1368. goto out;
  1369. }
  1370. /* This pairs with READ_ONCE() in taprio_mono_to_any */
  1371. WRITE_ONCE(q->tk_offset, tk_offset);
  1372. q->clockid = clockid;
  1373. } else {
  1374. NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
  1375. goto out;
  1376. }
  1377. /* Everything went ok, return success. */
  1378. err = 0;
  1379. out:
  1380. return err;
  1381. }
  1382. static int taprio_parse_tc_entry(struct Qdisc *sch,
  1383. struct nlattr *opt,
  1384. u32 max_sdu[TC_QOPT_MAX_QUEUE],
  1385. u32 fp[TC_QOPT_MAX_QUEUE],
  1386. unsigned long *seen_tcs,
  1387. struct netlink_ext_ack *extack)
  1388. {
  1389. struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { };
  1390. struct net_device *dev = qdisc_dev(sch);
  1391. int err, tc;
  1392. u32 val;
  1393. err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt,
  1394. taprio_tc_policy, extack);
  1395. if (err < 0)
  1396. return err;
  1397. if (!tb[TCA_TAPRIO_TC_ENTRY_INDEX]) {
  1398. NL_SET_ERR_MSG_MOD(extack, "TC entry index missing");
  1399. return -EINVAL;
  1400. }
  1401. tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]);
  1402. if (tc >= TC_QOPT_MAX_QUEUE) {
  1403. NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range");
  1404. return -ERANGE;
  1405. }
  1406. if (*seen_tcs & BIT(tc)) {
  1407. NL_SET_ERR_MSG_MOD(extack, "Duplicate TC entry");
  1408. return -EINVAL;
  1409. }
  1410. *seen_tcs |= BIT(tc);
  1411. if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) {
  1412. val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]);
  1413. if (val > dev->max_mtu) {
  1414. NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
  1415. return -ERANGE;
  1416. }
  1417. max_sdu[tc] = val;
  1418. }
  1419. if (tb[TCA_TAPRIO_TC_ENTRY_FP])
  1420. fp[tc] = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_FP]);
  1421. return 0;
  1422. }
  1423. static int taprio_parse_tc_entries(struct Qdisc *sch,
  1424. struct nlattr *opt,
  1425. struct netlink_ext_ack *extack)
  1426. {
  1427. struct taprio_sched *q = qdisc_priv(sch);
  1428. struct net_device *dev = qdisc_dev(sch);
  1429. u32 max_sdu[TC_QOPT_MAX_QUEUE];
  1430. bool have_preemption = false;
  1431. unsigned long seen_tcs = 0;
  1432. u32 fp[TC_QOPT_MAX_QUEUE];
  1433. struct nlattr *n;
  1434. int tc, rem;
  1435. int err = 0;
  1436. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
  1437. max_sdu[tc] = q->max_sdu[tc];
  1438. fp[tc] = q->fp[tc];
  1439. }
  1440. nla_for_each_nested_type(n, TCA_TAPRIO_ATTR_TC_ENTRY, opt, rem) {
  1441. err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs,
  1442. extack);
  1443. if (err)
  1444. return err;
  1445. }
  1446. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
  1447. q->max_sdu[tc] = max_sdu[tc];
  1448. q->fp[tc] = fp[tc];
  1449. if (fp[tc] != TC_FP_EXPRESS)
  1450. have_preemption = true;
  1451. }
  1452. if (have_preemption) {
  1453. if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1454. NL_SET_ERR_MSG(extack,
  1455. "Preemption only supported with full offload");
  1456. return -EOPNOTSUPP;
  1457. }
  1458. if (!ethtool_dev_mm_supported(dev)) {
  1459. NL_SET_ERR_MSG(extack,
  1460. "Device does not support preemption");
  1461. return -EOPNOTSUPP;
  1462. }
  1463. }
  1464. return err;
  1465. }
  1466. static int taprio_mqprio_cmp(const struct net_device *dev,
  1467. const struct tc_mqprio_qopt *mqprio)
  1468. {
  1469. int i;
  1470. if (!mqprio || mqprio->num_tc != dev->num_tc)
  1471. return -1;
  1472. for (i = 0; i < mqprio->num_tc; i++)
  1473. if (dev->tc_to_txq[i].count != mqprio->count[i] ||
  1474. dev->tc_to_txq[i].offset != mqprio->offset[i])
  1475. return -1;
  1476. for (i = 0; i <= TC_BITMASK; i++)
  1477. if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i])
  1478. return -1;
  1479. return 0;
  1480. }
  1481. static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
  1482. struct netlink_ext_ack *extack)
  1483. {
  1484. struct qdisc_size_table *stab = rtnl_dereference(sch->stab);
  1485. struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
  1486. struct sched_gate_list *oper, *admin, *new_admin;
  1487. struct taprio_sched *q = qdisc_priv(sch);
  1488. struct net_device *dev = qdisc_dev(sch);
  1489. struct tc_mqprio_qopt *mqprio = NULL;
  1490. unsigned long flags;
  1491. u32 taprio_flags;
  1492. ktime_t start;
  1493. int i, err;
  1494. err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt,
  1495. taprio_policy, extack);
  1496. if (err < 0)
  1497. return err;
  1498. if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
  1499. mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
  1500. /* The semantics of the 'flags' argument in relation to 'change()'
  1501. * requests, are interpreted following two rules (which are applied in
  1502. * this order): (1) an omitted 'flags' argument is interpreted as
  1503. * zero; (2) the 'flags' of a "running" taprio instance cannot be
  1504. * changed.
  1505. */
  1506. taprio_flags = tb[TCA_TAPRIO_ATTR_FLAGS] ? nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]) : 0;
  1507. /* txtime-assist and full offload are mutually exclusive */
  1508. if ((taprio_flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
  1509. (taprio_flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) {
  1510. NL_SET_ERR_MSG_ATTR(extack, tb[TCA_TAPRIO_ATTR_FLAGS],
  1511. "TXTIME_ASSIST and FULL_OFFLOAD are mutually exclusive");
  1512. return -EINVAL;
  1513. }
  1514. if (q->flags != TAPRIO_FLAGS_INVALID && q->flags != taprio_flags) {
  1515. NL_SET_ERR_MSG_MOD(extack,
  1516. "Changing 'flags' of a running schedule is not supported");
  1517. return -EOPNOTSUPP;
  1518. }
  1519. q->flags = taprio_flags;
  1520. /* Needed for length_to_duration() during netlink attribute parsing */
  1521. taprio_set_picos_per_byte(dev, q);
  1522. err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
  1523. if (err < 0)
  1524. return err;
  1525. err = taprio_parse_tc_entries(sch, opt, extack);
  1526. if (err)
  1527. return err;
  1528. new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL);
  1529. if (!new_admin) {
  1530. NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
  1531. return -ENOMEM;
  1532. }
  1533. INIT_LIST_HEAD(&new_admin->entries);
  1534. oper = rtnl_dereference(q->oper_sched);
  1535. admin = rtnl_dereference(q->admin_sched);
  1536. /* no changes - no new mqprio settings */
  1537. if (!taprio_mqprio_cmp(dev, mqprio))
  1538. mqprio = NULL;
  1539. if (mqprio && (oper || admin)) {
  1540. NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
  1541. err = -ENOTSUPP;
  1542. goto free_sched;
  1543. }
  1544. if (mqprio) {
  1545. err = netdev_set_num_tc(dev, mqprio->num_tc);
  1546. if (err)
  1547. goto free_sched;
  1548. for (i = 0; i < mqprio->num_tc; i++) {
  1549. netdev_set_tc_queue(dev, i,
  1550. mqprio->count[i],
  1551. mqprio->offset[i]);
  1552. q->cur_txq[i] = mqprio->offset[i];
  1553. }
  1554. /* Always use supplied priority mappings */
  1555. for (i = 0; i <= TC_BITMASK; i++)
  1556. netdev_set_prio_tc_map(dev, i,
  1557. mqprio->prio_tc_map[i]);
  1558. }
  1559. err = parse_taprio_schedule(q, tb, new_admin, extack);
  1560. if (err < 0)
  1561. goto free_sched;
  1562. if (new_admin->num_entries == 0) {
  1563. NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule");
  1564. err = -EINVAL;
  1565. goto free_sched;
  1566. }
  1567. err = taprio_parse_clockid(sch, tb, extack);
  1568. if (err < 0)
  1569. goto free_sched;
  1570. taprio_update_queue_max_sdu(q, new_admin, stab);
  1571. if (FULL_OFFLOAD_IS_ENABLED(q->flags))
  1572. err = taprio_enable_offload(dev, q, new_admin, extack);
  1573. else
  1574. err = taprio_disable_offload(dev, q, extack);
  1575. if (err)
  1576. goto free_sched;
  1577. /* Protects against enqueue()/dequeue() */
  1578. spin_lock_bh(qdisc_lock(sch));
  1579. if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) {
  1580. if (!TXTIME_ASSIST_IS_ENABLED(q->flags)) {
  1581. NL_SET_ERR_MSG_MOD(extack, "txtime-delay can only be set when txtime-assist mode is enabled");
  1582. err = -EINVAL;
  1583. goto unlock;
  1584. }
  1585. q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
  1586. }
  1587. if (!TXTIME_ASSIST_IS_ENABLED(q->flags) &&
  1588. !FULL_OFFLOAD_IS_ENABLED(q->flags) &&
  1589. !hrtimer_active(&q->advance_timer)) {
  1590. hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
  1591. q->advance_timer.function = advance_sched;
  1592. }
  1593. err = taprio_get_start_time(sch, new_admin, &start);
  1594. if (err < 0) {
  1595. NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
  1596. goto unlock;
  1597. }
  1598. setup_txtime(q, new_admin, start);
  1599. if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
  1600. if (!oper) {
  1601. rcu_assign_pointer(q->oper_sched, new_admin);
  1602. err = 0;
  1603. new_admin = NULL;
  1604. goto unlock;
  1605. }
  1606. /* Not going to race against advance_sched(), but still */
  1607. admin = rcu_replace_pointer(q->admin_sched, new_admin,
  1608. lockdep_rtnl_is_held());
  1609. if (admin)
  1610. call_rcu(&admin->rcu, taprio_free_sched_cb);
  1611. } else {
  1612. setup_first_end_time(q, new_admin, start);
  1613. /* Protects against advance_sched() */
  1614. spin_lock_irqsave(&q->current_entry_lock, flags);
  1615. taprio_start_sched(sch, start, new_admin);
  1616. admin = rcu_replace_pointer(q->admin_sched, new_admin,
  1617. lockdep_rtnl_is_held());
  1618. if (admin)
  1619. call_rcu(&admin->rcu, taprio_free_sched_cb);
  1620. spin_unlock_irqrestore(&q->current_entry_lock, flags);
  1621. if (FULL_OFFLOAD_IS_ENABLED(q->flags))
  1622. taprio_offload_config_changed(q);
  1623. }
  1624. new_admin = NULL;
  1625. err = 0;
  1626. if (!stab)
  1627. NL_SET_ERR_MSG_MOD(extack,
  1628. "Size table not specified, frame length estimations may be inaccurate");
  1629. unlock:
  1630. spin_unlock_bh(qdisc_lock(sch));
  1631. free_sched:
  1632. if (new_admin)
  1633. call_rcu(&new_admin->rcu, taprio_free_sched_cb);
  1634. return err;
  1635. }
  1636. static void taprio_reset(struct Qdisc *sch)
  1637. {
  1638. struct taprio_sched *q = qdisc_priv(sch);
  1639. struct net_device *dev = qdisc_dev(sch);
  1640. int i;
  1641. hrtimer_cancel(&q->advance_timer);
  1642. if (q->qdiscs) {
  1643. for (i = 0; i < dev->num_tx_queues; i++)
  1644. if (q->qdiscs[i])
  1645. qdisc_reset(q->qdiscs[i]);
  1646. }
  1647. }
  1648. static void taprio_destroy(struct Qdisc *sch)
  1649. {
  1650. struct taprio_sched *q = qdisc_priv(sch);
  1651. struct net_device *dev = qdisc_dev(sch);
  1652. struct sched_gate_list *oper, *admin;
  1653. unsigned int i;
  1654. list_del(&q->taprio_list);
  1655. /* Note that taprio_reset() might not be called if an error
  1656. * happens in qdisc_create(), after taprio_init() has been called.
  1657. */
  1658. hrtimer_cancel(&q->advance_timer);
  1659. qdisc_synchronize(sch);
  1660. taprio_disable_offload(dev, q, NULL);
  1661. if (q->qdiscs) {
  1662. for (i = 0; i < dev->num_tx_queues; i++)
  1663. qdisc_put(q->qdiscs[i]);
  1664. kfree(q->qdiscs);
  1665. }
  1666. q->qdiscs = NULL;
  1667. netdev_reset_tc(dev);
  1668. oper = rtnl_dereference(q->oper_sched);
  1669. admin = rtnl_dereference(q->admin_sched);
  1670. if (oper)
  1671. call_rcu(&oper->rcu, taprio_free_sched_cb);
  1672. if (admin)
  1673. call_rcu(&admin->rcu, taprio_free_sched_cb);
  1674. taprio_cleanup_broken_mqprio(q);
  1675. }
  1676. static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
  1677. struct netlink_ext_ack *extack)
  1678. {
  1679. struct taprio_sched *q = qdisc_priv(sch);
  1680. struct net_device *dev = qdisc_dev(sch);
  1681. int i, tc;
  1682. spin_lock_init(&q->current_entry_lock);
  1683. hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
  1684. q->advance_timer.function = advance_sched;
  1685. q->root = sch;
  1686. /* We only support static clockids. Use an invalid value as default
  1687. * and get the valid one on taprio_change().
  1688. */
  1689. q->clockid = -1;
  1690. q->flags = TAPRIO_FLAGS_INVALID;
  1691. list_add(&q->taprio_list, &taprio_list);
  1692. if (sch->parent != TC_H_ROOT) {
  1693. NL_SET_ERR_MSG_MOD(extack, "Can only be attached as root qdisc");
  1694. return -EOPNOTSUPP;
  1695. }
  1696. if (!netif_is_multiqueue(dev)) {
  1697. NL_SET_ERR_MSG_MOD(extack, "Multi-queue device is required");
  1698. return -EOPNOTSUPP;
  1699. }
  1700. q->qdiscs = kcalloc(dev->num_tx_queues, sizeof(q->qdiscs[0]),
  1701. GFP_KERNEL);
  1702. if (!q->qdiscs)
  1703. return -ENOMEM;
  1704. if (!opt)
  1705. return -EINVAL;
  1706. for (i = 0; i < dev->num_tx_queues; i++) {
  1707. struct netdev_queue *dev_queue;
  1708. struct Qdisc *qdisc;
  1709. dev_queue = netdev_get_tx_queue(dev, i);
  1710. qdisc = qdisc_create_dflt(dev_queue,
  1711. &pfifo_qdisc_ops,
  1712. TC_H_MAKE(TC_H_MAJ(sch->handle),
  1713. TC_H_MIN(i + 1)),
  1714. extack);
  1715. if (!qdisc)
  1716. return -ENOMEM;
  1717. if (i < dev->real_num_tx_queues)
  1718. qdisc_hash_add(qdisc, false);
  1719. q->qdiscs[i] = qdisc;
  1720. }
  1721. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
  1722. q->fp[tc] = TC_FP_EXPRESS;
  1723. taprio_detect_broken_mqprio(q);
  1724. return taprio_change(sch, opt, extack);
  1725. }
  1726. static void taprio_attach(struct Qdisc *sch)
  1727. {
  1728. struct taprio_sched *q = qdisc_priv(sch);
  1729. struct net_device *dev = qdisc_dev(sch);
  1730. unsigned int ntx;
  1731. /* Attach underlying qdisc */
  1732. for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
  1733. struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
  1734. struct Qdisc *old, *dev_queue_qdisc;
  1735. if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1736. struct Qdisc *qdisc = q->qdiscs[ntx];
  1737. /* In offload mode, the root taprio qdisc is bypassed
  1738. * and the netdev TX queues see the children directly
  1739. */
  1740. qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
  1741. dev_queue_qdisc = qdisc;
  1742. } else {
  1743. /* In software mode, attach the root taprio qdisc
  1744. * to all netdev TX queues, so that dev_qdisc_enqueue()
  1745. * goes through taprio_enqueue().
  1746. */
  1747. dev_queue_qdisc = sch;
  1748. }
  1749. old = dev_graft_qdisc(dev_queue, dev_queue_qdisc);
  1750. /* The qdisc's refcount requires to be elevated once
  1751. * for each netdev TX queue it is grafted onto
  1752. */
  1753. qdisc_refcount_inc(dev_queue_qdisc);
  1754. if (old)
  1755. qdisc_put(old);
  1756. }
  1757. }
  1758. static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
  1759. unsigned long cl)
  1760. {
  1761. struct net_device *dev = qdisc_dev(sch);
  1762. unsigned long ntx = cl - 1;
  1763. if (ntx >= dev->num_tx_queues)
  1764. return NULL;
  1765. return netdev_get_tx_queue(dev, ntx);
  1766. }
  1767. static int taprio_graft(struct Qdisc *sch, unsigned long cl,
  1768. struct Qdisc *new, struct Qdisc **old,
  1769. struct netlink_ext_ack *extack)
  1770. {
  1771. struct taprio_sched *q = qdisc_priv(sch);
  1772. struct net_device *dev = qdisc_dev(sch);
  1773. struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
  1774. if (!dev_queue)
  1775. return -EINVAL;
  1776. if (dev->flags & IFF_UP)
  1777. dev_deactivate(dev);
  1778. /* In offload mode, the child Qdisc is directly attached to the netdev
  1779. * TX queue, and thus, we need to keep its refcount elevated in order
  1780. * to counteract qdisc_graft()'s call to qdisc_put() once per TX queue.
  1781. * However, save the reference to the new qdisc in the private array in
  1782. * both software and offload cases, to have an up-to-date reference to
  1783. * our children.
  1784. */
  1785. *old = q->qdiscs[cl - 1];
  1786. if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1787. WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old);
  1788. if (new)
  1789. qdisc_refcount_inc(new);
  1790. if (*old)
  1791. qdisc_put(*old);
  1792. }
  1793. q->qdiscs[cl - 1] = new;
  1794. if (new)
  1795. new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
  1796. if (dev->flags & IFF_UP)
  1797. dev_activate(dev);
  1798. return 0;
  1799. }
  1800. static int dump_entry(struct sk_buff *msg,
  1801. const struct sched_entry *entry)
  1802. {
  1803. struct nlattr *item;
  1804. item = nla_nest_start_noflag(msg, TCA_TAPRIO_SCHED_ENTRY);
  1805. if (!item)
  1806. return -ENOSPC;
  1807. if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
  1808. goto nla_put_failure;
  1809. if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
  1810. goto nla_put_failure;
  1811. if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
  1812. entry->gate_mask))
  1813. goto nla_put_failure;
  1814. if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
  1815. entry->interval))
  1816. goto nla_put_failure;
  1817. return nla_nest_end(msg, item);
  1818. nla_put_failure:
  1819. nla_nest_cancel(msg, item);
  1820. return -1;
  1821. }
  1822. static int dump_schedule(struct sk_buff *msg,
  1823. const struct sched_gate_list *root)
  1824. {
  1825. struct nlattr *entry_list;
  1826. struct sched_entry *entry;
  1827. if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
  1828. root->base_time, TCA_TAPRIO_PAD))
  1829. return -1;
  1830. if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME,
  1831. root->cycle_time, TCA_TAPRIO_PAD))
  1832. return -1;
  1833. if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION,
  1834. root->cycle_time_extension, TCA_TAPRIO_PAD))
  1835. return -1;
  1836. entry_list = nla_nest_start_noflag(msg,
  1837. TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
  1838. if (!entry_list)
  1839. goto error_nest;
  1840. list_for_each_entry(entry, &root->entries, list) {
  1841. if (dump_entry(msg, entry) < 0)
  1842. goto error_nest;
  1843. }
  1844. nla_nest_end(msg, entry_list);
  1845. return 0;
  1846. error_nest:
  1847. nla_nest_cancel(msg, entry_list);
  1848. return -1;
  1849. }
  1850. static int taprio_dump_tc_entries(struct sk_buff *skb,
  1851. struct taprio_sched *q,
  1852. struct sched_gate_list *sched)
  1853. {
  1854. struct nlattr *n;
  1855. int tc;
  1856. for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
  1857. n = nla_nest_start(skb, TCA_TAPRIO_ATTR_TC_ENTRY);
  1858. if (!n)
  1859. return -EMSGSIZE;
  1860. if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_INDEX, tc))
  1861. goto nla_put_failure;
  1862. if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU,
  1863. sched->max_sdu[tc]))
  1864. goto nla_put_failure;
  1865. if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_FP, q->fp[tc]))
  1866. goto nla_put_failure;
  1867. nla_nest_end(skb, n);
  1868. }
  1869. return 0;
  1870. nla_put_failure:
  1871. nla_nest_cancel(skb, n);
  1872. return -EMSGSIZE;
  1873. }
  1874. static int taprio_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
  1875. {
  1876. if (val == TAPRIO_STAT_NOT_SET)
  1877. return 0;
  1878. if (nla_put_u64_64bit(skb, attrtype, val, TCA_TAPRIO_OFFLOAD_STATS_PAD))
  1879. return -EMSGSIZE;
  1880. return 0;
  1881. }
  1882. static int taprio_dump_xstats(struct Qdisc *sch, struct gnet_dump *d,
  1883. struct tc_taprio_qopt_offload *offload,
  1884. struct tc_taprio_qopt_stats *stats)
  1885. {
  1886. struct net_device *dev = qdisc_dev(sch);
  1887. const struct net_device_ops *ops;
  1888. struct sk_buff *skb = d->skb;
  1889. struct nlattr *xstats;
  1890. int err;
  1891. ops = qdisc_dev(sch)->netdev_ops;
  1892. /* FIXME I could use qdisc_offload_dump_helper(), but that messes
  1893. * with sch->flags depending on whether the device reports taprio
  1894. * stats, and I'm not sure whether that's a good idea, considering
  1895. * that stats are optional to the offload itself
  1896. */
  1897. if (!ops->ndo_setup_tc)
  1898. return 0;
  1899. memset(stats, 0xff, sizeof(*stats));
  1900. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
  1901. if (err == -EOPNOTSUPP)
  1902. return 0;
  1903. if (err)
  1904. return err;
  1905. xstats = nla_nest_start(skb, TCA_STATS_APP);
  1906. if (!xstats)
  1907. goto err;
  1908. if (taprio_put_stat(skb, stats->window_drops,
  1909. TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS) ||
  1910. taprio_put_stat(skb, stats->tx_overruns,
  1911. TCA_TAPRIO_OFFLOAD_STATS_TX_OVERRUNS))
  1912. goto err_cancel;
  1913. nla_nest_end(skb, xstats);
  1914. return 0;
  1915. err_cancel:
  1916. nla_nest_cancel(skb, xstats);
  1917. err:
  1918. return -EMSGSIZE;
  1919. }
  1920. static int taprio_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
  1921. {
  1922. struct tc_taprio_qopt_offload offload = {
  1923. .cmd = TAPRIO_CMD_STATS,
  1924. };
  1925. return taprio_dump_xstats(sch, d, &offload, &offload.stats);
  1926. }
  1927. static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
  1928. {
  1929. struct taprio_sched *q = qdisc_priv(sch);
  1930. struct net_device *dev = qdisc_dev(sch);
  1931. struct sched_gate_list *oper, *admin;
  1932. struct tc_mqprio_qopt opt = { 0 };
  1933. struct nlattr *nest, *sched_nest;
  1934. mqprio_qopt_reconstruct(dev, &opt);
  1935. nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
  1936. if (!nest)
  1937. goto start_error;
  1938. if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
  1939. goto options_error;
  1940. if (!FULL_OFFLOAD_IS_ENABLED(q->flags) &&
  1941. nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
  1942. goto options_error;
  1943. if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags))
  1944. goto options_error;
  1945. if (q->txtime_delay &&
  1946. nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
  1947. goto options_error;
  1948. rcu_read_lock();
  1949. oper = rtnl_dereference(q->oper_sched);
  1950. admin = rtnl_dereference(q->admin_sched);
  1951. if (oper && taprio_dump_tc_entries(skb, q, oper))
  1952. goto options_error_rcu;
  1953. if (oper && dump_schedule(skb, oper))
  1954. goto options_error_rcu;
  1955. if (!admin)
  1956. goto done;
  1957. sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED);
  1958. if (!sched_nest)
  1959. goto options_error_rcu;
  1960. if (dump_schedule(skb, admin))
  1961. goto admin_error;
  1962. nla_nest_end(skb, sched_nest);
  1963. done:
  1964. rcu_read_unlock();
  1965. return nla_nest_end(skb, nest);
  1966. admin_error:
  1967. nla_nest_cancel(skb, sched_nest);
  1968. options_error_rcu:
  1969. rcu_read_unlock();
  1970. options_error:
  1971. nla_nest_cancel(skb, nest);
  1972. start_error:
  1973. return -ENOSPC;
  1974. }
  1975. static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
  1976. {
  1977. struct taprio_sched *q = qdisc_priv(sch);
  1978. struct net_device *dev = qdisc_dev(sch);
  1979. unsigned int ntx = cl - 1;
  1980. if (ntx >= dev->num_tx_queues)
  1981. return NULL;
  1982. return q->qdiscs[ntx];
  1983. }
  1984. static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
  1985. {
  1986. unsigned int ntx = TC_H_MIN(classid);
  1987. if (!taprio_queue_get(sch, ntx))
  1988. return 0;
  1989. return ntx;
  1990. }
  1991. static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
  1992. struct sk_buff *skb, struct tcmsg *tcm)
  1993. {
  1994. struct Qdisc *child = taprio_leaf(sch, cl);
  1995. tcm->tcm_parent = TC_H_ROOT;
  1996. tcm->tcm_handle |= TC_H_MIN(cl);
  1997. tcm->tcm_info = child->handle;
  1998. return 0;
  1999. }
  2000. static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
  2001. struct gnet_dump *d)
  2002. __releases(d->lock)
  2003. __acquires(d->lock)
  2004. {
  2005. struct Qdisc *child = taprio_leaf(sch, cl);
  2006. struct tc_taprio_qopt_offload offload = {
  2007. .cmd = TAPRIO_CMD_QUEUE_STATS,
  2008. .queue_stats = {
  2009. .queue = cl - 1,
  2010. },
  2011. };
  2012. if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 ||
  2013. qdisc_qstats_copy(d, child) < 0)
  2014. return -1;
  2015. return taprio_dump_xstats(sch, d, &offload, &offload.queue_stats.stats);
  2016. }
  2017. static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
  2018. {
  2019. struct net_device *dev = qdisc_dev(sch);
  2020. unsigned long ntx;
  2021. if (arg->stop)
  2022. return;
  2023. arg->count = arg->skip;
  2024. for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
  2025. if (!tc_qdisc_stats_dump(sch, ntx + 1, arg))
  2026. break;
  2027. }
  2028. }
  2029. static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
  2030. struct tcmsg *tcm)
  2031. {
  2032. return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
  2033. }
  2034. static const struct Qdisc_class_ops taprio_class_ops = {
  2035. .graft = taprio_graft,
  2036. .leaf = taprio_leaf,
  2037. .find = taprio_find,
  2038. .walk = taprio_walk,
  2039. .dump = taprio_dump_class,
  2040. .dump_stats = taprio_dump_class_stats,
  2041. .select_queue = taprio_select_queue,
  2042. };
  2043. static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
  2044. .cl_ops = &taprio_class_ops,
  2045. .id = "taprio",
  2046. .priv_size = sizeof(struct taprio_sched),
  2047. .init = taprio_init,
  2048. .change = taprio_change,
  2049. .destroy = taprio_destroy,
  2050. .reset = taprio_reset,
  2051. .attach = taprio_attach,
  2052. .peek = taprio_peek,
  2053. .dequeue = taprio_dequeue,
  2054. .enqueue = taprio_enqueue,
  2055. .dump = taprio_dump,
  2056. .dump_stats = taprio_dump_stats,
  2057. .owner = THIS_MODULE,
  2058. };
  2059. MODULE_ALIAS_NET_SCH("taprio");
  2060. static struct notifier_block taprio_device_notifier = {
  2061. .notifier_call = taprio_dev_notifier,
  2062. };
  2063. static int __init taprio_module_init(void)
  2064. {
  2065. int err = register_netdevice_notifier(&taprio_device_notifier);
  2066. if (err)
  2067. return err;
  2068. return register_qdisc(&taprio_qdisc_ops);
  2069. }
  2070. static void __exit taprio_module_exit(void)
  2071. {
  2072. unregister_qdisc(&taprio_qdisc_ops);
  2073. unregister_netdevice_notifier(&taprio_device_notifier);
  2074. }
  2075. module_init(taprio_module_init);
  2076. module_exit(taprio_module_exit);
  2077. MODULE_LICENSE("GPL");
  2078. MODULE_DESCRIPTION("Time Aware Priority qdisc");