sch_taprio.c 67 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* net/sched/sch_taprio.c Time Aware Priority Scheduler
  3. *
  4. * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com>
  5. *
  6. */
  7. #include <linux/ethtool.h>
  8. #include <linux/ethtool_netlink.h>
  9. #include <linux/types.h>
  10. #include <linux/slab.h>
  11. #include <linux/kernel.h>
  12. #include <linux/string.h>
  13. #include <linux/list.h>
  14. #include <linux/errno.h>
  15. #include <linux/skbuff.h>
  16. #include <linux/math64.h>
  17. #include <linux/module.h>
  18. #include <linux/spinlock.h>
  19. #include <linux/rcupdate.h>
  20. #include <linux/time.h>
  21. #include <net/gso.h>
  22. #include <net/netlink.h>
  23. #include <net/pkt_sched.h>
  24. #include <net/pkt_cls.h>
  25. #include <net/sch_generic.h>
  26. #include <net/sock.h>
  27. #include <net/tcp.h>
  28. #define TAPRIO_STAT_NOT_SET (~0ULL)
  29. #include "sch_mqprio_lib.h"
  30. static LIST_HEAD(taprio_list);
  31. static struct static_key_false taprio_have_broken_mqprio;
  32. static struct static_key_false taprio_have_working_mqprio;
  33. #define TAPRIO_ALL_GATES_OPEN -1
  34. #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
  35. #define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
  36. #define TAPRIO_SUPPORTED_FLAGS \
  37. (TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
  38. #define TAPRIO_FLAGS_INVALID U32_MAX
  39. /* Minimum value for picos_per_byte to ensure non-zero duration
  40. * for minimum-sized Ethernet frames (ETH_ZLEN = 60).
  41. * 60 * 17 > PSEC_PER_NSEC (1000)
  42. */
  43. #define TAPRIO_PICOS_PER_BYTE_MIN 17
  44. struct sched_entry {
  45. /* Durations between this GCL entry and the GCL entry where the
  46. * respective traffic class gate closes
  47. */
  48. u64 gate_duration[TC_MAX_QUEUE];
  49. atomic_t budget[TC_MAX_QUEUE];
  50. /* The qdisc makes some effort so that no packet leaves
  51. * after this time
  52. */
  53. ktime_t gate_close_time[TC_MAX_QUEUE];
  54. struct list_head list;
  55. /* Used to calculate when to advance the schedule */
  56. ktime_t end_time;
  57. ktime_t next_txtime;
  58. int index;
  59. u32 gate_mask;
  60. u32 interval;
  61. u8 command;
  62. };
  63. struct sched_gate_list {
  64. /* Longest non-zero contiguous gate durations per traffic class,
  65. * or 0 if a traffic class gate never opens during the schedule.
  66. */
  67. u64 max_open_gate_duration[TC_MAX_QUEUE];
  68. u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */
  69. u32 max_sdu[TC_MAX_QUEUE]; /* for dump */
  70. struct rcu_head rcu;
  71. struct list_head entries;
  72. size_t num_entries;
  73. ktime_t cycle_end_time;
  74. s64 cycle_time;
  75. s64 cycle_time_extension;
  76. s64 base_time;
  77. };
  78. struct taprio_sched {
  79. struct Qdisc **qdiscs;
  80. struct Qdisc *root;
  81. u32 flags;
  82. enum tk_offsets tk_offset;
  83. int clockid;
  84. bool offloaded;
  85. bool detected_mqprio;
  86. bool broken_mqprio;
  87. atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
  88. * speeds it's sub-nanoseconds per byte
  89. */
  90. /* Protects the update side of the RCU protected current_entry */
  91. spinlock_t current_entry_lock;
  92. struct sched_entry __rcu *current_entry;
  93. struct sched_gate_list __rcu *oper_sched;
  94. struct sched_gate_list __rcu *admin_sched;
  95. struct hrtimer advance_timer;
  96. struct list_head taprio_list;
  97. int cur_txq[TC_MAX_QUEUE];
  98. u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
  99. u32 fp[TC_QOPT_MAX_QUEUE]; /* only for dump and offloading */
  100. u32 txtime_delay;
  101. };
  102. struct __tc_taprio_qopt_offload {
  103. refcount_t users;
  104. struct tc_taprio_qopt_offload offload;
  105. };
  106. static void taprio_calculate_gate_durations(struct taprio_sched *q,
  107. struct sched_gate_list *sched)
  108. {
  109. struct net_device *dev = qdisc_dev(q->root);
  110. int num_tc = netdev_get_num_tc(dev);
  111. struct sched_entry *entry, *cur;
  112. int tc;
  113. list_for_each_entry(entry, &sched->entries, list) {
  114. u32 gates_still_open = entry->gate_mask;
  115. /* For each traffic class, calculate each open gate duration,
  116. * starting at this schedule entry and ending at the schedule
  117. * entry containing a gate close event for that TC.
  118. */
  119. cur = entry;
  120. do {
  121. if (!gates_still_open)
  122. break;
  123. for (tc = 0; tc < num_tc; tc++) {
  124. if (!(gates_still_open & BIT(tc)))
  125. continue;
  126. if (cur->gate_mask & BIT(tc))
  127. entry->gate_duration[tc] += cur->interval;
  128. else
  129. gates_still_open &= ~BIT(tc);
  130. }
  131. cur = list_next_entry_circular(cur, &sched->entries, list);
  132. } while (cur != entry);
  133. /* Keep track of the maximum gate duration for each traffic
  134. * class, taking care to not confuse a traffic class which is
  135. * temporarily closed with one that is always closed.
  136. */
  137. for (tc = 0; tc < num_tc; tc++)
  138. if (entry->gate_duration[tc] &&
  139. sched->max_open_gate_duration[tc] < entry->gate_duration[tc])
  140. sched->max_open_gate_duration[tc] = entry->gate_duration[tc];
  141. }
  142. }
  143. static bool taprio_entry_allows_tx(ktime_t skb_end_time,
  144. struct sched_entry *entry, int tc)
  145. {
  146. return ktime_before(skb_end_time, entry->gate_close_time[tc]);
  147. }
  148. static ktime_t sched_base_time(const struct sched_gate_list *sched)
  149. {
  150. if (!sched)
  151. return KTIME_MAX;
  152. return ns_to_ktime(sched->base_time);
  153. }
  154. static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono)
  155. {
  156. /* This pairs with WRITE_ONCE() in taprio_parse_clockid() */
  157. enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
  158. switch (tk_offset) {
  159. case TK_OFFS_MAX:
  160. return mono;
  161. default:
  162. return ktime_mono_to_any(mono, tk_offset);
  163. }
  164. }
  165. static ktime_t taprio_get_time(const struct taprio_sched *q)
  166. {
  167. return taprio_mono_to_any(q, ktime_get());
  168. }
  169. static void taprio_free_sched_cb(struct rcu_head *head)
  170. {
  171. struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
  172. struct sched_entry *entry, *n;
  173. list_for_each_entry_safe(entry, n, &sched->entries, list) {
  174. list_del(&entry->list);
  175. kfree(entry);
  176. }
  177. kfree(sched);
  178. }
  179. static void switch_schedules(struct taprio_sched *q,
  180. struct sched_gate_list **admin,
  181. struct sched_gate_list **oper)
  182. {
  183. rcu_assign_pointer(q->oper_sched, *admin);
  184. rcu_assign_pointer(q->admin_sched, NULL);
  185. if (*oper)
  186. call_rcu(&(*oper)->rcu, taprio_free_sched_cb);
  187. *oper = *admin;
  188. *admin = NULL;
  189. }
  190. /* Get how much time has been already elapsed in the current cycle. */
  191. static s32 get_cycle_time_elapsed(struct sched_gate_list *sched, ktime_t time)
  192. {
  193. ktime_t time_since_sched_start;
  194. s32 time_elapsed;
  195. time_since_sched_start = ktime_sub(time, sched->base_time);
  196. div_s64_rem(time_since_sched_start, sched->cycle_time, &time_elapsed);
  197. return time_elapsed;
  198. }
  199. static ktime_t get_interval_end_time(struct sched_gate_list *sched,
  200. struct sched_gate_list *admin,
  201. struct sched_entry *entry,
  202. ktime_t intv_start)
  203. {
  204. s32 cycle_elapsed = get_cycle_time_elapsed(sched, intv_start);
  205. ktime_t intv_end, cycle_ext_end, cycle_end;
  206. cycle_end = ktime_add_ns(intv_start, sched->cycle_time - cycle_elapsed);
  207. intv_end = ktime_add_ns(intv_start, entry->interval);
  208. cycle_ext_end = ktime_add(cycle_end, sched->cycle_time_extension);
  209. if (ktime_before(intv_end, cycle_end))
  210. return intv_end;
  211. else if (admin && admin != sched &&
  212. ktime_after(admin->base_time, cycle_end) &&
  213. ktime_before(admin->base_time, cycle_ext_end))
  214. return admin->base_time;
  215. else
  216. return cycle_end;
  217. }
  218. static int length_to_duration(struct taprio_sched *q, int len)
  219. {
  220. return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC);
  221. }
  222. static int duration_to_length(struct taprio_sched *q, u64 duration)
  223. {
  224. return div_u64(duration * PSEC_PER_NSEC, atomic64_read(&q->picos_per_byte));
  225. }
  226. /* Sets sched->max_sdu[] and sched->max_frm_len[] to the minimum between the
  227. * q->max_sdu[] requested by the user and the max_sdu dynamically determined by
  228. * the maximum open gate durations at the given link speed.
  229. */
  230. static void taprio_update_queue_max_sdu(struct taprio_sched *q,
  231. struct sched_gate_list *sched,
  232. struct qdisc_size_table *stab)
  233. {
  234. struct net_device *dev = qdisc_dev(q->root);
  235. int num_tc = netdev_get_num_tc(dev);
  236. u32 max_sdu_from_user;
  237. u32 max_sdu_dynamic;
  238. u32 max_sdu;
  239. int tc;
  240. for (tc = 0; tc < num_tc; tc++) {
  241. max_sdu_from_user = q->max_sdu[tc] ?: U32_MAX;
  242. /* TC gate never closes => keep the queueMaxSDU
  243. * selected by the user
  244. */
  245. if (sched->max_open_gate_duration[tc] == sched->cycle_time) {
  246. max_sdu_dynamic = U32_MAX;
  247. } else {
  248. u32 max_frm_len;
  249. max_frm_len = duration_to_length(q, sched->max_open_gate_duration[tc]);
  250. /* Compensate for L1 overhead from size table,
  251. * but don't let the frame size go negative
  252. */
  253. if (stab) {
  254. max_frm_len -= stab->szopts.overhead;
  255. max_frm_len = max_t(int, max_frm_len,
  256. dev->hard_header_len + 1);
  257. }
  258. max_sdu_dynamic = max_frm_len - dev->hard_header_len;
  259. if (max_sdu_dynamic > dev->max_mtu)
  260. max_sdu_dynamic = U32_MAX;
  261. }
  262. max_sdu = min(max_sdu_dynamic, max_sdu_from_user);
  263. if (max_sdu != U32_MAX) {
  264. sched->max_frm_len[tc] = max_sdu + dev->hard_header_len;
  265. sched->max_sdu[tc] = max_sdu;
  266. } else {
  267. sched->max_frm_len[tc] = U32_MAX; /* never oversized */
  268. sched->max_sdu[tc] = 0;
  269. }
  270. }
  271. }
  272. /* Returns the entry corresponding to next available interval. If
  273. * validate_interval is set, it only validates whether the timestamp occurs
  274. * when the gate corresponding to the skb's traffic class is open.
  275. */
  276. static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb,
  277. struct Qdisc *sch,
  278. struct sched_gate_list *sched,
  279. struct sched_gate_list *admin,
  280. ktime_t time,
  281. ktime_t *interval_start,
  282. ktime_t *interval_end,
  283. bool validate_interval)
  284. {
  285. ktime_t curr_intv_start, curr_intv_end, cycle_end, packet_transmit_time;
  286. ktime_t earliest_txtime = KTIME_MAX, txtime, cycle, transmit_end_time;
  287. struct sched_entry *entry = NULL, *entry_found = NULL;
  288. struct taprio_sched *q = qdisc_priv(sch);
  289. struct net_device *dev = qdisc_dev(sch);
  290. bool entry_available = false;
  291. s32 cycle_elapsed;
  292. int tc, n;
  293. tc = netdev_get_prio_tc_map(dev, skb->priority);
  294. packet_transmit_time = length_to_duration(q, qdisc_pkt_len(skb));
  295. *interval_start = 0;
  296. *interval_end = 0;
  297. if (!sched)
  298. return NULL;
  299. cycle = sched->cycle_time;
  300. cycle_elapsed = get_cycle_time_elapsed(sched, time);
  301. curr_intv_end = ktime_sub_ns(time, cycle_elapsed);
  302. cycle_end = ktime_add_ns(curr_intv_end, cycle);
  303. list_for_each_entry(entry, &sched->entries, list) {
  304. curr_intv_start = curr_intv_end;
  305. curr_intv_end = get_interval_end_time(sched, admin, entry,
  306. curr_intv_start);
  307. if (ktime_after(curr_intv_start, cycle_end))
  308. break;
  309. if (!(entry->gate_mask & BIT(tc)) ||
  310. packet_transmit_time > entry->interval)
  311. continue;
  312. txtime = entry->next_txtime;
  313. if (ktime_before(txtime, time) || validate_interval) {
  314. transmit_end_time = ktime_add_ns(time, packet_transmit_time);
  315. if ((ktime_before(curr_intv_start, time) &&
  316. ktime_before(transmit_end_time, curr_intv_end)) ||
  317. (ktime_after(curr_intv_start, time) && !validate_interval)) {
  318. entry_found = entry;
  319. *interval_start = curr_intv_start;
  320. *interval_end = curr_intv_end;
  321. break;
  322. } else if (!entry_available && !validate_interval) {
  323. /* Here, we are just trying to find out the
  324. * first available interval in the next cycle.
  325. */
  326. entry_available = true;
  327. entry_found = entry;
  328. *interval_start = ktime_add_ns(curr_intv_start, cycle);
  329. *interval_end = ktime_add_ns(curr_intv_end, cycle);
  330. }
  331. } else if (ktime_before(txtime, earliest_txtime) &&
  332. !entry_available) {
  333. earliest_txtime = txtime;
  334. entry_found = entry;
  335. n = div_s64(ktime_sub(txtime, curr_intv_start), cycle);
  336. *interval_start = ktime_add(curr_intv_start, n * cycle);
  337. *interval_end = ktime_add(curr_intv_end, n * cycle);
  338. }
  339. }
  340. return entry_found;
  341. }
  342. static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
  343. {
  344. struct taprio_sched *q = qdisc_priv(sch);
  345. struct sched_gate_list *sched, *admin;
  346. ktime_t interval_start, interval_end;
  347. struct sched_entry *entry;
  348. rcu_read_lock();
  349. sched = rcu_dereference(q->oper_sched);
  350. admin = rcu_dereference(q->admin_sched);
  351. entry = find_entry_to_transmit(skb, sch, sched, admin, skb->tstamp,
  352. &interval_start, &interval_end, true);
  353. rcu_read_unlock();
  354. return entry;
  355. }
  356. /* This returns the tstamp value set by TCP in terms of the set clock. */
  357. static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
  358. {
  359. unsigned int offset = skb_network_offset(skb);
  360. const struct ipv6hdr *ipv6h;
  361. const struct iphdr *iph;
  362. struct ipv6hdr _ipv6h;
  363. ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
  364. if (!ipv6h)
  365. return 0;
  366. if (ipv6h->version == 4) {
  367. iph = (struct iphdr *)ipv6h;
  368. offset += iph->ihl * 4;
  369. /* special-case 6in4 tunnelling, as that is a common way to get
  370. * v6 connectivity in the home
  371. */
  372. if (iph->protocol == IPPROTO_IPV6) {
  373. ipv6h = skb_header_pointer(skb, offset,
  374. sizeof(_ipv6h), &_ipv6h);
  375. if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
  376. return 0;
  377. } else if (iph->protocol != IPPROTO_TCP) {
  378. return 0;
  379. }
  380. } else if (ipv6h->version == 6 && ipv6h->nexthdr != IPPROTO_TCP) {
  381. return 0;
  382. }
  383. return taprio_mono_to_any(q, skb->skb_mstamp_ns);
  384. }
  385. /* There are a few scenarios where we will have to modify the txtime from
  386. * what is read from next_txtime in sched_entry. They are:
  387. * 1. If txtime is in the past,
  388. * a. The gate for the traffic class is currently open and packet can be
  389. * transmitted before it closes, schedule the packet right away.
  390. * b. If the gate corresponding to the traffic class is going to open later
  391. * in the cycle, set the txtime of packet to the interval start.
  392. * 2. If txtime is in the future, there are packets corresponding to the
  393. * current traffic class waiting to be transmitted. So, the following
  394. * possibilities exist:
  395. * a. We can transmit the packet before the window containing the txtime
  396. * closes.
  397. * b. The window might close before the transmission can be completed
  398. * successfully. So, schedule the packet in the next open window.
  399. */
  400. static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch)
  401. {
  402. ktime_t transmit_end_time, interval_end, interval_start, tcp_tstamp;
  403. struct taprio_sched *q = qdisc_priv(sch);
  404. struct sched_gate_list *sched, *admin;
  405. ktime_t minimum_time, now, txtime;
  406. int len, packet_transmit_time;
  407. struct sched_entry *entry;
  408. bool sched_changed;
  409. now = taprio_get_time(q);
  410. minimum_time = ktime_add_ns(now, q->txtime_delay);
  411. tcp_tstamp = get_tcp_tstamp(q, skb);
  412. minimum_time = max_t(ktime_t, minimum_time, tcp_tstamp);
  413. rcu_read_lock();
  414. admin = rcu_dereference(q->admin_sched);
  415. sched = rcu_dereference(q->oper_sched);
  416. if (admin && ktime_after(minimum_time, admin->base_time))
  417. switch_schedules(q, &admin, &sched);
  418. /* Until the schedule starts, all the queues are open */
  419. if (!sched || ktime_before(minimum_time, sched->base_time)) {
  420. txtime = minimum_time;
  421. goto done;
  422. }
  423. len = qdisc_pkt_len(skb);
  424. packet_transmit_time = length_to_duration(q, len);
  425. do {
  426. sched_changed = false;
  427. entry = find_entry_to_transmit(skb, sch, sched, admin,
  428. minimum_time,
  429. &interval_start, &interval_end,
  430. false);
  431. if (!entry) {
  432. txtime = 0;
  433. goto done;
  434. }
  435. txtime = entry->next_txtime;
  436. txtime = max_t(ktime_t, txtime, minimum_time);
  437. txtime = max_t(ktime_t, txtime, interval_start);
  438. if (admin && admin != sched &&
  439. ktime_after(txtime, admin->base_time)) {
  440. sched = admin;
  441. sched_changed = true;
  442. continue;
  443. }
  444. transmit_end_time = ktime_add(txtime, packet_transmit_time);
  445. minimum_time = transmit_end_time;
  446. /* Update the txtime of current entry to the next time it's
  447. * interval starts.
  448. */
  449. if (ktime_after(transmit_end_time, interval_end))
  450. entry->next_txtime = ktime_add(interval_start, sched->cycle_time);
  451. } while (sched_changed || ktime_after(transmit_end_time, interval_end));
  452. entry->next_txtime = transmit_end_time;
  453. done:
  454. rcu_read_unlock();
  455. return txtime;
  456. }
  457. /* Devices with full offload are expected to honor this in hardware */
  458. static bool taprio_skb_exceeds_queue_max_sdu(struct Qdisc *sch,
  459. struct sk_buff *skb)
  460. {
  461. struct taprio_sched *q = qdisc_priv(sch);
  462. struct net_device *dev = qdisc_dev(sch);
  463. struct sched_gate_list *sched;
  464. int prio = skb->priority;
  465. bool exceeds = false;
  466. u8 tc;
  467. tc = netdev_get_prio_tc_map(dev, prio);
  468. rcu_read_lock();
  469. sched = rcu_dereference(q->oper_sched);
  470. if (sched && skb->len > sched->max_frm_len[tc])
  471. exceeds = true;
  472. rcu_read_unlock();
  473. return exceeds;
  474. }
  475. static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
  476. struct Qdisc *child, struct sk_buff **to_free)
  477. {
  478. struct taprio_sched *q = qdisc_priv(sch);
  479. /* sk_flags are only safe to use on full sockets. */
  480. if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
  481. if (!is_valid_interval(skb, sch))
  482. return qdisc_drop(skb, sch, to_free);
  483. } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
  484. skb->tstamp = get_packet_txtime(skb, sch);
  485. if (!skb->tstamp)
  486. return qdisc_drop(skb, sch, to_free);
  487. }
  488. qdisc_qstats_backlog_inc(sch, skb);
  489. sch->q.qlen++;
  490. return qdisc_enqueue(skb, child, to_free);
  491. }
  492. static int taprio_enqueue_segmented(struct sk_buff *skb, struct Qdisc *sch,
  493. struct Qdisc *child,
  494. struct sk_buff **to_free)
  495. {
  496. unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
  497. netdev_features_t features = netif_skb_features(skb);
  498. struct sk_buff *segs, *nskb;
  499. int ret;
  500. segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
  501. if (IS_ERR_OR_NULL(segs))
  502. return qdisc_drop(skb, sch, to_free);
  503. skb_list_walk_safe(segs, segs, nskb) {
  504. skb_mark_not_on_list(segs);
  505. qdisc_skb_cb(segs)->pkt_len = segs->len;
  506. slen += segs->len;
  507. /* FIXME: we should be segmenting to a smaller size
  508. * rather than dropping these
  509. */
  510. if (taprio_skb_exceeds_queue_max_sdu(sch, segs))
  511. ret = qdisc_drop(segs, sch, to_free);
  512. else
  513. ret = taprio_enqueue_one(segs, sch, child, to_free);
  514. if (ret != NET_XMIT_SUCCESS) {
  515. if (net_xmit_drop_count(ret))
  516. qdisc_qstats_drop(sch);
  517. } else {
  518. numsegs++;
  519. }
  520. }
  521. if (numsegs > 1)
  522. qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
  523. consume_skb(skb);
  524. return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
  525. }
  526. /* Will not be called in the full offload case, since the TX queues are
  527. * attached to the Qdisc created using qdisc_create_dflt()
  528. */
  529. static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  530. struct sk_buff **to_free)
  531. {
  532. struct taprio_sched *q = qdisc_priv(sch);
  533. struct Qdisc *child;
  534. int queue;
  535. queue = skb_get_queue_mapping(skb);
  536. child = q->qdiscs[queue];
  537. if (unlikely(!child))
  538. return qdisc_drop(skb, sch, to_free);
  539. if (taprio_skb_exceeds_queue_max_sdu(sch, skb)) {
  540. /* Large packets might not be transmitted when the transmission
  541. * duration exceeds any configured interval. Therefore, segment
  542. * the skb into smaller chunks. Drivers with full offload are
  543. * expected to handle this in hardware.
  544. */
  545. if (skb_is_gso(skb))
  546. return taprio_enqueue_segmented(skb, sch, child,
  547. to_free);
  548. return qdisc_drop(skb, sch, to_free);
  549. }
  550. return taprio_enqueue_one(skb, sch, child, to_free);
  551. }
  552. static struct sk_buff *taprio_peek(struct Qdisc *sch)
  553. {
  554. WARN_ONCE(1, "taprio only supports operating as root qdisc, peek() not implemented");
  555. return NULL;
  556. }
  557. static void taprio_set_budgets(struct taprio_sched *q,
  558. struct sched_gate_list *sched,
  559. struct sched_entry *entry)
  560. {
  561. struct net_device *dev = qdisc_dev(q->root);
  562. int num_tc = netdev_get_num_tc(dev);
  563. int tc, budget;
  564. for (tc = 0; tc < num_tc; tc++) {
  565. /* Traffic classes which never close have infinite budget */
  566. if (entry->gate_duration[tc] == sched->cycle_time)
  567. budget = INT_MAX;
  568. else
  569. budget = div64_u64((u64)entry->gate_duration[tc] * PSEC_PER_NSEC,
  570. atomic64_read(&q->picos_per_byte));
  571. atomic_set(&entry->budget[tc], budget);
  572. }
  573. }
  574. /* When an skb is sent, it consumes from the budget of all traffic classes */
  575. static int taprio_update_budgets(struct sched_entry *entry, size_t len,
  576. int tc_consumed, int num_tc)
  577. {
  578. int tc, budget, new_budget = 0;
  579. for (tc = 0; tc < num_tc; tc++) {
  580. budget = atomic_read(&entry->budget[tc]);
  581. /* Don't consume from infinite budget */
  582. if (budget == INT_MAX) {
  583. if (tc == tc_consumed)
  584. new_budget = budget;
  585. continue;
  586. }
  587. if (tc == tc_consumed)
  588. new_budget = atomic_sub_return(len, &entry->budget[tc]);
  589. else
  590. atomic_sub(len, &entry->budget[tc]);
  591. }
  592. return new_budget;
  593. }
  594. static struct sk_buff *taprio_dequeue_from_txq(struct Qdisc *sch, int txq,
  595. struct sched_entry *entry,
  596. u32 gate_mask)
  597. {
  598. struct taprio_sched *q = qdisc_priv(sch);
  599. struct net_device *dev = qdisc_dev(sch);
  600. struct Qdisc *child = q->qdiscs[txq];
  601. int num_tc = netdev_get_num_tc(dev);
  602. struct sk_buff *skb;
  603. ktime_t guard;
  604. int prio;
  605. int len;
  606. u8 tc;
  607. if (unlikely(!child))
  608. return NULL;
  609. if (TXTIME_ASSIST_IS_ENABLED(q->flags))
  610. goto skip_peek_checks;
  611. skb = child->ops->peek(child);
  612. if (!skb)
  613. return NULL;
  614. prio = skb->priority;
  615. tc = netdev_get_prio_tc_map(dev, prio);
  616. if (!(gate_mask & BIT(tc)))
  617. return NULL;
  618. len = qdisc_pkt_len(skb);
  619. guard = ktime_add_ns(taprio_get_time(q), length_to_duration(q, len));
  620. /* In the case that there's no gate entry, there's no
  621. * guard band ...
  622. */
  623. if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
  624. !taprio_entry_allows_tx(guard, entry, tc))
  625. return NULL;
  626. /* ... and no budget. */
  627. if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
  628. taprio_update_budgets(entry, len, tc, num_tc) < 0)
  629. return NULL;
  630. skip_peek_checks:
  631. skb = child->ops->dequeue(child);
  632. if (unlikely(!skb))
  633. return NULL;
  634. qdisc_bstats_update(sch, skb);
  635. qdisc_qstats_backlog_dec(sch, skb);
  636. sch->q.qlen--;
  637. return skb;
  638. }
  639. static void taprio_next_tc_txq(struct net_device *dev, int tc, int *txq)
  640. {
  641. int offset = dev->tc_to_txq[tc].offset;
  642. int count = dev->tc_to_txq[tc].count;
  643. (*txq)++;
  644. if (*txq == offset + count)
  645. *txq = offset;
  646. }
  647. /* Prioritize higher traffic classes, and select among TXQs belonging to the
  648. * same TC using round robin
  649. */
  650. static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch,
  651. struct sched_entry *entry,
  652. u32 gate_mask)
  653. {
  654. struct taprio_sched *q = qdisc_priv(sch);
  655. struct net_device *dev = qdisc_dev(sch);
  656. int num_tc = netdev_get_num_tc(dev);
  657. struct sk_buff *skb;
  658. int tc;
  659. for (tc = num_tc - 1; tc >= 0; tc--) {
  660. int first_txq = q->cur_txq[tc];
  661. if (!(gate_mask & BIT(tc)))
  662. continue;
  663. do {
  664. skb = taprio_dequeue_from_txq(sch, q->cur_txq[tc],
  665. entry, gate_mask);
  666. taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]);
  667. if (q->cur_txq[tc] >= dev->num_tx_queues)
  668. q->cur_txq[tc] = first_txq;
  669. if (skb)
  670. return skb;
  671. } while (q->cur_txq[tc] != first_txq);
  672. }
  673. return NULL;
  674. }
  675. /* Broken way of prioritizing smaller TXQ indices and ignoring the traffic
  676. * class other than to determine whether the gate is open or not
  677. */
  678. static struct sk_buff *taprio_dequeue_txq_priority(struct Qdisc *sch,
  679. struct sched_entry *entry,
  680. u32 gate_mask)
  681. {
  682. struct net_device *dev = qdisc_dev(sch);
  683. struct sk_buff *skb;
  684. int i;
  685. for (i = 0; i < dev->num_tx_queues; i++) {
  686. skb = taprio_dequeue_from_txq(sch, i, entry, gate_mask);
  687. if (skb)
  688. return skb;
  689. }
  690. return NULL;
  691. }
  692. /* Will not be called in the full offload case, since the TX queues are
  693. * attached to the Qdisc created using qdisc_create_dflt()
  694. */
  695. static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
  696. {
  697. struct taprio_sched *q = qdisc_priv(sch);
  698. struct sk_buff *skb = NULL;
  699. struct sched_entry *entry;
  700. u32 gate_mask;
  701. rcu_read_lock();
  702. entry = rcu_dereference(q->current_entry);
  703. /* if there's no entry, it means that the schedule didn't
  704. * start yet, so force all gates to be open, this is in
  705. * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
  706. * "AdminGateStates"
  707. */
  708. gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
  709. if (!gate_mask)
  710. goto done;
  711. if (static_branch_unlikely(&taprio_have_broken_mqprio) &&
  712. !static_branch_likely(&taprio_have_working_mqprio)) {
  713. /* Single NIC kind which is broken */
  714. skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
  715. } else if (static_branch_likely(&taprio_have_working_mqprio) &&
  716. !static_branch_unlikely(&taprio_have_broken_mqprio)) {
  717. /* Single NIC kind which prioritizes properly */
  718. skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
  719. } else {
  720. /* Mixed NIC kinds present in system, need dynamic testing */
  721. if (q->broken_mqprio)
  722. skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
  723. else
  724. skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
  725. }
  726. done:
  727. rcu_read_unlock();
  728. return skb;
  729. }
  730. static bool should_restart_cycle(const struct sched_gate_list *oper,
  731. const struct sched_entry *entry)
  732. {
  733. if (list_is_last(&entry->list, &oper->entries))
  734. return true;
  735. if (ktime_compare(entry->end_time, oper->cycle_end_time) == 0)
  736. return true;
  737. return false;
  738. }
  739. static bool should_change_schedules(const struct sched_gate_list *admin,
  740. const struct sched_gate_list *oper,
  741. ktime_t end_time)
  742. {
  743. ktime_t next_base_time, extension_time;
  744. if (!admin)
  745. return false;
  746. next_base_time = sched_base_time(admin);
  747. /* This is the simple case, the end_time would fall after
  748. * the next schedule base_time.
  749. */
  750. if (ktime_compare(next_base_time, end_time) <= 0)
  751. return true;
  752. /* This is the cycle_time_extension case, if the end_time
  753. * plus the amount that can be extended would fall after the
  754. * next schedule base_time, we can extend the current schedule
  755. * for that amount.
  756. */
  757. extension_time = ktime_add_ns(end_time, oper->cycle_time_extension);
  758. /* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about
  759. * how precisely the extension should be made. So after
  760. * conformance testing, this logic may change.
  761. */
  762. if (ktime_compare(next_base_time, extension_time) <= 0)
  763. return true;
  764. return false;
  765. }
  766. static enum hrtimer_restart advance_sched(struct hrtimer *timer)
  767. {
  768. struct taprio_sched *q = container_of(timer, struct taprio_sched,
  769. advance_timer);
  770. struct net_device *dev = qdisc_dev(q->root);
  771. struct sched_gate_list *oper, *admin;
  772. int num_tc = netdev_get_num_tc(dev);
  773. struct sched_entry *entry, *next;
  774. struct Qdisc *sch = q->root;
  775. ktime_t end_time;
  776. int tc;
  777. spin_lock(&q->current_entry_lock);
  778. entry = rcu_dereference_protected(q->current_entry,
  779. lockdep_is_held(&q->current_entry_lock));
  780. oper = rcu_dereference_protected(q->oper_sched,
  781. lockdep_is_held(&q->current_entry_lock));
  782. admin = rcu_dereference_protected(q->admin_sched,
  783. lockdep_is_held(&q->current_entry_lock));
  784. if (!oper)
  785. switch_schedules(q, &admin, &oper);
  786. /* This can happen in two cases: 1. this is the very first run
  787. * of this function (i.e. we weren't running any schedule
  788. * previously); 2. The previous schedule just ended. The first
  789. * entry of all schedules are pre-calculated during the
  790. * schedule initialization.
  791. */
  792. if (unlikely(!entry || entry->end_time == oper->base_time)) {
  793. next = list_first_entry(&oper->entries, struct sched_entry,
  794. list);
  795. end_time = next->end_time;
  796. goto first_run;
  797. }
  798. if (should_restart_cycle(oper, entry)) {
  799. next = list_first_entry(&oper->entries, struct sched_entry,
  800. list);
  801. oper->cycle_end_time = ktime_add_ns(oper->cycle_end_time,
  802. oper->cycle_time);
  803. } else {
  804. next = list_next_entry(entry, list);
  805. }
  806. end_time = ktime_add_ns(entry->end_time, next->interval);
  807. end_time = min_t(ktime_t, end_time, oper->cycle_end_time);
  808. for (tc = 0; tc < num_tc; tc++) {
  809. if (next->gate_duration[tc] == oper->cycle_time)
  810. next->gate_close_time[tc] = KTIME_MAX;
  811. else
  812. next->gate_close_time[tc] = ktime_add_ns(entry->end_time,
  813. next->gate_duration[tc]);
  814. }
  815. if (should_change_schedules(admin, oper, end_time)) {
  816. /* Set things so the next time this runs, the new
  817. * schedule runs.
  818. */
  819. end_time = sched_base_time(admin);
  820. switch_schedules(q, &admin, &oper);
  821. }
  822. next->end_time = end_time;
  823. taprio_set_budgets(q, oper, next);
  824. first_run:
  825. rcu_assign_pointer(q->current_entry, next);
  826. spin_unlock(&q->current_entry_lock);
  827. hrtimer_set_expires(&q->advance_timer, end_time);
  828. rcu_read_lock();
  829. __netif_schedule(sch);
  830. rcu_read_unlock();
  831. return HRTIMER_RESTART;
  832. }
  833. static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
  834. [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 },
  835. [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 },
  836. [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
  837. [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 },
  838. };
  839. static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
  840. [TCA_TAPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32,
  841. TC_QOPT_MAX_QUEUE),
  842. [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 },
  843. [TCA_TAPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32,
  844. TC_FP_EXPRESS,
  845. TC_FP_PREEMPTIBLE),
  846. };
  847. static const struct netlink_range_validation_signed taprio_cycle_time_range = {
  848. .min = 0,
  849. .max = INT_MAX,
  850. };
  851. static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
  852. [TCA_TAPRIO_ATTR_PRIOMAP] = {
  853. .len = sizeof(struct tc_mqprio_qopt)
  854. },
  855. [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED },
  856. [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
  857. [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
  858. [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
  859. [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] =
  860. NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
  861. [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
  862. [TCA_TAPRIO_ATTR_FLAGS] =
  863. NLA_POLICY_MASK(NLA_U32, TAPRIO_SUPPORTED_FLAGS),
  864. [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
  865. [TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED },
  866. };
  867. static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb,
  868. struct sched_entry *entry,
  869. struct netlink_ext_ack *extack)
  870. {
  871. int min_duration = length_to_duration(q, ETH_ZLEN);
  872. u32 interval = 0;
  873. if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
  874. entry->command = nla_get_u8(
  875. tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
  876. if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
  877. entry->gate_mask = nla_get_u32(
  878. tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
  879. if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
  880. interval = nla_get_u32(
  881. tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
  882. /* The interval should allow at least the minimum ethernet
  883. * frame to go out.
  884. */
  885. if (interval < min_duration) {
  886. NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
  887. return -EINVAL;
  888. }
  889. entry->interval = interval;
  890. return 0;
  891. }
  892. static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n,
  893. struct sched_entry *entry, int index,
  894. struct netlink_ext_ack *extack)
  895. {
  896. struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
  897. int err;
  898. err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
  899. entry_policy, NULL);
  900. if (err < 0) {
  901. NL_SET_ERR_MSG(extack, "Could not parse nested entry");
  902. return -EINVAL;
  903. }
  904. entry->index = index;
  905. return fill_sched_entry(q, tb, entry, extack);
  906. }
  907. static int parse_sched_list(struct taprio_sched *q, struct nlattr *list,
  908. struct sched_gate_list *sched,
  909. struct netlink_ext_ack *extack)
  910. {
  911. struct nlattr *n;
  912. int err, rem;
  913. int i = 0;
  914. if (!list)
  915. return -EINVAL;
  916. nla_for_each_nested(n, list, rem) {
  917. struct sched_entry *entry;
  918. if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
  919. NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
  920. continue;
  921. }
  922. entry = kzalloc(sizeof(*entry), GFP_KERNEL);
  923. if (!entry) {
  924. NL_SET_ERR_MSG(extack, "Not enough memory for entry");
  925. return -ENOMEM;
  926. }
  927. err = parse_sched_entry(q, n, entry, i, extack);
  928. if (err < 0) {
  929. kfree(entry);
  930. return err;
  931. }
  932. list_add_tail(&entry->list, &sched->entries);
  933. i++;
  934. }
  935. sched->num_entries = i;
  936. return i;
  937. }
  938. static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
  939. struct sched_gate_list *new,
  940. struct netlink_ext_ack *extack)
  941. {
  942. int err = 0;
  943. if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) {
  944. NL_SET_ERR_MSG(extack, "Adding a single entry is not supported");
  945. return -ENOTSUPP;
  946. }
  947. if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
  948. new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
  949. if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION])
  950. new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]);
  951. if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME])
  952. new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
  953. if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
  954. err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST],
  955. new, extack);
  956. if (err < 0)
  957. return err;
  958. if (!new->cycle_time) {
  959. struct sched_entry *entry;
  960. ktime_t cycle = 0;
  961. list_for_each_entry(entry, &new->entries, list)
  962. cycle = ktime_add_ns(cycle, entry->interval);
  963. if (cycle < 0 || cycle > INT_MAX) {
  964. NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
  965. return -EINVAL;
  966. }
  967. new->cycle_time = cycle;
  968. }
  969. if (new->cycle_time < new->num_entries * length_to_duration(q, ETH_ZLEN)) {
  970. NL_SET_ERR_MSG(extack, "'cycle_time' is too small");
  971. return -EINVAL;
  972. }
  973. taprio_calculate_gate_durations(q, new);
  974. return 0;
  975. }
  976. static int taprio_parse_mqprio_opt(struct net_device *dev,
  977. struct tc_mqprio_qopt *qopt,
  978. struct netlink_ext_ack *extack,
  979. u32 taprio_flags)
  980. {
  981. bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags);
  982. if (!qopt) {
  983. if (!dev->num_tc) {
  984. NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
  985. return -EINVAL;
  986. }
  987. return 0;
  988. }
  989. /* taprio imposes that traffic classes map 1:n to tx queues */
  990. if (qopt->num_tc > dev->num_tx_queues) {
  991. NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
  992. return -EINVAL;
  993. }
  994. /* For some reason, in txtime-assist mode, we allow TXQ ranges for
  995. * different TCs to overlap, and just validate the TXQ ranges.
  996. */
  997. return mqprio_validate_qopt(dev, qopt, true, allow_overlapping_txqs,
  998. extack);
  999. }
  1000. static int taprio_get_start_time(struct Qdisc *sch,
  1001. struct sched_gate_list *sched,
  1002. ktime_t *start)
  1003. {
  1004. struct taprio_sched *q = qdisc_priv(sch);
  1005. ktime_t now, base, cycle;
  1006. s64 n;
  1007. base = sched_base_time(sched);
  1008. now = taprio_get_time(q);
  1009. if (ktime_after(base, now)) {
  1010. *start = base;
  1011. return 0;
  1012. }
  1013. cycle = sched->cycle_time;
  1014. /* The qdisc is expected to have at least one sched_entry. Moreover,
  1015. * any entry must have 'interval' > 0. Thus if the cycle time is zero,
  1016. * something went really wrong. In that case, we should warn about this
  1017. * inconsistent state and return error.
  1018. */
  1019. if (WARN_ON(!cycle))
  1020. return -EFAULT;
  1021. /* Schedule the start time for the beginning of the next
  1022. * cycle.
  1023. */
  1024. n = div64_s64(ktime_sub_ns(now, base), cycle);
  1025. *start = ktime_add_ns(base, (n + 1) * cycle);
  1026. return 0;
  1027. }
  1028. static void setup_first_end_time(struct taprio_sched *q,
  1029. struct sched_gate_list *sched, ktime_t base)
  1030. {
  1031. struct net_device *dev = qdisc_dev(q->root);
  1032. int num_tc = netdev_get_num_tc(dev);
  1033. struct sched_entry *first;
  1034. ktime_t cycle;
  1035. int tc;
  1036. first = list_first_entry(&sched->entries,
  1037. struct sched_entry, list);
  1038. cycle = sched->cycle_time;
  1039. /* FIXME: find a better place to do this */
  1040. sched->cycle_end_time = ktime_add_ns(base, cycle);
  1041. first->end_time = ktime_add_ns(base, first->interval);
  1042. taprio_set_budgets(q, sched, first);
  1043. for (tc = 0; tc < num_tc; tc++) {
  1044. if (first->gate_duration[tc] == sched->cycle_time)
  1045. first->gate_close_time[tc] = KTIME_MAX;
  1046. else
  1047. first->gate_close_time[tc] = ktime_add_ns(base, first->gate_duration[tc]);
  1048. }
  1049. rcu_assign_pointer(q->current_entry, NULL);
  1050. }
  1051. static void taprio_start_sched(struct Qdisc *sch,
  1052. ktime_t start, struct sched_gate_list *new)
  1053. {
  1054. struct taprio_sched *q = qdisc_priv(sch);
  1055. ktime_t expires;
  1056. if (FULL_OFFLOAD_IS_ENABLED(q->flags))
  1057. return;
  1058. expires = hrtimer_get_expires(&q->advance_timer);
  1059. if (expires == 0)
  1060. expires = KTIME_MAX;
  1061. /* If the new schedule starts before the next expiration, we
  1062. * reprogram it to the earliest one, so we change the admin
  1063. * schedule to the operational one at the right time.
  1064. */
  1065. start = min_t(ktime_t, start, expires);
  1066. hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
  1067. }
  1068. static void taprio_set_picos_per_byte(struct net_device *dev,
  1069. struct taprio_sched *q,
  1070. struct netlink_ext_ack *extack)
  1071. {
  1072. struct ethtool_link_ksettings ecmd;
  1073. int speed = SPEED_10;
  1074. int picos_per_byte;
  1075. int err;
  1076. err = __ethtool_get_link_ksettings(dev, &ecmd);
  1077. if (err < 0)
  1078. goto skip;
  1079. if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN)
  1080. speed = ecmd.base.speed;
  1081. skip:
  1082. picos_per_byte = (USEC_PER_SEC * 8) / speed;
  1083. if (picos_per_byte < TAPRIO_PICOS_PER_BYTE_MIN) {
  1084. if (!extack)
  1085. pr_warn("Link speed %d is too high. Schedule may be inaccurate.\n",
  1086. speed);
  1087. NL_SET_ERR_MSG_FMT_MOD(extack,
  1088. "Link speed %d is too high. Schedule may be inaccurate.",
  1089. speed);
  1090. picos_per_byte = TAPRIO_PICOS_PER_BYTE_MIN;
  1091. }
  1092. atomic64_set(&q->picos_per_byte, picos_per_byte);
  1093. netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
  1094. dev->name, (long long)atomic64_read(&q->picos_per_byte),
  1095. ecmd.base.speed);
  1096. }
  1097. static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
  1098. void *ptr)
  1099. {
  1100. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  1101. struct sched_gate_list *oper, *admin;
  1102. struct qdisc_size_table *stab;
  1103. struct taprio_sched *q;
  1104. ASSERT_RTNL();
  1105. if (event != NETDEV_UP && event != NETDEV_CHANGE)
  1106. return NOTIFY_DONE;
  1107. list_for_each_entry(q, &taprio_list, taprio_list) {
  1108. if (dev != qdisc_dev(q->root))
  1109. continue;
  1110. taprio_set_picos_per_byte(dev, q, NULL);
  1111. stab = rtnl_dereference(q->root->stab);
  1112. rcu_read_lock();
  1113. oper = rcu_dereference(q->oper_sched);
  1114. if (oper)
  1115. taprio_update_queue_max_sdu(q, oper, stab);
  1116. admin = rcu_dereference(q->admin_sched);
  1117. if (admin)
  1118. taprio_update_queue_max_sdu(q, admin, stab);
  1119. rcu_read_unlock();
  1120. break;
  1121. }
  1122. return NOTIFY_DONE;
  1123. }
  1124. static void setup_txtime(struct taprio_sched *q,
  1125. struct sched_gate_list *sched, ktime_t base)
  1126. {
  1127. struct sched_entry *entry;
  1128. u64 interval = 0;
  1129. list_for_each_entry(entry, &sched->entries, list) {
  1130. entry->next_txtime = ktime_add_ns(base, interval);
  1131. interval += entry->interval;
  1132. }
  1133. }
  1134. static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries)
  1135. {
  1136. struct __tc_taprio_qopt_offload *__offload;
  1137. __offload = kzalloc(struct_size(__offload, offload.entries, num_entries),
  1138. GFP_KERNEL);
  1139. if (!__offload)
  1140. return NULL;
  1141. refcount_set(&__offload->users, 1);
  1142. return &__offload->offload;
  1143. }
  1144. struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
  1145. *offload)
  1146. {
  1147. struct __tc_taprio_qopt_offload *__offload;
  1148. __offload = container_of(offload, struct __tc_taprio_qopt_offload,
  1149. offload);
  1150. refcount_inc(&__offload->users);
  1151. return offload;
  1152. }
  1153. EXPORT_SYMBOL_GPL(taprio_offload_get);
  1154. void taprio_offload_free(struct tc_taprio_qopt_offload *offload)
  1155. {
  1156. struct __tc_taprio_qopt_offload *__offload;
  1157. __offload = container_of(offload, struct __tc_taprio_qopt_offload,
  1158. offload);
  1159. if (!refcount_dec_and_test(&__offload->users))
  1160. return;
  1161. kfree(__offload);
  1162. }
  1163. EXPORT_SYMBOL_GPL(taprio_offload_free);
  1164. /* The function will only serve to keep the pointers to the "oper" and "admin"
  1165. * schedules valid in relation to their base times, so when calling dump() the
  1166. * users looks at the right schedules.
  1167. * When using full offload, the admin configuration is promoted to oper at the
  1168. * base_time in the PHC time domain. But because the system time is not
  1169. * necessarily in sync with that, we can't just trigger a hrtimer to call
  1170. * switch_schedules at the right hardware time.
  1171. * At the moment we call this by hand right away from taprio, but in the future
  1172. * it will be useful to create a mechanism for drivers to notify taprio of the
  1173. * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump().
  1174. * This is left as TODO.
  1175. */
  1176. static void taprio_offload_config_changed(struct taprio_sched *q)
  1177. {
  1178. struct sched_gate_list *oper, *admin;
  1179. oper = rtnl_dereference(q->oper_sched);
  1180. admin = rtnl_dereference(q->admin_sched);
  1181. switch_schedules(q, &admin, &oper);
  1182. }
  1183. static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask)
  1184. {
  1185. u32 i, queue_mask = 0;
  1186. for (i = 0; i < dev->num_tc; i++) {
  1187. u32 offset, count;
  1188. if (!(tc_mask & BIT(i)))
  1189. continue;
  1190. offset = dev->tc_to_txq[i].offset;
  1191. count = dev->tc_to_txq[i].count;
  1192. queue_mask |= GENMASK(offset + count - 1, offset);
  1193. }
  1194. return queue_mask;
  1195. }
  1196. static void taprio_sched_to_offload(struct net_device *dev,
  1197. struct sched_gate_list *sched,
  1198. struct tc_taprio_qopt_offload *offload,
  1199. const struct tc_taprio_caps *caps)
  1200. {
  1201. struct sched_entry *entry;
  1202. int i = 0;
  1203. offload->base_time = sched->base_time;
  1204. offload->cycle_time = sched->cycle_time;
  1205. offload->cycle_time_extension = sched->cycle_time_extension;
  1206. list_for_each_entry(entry, &sched->entries, list) {
  1207. struct tc_taprio_sched_entry *e = &offload->entries[i];
  1208. e->command = entry->command;
  1209. e->interval = entry->interval;
  1210. if (caps->gate_mask_per_txq)
  1211. e->gate_mask = tc_map_to_queue_mask(dev,
  1212. entry->gate_mask);
  1213. else
  1214. e->gate_mask = entry->gate_mask;
  1215. i++;
  1216. }
  1217. offload->num_entries = i;
  1218. }
  1219. static void taprio_detect_broken_mqprio(struct taprio_sched *q)
  1220. {
  1221. struct net_device *dev = qdisc_dev(q->root);
  1222. struct tc_taprio_caps caps;
  1223. qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO,
  1224. &caps, sizeof(caps));
  1225. q->broken_mqprio = caps.broken_mqprio;
  1226. if (q->broken_mqprio)
  1227. static_branch_inc(&taprio_have_broken_mqprio);
  1228. else
  1229. static_branch_inc(&taprio_have_working_mqprio);
  1230. q->detected_mqprio = true;
  1231. }
  1232. static void taprio_cleanup_broken_mqprio(struct taprio_sched *q)
  1233. {
  1234. if (!q->detected_mqprio)
  1235. return;
  1236. if (q->broken_mqprio)
  1237. static_branch_dec(&taprio_have_broken_mqprio);
  1238. else
  1239. static_branch_dec(&taprio_have_working_mqprio);
  1240. }
  1241. static int taprio_enable_offload(struct net_device *dev,
  1242. struct taprio_sched *q,
  1243. struct sched_gate_list *sched,
  1244. struct netlink_ext_ack *extack)
  1245. {
  1246. const struct net_device_ops *ops = dev->netdev_ops;
  1247. struct tc_taprio_qopt_offload *offload;
  1248. struct tc_taprio_caps caps;
  1249. int tc, err = 0;
  1250. if (!ops->ndo_setup_tc) {
  1251. NL_SET_ERR_MSG(extack,
  1252. "Device does not support taprio offload");
  1253. return -EOPNOTSUPP;
  1254. }
  1255. qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO,
  1256. &caps, sizeof(caps));
  1257. if (!caps.supports_queue_max_sdu) {
  1258. for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
  1259. if (q->max_sdu[tc]) {
  1260. NL_SET_ERR_MSG_MOD(extack,
  1261. "Device does not handle queueMaxSDU");
  1262. return -EOPNOTSUPP;
  1263. }
  1264. }
  1265. }
  1266. offload = taprio_offload_alloc(sched->num_entries);
  1267. if (!offload) {
  1268. NL_SET_ERR_MSG(extack,
  1269. "Not enough memory for enabling offload mode");
  1270. return -ENOMEM;
  1271. }
  1272. offload->cmd = TAPRIO_CMD_REPLACE;
  1273. offload->extack = extack;
  1274. mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
  1275. offload->mqprio.extack = extack;
  1276. taprio_sched_to_offload(dev, sched, offload, &caps);
  1277. mqprio_fp_to_offload(q->fp, &offload->mqprio);
  1278. for (tc = 0; tc < TC_MAX_QUEUE; tc++)
  1279. offload->max_sdu[tc] = q->max_sdu[tc];
  1280. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
  1281. if (err < 0) {
  1282. NL_SET_ERR_MSG_WEAK(extack,
  1283. "Device failed to setup taprio offload");
  1284. goto done;
  1285. }
  1286. q->offloaded = true;
  1287. done:
  1288. /* The offload structure may linger around via a reference taken by the
  1289. * device driver, so clear up the netlink extack pointer so that the
  1290. * driver isn't tempted to dereference data which stopped being valid
  1291. */
  1292. offload->extack = NULL;
  1293. offload->mqprio.extack = NULL;
  1294. taprio_offload_free(offload);
  1295. return err;
  1296. }
  1297. static int taprio_disable_offload(struct net_device *dev,
  1298. struct taprio_sched *q,
  1299. struct netlink_ext_ack *extack)
  1300. {
  1301. const struct net_device_ops *ops = dev->netdev_ops;
  1302. struct tc_taprio_qopt_offload *offload;
  1303. int err;
  1304. if (!q->offloaded)
  1305. return 0;
  1306. offload = taprio_offload_alloc(0);
  1307. if (!offload) {
  1308. NL_SET_ERR_MSG(extack,
  1309. "Not enough memory to disable offload mode");
  1310. return -ENOMEM;
  1311. }
  1312. offload->cmd = TAPRIO_CMD_DESTROY;
  1313. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
  1314. if (err < 0) {
  1315. NL_SET_ERR_MSG(extack,
  1316. "Device failed to disable offload");
  1317. goto out;
  1318. }
  1319. q->offloaded = false;
  1320. out:
  1321. taprio_offload_free(offload);
  1322. return err;
  1323. }
  1324. /* If full offload is enabled, the only possible clockid is the net device's
  1325. * PHC. For that reason, specifying a clockid through netlink is incorrect.
  1326. * For txtime-assist, it is implicitly assumed that the device's PHC is kept
  1327. * in sync with the specified clockid via a user space daemon such as phc2sys.
  1328. * For both software taprio and txtime-assist, the clockid is used for the
  1329. * hrtimer that advances the schedule and hence mandatory.
  1330. */
  1331. static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
  1332. struct netlink_ext_ack *extack)
  1333. {
  1334. struct taprio_sched *q = qdisc_priv(sch);
  1335. struct net_device *dev = qdisc_dev(sch);
  1336. int err = -EINVAL;
  1337. if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1338. const struct ethtool_ops *ops = dev->ethtool_ops;
  1339. struct kernel_ethtool_ts_info info = {
  1340. .cmd = ETHTOOL_GET_TS_INFO,
  1341. .phc_index = -1,
  1342. };
  1343. if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
  1344. NL_SET_ERR_MSG(extack,
  1345. "The 'clockid' cannot be specified for full offload");
  1346. goto out;
  1347. }
  1348. if (ops && ops->get_ts_info)
  1349. err = ops->get_ts_info(dev, &info);
  1350. if (err || info.phc_index < 0) {
  1351. NL_SET_ERR_MSG(extack,
  1352. "Device does not have a PTP clock");
  1353. err = -ENOTSUPP;
  1354. goto out;
  1355. }
  1356. } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
  1357. int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
  1358. enum tk_offsets tk_offset;
  1359. /* We only support static clockids and we don't allow
  1360. * for it to be modified after the first init.
  1361. */
  1362. if (clockid < 0 ||
  1363. (q->clockid != -1 && q->clockid != clockid)) {
  1364. NL_SET_ERR_MSG(extack,
  1365. "Changing the 'clockid' of a running schedule is not supported");
  1366. err = -ENOTSUPP;
  1367. goto out;
  1368. }
  1369. switch (clockid) {
  1370. case CLOCK_REALTIME:
  1371. tk_offset = TK_OFFS_REAL;
  1372. break;
  1373. case CLOCK_MONOTONIC:
  1374. tk_offset = TK_OFFS_MAX;
  1375. break;
  1376. case CLOCK_BOOTTIME:
  1377. tk_offset = TK_OFFS_BOOT;
  1378. break;
  1379. case CLOCK_TAI:
  1380. tk_offset = TK_OFFS_TAI;
  1381. break;
  1382. default:
  1383. NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
  1384. err = -EINVAL;
  1385. goto out;
  1386. }
  1387. /* This pairs with READ_ONCE() in taprio_mono_to_any */
  1388. WRITE_ONCE(q->tk_offset, tk_offset);
  1389. q->clockid = clockid;
  1390. } else {
  1391. NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
  1392. goto out;
  1393. }
  1394. /* Everything went ok, return success. */
  1395. err = 0;
  1396. out:
  1397. return err;
  1398. }
  1399. static int taprio_parse_tc_entry(struct Qdisc *sch,
  1400. struct nlattr *opt,
  1401. u32 max_sdu[TC_QOPT_MAX_QUEUE],
  1402. u32 fp[TC_QOPT_MAX_QUEUE],
  1403. unsigned long *seen_tcs,
  1404. struct netlink_ext_ack *extack)
  1405. {
  1406. struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { };
  1407. struct net_device *dev = qdisc_dev(sch);
  1408. int err, tc;
  1409. u32 val;
  1410. err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt,
  1411. taprio_tc_policy, extack);
  1412. if (err < 0)
  1413. return err;
  1414. if (!tb[TCA_TAPRIO_TC_ENTRY_INDEX]) {
  1415. NL_SET_ERR_MSG_MOD(extack, "TC entry index missing");
  1416. return -EINVAL;
  1417. }
  1418. tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]);
  1419. if (tc >= TC_QOPT_MAX_QUEUE) {
  1420. NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range");
  1421. return -ERANGE;
  1422. }
  1423. if (*seen_tcs & BIT(tc)) {
  1424. NL_SET_ERR_MSG_MOD(extack, "Duplicate TC entry");
  1425. return -EINVAL;
  1426. }
  1427. *seen_tcs |= BIT(tc);
  1428. if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) {
  1429. val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]);
  1430. if (val > dev->max_mtu) {
  1431. NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
  1432. return -ERANGE;
  1433. }
  1434. max_sdu[tc] = val;
  1435. }
  1436. if (tb[TCA_TAPRIO_TC_ENTRY_FP])
  1437. fp[tc] = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_FP]);
  1438. return 0;
  1439. }
  1440. static int taprio_parse_tc_entries(struct Qdisc *sch,
  1441. struct nlattr *opt,
  1442. struct netlink_ext_ack *extack)
  1443. {
  1444. struct taprio_sched *q = qdisc_priv(sch);
  1445. struct net_device *dev = qdisc_dev(sch);
  1446. u32 max_sdu[TC_QOPT_MAX_QUEUE];
  1447. bool have_preemption = false;
  1448. unsigned long seen_tcs = 0;
  1449. u32 fp[TC_QOPT_MAX_QUEUE];
  1450. struct nlattr *n;
  1451. int tc, rem;
  1452. int err = 0;
  1453. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
  1454. max_sdu[tc] = q->max_sdu[tc];
  1455. fp[tc] = q->fp[tc];
  1456. }
  1457. nla_for_each_nested_type(n, TCA_TAPRIO_ATTR_TC_ENTRY, opt, rem) {
  1458. err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs,
  1459. extack);
  1460. if (err)
  1461. return err;
  1462. }
  1463. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
  1464. q->max_sdu[tc] = max_sdu[tc];
  1465. q->fp[tc] = fp[tc];
  1466. if (fp[tc] != TC_FP_EXPRESS)
  1467. have_preemption = true;
  1468. }
  1469. if (have_preemption) {
  1470. if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1471. NL_SET_ERR_MSG(extack,
  1472. "Preemption only supported with full offload");
  1473. return -EOPNOTSUPP;
  1474. }
  1475. if (!ethtool_dev_mm_supported(dev)) {
  1476. NL_SET_ERR_MSG(extack,
  1477. "Device does not support preemption");
  1478. return -EOPNOTSUPP;
  1479. }
  1480. }
  1481. return err;
  1482. }
  1483. static int taprio_mqprio_cmp(const struct net_device *dev,
  1484. const struct tc_mqprio_qopt *mqprio)
  1485. {
  1486. int i;
  1487. if (!mqprio || mqprio->num_tc != dev->num_tc)
  1488. return -1;
  1489. for (i = 0; i < mqprio->num_tc; i++)
  1490. if (dev->tc_to_txq[i].count != mqprio->count[i] ||
  1491. dev->tc_to_txq[i].offset != mqprio->offset[i])
  1492. return -1;
  1493. for (i = 0; i <= TC_BITMASK; i++)
  1494. if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i])
  1495. return -1;
  1496. return 0;
  1497. }
  1498. static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
  1499. struct netlink_ext_ack *extack)
  1500. {
  1501. struct qdisc_size_table *stab = rtnl_dereference(sch->stab);
  1502. struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
  1503. struct sched_gate_list *oper, *admin, *new_admin;
  1504. struct taprio_sched *q = qdisc_priv(sch);
  1505. struct net_device *dev = qdisc_dev(sch);
  1506. struct tc_mqprio_qopt *mqprio = NULL;
  1507. unsigned long flags;
  1508. u32 taprio_flags;
  1509. ktime_t start;
  1510. int i, err;
  1511. err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt,
  1512. taprio_policy, extack);
  1513. if (err < 0)
  1514. return err;
  1515. if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
  1516. mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
  1517. /* The semantics of the 'flags' argument in relation to 'change()'
  1518. * requests, are interpreted following two rules (which are applied in
  1519. * this order): (1) an omitted 'flags' argument is interpreted as
  1520. * zero; (2) the 'flags' of a "running" taprio instance cannot be
  1521. * changed.
  1522. */
  1523. taprio_flags = tb[TCA_TAPRIO_ATTR_FLAGS] ? nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]) : 0;
  1524. /* txtime-assist and full offload are mutually exclusive */
  1525. if ((taprio_flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
  1526. (taprio_flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) {
  1527. NL_SET_ERR_MSG_ATTR(extack, tb[TCA_TAPRIO_ATTR_FLAGS],
  1528. "TXTIME_ASSIST and FULL_OFFLOAD are mutually exclusive");
  1529. return -EINVAL;
  1530. }
  1531. if (q->flags != TAPRIO_FLAGS_INVALID && q->flags != taprio_flags) {
  1532. NL_SET_ERR_MSG_MOD(extack,
  1533. "Changing 'flags' of a running schedule is not supported");
  1534. return -EOPNOTSUPP;
  1535. }
  1536. q->flags = taprio_flags;
  1537. /* Needed for length_to_duration() during netlink attribute parsing */
  1538. taprio_set_picos_per_byte(dev, q, extack);
  1539. err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
  1540. if (err < 0)
  1541. return err;
  1542. err = taprio_parse_tc_entries(sch, opt, extack);
  1543. if (err)
  1544. return err;
  1545. new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL);
  1546. if (!new_admin) {
  1547. NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
  1548. return -ENOMEM;
  1549. }
  1550. INIT_LIST_HEAD(&new_admin->entries);
  1551. oper = rtnl_dereference(q->oper_sched);
  1552. admin = rtnl_dereference(q->admin_sched);
  1553. /* no changes - no new mqprio settings */
  1554. if (!taprio_mqprio_cmp(dev, mqprio))
  1555. mqprio = NULL;
  1556. if (mqprio && (oper || admin)) {
  1557. NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
  1558. err = -ENOTSUPP;
  1559. goto free_sched;
  1560. }
  1561. if (mqprio) {
  1562. err = netdev_set_num_tc(dev, mqprio->num_tc);
  1563. if (err)
  1564. goto free_sched;
  1565. for (i = 0; i < mqprio->num_tc; i++) {
  1566. netdev_set_tc_queue(dev, i,
  1567. mqprio->count[i],
  1568. mqprio->offset[i]);
  1569. q->cur_txq[i] = mqprio->offset[i];
  1570. }
  1571. /* Always use supplied priority mappings */
  1572. for (i = 0; i <= TC_BITMASK; i++)
  1573. netdev_set_prio_tc_map(dev, i,
  1574. mqprio->prio_tc_map[i]);
  1575. }
  1576. err = parse_taprio_schedule(q, tb, new_admin, extack);
  1577. if (err < 0)
  1578. goto free_sched;
  1579. if (new_admin->num_entries == 0) {
  1580. NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule");
  1581. err = -EINVAL;
  1582. goto free_sched;
  1583. }
  1584. err = taprio_parse_clockid(sch, tb, extack);
  1585. if (err < 0)
  1586. goto free_sched;
  1587. taprio_update_queue_max_sdu(q, new_admin, stab);
  1588. if (FULL_OFFLOAD_IS_ENABLED(q->flags))
  1589. err = taprio_enable_offload(dev, q, new_admin, extack);
  1590. else
  1591. err = taprio_disable_offload(dev, q, extack);
  1592. if (err)
  1593. goto free_sched;
  1594. /* Protects against enqueue()/dequeue() */
  1595. spin_lock_bh(qdisc_lock(sch));
  1596. if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) {
  1597. if (!TXTIME_ASSIST_IS_ENABLED(q->flags)) {
  1598. NL_SET_ERR_MSG_MOD(extack, "txtime-delay can only be set when txtime-assist mode is enabled");
  1599. err = -EINVAL;
  1600. goto unlock;
  1601. }
  1602. q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
  1603. }
  1604. if (!TXTIME_ASSIST_IS_ENABLED(q->flags) &&
  1605. !FULL_OFFLOAD_IS_ENABLED(q->flags) &&
  1606. !hrtimer_active(&q->advance_timer)) {
  1607. hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
  1608. q->advance_timer.function = advance_sched;
  1609. }
  1610. err = taprio_get_start_time(sch, new_admin, &start);
  1611. if (err < 0) {
  1612. NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
  1613. goto unlock;
  1614. }
  1615. setup_txtime(q, new_admin, start);
  1616. if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
  1617. if (!oper) {
  1618. rcu_assign_pointer(q->oper_sched, new_admin);
  1619. err = 0;
  1620. new_admin = NULL;
  1621. goto unlock;
  1622. }
  1623. /* Not going to race against advance_sched(), but still */
  1624. admin = rcu_replace_pointer(q->admin_sched, new_admin,
  1625. lockdep_rtnl_is_held());
  1626. if (admin)
  1627. call_rcu(&admin->rcu, taprio_free_sched_cb);
  1628. } else {
  1629. setup_first_end_time(q, new_admin, start);
  1630. /* Protects against advance_sched() */
  1631. spin_lock_irqsave(&q->current_entry_lock, flags);
  1632. taprio_start_sched(sch, start, new_admin);
  1633. admin = rcu_replace_pointer(q->admin_sched, new_admin,
  1634. lockdep_rtnl_is_held());
  1635. if (admin)
  1636. call_rcu(&admin->rcu, taprio_free_sched_cb);
  1637. spin_unlock_irqrestore(&q->current_entry_lock, flags);
  1638. if (FULL_OFFLOAD_IS_ENABLED(q->flags))
  1639. taprio_offload_config_changed(q);
  1640. }
  1641. new_admin = NULL;
  1642. err = 0;
  1643. if (!stab)
  1644. NL_SET_ERR_MSG_MOD(extack,
  1645. "Size table not specified, frame length estimations may be inaccurate");
  1646. unlock:
  1647. spin_unlock_bh(qdisc_lock(sch));
  1648. free_sched:
  1649. if (new_admin)
  1650. call_rcu(&new_admin->rcu, taprio_free_sched_cb);
  1651. return err;
  1652. }
  1653. static void taprio_reset(struct Qdisc *sch)
  1654. {
  1655. struct taprio_sched *q = qdisc_priv(sch);
  1656. struct net_device *dev = qdisc_dev(sch);
  1657. int i;
  1658. hrtimer_cancel(&q->advance_timer);
  1659. if (q->qdiscs) {
  1660. for (i = 0; i < dev->num_tx_queues; i++)
  1661. if (q->qdiscs[i])
  1662. qdisc_reset(q->qdiscs[i]);
  1663. }
  1664. }
  1665. static void taprio_destroy(struct Qdisc *sch)
  1666. {
  1667. struct taprio_sched *q = qdisc_priv(sch);
  1668. struct net_device *dev = qdisc_dev(sch);
  1669. struct sched_gate_list *oper, *admin;
  1670. unsigned int i;
  1671. list_del(&q->taprio_list);
  1672. /* Note that taprio_reset() might not be called if an error
  1673. * happens in qdisc_create(), after taprio_init() has been called.
  1674. */
  1675. hrtimer_cancel(&q->advance_timer);
  1676. qdisc_synchronize(sch);
  1677. taprio_disable_offload(dev, q, NULL);
  1678. if (q->qdiscs) {
  1679. for (i = 0; i < dev->num_tx_queues; i++)
  1680. qdisc_put(q->qdiscs[i]);
  1681. kfree(q->qdiscs);
  1682. }
  1683. q->qdiscs = NULL;
  1684. netdev_reset_tc(dev);
  1685. oper = rtnl_dereference(q->oper_sched);
  1686. admin = rtnl_dereference(q->admin_sched);
  1687. if (oper)
  1688. call_rcu(&oper->rcu, taprio_free_sched_cb);
  1689. if (admin)
  1690. call_rcu(&admin->rcu, taprio_free_sched_cb);
  1691. taprio_cleanup_broken_mqprio(q);
  1692. }
  1693. static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
  1694. struct netlink_ext_ack *extack)
  1695. {
  1696. struct taprio_sched *q = qdisc_priv(sch);
  1697. struct net_device *dev = qdisc_dev(sch);
  1698. int i, tc;
  1699. spin_lock_init(&q->current_entry_lock);
  1700. hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
  1701. q->advance_timer.function = advance_sched;
  1702. q->root = sch;
  1703. /* We only support static clockids. Use an invalid value as default
  1704. * and get the valid one on taprio_change().
  1705. */
  1706. q->clockid = -1;
  1707. q->flags = TAPRIO_FLAGS_INVALID;
  1708. list_add(&q->taprio_list, &taprio_list);
  1709. if (sch->parent != TC_H_ROOT) {
  1710. NL_SET_ERR_MSG_MOD(extack, "Can only be attached as root qdisc");
  1711. return -EOPNOTSUPP;
  1712. }
  1713. if (!netif_is_multiqueue(dev)) {
  1714. NL_SET_ERR_MSG_MOD(extack, "Multi-queue device is required");
  1715. return -EOPNOTSUPP;
  1716. }
  1717. q->qdiscs = kcalloc(dev->num_tx_queues, sizeof(q->qdiscs[0]),
  1718. GFP_KERNEL);
  1719. if (!q->qdiscs)
  1720. return -ENOMEM;
  1721. if (!opt)
  1722. return -EINVAL;
  1723. for (i = 0; i < dev->num_tx_queues; i++) {
  1724. struct netdev_queue *dev_queue;
  1725. struct Qdisc *qdisc;
  1726. dev_queue = netdev_get_tx_queue(dev, i);
  1727. qdisc = qdisc_create_dflt(dev_queue,
  1728. &pfifo_qdisc_ops,
  1729. TC_H_MAKE(TC_H_MAJ(sch->handle),
  1730. TC_H_MIN(i + 1)),
  1731. extack);
  1732. if (!qdisc)
  1733. return -ENOMEM;
  1734. if (i < dev->real_num_tx_queues)
  1735. qdisc_hash_add(qdisc, false);
  1736. q->qdiscs[i] = qdisc;
  1737. }
  1738. for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
  1739. q->fp[tc] = TC_FP_EXPRESS;
  1740. taprio_detect_broken_mqprio(q);
  1741. return taprio_change(sch, opt, extack);
  1742. }
  1743. static void taprio_attach(struct Qdisc *sch)
  1744. {
  1745. struct taprio_sched *q = qdisc_priv(sch);
  1746. struct net_device *dev = qdisc_dev(sch);
  1747. unsigned int ntx;
  1748. /* Attach underlying qdisc */
  1749. for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
  1750. struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
  1751. struct Qdisc *old, *dev_queue_qdisc;
  1752. if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1753. struct Qdisc *qdisc = q->qdiscs[ntx];
  1754. /* In offload mode, the root taprio qdisc is bypassed
  1755. * and the netdev TX queues see the children directly
  1756. */
  1757. qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
  1758. dev_queue_qdisc = qdisc;
  1759. } else {
  1760. /* In software mode, attach the root taprio qdisc
  1761. * to all netdev TX queues, so that dev_qdisc_enqueue()
  1762. * goes through taprio_enqueue().
  1763. */
  1764. dev_queue_qdisc = sch;
  1765. }
  1766. old = dev_graft_qdisc(dev_queue, dev_queue_qdisc);
  1767. /* The qdisc's refcount requires to be elevated once
  1768. * for each netdev TX queue it is grafted onto
  1769. */
  1770. qdisc_refcount_inc(dev_queue_qdisc);
  1771. if (old)
  1772. qdisc_put(old);
  1773. }
  1774. }
  1775. static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
  1776. unsigned long cl)
  1777. {
  1778. struct net_device *dev = qdisc_dev(sch);
  1779. unsigned long ntx = cl - 1;
  1780. if (ntx >= dev->num_tx_queues)
  1781. return NULL;
  1782. return netdev_get_tx_queue(dev, ntx);
  1783. }
  1784. static int taprio_graft(struct Qdisc *sch, unsigned long cl,
  1785. struct Qdisc *new, struct Qdisc **old,
  1786. struct netlink_ext_ack *extack)
  1787. {
  1788. struct taprio_sched *q = qdisc_priv(sch);
  1789. struct net_device *dev = qdisc_dev(sch);
  1790. struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
  1791. if (!dev_queue)
  1792. return -EINVAL;
  1793. if (dev->flags & IFF_UP)
  1794. dev_deactivate(dev);
  1795. /* In offload mode, the child Qdisc is directly attached to the netdev
  1796. * TX queue, and thus, we need to keep its refcount elevated in order
  1797. * to counteract qdisc_graft()'s call to qdisc_put() once per TX queue.
  1798. * However, save the reference to the new qdisc in the private array in
  1799. * both software and offload cases, to have an up-to-date reference to
  1800. * our children.
  1801. */
  1802. *old = q->qdiscs[cl - 1];
  1803. if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
  1804. WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old);
  1805. if (new)
  1806. qdisc_refcount_inc(new);
  1807. if (*old)
  1808. qdisc_put(*old);
  1809. }
  1810. q->qdiscs[cl - 1] = new;
  1811. if (new)
  1812. new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
  1813. if (dev->flags & IFF_UP)
  1814. dev_activate(dev);
  1815. return 0;
  1816. }
  1817. static int dump_entry(struct sk_buff *msg,
  1818. const struct sched_entry *entry)
  1819. {
  1820. struct nlattr *item;
  1821. item = nla_nest_start_noflag(msg, TCA_TAPRIO_SCHED_ENTRY);
  1822. if (!item)
  1823. return -ENOSPC;
  1824. if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
  1825. goto nla_put_failure;
  1826. if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
  1827. goto nla_put_failure;
  1828. if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
  1829. entry->gate_mask))
  1830. goto nla_put_failure;
  1831. if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
  1832. entry->interval))
  1833. goto nla_put_failure;
  1834. return nla_nest_end(msg, item);
  1835. nla_put_failure:
  1836. nla_nest_cancel(msg, item);
  1837. return -1;
  1838. }
  1839. static int dump_schedule(struct sk_buff *msg,
  1840. const struct sched_gate_list *root)
  1841. {
  1842. struct nlattr *entry_list;
  1843. struct sched_entry *entry;
  1844. if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
  1845. root->base_time, TCA_TAPRIO_PAD))
  1846. return -1;
  1847. if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME,
  1848. root->cycle_time, TCA_TAPRIO_PAD))
  1849. return -1;
  1850. if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION,
  1851. root->cycle_time_extension, TCA_TAPRIO_PAD))
  1852. return -1;
  1853. entry_list = nla_nest_start_noflag(msg,
  1854. TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
  1855. if (!entry_list)
  1856. goto error_nest;
  1857. list_for_each_entry(entry, &root->entries, list) {
  1858. if (dump_entry(msg, entry) < 0)
  1859. goto error_nest;
  1860. }
  1861. nla_nest_end(msg, entry_list);
  1862. return 0;
  1863. error_nest:
  1864. nla_nest_cancel(msg, entry_list);
  1865. return -1;
  1866. }
  1867. static int taprio_dump_tc_entries(struct sk_buff *skb,
  1868. struct taprio_sched *q,
  1869. struct sched_gate_list *sched)
  1870. {
  1871. struct nlattr *n;
  1872. int tc;
  1873. for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
  1874. n = nla_nest_start(skb, TCA_TAPRIO_ATTR_TC_ENTRY);
  1875. if (!n)
  1876. return -EMSGSIZE;
  1877. if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_INDEX, tc))
  1878. goto nla_put_failure;
  1879. if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU,
  1880. sched->max_sdu[tc]))
  1881. goto nla_put_failure;
  1882. if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_FP, q->fp[tc]))
  1883. goto nla_put_failure;
  1884. nla_nest_end(skb, n);
  1885. }
  1886. return 0;
  1887. nla_put_failure:
  1888. nla_nest_cancel(skb, n);
  1889. return -EMSGSIZE;
  1890. }
  1891. static int taprio_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
  1892. {
  1893. if (val == TAPRIO_STAT_NOT_SET)
  1894. return 0;
  1895. if (nla_put_u64_64bit(skb, attrtype, val, TCA_TAPRIO_OFFLOAD_STATS_PAD))
  1896. return -EMSGSIZE;
  1897. return 0;
  1898. }
  1899. static int taprio_dump_xstats(struct Qdisc *sch, struct gnet_dump *d,
  1900. struct tc_taprio_qopt_offload *offload,
  1901. struct tc_taprio_qopt_stats *stats)
  1902. {
  1903. struct net_device *dev = qdisc_dev(sch);
  1904. const struct net_device_ops *ops;
  1905. struct sk_buff *skb = d->skb;
  1906. struct nlattr *xstats;
  1907. int err;
  1908. ops = qdisc_dev(sch)->netdev_ops;
  1909. /* FIXME I could use qdisc_offload_dump_helper(), but that messes
  1910. * with sch->flags depending on whether the device reports taprio
  1911. * stats, and I'm not sure whether that's a good idea, considering
  1912. * that stats are optional to the offload itself
  1913. */
  1914. if (!ops->ndo_setup_tc)
  1915. return 0;
  1916. memset(stats, 0xff, sizeof(*stats));
  1917. err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
  1918. if (err == -EOPNOTSUPP)
  1919. return 0;
  1920. if (err)
  1921. return err;
  1922. xstats = nla_nest_start(skb, TCA_STATS_APP);
  1923. if (!xstats)
  1924. goto err;
  1925. if (taprio_put_stat(skb, stats->window_drops,
  1926. TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS) ||
  1927. taprio_put_stat(skb, stats->tx_overruns,
  1928. TCA_TAPRIO_OFFLOAD_STATS_TX_OVERRUNS))
  1929. goto err_cancel;
  1930. nla_nest_end(skb, xstats);
  1931. return 0;
  1932. err_cancel:
  1933. nla_nest_cancel(skb, xstats);
  1934. err:
  1935. return -EMSGSIZE;
  1936. }
  1937. static int taprio_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
  1938. {
  1939. struct tc_taprio_qopt_offload offload = {
  1940. .cmd = TAPRIO_CMD_STATS,
  1941. };
  1942. return taprio_dump_xstats(sch, d, &offload, &offload.stats);
  1943. }
  1944. static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
  1945. {
  1946. struct taprio_sched *q = qdisc_priv(sch);
  1947. struct net_device *dev = qdisc_dev(sch);
  1948. struct sched_gate_list *oper, *admin;
  1949. struct tc_mqprio_qopt opt = { 0 };
  1950. struct nlattr *nest, *sched_nest;
  1951. mqprio_qopt_reconstruct(dev, &opt);
  1952. nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
  1953. if (!nest)
  1954. goto start_error;
  1955. if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
  1956. goto options_error;
  1957. if (!FULL_OFFLOAD_IS_ENABLED(q->flags) &&
  1958. nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
  1959. goto options_error;
  1960. if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags))
  1961. goto options_error;
  1962. if (q->txtime_delay &&
  1963. nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
  1964. goto options_error;
  1965. rcu_read_lock();
  1966. oper = rtnl_dereference(q->oper_sched);
  1967. admin = rtnl_dereference(q->admin_sched);
  1968. if (oper && taprio_dump_tc_entries(skb, q, oper))
  1969. goto options_error_rcu;
  1970. if (oper && dump_schedule(skb, oper))
  1971. goto options_error_rcu;
  1972. if (!admin)
  1973. goto done;
  1974. sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED);
  1975. if (!sched_nest)
  1976. goto options_error_rcu;
  1977. if (dump_schedule(skb, admin))
  1978. goto admin_error;
  1979. nla_nest_end(skb, sched_nest);
  1980. done:
  1981. rcu_read_unlock();
  1982. return nla_nest_end(skb, nest);
  1983. admin_error:
  1984. nla_nest_cancel(skb, sched_nest);
  1985. options_error_rcu:
  1986. rcu_read_unlock();
  1987. options_error:
  1988. nla_nest_cancel(skb, nest);
  1989. start_error:
  1990. return -ENOSPC;
  1991. }
  1992. static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
  1993. {
  1994. struct taprio_sched *q = qdisc_priv(sch);
  1995. struct net_device *dev = qdisc_dev(sch);
  1996. unsigned int ntx = cl - 1;
  1997. if (ntx >= dev->num_tx_queues)
  1998. return NULL;
  1999. return q->qdiscs[ntx];
  2000. }
  2001. static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
  2002. {
  2003. unsigned int ntx = TC_H_MIN(classid);
  2004. if (!taprio_queue_get(sch, ntx))
  2005. return 0;
  2006. return ntx;
  2007. }
  2008. static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
  2009. struct sk_buff *skb, struct tcmsg *tcm)
  2010. {
  2011. struct Qdisc *child = taprio_leaf(sch, cl);
  2012. tcm->tcm_parent = TC_H_ROOT;
  2013. tcm->tcm_handle |= TC_H_MIN(cl);
  2014. tcm->tcm_info = child->handle;
  2015. return 0;
  2016. }
  2017. static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
  2018. struct gnet_dump *d)
  2019. __releases(d->lock)
  2020. __acquires(d->lock)
  2021. {
  2022. struct Qdisc *child = taprio_leaf(sch, cl);
  2023. struct tc_taprio_qopt_offload offload = {
  2024. .cmd = TAPRIO_CMD_QUEUE_STATS,
  2025. .queue_stats = {
  2026. .queue = cl - 1,
  2027. },
  2028. };
  2029. if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 ||
  2030. qdisc_qstats_copy(d, child) < 0)
  2031. return -1;
  2032. return taprio_dump_xstats(sch, d, &offload, &offload.queue_stats.stats);
  2033. }
  2034. static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
  2035. {
  2036. struct net_device *dev = qdisc_dev(sch);
  2037. unsigned long ntx;
  2038. if (arg->stop)
  2039. return;
  2040. arg->count = arg->skip;
  2041. for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
  2042. if (!tc_qdisc_stats_dump(sch, ntx + 1, arg))
  2043. break;
  2044. }
  2045. }
  2046. static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
  2047. struct tcmsg *tcm)
  2048. {
  2049. return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
  2050. }
  2051. static const struct Qdisc_class_ops taprio_class_ops = {
  2052. .graft = taprio_graft,
  2053. .leaf = taprio_leaf,
  2054. .find = taprio_find,
  2055. .walk = taprio_walk,
  2056. .dump = taprio_dump_class,
  2057. .dump_stats = taprio_dump_class_stats,
  2058. .select_queue = taprio_select_queue,
  2059. };
  2060. static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
  2061. .cl_ops = &taprio_class_ops,
  2062. .id = "taprio",
  2063. .priv_size = sizeof(struct taprio_sched),
  2064. .init = taprio_init,
  2065. .change = taprio_change,
  2066. .destroy = taprio_destroy,
  2067. .reset = taprio_reset,
  2068. .attach = taprio_attach,
  2069. .peek = taprio_peek,
  2070. .dequeue = taprio_dequeue,
  2071. .enqueue = taprio_enqueue,
  2072. .dump = taprio_dump,
  2073. .dump_stats = taprio_dump_stats,
  2074. .owner = THIS_MODULE,
  2075. };
  2076. MODULE_ALIAS_NET_SCH("taprio");
  2077. static struct notifier_block taprio_device_notifier = {
  2078. .notifier_call = taprio_dev_notifier,
  2079. };
  2080. static int __init taprio_module_init(void)
  2081. {
  2082. int err = register_netdevice_notifier(&taprio_device_notifier);
  2083. if (err)
  2084. return err;
  2085. return register_qdisc(&taprio_qdisc_ops);
  2086. }
  2087. static void __exit taprio_module_exit(void)
  2088. {
  2089. unregister_qdisc(&taprio_qdisc_ops);
  2090. unregister_netdevice_notifier(&taprio_device_notifier);
  2091. }
  2092. module_init(taprio_module_init);
  2093. module_exit(taprio_module_exit);
  2094. MODULE_LICENSE("GPL");
  2095. MODULE_DESCRIPTION("Time Aware Priority qdisc");