rt.c 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
  4. * policies)
  5. */
  6. #include "sched.h"
  7. #include "pelt.h"
  8. int sched_rr_timeslice = RR_TIMESLICE;
  9. int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
  10. static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
  11. struct rt_bandwidth def_rt_bandwidth;
  12. static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
  13. {
  14. struct rt_bandwidth *rt_b =
  15. container_of(timer, struct rt_bandwidth, rt_period_timer);
  16. int idle = 0;
  17. int overrun;
  18. raw_spin_lock(&rt_b->rt_runtime_lock);
  19. for (;;) {
  20. overrun = hrtimer_forward_now(timer, rt_b->rt_period);
  21. if (!overrun)
  22. break;
  23. raw_spin_unlock(&rt_b->rt_runtime_lock);
  24. idle = do_sched_rt_period_timer(rt_b, overrun);
  25. raw_spin_lock(&rt_b->rt_runtime_lock);
  26. }
  27. if (idle)
  28. rt_b->rt_period_active = 0;
  29. raw_spin_unlock(&rt_b->rt_runtime_lock);
  30. return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
  31. }
  32. void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
  33. {
  34. rt_b->rt_period = ns_to_ktime(period);
  35. rt_b->rt_runtime = runtime;
  36. raw_spin_lock_init(&rt_b->rt_runtime_lock);
  37. hrtimer_init(&rt_b->rt_period_timer,
  38. CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  39. rt_b->rt_period_timer.function = sched_rt_period_timer;
  40. }
  41. static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
  42. {
  43. if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
  44. return;
  45. raw_spin_lock(&rt_b->rt_runtime_lock);
  46. if (!rt_b->rt_period_active) {
  47. rt_b->rt_period_active = 1;
  48. /*
  49. * SCHED_DEADLINE updates the bandwidth, as a run away
  50. * RT task with a DL task could hog a CPU. But DL does
  51. * not reset the period. If a deadline task was running
  52. * without an RT task running, it can cause RT tasks to
  53. * throttle when they start up. Kick the timer right away
  54. * to update the period.
  55. */
  56. hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
  57. hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
  58. }
  59. raw_spin_unlock(&rt_b->rt_runtime_lock);
  60. }
  61. void init_rt_rq(struct rt_rq *rt_rq)
  62. {
  63. struct rt_prio_array *array;
  64. int i;
  65. array = &rt_rq->active;
  66. for (i = 0; i < MAX_RT_PRIO; i++) {
  67. INIT_LIST_HEAD(array->queue + i);
  68. __clear_bit(i, array->bitmap);
  69. }
  70. /* delimiter for bitsearch: */
  71. __set_bit(MAX_RT_PRIO, array->bitmap);
  72. #if defined CONFIG_SMP
  73. rt_rq->highest_prio.curr = MAX_RT_PRIO;
  74. rt_rq->highest_prio.next = MAX_RT_PRIO;
  75. rt_rq->rt_nr_migratory = 0;
  76. rt_rq->overloaded = 0;
  77. plist_head_init(&rt_rq->pushable_tasks);
  78. #endif /* CONFIG_SMP */
  79. /* We start is dequeued state, because no RT tasks are queued */
  80. rt_rq->rt_queued = 0;
  81. rt_rq->rt_time = 0;
  82. rt_rq->rt_throttled = 0;
  83. rt_rq->rt_runtime = 0;
  84. raw_spin_lock_init(&rt_rq->rt_runtime_lock);
  85. }
  86. #ifdef CONFIG_RT_GROUP_SCHED
  87. static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
  88. {
  89. hrtimer_cancel(&rt_b->rt_period_timer);
  90. }
  91. #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
  92. static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
  93. {
  94. #ifdef CONFIG_SCHED_DEBUG
  95. WARN_ON_ONCE(!rt_entity_is_task(rt_se));
  96. #endif
  97. return container_of(rt_se, struct task_struct, rt);
  98. }
  99. static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
  100. {
  101. return rt_rq->rq;
  102. }
  103. static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
  104. {
  105. return rt_se->rt_rq;
  106. }
  107. static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
  108. {
  109. struct rt_rq *rt_rq = rt_se->rt_rq;
  110. return rt_rq->rq;
  111. }
  112. void free_rt_sched_group(struct task_group *tg)
  113. {
  114. int i;
  115. if (tg->rt_se)
  116. destroy_rt_bandwidth(&tg->rt_bandwidth);
  117. for_each_possible_cpu(i) {
  118. if (tg->rt_rq)
  119. kfree(tg->rt_rq[i]);
  120. if (tg->rt_se)
  121. kfree(tg->rt_se[i]);
  122. }
  123. kfree(tg->rt_rq);
  124. kfree(tg->rt_se);
  125. }
  126. void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
  127. struct sched_rt_entity *rt_se, int cpu,
  128. struct sched_rt_entity *parent)
  129. {
  130. struct rq *rq = cpu_rq(cpu);
  131. rt_rq->highest_prio.curr = MAX_RT_PRIO;
  132. rt_rq->rt_nr_boosted = 0;
  133. rt_rq->rq = rq;
  134. rt_rq->tg = tg;
  135. tg->rt_rq[cpu] = rt_rq;
  136. tg->rt_se[cpu] = rt_se;
  137. if (!rt_se)
  138. return;
  139. if (!parent)
  140. rt_se->rt_rq = &rq->rt;
  141. else
  142. rt_se->rt_rq = parent->my_q;
  143. rt_se->my_q = rt_rq;
  144. rt_se->parent = parent;
  145. INIT_LIST_HEAD(&rt_se->run_list);
  146. }
  147. int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
  148. {
  149. struct rt_rq *rt_rq;
  150. struct sched_rt_entity *rt_se;
  151. int i;
  152. tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL);
  153. if (!tg->rt_rq)
  154. goto err;
  155. tg->rt_se = kcalloc(nr_cpu_ids, sizeof(rt_se), GFP_KERNEL);
  156. if (!tg->rt_se)
  157. goto err;
  158. init_rt_bandwidth(&tg->rt_bandwidth,
  159. ktime_to_ns(def_rt_bandwidth.rt_period), 0);
  160. for_each_possible_cpu(i) {
  161. rt_rq = kzalloc_node(sizeof(struct rt_rq),
  162. GFP_KERNEL, cpu_to_node(i));
  163. if (!rt_rq)
  164. goto err;
  165. rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
  166. GFP_KERNEL, cpu_to_node(i));
  167. if (!rt_se)
  168. goto err_free_rq;
  169. init_rt_rq(rt_rq);
  170. rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
  171. init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
  172. }
  173. return 1;
  174. err_free_rq:
  175. kfree(rt_rq);
  176. err:
  177. return 0;
  178. }
  179. #else /* CONFIG_RT_GROUP_SCHED */
  180. #define rt_entity_is_task(rt_se) (1)
  181. static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
  182. {
  183. return container_of(rt_se, struct task_struct, rt);
  184. }
  185. static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
  186. {
  187. return container_of(rt_rq, struct rq, rt);
  188. }
  189. static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
  190. {
  191. struct task_struct *p = rt_task_of(rt_se);
  192. return task_rq(p);
  193. }
  194. static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
  195. {
  196. struct rq *rq = rq_of_rt_se(rt_se);
  197. return &rq->rt;
  198. }
  199. void free_rt_sched_group(struct task_group *tg) { }
  200. int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
  201. {
  202. return 1;
  203. }
  204. #endif /* CONFIG_RT_GROUP_SCHED */
  205. #ifdef CONFIG_SMP
  206. static void pull_rt_task(struct rq *this_rq);
  207. static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
  208. {
  209. /* Try to pull RT tasks here if we lower this rq's prio */
  210. return rq->rt.highest_prio.curr > prev->prio;
  211. }
  212. static inline int rt_overloaded(struct rq *rq)
  213. {
  214. return atomic_read(&rq->rd->rto_count);
  215. }
  216. static inline void rt_set_overload(struct rq *rq)
  217. {
  218. if (!rq->online)
  219. return;
  220. cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
  221. /*
  222. * Make sure the mask is visible before we set
  223. * the overload count. That is checked to determine
  224. * if we should look at the mask. It would be a shame
  225. * if we looked at the mask, but the mask was not
  226. * updated yet.
  227. *
  228. * Matched by the barrier in pull_rt_task().
  229. */
  230. smp_wmb();
  231. atomic_inc(&rq->rd->rto_count);
  232. }
  233. static inline void rt_clear_overload(struct rq *rq)
  234. {
  235. if (!rq->online)
  236. return;
  237. /* the order here really doesn't matter */
  238. atomic_dec(&rq->rd->rto_count);
  239. cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
  240. }
  241. static void update_rt_migration(struct rt_rq *rt_rq)
  242. {
  243. if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
  244. if (!rt_rq->overloaded) {
  245. rt_set_overload(rq_of_rt_rq(rt_rq));
  246. rt_rq->overloaded = 1;
  247. }
  248. } else if (rt_rq->overloaded) {
  249. rt_clear_overload(rq_of_rt_rq(rt_rq));
  250. rt_rq->overloaded = 0;
  251. }
  252. }
  253. static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  254. {
  255. struct task_struct *p;
  256. if (!rt_entity_is_task(rt_se))
  257. return;
  258. p = rt_task_of(rt_se);
  259. rt_rq = &rq_of_rt_rq(rt_rq)->rt;
  260. rt_rq->rt_nr_total++;
  261. if (p->nr_cpus_allowed > 1)
  262. rt_rq->rt_nr_migratory++;
  263. update_rt_migration(rt_rq);
  264. }
  265. static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  266. {
  267. struct task_struct *p;
  268. if (!rt_entity_is_task(rt_se))
  269. return;
  270. p = rt_task_of(rt_se);
  271. rt_rq = &rq_of_rt_rq(rt_rq)->rt;
  272. rt_rq->rt_nr_total--;
  273. if (p->nr_cpus_allowed > 1)
  274. rt_rq->rt_nr_migratory--;
  275. update_rt_migration(rt_rq);
  276. }
  277. static inline int has_pushable_tasks(struct rq *rq)
  278. {
  279. return !plist_head_empty(&rq->rt.pushable_tasks);
  280. }
  281. static DEFINE_PER_CPU(struct callback_head, rt_push_head);
  282. static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
  283. static void push_rt_tasks(struct rq *);
  284. static void pull_rt_task(struct rq *);
  285. static inline void rt_queue_push_tasks(struct rq *rq)
  286. {
  287. if (!has_pushable_tasks(rq))
  288. return;
  289. queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
  290. }
  291. static inline void rt_queue_pull_task(struct rq *rq)
  292. {
  293. queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
  294. }
  295. static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
  296. {
  297. plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
  298. plist_node_init(&p->pushable_tasks, p->prio);
  299. plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
  300. /* Update the highest prio pushable task */
  301. if (p->prio < rq->rt.highest_prio.next)
  302. rq->rt.highest_prio.next = p->prio;
  303. }
  304. static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
  305. {
  306. plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
  307. /* Update the new highest prio pushable task */
  308. if (has_pushable_tasks(rq)) {
  309. p = plist_first_entry(&rq->rt.pushable_tasks,
  310. struct task_struct, pushable_tasks);
  311. rq->rt.highest_prio.next = p->prio;
  312. } else
  313. rq->rt.highest_prio.next = MAX_RT_PRIO;
  314. }
  315. #else
  316. static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
  317. {
  318. }
  319. static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
  320. {
  321. }
  322. static inline
  323. void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  324. {
  325. }
  326. static inline
  327. void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  328. {
  329. }
  330. static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
  331. {
  332. return false;
  333. }
  334. static inline void pull_rt_task(struct rq *this_rq)
  335. {
  336. }
  337. static inline void rt_queue_push_tasks(struct rq *rq)
  338. {
  339. }
  340. #endif /* CONFIG_SMP */
  341. static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
  342. static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
  343. static inline int on_rt_rq(struct sched_rt_entity *rt_se)
  344. {
  345. return rt_se->on_rq;
  346. }
  347. #ifdef CONFIG_RT_GROUP_SCHED
  348. static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
  349. {
  350. if (!rt_rq->tg)
  351. return RUNTIME_INF;
  352. return rt_rq->rt_runtime;
  353. }
  354. static inline u64 sched_rt_period(struct rt_rq *rt_rq)
  355. {
  356. return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
  357. }
  358. typedef struct task_group *rt_rq_iter_t;
  359. static inline struct task_group *next_task_group(struct task_group *tg)
  360. {
  361. do {
  362. tg = list_entry_rcu(tg->list.next,
  363. typeof(struct task_group), list);
  364. } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
  365. if (&tg->list == &task_groups)
  366. tg = NULL;
  367. return tg;
  368. }
  369. #define for_each_rt_rq(rt_rq, iter, rq) \
  370. for (iter = container_of(&task_groups, typeof(*iter), list); \
  371. (iter = next_task_group(iter)) && \
  372. (rt_rq = iter->rt_rq[cpu_of(rq)]);)
  373. #define for_each_sched_rt_entity(rt_se) \
  374. for (; rt_se; rt_se = rt_se->parent)
  375. static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
  376. {
  377. return rt_se->my_q;
  378. }
  379. static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
  380. static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
  381. static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
  382. {
  383. struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
  384. struct rq *rq = rq_of_rt_rq(rt_rq);
  385. struct sched_rt_entity *rt_se;
  386. int cpu = cpu_of(rq);
  387. rt_se = rt_rq->tg->rt_se[cpu];
  388. if (rt_rq->rt_nr_running) {
  389. if (!rt_se)
  390. enqueue_top_rt_rq(rt_rq);
  391. else if (!on_rt_rq(rt_se))
  392. enqueue_rt_entity(rt_se, 0);
  393. if (rt_rq->highest_prio.curr < curr->prio)
  394. resched_curr(rq);
  395. }
  396. }
  397. static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
  398. {
  399. struct sched_rt_entity *rt_se;
  400. int cpu = cpu_of(rq_of_rt_rq(rt_rq));
  401. rt_se = rt_rq->tg->rt_se[cpu];
  402. if (!rt_se) {
  403. dequeue_top_rt_rq(rt_rq);
  404. /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
  405. cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
  406. }
  407. else if (on_rt_rq(rt_se))
  408. dequeue_rt_entity(rt_se, 0);
  409. }
  410. static inline int rt_rq_throttled(struct rt_rq *rt_rq)
  411. {
  412. return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
  413. }
  414. static int rt_se_boosted(struct sched_rt_entity *rt_se)
  415. {
  416. struct rt_rq *rt_rq = group_rt_rq(rt_se);
  417. struct task_struct *p;
  418. if (rt_rq)
  419. return !!rt_rq->rt_nr_boosted;
  420. p = rt_task_of(rt_se);
  421. return p->prio != p->normal_prio;
  422. }
  423. #ifdef CONFIG_SMP
  424. static inline const struct cpumask *sched_rt_period_mask(void)
  425. {
  426. return this_rq()->rd->span;
  427. }
  428. #else
  429. static inline const struct cpumask *sched_rt_period_mask(void)
  430. {
  431. return cpu_online_mask;
  432. }
  433. #endif
  434. static inline
  435. struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
  436. {
  437. return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
  438. }
  439. static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
  440. {
  441. return &rt_rq->tg->rt_bandwidth;
  442. }
  443. #else /* !CONFIG_RT_GROUP_SCHED */
  444. static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
  445. {
  446. return rt_rq->rt_runtime;
  447. }
  448. static inline u64 sched_rt_period(struct rt_rq *rt_rq)
  449. {
  450. return ktime_to_ns(def_rt_bandwidth.rt_period);
  451. }
  452. typedef struct rt_rq *rt_rq_iter_t;
  453. #define for_each_rt_rq(rt_rq, iter, rq) \
  454. for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
  455. #define for_each_sched_rt_entity(rt_se) \
  456. for (; rt_se; rt_se = NULL)
  457. static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
  458. {
  459. return NULL;
  460. }
  461. static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
  462. {
  463. struct rq *rq = rq_of_rt_rq(rt_rq);
  464. if (!rt_rq->rt_nr_running)
  465. return;
  466. enqueue_top_rt_rq(rt_rq);
  467. resched_curr(rq);
  468. }
  469. static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
  470. {
  471. dequeue_top_rt_rq(rt_rq);
  472. }
  473. static inline int rt_rq_throttled(struct rt_rq *rt_rq)
  474. {
  475. return rt_rq->rt_throttled;
  476. }
  477. static inline const struct cpumask *sched_rt_period_mask(void)
  478. {
  479. return cpu_online_mask;
  480. }
  481. static inline
  482. struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
  483. {
  484. return &cpu_rq(cpu)->rt;
  485. }
  486. static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
  487. {
  488. return &def_rt_bandwidth;
  489. }
  490. #endif /* CONFIG_RT_GROUP_SCHED */
  491. bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
  492. {
  493. struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
  494. return (hrtimer_active(&rt_b->rt_period_timer) ||
  495. rt_rq->rt_time < rt_b->rt_runtime);
  496. }
  497. #ifdef CONFIG_SMP
  498. /*
  499. * We ran out of runtime, see if we can borrow some from our neighbours.
  500. */
  501. static void do_balance_runtime(struct rt_rq *rt_rq)
  502. {
  503. struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
  504. struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
  505. int i, weight;
  506. u64 rt_period;
  507. weight = cpumask_weight(rd->span);
  508. raw_spin_lock(&rt_b->rt_runtime_lock);
  509. rt_period = ktime_to_ns(rt_b->rt_period);
  510. for_each_cpu(i, rd->span) {
  511. struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
  512. s64 diff;
  513. if (iter == rt_rq)
  514. continue;
  515. raw_spin_lock(&iter->rt_runtime_lock);
  516. /*
  517. * Either all rqs have inf runtime and there's nothing to steal
  518. * or __disable_runtime() below sets a specific rq to inf to
  519. * indicate its been disabled and disalow stealing.
  520. */
  521. if (iter->rt_runtime == RUNTIME_INF)
  522. goto next;
  523. /*
  524. * From runqueues with spare time, take 1/n part of their
  525. * spare time, but no more than our period.
  526. */
  527. diff = iter->rt_runtime - iter->rt_time;
  528. if (diff > 0) {
  529. diff = div_u64((u64)diff, weight);
  530. if (rt_rq->rt_runtime + diff > rt_period)
  531. diff = rt_period - rt_rq->rt_runtime;
  532. iter->rt_runtime -= diff;
  533. rt_rq->rt_runtime += diff;
  534. if (rt_rq->rt_runtime == rt_period) {
  535. raw_spin_unlock(&iter->rt_runtime_lock);
  536. break;
  537. }
  538. }
  539. next:
  540. raw_spin_unlock(&iter->rt_runtime_lock);
  541. }
  542. raw_spin_unlock(&rt_b->rt_runtime_lock);
  543. }
  544. /*
  545. * Ensure this RQ takes back all the runtime it lend to its neighbours.
  546. */
  547. static void __disable_runtime(struct rq *rq)
  548. {
  549. struct root_domain *rd = rq->rd;
  550. rt_rq_iter_t iter;
  551. struct rt_rq *rt_rq;
  552. if (unlikely(!scheduler_running))
  553. return;
  554. for_each_rt_rq(rt_rq, iter, rq) {
  555. struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
  556. s64 want;
  557. int i;
  558. raw_spin_lock(&rt_b->rt_runtime_lock);
  559. raw_spin_lock(&rt_rq->rt_runtime_lock);
  560. /*
  561. * Either we're all inf and nobody needs to borrow, or we're
  562. * already disabled and thus have nothing to do, or we have
  563. * exactly the right amount of runtime to take out.
  564. */
  565. if (rt_rq->rt_runtime == RUNTIME_INF ||
  566. rt_rq->rt_runtime == rt_b->rt_runtime)
  567. goto balanced;
  568. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  569. /*
  570. * Calculate the difference between what we started out with
  571. * and what we current have, that's the amount of runtime
  572. * we lend and now have to reclaim.
  573. */
  574. want = rt_b->rt_runtime - rt_rq->rt_runtime;
  575. /*
  576. * Greedy reclaim, take back as much as we can.
  577. */
  578. for_each_cpu(i, rd->span) {
  579. struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
  580. s64 diff;
  581. /*
  582. * Can't reclaim from ourselves or disabled runqueues.
  583. */
  584. if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
  585. continue;
  586. raw_spin_lock(&iter->rt_runtime_lock);
  587. if (want > 0) {
  588. diff = min_t(s64, iter->rt_runtime, want);
  589. iter->rt_runtime -= diff;
  590. want -= diff;
  591. } else {
  592. iter->rt_runtime -= want;
  593. want -= want;
  594. }
  595. raw_spin_unlock(&iter->rt_runtime_lock);
  596. if (!want)
  597. break;
  598. }
  599. raw_spin_lock(&rt_rq->rt_runtime_lock);
  600. /*
  601. * We cannot be left wanting - that would mean some runtime
  602. * leaked out of the system.
  603. */
  604. BUG_ON(want);
  605. balanced:
  606. /*
  607. * Disable all the borrow logic by pretending we have inf
  608. * runtime - in which case borrowing doesn't make sense.
  609. */
  610. rt_rq->rt_runtime = RUNTIME_INF;
  611. rt_rq->rt_throttled = 0;
  612. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  613. raw_spin_unlock(&rt_b->rt_runtime_lock);
  614. /* Make rt_rq available for pick_next_task() */
  615. sched_rt_rq_enqueue(rt_rq);
  616. }
  617. }
  618. static void __enable_runtime(struct rq *rq)
  619. {
  620. rt_rq_iter_t iter;
  621. struct rt_rq *rt_rq;
  622. if (unlikely(!scheduler_running))
  623. return;
  624. /*
  625. * Reset each runqueue's bandwidth settings
  626. */
  627. for_each_rt_rq(rt_rq, iter, rq) {
  628. struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
  629. raw_spin_lock(&rt_b->rt_runtime_lock);
  630. raw_spin_lock(&rt_rq->rt_runtime_lock);
  631. rt_rq->rt_runtime = rt_b->rt_runtime;
  632. rt_rq->rt_time = 0;
  633. rt_rq->rt_throttled = 0;
  634. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  635. raw_spin_unlock(&rt_b->rt_runtime_lock);
  636. }
  637. }
  638. static void balance_runtime(struct rt_rq *rt_rq)
  639. {
  640. if (!sched_feat(RT_RUNTIME_SHARE))
  641. return;
  642. if (rt_rq->rt_time > rt_rq->rt_runtime) {
  643. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  644. do_balance_runtime(rt_rq);
  645. raw_spin_lock(&rt_rq->rt_runtime_lock);
  646. }
  647. }
  648. #else /* !CONFIG_SMP */
  649. static inline void balance_runtime(struct rt_rq *rt_rq) {}
  650. #endif /* CONFIG_SMP */
  651. static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
  652. {
  653. int i, idle = 1, throttled = 0;
  654. const struct cpumask *span;
  655. span = sched_rt_period_mask();
  656. #ifdef CONFIG_RT_GROUP_SCHED
  657. /*
  658. * FIXME: isolated CPUs should really leave the root task group,
  659. * whether they are isolcpus or were isolated via cpusets, lest
  660. * the timer run on a CPU which does not service all runqueues,
  661. * potentially leaving other CPUs indefinitely throttled. If
  662. * isolation is really required, the user will turn the throttle
  663. * off to kill the perturbations it causes anyway. Meanwhile,
  664. * this maintains functionality for boot and/or troubleshooting.
  665. */
  666. if (rt_b == &root_task_group.rt_bandwidth)
  667. span = cpu_online_mask;
  668. #endif
  669. for_each_cpu(i, span) {
  670. int enqueue = 0;
  671. struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
  672. struct rq *rq = rq_of_rt_rq(rt_rq);
  673. int skip;
  674. /*
  675. * When span == cpu_online_mask, taking each rq->lock
  676. * can be time-consuming. Try to avoid it when possible.
  677. */
  678. raw_spin_lock(&rt_rq->rt_runtime_lock);
  679. if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF)
  680. rt_rq->rt_runtime = rt_b->rt_runtime;
  681. skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
  682. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  683. if (skip)
  684. continue;
  685. raw_spin_lock(&rq->lock);
  686. update_rq_clock(rq);
  687. if (rt_rq->rt_time) {
  688. u64 runtime;
  689. raw_spin_lock(&rt_rq->rt_runtime_lock);
  690. if (rt_rq->rt_throttled)
  691. balance_runtime(rt_rq);
  692. runtime = rt_rq->rt_runtime;
  693. rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
  694. if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
  695. rt_rq->rt_throttled = 0;
  696. enqueue = 1;
  697. /*
  698. * When we're idle and a woken (rt) task is
  699. * throttled check_preempt_curr() will set
  700. * skip_update and the time between the wakeup
  701. * and this unthrottle will get accounted as
  702. * 'runtime'.
  703. */
  704. if (rt_rq->rt_nr_running && rq->curr == rq->idle)
  705. rq_clock_cancel_skipupdate(rq);
  706. }
  707. if (rt_rq->rt_time || rt_rq->rt_nr_running)
  708. idle = 0;
  709. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  710. } else if (rt_rq->rt_nr_running) {
  711. idle = 0;
  712. if (!rt_rq_throttled(rt_rq))
  713. enqueue = 1;
  714. }
  715. if (rt_rq->rt_throttled)
  716. throttled = 1;
  717. if (enqueue)
  718. sched_rt_rq_enqueue(rt_rq);
  719. raw_spin_unlock(&rq->lock);
  720. }
  721. if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
  722. return 1;
  723. return idle;
  724. }
  725. static inline int rt_se_prio(struct sched_rt_entity *rt_se)
  726. {
  727. #ifdef CONFIG_RT_GROUP_SCHED
  728. struct rt_rq *rt_rq = group_rt_rq(rt_se);
  729. if (rt_rq)
  730. return rt_rq->highest_prio.curr;
  731. #endif
  732. return rt_task_of(rt_se)->prio;
  733. }
  734. static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
  735. {
  736. u64 runtime = sched_rt_runtime(rt_rq);
  737. if (rt_rq->rt_throttled)
  738. return rt_rq_throttled(rt_rq);
  739. if (runtime >= sched_rt_period(rt_rq))
  740. return 0;
  741. balance_runtime(rt_rq);
  742. runtime = sched_rt_runtime(rt_rq);
  743. if (runtime == RUNTIME_INF)
  744. return 0;
  745. if (rt_rq->rt_time > runtime) {
  746. struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
  747. /*
  748. * Don't actually throttle groups that have no runtime assigned
  749. * but accrue some time due to boosting.
  750. */
  751. if (likely(rt_b->rt_runtime)) {
  752. rt_rq->rt_throttled = 1;
  753. printk_deferred_once("sched: RT throttling activated\n");
  754. } else {
  755. /*
  756. * In case we did anyway, make it go away,
  757. * replenishment is a joke, since it will replenish us
  758. * with exactly 0 ns.
  759. */
  760. rt_rq->rt_time = 0;
  761. }
  762. if (rt_rq_throttled(rt_rq)) {
  763. sched_rt_rq_dequeue(rt_rq);
  764. return 1;
  765. }
  766. }
  767. return 0;
  768. }
  769. /*
  770. * Update the current task's runtime statistics. Skip current tasks that
  771. * are not in our scheduling class.
  772. */
  773. static void update_curr_rt(struct rq *rq)
  774. {
  775. struct task_struct *curr = rq->curr;
  776. struct sched_rt_entity *rt_se = &curr->rt;
  777. u64 delta_exec;
  778. u64 now;
  779. if (curr->sched_class != &rt_sched_class)
  780. return;
  781. now = rq_clock_task(rq);
  782. delta_exec = now - curr->se.exec_start;
  783. if (unlikely((s64)delta_exec <= 0))
  784. return;
  785. schedstat_set(curr->se.statistics.exec_max,
  786. max(curr->se.statistics.exec_max, delta_exec));
  787. curr->se.sum_exec_runtime += delta_exec;
  788. account_group_exec_runtime(curr, delta_exec);
  789. curr->se.exec_start = now;
  790. cgroup_account_cputime(curr, delta_exec);
  791. if (!rt_bandwidth_enabled())
  792. return;
  793. for_each_sched_rt_entity(rt_se) {
  794. struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
  795. if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
  796. raw_spin_lock(&rt_rq->rt_runtime_lock);
  797. rt_rq->rt_time += delta_exec;
  798. if (sched_rt_runtime_exceeded(rt_rq))
  799. resched_curr(rq);
  800. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  801. }
  802. }
  803. }
  804. static void
  805. dequeue_top_rt_rq(struct rt_rq *rt_rq)
  806. {
  807. struct rq *rq = rq_of_rt_rq(rt_rq);
  808. BUG_ON(&rq->rt != rt_rq);
  809. if (!rt_rq->rt_queued)
  810. return;
  811. BUG_ON(!rq->nr_running);
  812. sub_nr_running(rq, rt_rq->rt_nr_running);
  813. rt_rq->rt_queued = 0;
  814. }
  815. static void
  816. enqueue_top_rt_rq(struct rt_rq *rt_rq)
  817. {
  818. struct rq *rq = rq_of_rt_rq(rt_rq);
  819. BUG_ON(&rq->rt != rt_rq);
  820. if (rt_rq->rt_queued)
  821. return;
  822. if (rt_rq_throttled(rt_rq))
  823. return;
  824. if (rt_rq->rt_nr_running) {
  825. add_nr_running(rq, rt_rq->rt_nr_running);
  826. rt_rq->rt_queued = 1;
  827. }
  828. /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
  829. cpufreq_update_util(rq, 0);
  830. }
  831. #if defined CONFIG_SMP
  832. static void
  833. inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
  834. {
  835. struct rq *rq = rq_of_rt_rq(rt_rq);
  836. #ifdef CONFIG_RT_GROUP_SCHED
  837. /*
  838. * Change rq's cpupri only if rt_rq is the top queue.
  839. */
  840. if (&rq->rt != rt_rq)
  841. return;
  842. #endif
  843. if (rq->online && prio < prev_prio)
  844. cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
  845. }
  846. static void
  847. dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
  848. {
  849. struct rq *rq = rq_of_rt_rq(rt_rq);
  850. #ifdef CONFIG_RT_GROUP_SCHED
  851. /*
  852. * Change rq's cpupri only if rt_rq is the top queue.
  853. */
  854. if (&rq->rt != rt_rq)
  855. return;
  856. #endif
  857. if (rq->online && rt_rq->highest_prio.curr != prev_prio)
  858. cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
  859. }
  860. #else /* CONFIG_SMP */
  861. static inline
  862. void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
  863. static inline
  864. void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
  865. #endif /* CONFIG_SMP */
  866. #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
  867. static void
  868. inc_rt_prio(struct rt_rq *rt_rq, int prio)
  869. {
  870. int prev_prio = rt_rq->highest_prio.curr;
  871. if (prio < prev_prio)
  872. rt_rq->highest_prio.curr = prio;
  873. inc_rt_prio_smp(rt_rq, prio, prev_prio);
  874. }
  875. static void
  876. dec_rt_prio(struct rt_rq *rt_rq, int prio)
  877. {
  878. int prev_prio = rt_rq->highest_prio.curr;
  879. if (rt_rq->rt_nr_running) {
  880. WARN_ON(prio < prev_prio);
  881. /*
  882. * This may have been our highest task, and therefore
  883. * we may have some recomputation to do
  884. */
  885. if (prio == prev_prio) {
  886. struct rt_prio_array *array = &rt_rq->active;
  887. rt_rq->highest_prio.curr =
  888. sched_find_first_bit(array->bitmap);
  889. }
  890. } else
  891. rt_rq->highest_prio.curr = MAX_RT_PRIO;
  892. dec_rt_prio_smp(rt_rq, prio, prev_prio);
  893. }
  894. #else
  895. static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
  896. static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
  897. #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
  898. #ifdef CONFIG_RT_GROUP_SCHED
  899. static void
  900. inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  901. {
  902. if (rt_se_boosted(rt_se))
  903. rt_rq->rt_nr_boosted++;
  904. if (rt_rq->tg)
  905. start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
  906. }
  907. static void
  908. dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  909. {
  910. if (rt_se_boosted(rt_se))
  911. rt_rq->rt_nr_boosted--;
  912. WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
  913. }
  914. #else /* CONFIG_RT_GROUP_SCHED */
  915. static void
  916. inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  917. {
  918. start_rt_bandwidth(&def_rt_bandwidth);
  919. }
  920. static inline
  921. void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
  922. #endif /* CONFIG_RT_GROUP_SCHED */
  923. static inline
  924. unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
  925. {
  926. struct rt_rq *group_rq = group_rt_rq(rt_se);
  927. if (group_rq)
  928. return group_rq->rt_nr_running;
  929. else
  930. return 1;
  931. }
  932. static inline
  933. unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
  934. {
  935. struct rt_rq *group_rq = group_rt_rq(rt_se);
  936. struct task_struct *tsk;
  937. if (group_rq)
  938. return group_rq->rr_nr_running;
  939. tsk = rt_task_of(rt_se);
  940. return (tsk->policy == SCHED_RR) ? 1 : 0;
  941. }
  942. static inline
  943. void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  944. {
  945. int prio = rt_se_prio(rt_se);
  946. WARN_ON(!rt_prio(prio));
  947. rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
  948. rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
  949. inc_rt_prio(rt_rq, prio);
  950. inc_rt_migration(rt_se, rt_rq);
  951. inc_rt_group(rt_se, rt_rq);
  952. }
  953. static inline
  954. void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
  955. {
  956. WARN_ON(!rt_prio(rt_se_prio(rt_se)));
  957. WARN_ON(!rt_rq->rt_nr_running);
  958. rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
  959. rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
  960. dec_rt_prio(rt_rq, rt_se_prio(rt_se));
  961. dec_rt_migration(rt_se, rt_rq);
  962. dec_rt_group(rt_se, rt_rq);
  963. }
  964. /*
  965. * Change rt_se->run_list location unless SAVE && !MOVE
  966. *
  967. * assumes ENQUEUE/DEQUEUE flags match
  968. */
  969. static inline bool move_entity(unsigned int flags)
  970. {
  971. if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
  972. return false;
  973. return true;
  974. }
  975. static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
  976. {
  977. list_del_init(&rt_se->run_list);
  978. if (list_empty(array->queue + rt_se_prio(rt_se)))
  979. __clear_bit(rt_se_prio(rt_se), array->bitmap);
  980. rt_se->on_list = 0;
  981. }
  982. static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
  983. {
  984. struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
  985. struct rt_prio_array *array = &rt_rq->active;
  986. struct rt_rq *group_rq = group_rt_rq(rt_se);
  987. struct list_head *queue = array->queue + rt_se_prio(rt_se);
  988. /*
  989. * Don't enqueue the group if its throttled, or when empty.
  990. * The latter is a consequence of the former when a child group
  991. * get throttled and the current group doesn't have any other
  992. * active members.
  993. */
  994. if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
  995. if (rt_se->on_list)
  996. __delist_rt_entity(rt_se, array);
  997. return;
  998. }
  999. if (move_entity(flags)) {
  1000. WARN_ON_ONCE(rt_se->on_list);
  1001. if (flags & ENQUEUE_HEAD)
  1002. list_add(&rt_se->run_list, queue);
  1003. else
  1004. list_add_tail(&rt_se->run_list, queue);
  1005. __set_bit(rt_se_prio(rt_se), array->bitmap);
  1006. rt_se->on_list = 1;
  1007. }
  1008. rt_se->on_rq = 1;
  1009. inc_rt_tasks(rt_se, rt_rq);
  1010. }
  1011. static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
  1012. {
  1013. struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
  1014. struct rt_prio_array *array = &rt_rq->active;
  1015. if (move_entity(flags)) {
  1016. WARN_ON_ONCE(!rt_se->on_list);
  1017. __delist_rt_entity(rt_se, array);
  1018. }
  1019. rt_se->on_rq = 0;
  1020. dec_rt_tasks(rt_se, rt_rq);
  1021. }
  1022. /*
  1023. * Because the prio of an upper entry depends on the lower
  1024. * entries, we must remove entries top - down.
  1025. */
  1026. static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
  1027. {
  1028. struct sched_rt_entity *back = NULL;
  1029. for_each_sched_rt_entity(rt_se) {
  1030. rt_se->back = back;
  1031. back = rt_se;
  1032. }
  1033. dequeue_top_rt_rq(rt_rq_of_se(back));
  1034. for (rt_se = back; rt_se; rt_se = rt_se->back) {
  1035. if (on_rt_rq(rt_se))
  1036. __dequeue_rt_entity(rt_se, flags);
  1037. }
  1038. }
  1039. static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
  1040. {
  1041. struct rq *rq = rq_of_rt_se(rt_se);
  1042. dequeue_rt_stack(rt_se, flags);
  1043. for_each_sched_rt_entity(rt_se)
  1044. __enqueue_rt_entity(rt_se, flags);
  1045. enqueue_top_rt_rq(&rq->rt);
  1046. }
  1047. static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
  1048. {
  1049. struct rq *rq = rq_of_rt_se(rt_se);
  1050. dequeue_rt_stack(rt_se, flags);
  1051. for_each_sched_rt_entity(rt_se) {
  1052. struct rt_rq *rt_rq = group_rt_rq(rt_se);
  1053. if (rt_rq && rt_rq->rt_nr_running)
  1054. __enqueue_rt_entity(rt_se, flags);
  1055. }
  1056. enqueue_top_rt_rq(&rq->rt);
  1057. }
  1058. /*
  1059. * Adding/removing a task to/from a priority array:
  1060. */
  1061. static void
  1062. enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
  1063. {
  1064. struct sched_rt_entity *rt_se = &p->rt;
  1065. if (flags & ENQUEUE_WAKEUP)
  1066. rt_se->timeout = 0;
  1067. enqueue_rt_entity(rt_se, flags);
  1068. if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
  1069. enqueue_pushable_task(rq, p);
  1070. }
  1071. static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
  1072. {
  1073. struct sched_rt_entity *rt_se = &p->rt;
  1074. update_curr_rt(rq);
  1075. dequeue_rt_entity(rt_se, flags);
  1076. dequeue_pushable_task(rq, p);
  1077. }
  1078. /*
  1079. * Put task to the head or the end of the run list without the overhead of
  1080. * dequeue followed by enqueue.
  1081. */
  1082. static void
  1083. requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
  1084. {
  1085. if (on_rt_rq(rt_se)) {
  1086. struct rt_prio_array *array = &rt_rq->active;
  1087. struct list_head *queue = array->queue + rt_se_prio(rt_se);
  1088. if (head)
  1089. list_move(&rt_se->run_list, queue);
  1090. else
  1091. list_move_tail(&rt_se->run_list, queue);
  1092. }
  1093. }
  1094. static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
  1095. {
  1096. struct sched_rt_entity *rt_se = &p->rt;
  1097. struct rt_rq *rt_rq;
  1098. for_each_sched_rt_entity(rt_se) {
  1099. rt_rq = rt_rq_of_se(rt_se);
  1100. requeue_rt_entity(rt_rq, rt_se, head);
  1101. }
  1102. }
  1103. static void yield_task_rt(struct rq *rq)
  1104. {
  1105. requeue_task_rt(rq, rq->curr, 0);
  1106. }
  1107. #ifdef CONFIG_SMP
  1108. static int find_lowest_rq(struct task_struct *task);
  1109. static int
  1110. select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
  1111. {
  1112. struct task_struct *curr;
  1113. struct rq *rq;
  1114. /* For anything but wake ups, just return the task_cpu */
  1115. if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
  1116. goto out;
  1117. rq = cpu_rq(cpu);
  1118. rcu_read_lock();
  1119. curr = READ_ONCE(rq->curr); /* unlocked access */
  1120. /*
  1121. * If the current task on @p's runqueue is an RT task, then
  1122. * try to see if we can wake this RT task up on another
  1123. * runqueue. Otherwise simply start this RT task
  1124. * on its current runqueue.
  1125. *
  1126. * We want to avoid overloading runqueues. If the woken
  1127. * task is a higher priority, then it will stay on this CPU
  1128. * and the lower prio task should be moved to another CPU.
  1129. * Even though this will probably make the lower prio task
  1130. * lose its cache, we do not want to bounce a higher task
  1131. * around just because it gave up its CPU, perhaps for a
  1132. * lock?
  1133. *
  1134. * For equal prio tasks, we just let the scheduler sort it out.
  1135. *
  1136. * Otherwise, just let it ride on the affined RQ and the
  1137. * post-schedule router will push the preempted task away
  1138. *
  1139. * This test is optimistic, if we get it wrong the load-balancer
  1140. * will have to sort it out.
  1141. */
  1142. if (curr && unlikely(rt_task(curr)) &&
  1143. (curr->nr_cpus_allowed < 2 ||
  1144. curr->prio <= p->prio)) {
  1145. int target = find_lowest_rq(p);
  1146. /*
  1147. * Don't bother moving it if the destination CPU is
  1148. * not running a lower priority task.
  1149. */
  1150. if (target != -1 &&
  1151. p->prio < cpu_rq(target)->rt.highest_prio.curr)
  1152. cpu = target;
  1153. }
  1154. rcu_read_unlock();
  1155. out:
  1156. return cpu;
  1157. }
  1158. static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
  1159. {
  1160. /*
  1161. * Current can't be migrated, useless to reschedule,
  1162. * let's hope p can move out.
  1163. */
  1164. if (rq->curr->nr_cpus_allowed == 1 ||
  1165. !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
  1166. return;
  1167. /*
  1168. * p is migratable, so let's not schedule it and
  1169. * see if it is pushed or pulled somewhere else.
  1170. */
  1171. if (p->nr_cpus_allowed != 1
  1172. && cpupri_find(&rq->rd->cpupri, p, NULL))
  1173. return;
  1174. /*
  1175. * There appear to be other CPUs that can accept
  1176. * the current task but none can run 'p', so lets reschedule
  1177. * to try and push the current task away:
  1178. */
  1179. requeue_task_rt(rq, p, 1);
  1180. resched_curr(rq);
  1181. }
  1182. #endif /* CONFIG_SMP */
  1183. /*
  1184. * Preempt the current task with a newly woken task if needed:
  1185. */
  1186. static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
  1187. {
  1188. if (p->prio < rq->curr->prio) {
  1189. resched_curr(rq);
  1190. return;
  1191. }
  1192. #ifdef CONFIG_SMP
  1193. /*
  1194. * If:
  1195. *
  1196. * - the newly woken task is of equal priority to the current task
  1197. * - the newly woken task is non-migratable while current is migratable
  1198. * - current will be preempted on the next reschedule
  1199. *
  1200. * we should check to see if current can readily move to a different
  1201. * cpu. If so, we will reschedule to allow the push logic to try
  1202. * to move current somewhere else, making room for our non-migratable
  1203. * task.
  1204. */
  1205. if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
  1206. check_preempt_equal_prio(rq, p);
  1207. #endif
  1208. }
  1209. static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
  1210. struct rt_rq *rt_rq)
  1211. {
  1212. struct rt_prio_array *array = &rt_rq->active;
  1213. struct sched_rt_entity *next = NULL;
  1214. struct list_head *queue;
  1215. int idx;
  1216. idx = sched_find_first_bit(array->bitmap);
  1217. BUG_ON(idx >= MAX_RT_PRIO);
  1218. queue = array->queue + idx;
  1219. next = list_entry(queue->next, struct sched_rt_entity, run_list);
  1220. return next;
  1221. }
  1222. static struct task_struct *_pick_next_task_rt(struct rq *rq)
  1223. {
  1224. struct sched_rt_entity *rt_se;
  1225. struct task_struct *p;
  1226. struct rt_rq *rt_rq = &rq->rt;
  1227. do {
  1228. rt_se = pick_next_rt_entity(rq, rt_rq);
  1229. BUG_ON(!rt_se);
  1230. rt_rq = group_rt_rq(rt_se);
  1231. } while (rt_rq);
  1232. p = rt_task_of(rt_se);
  1233. p->se.exec_start = rq_clock_task(rq);
  1234. return p;
  1235. }
  1236. static struct task_struct *
  1237. pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  1238. {
  1239. struct task_struct *p;
  1240. struct rt_rq *rt_rq = &rq->rt;
  1241. if (need_pull_rt_task(rq, prev)) {
  1242. /*
  1243. * This is OK, because current is on_cpu, which avoids it being
  1244. * picked for load-balance and preemption/IRQs are still
  1245. * disabled avoiding further scheduler activity on it and we're
  1246. * being very careful to re-start the picking loop.
  1247. */
  1248. rq_unpin_lock(rq, rf);
  1249. pull_rt_task(rq);
  1250. rq_repin_lock(rq, rf);
  1251. /*
  1252. * pull_rt_task() can drop (and re-acquire) rq->lock; this
  1253. * means a dl or stop task can slip in, in which case we need
  1254. * to re-start task selection.
  1255. */
  1256. if (unlikely((rq->stop && task_on_rq_queued(rq->stop)) ||
  1257. rq->dl.dl_nr_running))
  1258. return RETRY_TASK;
  1259. }
  1260. /*
  1261. * We may dequeue prev's rt_rq in put_prev_task().
  1262. * So, we update time before rt_nr_running check.
  1263. */
  1264. if (prev->sched_class == &rt_sched_class)
  1265. update_curr_rt(rq);
  1266. if (!rt_rq->rt_queued)
  1267. return NULL;
  1268. put_prev_task(rq, prev);
  1269. p = _pick_next_task_rt(rq);
  1270. /* The running task is never eligible for pushing */
  1271. dequeue_pushable_task(rq, p);
  1272. rt_queue_push_tasks(rq);
  1273. /*
  1274. * If prev task was rt, put_prev_task() has already updated the
  1275. * utilization. We only care of the case where we start to schedule a
  1276. * rt task
  1277. */
  1278. if (rq->curr->sched_class != &rt_sched_class)
  1279. update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
  1280. return p;
  1281. }
  1282. static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
  1283. {
  1284. update_curr_rt(rq);
  1285. update_rt_rq_load_avg(rq_clock_task(rq), rq, 1);
  1286. /*
  1287. * The previous task needs to be made eligible for pushing
  1288. * if it is still active
  1289. */
  1290. if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
  1291. enqueue_pushable_task(rq, p);
  1292. }
  1293. #ifdef CONFIG_SMP
  1294. /* Only try algorithms three times */
  1295. #define RT_MAX_TRIES 3
  1296. static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
  1297. {
  1298. if (!task_running(rq, p) &&
  1299. cpumask_test_cpu(cpu, &p->cpus_allowed))
  1300. return 1;
  1301. return 0;
  1302. }
  1303. /*
  1304. * Return the highest pushable rq's task, which is suitable to be executed
  1305. * on the CPU, NULL otherwise
  1306. */
  1307. static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
  1308. {
  1309. struct plist_head *head = &rq->rt.pushable_tasks;
  1310. struct task_struct *p;
  1311. if (!has_pushable_tasks(rq))
  1312. return NULL;
  1313. plist_for_each_entry(p, head, pushable_tasks) {
  1314. if (pick_rt_task(rq, p, cpu))
  1315. return p;
  1316. }
  1317. return NULL;
  1318. }
  1319. static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
  1320. static int find_lowest_rq(struct task_struct *task)
  1321. {
  1322. struct sched_domain *sd;
  1323. struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
  1324. int this_cpu = smp_processor_id();
  1325. int cpu = task_cpu(task);
  1326. /* Make sure the mask is initialized first */
  1327. if (unlikely(!lowest_mask))
  1328. return -1;
  1329. if (task->nr_cpus_allowed == 1)
  1330. return -1; /* No other targets possible */
  1331. if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
  1332. return -1; /* No targets found */
  1333. /*
  1334. * At this point we have built a mask of CPUs representing the
  1335. * lowest priority tasks in the system. Now we want to elect
  1336. * the best one based on our affinity and topology.
  1337. *
  1338. * We prioritize the last CPU that the task executed on since
  1339. * it is most likely cache-hot in that location.
  1340. */
  1341. if (cpumask_test_cpu(cpu, lowest_mask))
  1342. return cpu;
  1343. /*
  1344. * Otherwise, we consult the sched_domains span maps to figure
  1345. * out which CPU is logically closest to our hot cache data.
  1346. */
  1347. if (!cpumask_test_cpu(this_cpu, lowest_mask))
  1348. this_cpu = -1; /* Skip this_cpu opt if not among lowest */
  1349. rcu_read_lock();
  1350. for_each_domain(cpu, sd) {
  1351. if (sd->flags & SD_WAKE_AFFINE) {
  1352. int best_cpu;
  1353. /*
  1354. * "this_cpu" is cheaper to preempt than a
  1355. * remote processor.
  1356. */
  1357. if (this_cpu != -1 &&
  1358. cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
  1359. rcu_read_unlock();
  1360. return this_cpu;
  1361. }
  1362. best_cpu = cpumask_first_and(lowest_mask,
  1363. sched_domain_span(sd));
  1364. if (best_cpu < nr_cpu_ids) {
  1365. rcu_read_unlock();
  1366. return best_cpu;
  1367. }
  1368. }
  1369. }
  1370. rcu_read_unlock();
  1371. /*
  1372. * And finally, if there were no matches within the domains
  1373. * just give the caller *something* to work with from the compatible
  1374. * locations.
  1375. */
  1376. if (this_cpu != -1)
  1377. return this_cpu;
  1378. cpu = cpumask_any(lowest_mask);
  1379. if (cpu < nr_cpu_ids)
  1380. return cpu;
  1381. return -1;
  1382. }
  1383. /* Will lock the rq it finds */
  1384. static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
  1385. {
  1386. struct rq *lowest_rq = NULL;
  1387. int tries;
  1388. int cpu;
  1389. for (tries = 0; tries < RT_MAX_TRIES; tries++) {
  1390. cpu = find_lowest_rq(task);
  1391. if ((cpu == -1) || (cpu == rq->cpu))
  1392. break;
  1393. lowest_rq = cpu_rq(cpu);
  1394. if (lowest_rq->rt.highest_prio.curr <= task->prio) {
  1395. /*
  1396. * Target rq has tasks of equal or higher priority,
  1397. * retrying does not release any lock and is unlikely
  1398. * to yield a different result.
  1399. */
  1400. lowest_rq = NULL;
  1401. break;
  1402. }
  1403. /* if the prio of this runqueue changed, try again */
  1404. if (double_lock_balance(rq, lowest_rq)) {
  1405. /*
  1406. * We had to unlock the run queue. In
  1407. * the mean time, task could have
  1408. * migrated already or had its affinity changed.
  1409. * Also make sure that it wasn't scheduled on its rq.
  1410. */
  1411. if (unlikely(task_rq(task) != rq ||
  1412. !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) ||
  1413. task_running(rq, task) ||
  1414. !rt_task(task) ||
  1415. !task_on_rq_queued(task))) {
  1416. double_unlock_balance(rq, lowest_rq);
  1417. lowest_rq = NULL;
  1418. break;
  1419. }
  1420. }
  1421. /* If this rq is still suitable use it. */
  1422. if (lowest_rq->rt.highest_prio.curr > task->prio)
  1423. break;
  1424. /* try again */
  1425. double_unlock_balance(rq, lowest_rq);
  1426. lowest_rq = NULL;
  1427. }
  1428. return lowest_rq;
  1429. }
  1430. static struct task_struct *pick_next_pushable_task(struct rq *rq)
  1431. {
  1432. struct task_struct *p;
  1433. if (!has_pushable_tasks(rq))
  1434. return NULL;
  1435. p = plist_first_entry(&rq->rt.pushable_tasks,
  1436. struct task_struct, pushable_tasks);
  1437. BUG_ON(rq->cpu != task_cpu(p));
  1438. BUG_ON(task_current(rq, p));
  1439. BUG_ON(p->nr_cpus_allowed <= 1);
  1440. BUG_ON(!task_on_rq_queued(p));
  1441. BUG_ON(!rt_task(p));
  1442. return p;
  1443. }
  1444. /*
  1445. * If the current CPU has more than one RT task, see if the non
  1446. * running task can migrate over to a CPU that is running a task
  1447. * of lesser priority.
  1448. */
  1449. static int push_rt_task(struct rq *rq)
  1450. {
  1451. struct task_struct *next_task;
  1452. struct rq *lowest_rq;
  1453. int ret = 0;
  1454. if (!rq->rt.overloaded)
  1455. return 0;
  1456. next_task = pick_next_pushable_task(rq);
  1457. if (!next_task)
  1458. return 0;
  1459. retry:
  1460. if (unlikely(next_task == rq->curr)) {
  1461. WARN_ON(1);
  1462. return 0;
  1463. }
  1464. /*
  1465. * It's possible that the next_task slipped in of
  1466. * higher priority than current. If that's the case
  1467. * just reschedule current.
  1468. */
  1469. if (unlikely(next_task->prio < rq->curr->prio)) {
  1470. resched_curr(rq);
  1471. return 0;
  1472. }
  1473. /* We might release rq lock */
  1474. get_task_struct(next_task);
  1475. /* find_lock_lowest_rq locks the rq if found */
  1476. lowest_rq = find_lock_lowest_rq(next_task, rq);
  1477. if (!lowest_rq) {
  1478. struct task_struct *task;
  1479. /*
  1480. * find_lock_lowest_rq releases rq->lock
  1481. * so it is possible that next_task has migrated.
  1482. *
  1483. * We need to make sure that the task is still on the same
  1484. * run-queue and is also still the next task eligible for
  1485. * pushing.
  1486. */
  1487. task = pick_next_pushable_task(rq);
  1488. if (task == next_task) {
  1489. /*
  1490. * The task hasn't migrated, and is still the next
  1491. * eligible task, but we failed to find a run-queue
  1492. * to push it to. Do not retry in this case, since
  1493. * other CPUs will pull from us when ready.
  1494. */
  1495. goto out;
  1496. }
  1497. if (!task)
  1498. /* No more tasks, just exit */
  1499. goto out;
  1500. /*
  1501. * Something has shifted, try again.
  1502. */
  1503. put_task_struct(next_task);
  1504. next_task = task;
  1505. goto retry;
  1506. }
  1507. deactivate_task(rq, next_task, 0);
  1508. set_task_cpu(next_task, lowest_rq->cpu);
  1509. activate_task(lowest_rq, next_task, 0);
  1510. ret = 1;
  1511. resched_curr(lowest_rq);
  1512. double_unlock_balance(rq, lowest_rq);
  1513. out:
  1514. put_task_struct(next_task);
  1515. return ret;
  1516. }
  1517. static void push_rt_tasks(struct rq *rq)
  1518. {
  1519. /* push_rt_task will return true if it moved an RT */
  1520. while (push_rt_task(rq))
  1521. ;
  1522. }
  1523. #ifdef HAVE_RT_PUSH_IPI
  1524. /*
  1525. * When a high priority task schedules out from a CPU and a lower priority
  1526. * task is scheduled in, a check is made to see if there's any RT tasks
  1527. * on other CPUs that are waiting to run because a higher priority RT task
  1528. * is currently running on its CPU. In this case, the CPU with multiple RT
  1529. * tasks queued on it (overloaded) needs to be notified that a CPU has opened
  1530. * up that may be able to run one of its non-running queued RT tasks.
  1531. *
  1532. * All CPUs with overloaded RT tasks need to be notified as there is currently
  1533. * no way to know which of these CPUs have the highest priority task waiting
  1534. * to run. Instead of trying to take a spinlock on each of these CPUs,
  1535. * which has shown to cause large latency when done on machines with many
  1536. * CPUs, sending an IPI to the CPUs to have them push off the overloaded
  1537. * RT tasks waiting to run.
  1538. *
  1539. * Just sending an IPI to each of the CPUs is also an issue, as on large
  1540. * count CPU machines, this can cause an IPI storm on a CPU, especially
  1541. * if its the only CPU with multiple RT tasks queued, and a large number
  1542. * of CPUs scheduling a lower priority task at the same time.
  1543. *
  1544. * Each root domain has its own irq work function that can iterate over
  1545. * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
  1546. * tassk must be checked if there's one or many CPUs that are lowering
  1547. * their priority, there's a single irq work iterator that will try to
  1548. * push off RT tasks that are waiting to run.
  1549. *
  1550. * When a CPU schedules a lower priority task, it will kick off the
  1551. * irq work iterator that will jump to each CPU with overloaded RT tasks.
  1552. * As it only takes the first CPU that schedules a lower priority task
  1553. * to start the process, the rto_start variable is incremented and if
  1554. * the atomic result is one, then that CPU will try to take the rto_lock.
  1555. * This prevents high contention on the lock as the process handles all
  1556. * CPUs scheduling lower priority tasks.
  1557. *
  1558. * All CPUs that are scheduling a lower priority task will increment the
  1559. * rt_loop_next variable. This will make sure that the irq work iterator
  1560. * checks all RT overloaded CPUs whenever a CPU schedules a new lower
  1561. * priority task, even if the iterator is in the middle of a scan. Incrementing
  1562. * the rt_loop_next will cause the iterator to perform another scan.
  1563. *
  1564. */
  1565. static int rto_next_cpu(struct root_domain *rd)
  1566. {
  1567. int next;
  1568. int cpu;
  1569. /*
  1570. * When starting the IPI RT pushing, the rto_cpu is set to -1,
  1571. * rt_next_cpu() will simply return the first CPU found in
  1572. * the rto_mask.
  1573. *
  1574. * If rto_next_cpu() is called with rto_cpu is a valid CPU, it
  1575. * will return the next CPU found in the rto_mask.
  1576. *
  1577. * If there are no more CPUs left in the rto_mask, then a check is made
  1578. * against rto_loop and rto_loop_next. rto_loop is only updated with
  1579. * the rto_lock held, but any CPU may increment the rto_loop_next
  1580. * without any locking.
  1581. */
  1582. for (;;) {
  1583. /* When rto_cpu is -1 this acts like cpumask_first() */
  1584. cpu = cpumask_next(rd->rto_cpu, rd->rto_mask);
  1585. rd->rto_cpu = cpu;
  1586. if (cpu < nr_cpu_ids)
  1587. return cpu;
  1588. rd->rto_cpu = -1;
  1589. /*
  1590. * ACQUIRE ensures we see the @rto_mask changes
  1591. * made prior to the @next value observed.
  1592. *
  1593. * Matches WMB in rt_set_overload().
  1594. */
  1595. next = atomic_read_acquire(&rd->rto_loop_next);
  1596. if (rd->rto_loop == next)
  1597. break;
  1598. rd->rto_loop = next;
  1599. }
  1600. return -1;
  1601. }
  1602. static inline bool rto_start_trylock(atomic_t *v)
  1603. {
  1604. return !atomic_cmpxchg_acquire(v, 0, 1);
  1605. }
  1606. static inline void rto_start_unlock(atomic_t *v)
  1607. {
  1608. atomic_set_release(v, 0);
  1609. }
  1610. static void tell_cpu_to_push(struct rq *rq)
  1611. {
  1612. int cpu = -1;
  1613. /* Keep the loop going if the IPI is currently active */
  1614. atomic_inc(&rq->rd->rto_loop_next);
  1615. /* Only one CPU can initiate a loop at a time */
  1616. if (!rto_start_trylock(&rq->rd->rto_loop_start))
  1617. return;
  1618. raw_spin_lock(&rq->rd->rto_lock);
  1619. /*
  1620. * The rto_cpu is updated under the lock, if it has a valid CPU
  1621. * then the IPI is still running and will continue due to the
  1622. * update to loop_next, and nothing needs to be done here.
  1623. * Otherwise it is finishing up and an ipi needs to be sent.
  1624. */
  1625. if (rq->rd->rto_cpu < 0)
  1626. cpu = rto_next_cpu(rq->rd);
  1627. raw_spin_unlock(&rq->rd->rto_lock);
  1628. rto_start_unlock(&rq->rd->rto_loop_start);
  1629. if (cpu >= 0) {
  1630. /* Make sure the rd does not get freed while pushing */
  1631. sched_get_rd(rq->rd);
  1632. irq_work_queue_on(&rq->rd->rto_push_work, cpu);
  1633. }
  1634. }
  1635. /* Called from hardirq context */
  1636. void rto_push_irq_work_func(struct irq_work *work)
  1637. {
  1638. struct root_domain *rd =
  1639. container_of(work, struct root_domain, rto_push_work);
  1640. struct rq *rq;
  1641. int cpu;
  1642. rq = this_rq();
  1643. /*
  1644. * We do not need to grab the lock to check for has_pushable_tasks.
  1645. * When it gets updated, a check is made if a push is possible.
  1646. */
  1647. if (has_pushable_tasks(rq)) {
  1648. raw_spin_lock(&rq->lock);
  1649. push_rt_tasks(rq);
  1650. raw_spin_unlock(&rq->lock);
  1651. }
  1652. raw_spin_lock(&rd->rto_lock);
  1653. /* Pass the IPI to the next rt overloaded queue */
  1654. cpu = rto_next_cpu(rd);
  1655. raw_spin_unlock(&rd->rto_lock);
  1656. if (cpu < 0) {
  1657. sched_put_rd(rd);
  1658. return;
  1659. }
  1660. /* Try the next RT overloaded CPU */
  1661. irq_work_queue_on(&rd->rto_push_work, cpu);
  1662. }
  1663. #endif /* HAVE_RT_PUSH_IPI */
  1664. static void pull_rt_task(struct rq *this_rq)
  1665. {
  1666. int this_cpu = this_rq->cpu, cpu;
  1667. bool resched = false;
  1668. struct task_struct *p;
  1669. struct rq *src_rq;
  1670. int rt_overload_count = rt_overloaded(this_rq);
  1671. if (likely(!rt_overload_count))
  1672. return;
  1673. /*
  1674. * Match the barrier from rt_set_overloaded; this guarantees that if we
  1675. * see overloaded we must also see the rto_mask bit.
  1676. */
  1677. smp_rmb();
  1678. /* If we are the only overloaded CPU do nothing */
  1679. if (rt_overload_count == 1 &&
  1680. cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
  1681. return;
  1682. #ifdef HAVE_RT_PUSH_IPI
  1683. if (sched_feat(RT_PUSH_IPI)) {
  1684. tell_cpu_to_push(this_rq);
  1685. return;
  1686. }
  1687. #endif
  1688. for_each_cpu(cpu, this_rq->rd->rto_mask) {
  1689. if (this_cpu == cpu)
  1690. continue;
  1691. src_rq = cpu_rq(cpu);
  1692. /*
  1693. * Don't bother taking the src_rq->lock if the next highest
  1694. * task is known to be lower-priority than our current task.
  1695. * This may look racy, but if this value is about to go
  1696. * logically higher, the src_rq will push this task away.
  1697. * And if its going logically lower, we do not care
  1698. */
  1699. if (src_rq->rt.highest_prio.next >=
  1700. this_rq->rt.highest_prio.curr)
  1701. continue;
  1702. /*
  1703. * We can potentially drop this_rq's lock in
  1704. * double_lock_balance, and another CPU could
  1705. * alter this_rq
  1706. */
  1707. double_lock_balance(this_rq, src_rq);
  1708. /*
  1709. * We can pull only a task, which is pushable
  1710. * on its rq, and no others.
  1711. */
  1712. p = pick_highest_pushable_task(src_rq, this_cpu);
  1713. /*
  1714. * Do we have an RT task that preempts
  1715. * the to-be-scheduled task?
  1716. */
  1717. if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
  1718. WARN_ON(p == src_rq->curr);
  1719. WARN_ON(!task_on_rq_queued(p));
  1720. /*
  1721. * There's a chance that p is higher in priority
  1722. * than what's currently running on its CPU.
  1723. * This is just that p is wakeing up and hasn't
  1724. * had a chance to schedule. We only pull
  1725. * p if it is lower in priority than the
  1726. * current task on the run queue
  1727. */
  1728. if (p->prio < src_rq->curr->prio)
  1729. goto skip;
  1730. resched = true;
  1731. deactivate_task(src_rq, p, 0);
  1732. set_task_cpu(p, this_cpu);
  1733. activate_task(this_rq, p, 0);
  1734. /*
  1735. * We continue with the search, just in
  1736. * case there's an even higher prio task
  1737. * in another runqueue. (low likelihood
  1738. * but possible)
  1739. */
  1740. }
  1741. skip:
  1742. double_unlock_balance(this_rq, src_rq);
  1743. }
  1744. if (resched)
  1745. resched_curr(this_rq);
  1746. }
  1747. /*
  1748. * If we are not running and we are not going to reschedule soon, we should
  1749. * try to push tasks away now
  1750. */
  1751. static void task_woken_rt(struct rq *rq, struct task_struct *p)
  1752. {
  1753. if (!task_running(rq, p) &&
  1754. !test_tsk_need_resched(rq->curr) &&
  1755. p->nr_cpus_allowed > 1 &&
  1756. (dl_task(rq->curr) || rt_task(rq->curr)) &&
  1757. (rq->curr->nr_cpus_allowed < 2 ||
  1758. rq->curr->prio <= p->prio))
  1759. push_rt_tasks(rq);
  1760. }
  1761. /* Assumes rq->lock is held */
  1762. static void rq_online_rt(struct rq *rq)
  1763. {
  1764. if (rq->rt.overloaded)
  1765. rt_set_overload(rq);
  1766. __enable_runtime(rq);
  1767. cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
  1768. }
  1769. /* Assumes rq->lock is held */
  1770. static void rq_offline_rt(struct rq *rq)
  1771. {
  1772. if (rq->rt.overloaded)
  1773. rt_clear_overload(rq);
  1774. __disable_runtime(rq);
  1775. cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
  1776. }
  1777. /*
  1778. * When switch from the rt queue, we bring ourselves to a position
  1779. * that we might want to pull RT tasks from other runqueues.
  1780. */
  1781. static void switched_from_rt(struct rq *rq, struct task_struct *p)
  1782. {
  1783. /*
  1784. * If there are other RT tasks then we will reschedule
  1785. * and the scheduling of the other RT tasks will handle
  1786. * the balancing. But if we are the last RT task
  1787. * we may need to handle the pulling of RT tasks
  1788. * now.
  1789. */
  1790. if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
  1791. return;
  1792. rt_queue_pull_task(rq);
  1793. }
  1794. void __init init_sched_rt_class(void)
  1795. {
  1796. unsigned int i;
  1797. for_each_possible_cpu(i) {
  1798. zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
  1799. GFP_KERNEL, cpu_to_node(i));
  1800. }
  1801. }
  1802. #endif /* CONFIG_SMP */
  1803. /*
  1804. * When switching a task to RT, we may overload the runqueue
  1805. * with RT tasks. In this case we try to push them off to
  1806. * other runqueues.
  1807. */
  1808. static void switched_to_rt(struct rq *rq, struct task_struct *p)
  1809. {
  1810. /*
  1811. * If we are already running, then there's nothing
  1812. * that needs to be done. But if we are not running
  1813. * we may need to preempt the current running task.
  1814. * If that current running task is also an RT task
  1815. * then see if we can move to another run queue.
  1816. */
  1817. if (task_on_rq_queued(p) && rq->curr != p) {
  1818. #ifdef CONFIG_SMP
  1819. if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
  1820. rt_queue_push_tasks(rq);
  1821. #endif /* CONFIG_SMP */
  1822. if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
  1823. resched_curr(rq);
  1824. }
  1825. }
  1826. /*
  1827. * Priority of the task has changed. This may cause
  1828. * us to initiate a push or pull.
  1829. */
  1830. static void
  1831. prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
  1832. {
  1833. if (!task_on_rq_queued(p))
  1834. return;
  1835. if (rq->curr == p) {
  1836. #ifdef CONFIG_SMP
  1837. /*
  1838. * If our priority decreases while running, we
  1839. * may need to pull tasks to this runqueue.
  1840. */
  1841. if (oldprio < p->prio)
  1842. rt_queue_pull_task(rq);
  1843. /*
  1844. * If there's a higher priority task waiting to run
  1845. * then reschedule.
  1846. */
  1847. if (p->prio > rq->rt.highest_prio.curr)
  1848. resched_curr(rq);
  1849. #else
  1850. /* For UP simply resched on drop of prio */
  1851. if (oldprio < p->prio)
  1852. resched_curr(rq);
  1853. #endif /* CONFIG_SMP */
  1854. } else {
  1855. /*
  1856. * This task is not running, but if it is
  1857. * greater than the current running task
  1858. * then reschedule.
  1859. */
  1860. if (p->prio < rq->curr->prio)
  1861. resched_curr(rq);
  1862. }
  1863. }
  1864. #ifdef CONFIG_POSIX_TIMERS
  1865. static void watchdog(struct rq *rq, struct task_struct *p)
  1866. {
  1867. unsigned long soft, hard;
  1868. /* max may change after cur was read, this will be fixed next tick */
  1869. soft = task_rlimit(p, RLIMIT_RTTIME);
  1870. hard = task_rlimit_max(p, RLIMIT_RTTIME);
  1871. if (soft != RLIM_INFINITY) {
  1872. unsigned long next;
  1873. if (p->rt.watchdog_stamp != jiffies) {
  1874. p->rt.timeout++;
  1875. p->rt.watchdog_stamp = jiffies;
  1876. }
  1877. next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
  1878. if (p->rt.timeout > next)
  1879. p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
  1880. }
  1881. }
  1882. #else
  1883. static inline void watchdog(struct rq *rq, struct task_struct *p) { }
  1884. #endif
  1885. /*
  1886. * scheduler tick hitting a task of our scheduling class.
  1887. *
  1888. * NOTE: This function can be called remotely by the tick offload that
  1889. * goes along full dynticks. Therefore no local assumption can be made
  1890. * and everything must be accessed through the @rq and @curr passed in
  1891. * parameters.
  1892. */
  1893. static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
  1894. {
  1895. struct sched_rt_entity *rt_se = &p->rt;
  1896. update_curr_rt(rq);
  1897. update_rt_rq_load_avg(rq_clock_task(rq), rq, 1);
  1898. watchdog(rq, p);
  1899. /*
  1900. * RR tasks need a special form of timeslice management.
  1901. * FIFO tasks have no timeslices.
  1902. */
  1903. if (p->policy != SCHED_RR)
  1904. return;
  1905. if (--p->rt.time_slice)
  1906. return;
  1907. p->rt.time_slice = sched_rr_timeslice;
  1908. /*
  1909. * Requeue to the end of queue if we (and all of our ancestors) are not
  1910. * the only element on the queue
  1911. */
  1912. for_each_sched_rt_entity(rt_se) {
  1913. if (rt_se->run_list.prev != rt_se->run_list.next) {
  1914. requeue_task_rt(rq, p, 0);
  1915. resched_curr(rq);
  1916. return;
  1917. }
  1918. }
  1919. }
  1920. static void set_curr_task_rt(struct rq *rq)
  1921. {
  1922. struct task_struct *p = rq->curr;
  1923. p->se.exec_start = rq_clock_task(rq);
  1924. /* The running task is never eligible for pushing */
  1925. dequeue_pushable_task(rq, p);
  1926. }
  1927. static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
  1928. {
  1929. /*
  1930. * Time slice is 0 for SCHED_FIFO tasks
  1931. */
  1932. if (task->policy == SCHED_RR)
  1933. return sched_rr_timeslice;
  1934. else
  1935. return 0;
  1936. }
  1937. const struct sched_class rt_sched_class = {
  1938. .next = &fair_sched_class,
  1939. .enqueue_task = enqueue_task_rt,
  1940. .dequeue_task = dequeue_task_rt,
  1941. .yield_task = yield_task_rt,
  1942. .check_preempt_curr = check_preempt_curr_rt,
  1943. .pick_next_task = pick_next_task_rt,
  1944. .put_prev_task = put_prev_task_rt,
  1945. #ifdef CONFIG_SMP
  1946. .select_task_rq = select_task_rq_rt,
  1947. .set_cpus_allowed = set_cpus_allowed_common,
  1948. .rq_online = rq_online_rt,
  1949. .rq_offline = rq_offline_rt,
  1950. .task_woken = task_woken_rt,
  1951. .switched_from = switched_from_rt,
  1952. #endif
  1953. .set_curr_task = set_curr_task_rt,
  1954. .task_tick = task_tick_rt,
  1955. .get_rr_interval = get_rr_interval_rt,
  1956. .prio_changed = prio_changed_rt,
  1957. .switched_to = switched_to_rt,
  1958. .update_curr = update_curr_rt,
  1959. };
  1960. #ifdef CONFIG_RT_GROUP_SCHED
  1961. /*
  1962. * Ensure that the real time constraints are schedulable.
  1963. */
  1964. static DEFINE_MUTEX(rt_constraints_mutex);
  1965. /* Must be called with tasklist_lock held */
  1966. static inline int tg_has_rt_tasks(struct task_group *tg)
  1967. {
  1968. struct task_struct *g, *p;
  1969. /*
  1970. * Autogroups do not have RT tasks; see autogroup_create().
  1971. */
  1972. if (task_group_is_autogroup(tg))
  1973. return 0;
  1974. for_each_process_thread(g, p) {
  1975. if (rt_task(p) && task_group(p) == tg)
  1976. return 1;
  1977. }
  1978. return 0;
  1979. }
  1980. struct rt_schedulable_data {
  1981. struct task_group *tg;
  1982. u64 rt_period;
  1983. u64 rt_runtime;
  1984. };
  1985. static int tg_rt_schedulable(struct task_group *tg, void *data)
  1986. {
  1987. struct rt_schedulable_data *d = data;
  1988. struct task_group *child;
  1989. unsigned long total, sum = 0;
  1990. u64 period, runtime;
  1991. period = ktime_to_ns(tg->rt_bandwidth.rt_period);
  1992. runtime = tg->rt_bandwidth.rt_runtime;
  1993. if (tg == d->tg) {
  1994. period = d->rt_period;
  1995. runtime = d->rt_runtime;
  1996. }
  1997. /*
  1998. * Cannot have more runtime than the period.
  1999. */
  2000. if (runtime > period && runtime != RUNTIME_INF)
  2001. return -EINVAL;
  2002. /*
  2003. * Ensure we don't starve existing RT tasks.
  2004. */
  2005. if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
  2006. return -EBUSY;
  2007. total = to_ratio(period, runtime);
  2008. /*
  2009. * Nobody can have more than the global setting allows.
  2010. */
  2011. if (total > to_ratio(global_rt_period(), global_rt_runtime()))
  2012. return -EINVAL;
  2013. /*
  2014. * The sum of our children's runtime should not exceed our own.
  2015. */
  2016. list_for_each_entry_rcu(child, &tg->children, siblings) {
  2017. period = ktime_to_ns(child->rt_bandwidth.rt_period);
  2018. runtime = child->rt_bandwidth.rt_runtime;
  2019. if (child == d->tg) {
  2020. period = d->rt_period;
  2021. runtime = d->rt_runtime;
  2022. }
  2023. sum += to_ratio(period, runtime);
  2024. }
  2025. if (sum > total)
  2026. return -EINVAL;
  2027. return 0;
  2028. }
  2029. static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
  2030. {
  2031. int ret;
  2032. struct rt_schedulable_data data = {
  2033. .tg = tg,
  2034. .rt_period = period,
  2035. .rt_runtime = runtime,
  2036. };
  2037. rcu_read_lock();
  2038. ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
  2039. rcu_read_unlock();
  2040. return ret;
  2041. }
  2042. static int tg_set_rt_bandwidth(struct task_group *tg,
  2043. u64 rt_period, u64 rt_runtime)
  2044. {
  2045. int i, err = 0;
  2046. /*
  2047. * Disallowing the root group RT runtime is BAD, it would disallow the
  2048. * kernel creating (and or operating) RT threads.
  2049. */
  2050. if (tg == &root_task_group && rt_runtime == 0)
  2051. return -EINVAL;
  2052. /* No period doesn't make any sense. */
  2053. if (rt_period == 0)
  2054. return -EINVAL;
  2055. mutex_lock(&rt_constraints_mutex);
  2056. read_lock(&tasklist_lock);
  2057. err = __rt_schedulable(tg, rt_period, rt_runtime);
  2058. if (err)
  2059. goto unlock;
  2060. raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
  2061. tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
  2062. tg->rt_bandwidth.rt_runtime = rt_runtime;
  2063. for_each_possible_cpu(i) {
  2064. struct rt_rq *rt_rq = tg->rt_rq[i];
  2065. raw_spin_lock(&rt_rq->rt_runtime_lock);
  2066. rt_rq->rt_runtime = rt_runtime;
  2067. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  2068. }
  2069. raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
  2070. unlock:
  2071. read_unlock(&tasklist_lock);
  2072. mutex_unlock(&rt_constraints_mutex);
  2073. return err;
  2074. }
  2075. int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
  2076. {
  2077. u64 rt_runtime, rt_period;
  2078. rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
  2079. rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
  2080. if (rt_runtime_us < 0)
  2081. rt_runtime = RUNTIME_INF;
  2082. else if ((u64)rt_runtime_us > U64_MAX / NSEC_PER_USEC)
  2083. return -EINVAL;
  2084. return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
  2085. }
  2086. long sched_group_rt_runtime(struct task_group *tg)
  2087. {
  2088. u64 rt_runtime_us;
  2089. if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
  2090. return -1;
  2091. rt_runtime_us = tg->rt_bandwidth.rt_runtime;
  2092. do_div(rt_runtime_us, NSEC_PER_USEC);
  2093. return rt_runtime_us;
  2094. }
  2095. int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
  2096. {
  2097. u64 rt_runtime, rt_period;
  2098. if (rt_period_us > U64_MAX / NSEC_PER_USEC)
  2099. return -EINVAL;
  2100. rt_period = rt_period_us * NSEC_PER_USEC;
  2101. rt_runtime = tg->rt_bandwidth.rt_runtime;
  2102. return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
  2103. }
  2104. long sched_group_rt_period(struct task_group *tg)
  2105. {
  2106. u64 rt_period_us;
  2107. rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
  2108. do_div(rt_period_us, NSEC_PER_USEC);
  2109. return rt_period_us;
  2110. }
  2111. static int sched_rt_global_constraints(void)
  2112. {
  2113. int ret = 0;
  2114. mutex_lock(&rt_constraints_mutex);
  2115. read_lock(&tasklist_lock);
  2116. ret = __rt_schedulable(NULL, 0, 0);
  2117. read_unlock(&tasklist_lock);
  2118. mutex_unlock(&rt_constraints_mutex);
  2119. return ret;
  2120. }
  2121. int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
  2122. {
  2123. /* Don't accept realtime tasks when there is no way for them to run */
  2124. if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
  2125. return 0;
  2126. return 1;
  2127. }
  2128. #else /* !CONFIG_RT_GROUP_SCHED */
  2129. static int sched_rt_global_constraints(void)
  2130. {
  2131. unsigned long flags;
  2132. int i;
  2133. raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
  2134. for_each_possible_cpu(i) {
  2135. struct rt_rq *rt_rq = &cpu_rq(i)->rt;
  2136. raw_spin_lock(&rt_rq->rt_runtime_lock);
  2137. rt_rq->rt_runtime = global_rt_runtime();
  2138. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  2139. }
  2140. raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
  2141. return 0;
  2142. }
  2143. #endif /* CONFIG_RT_GROUP_SCHED */
  2144. static int sched_rt_global_validate(void)
  2145. {
  2146. if (sysctl_sched_rt_period <= 0)
  2147. return -EINVAL;
  2148. if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
  2149. (sysctl_sched_rt_runtime > sysctl_sched_rt_period))
  2150. return -EINVAL;
  2151. return 0;
  2152. }
  2153. static void sched_rt_do_global(void)
  2154. {
  2155. def_rt_bandwidth.rt_runtime = global_rt_runtime();
  2156. def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
  2157. }
  2158. int sched_rt_handler(struct ctl_table *table, int write,
  2159. void __user *buffer, size_t *lenp,
  2160. loff_t *ppos)
  2161. {
  2162. int old_period, old_runtime;
  2163. static DEFINE_MUTEX(mutex);
  2164. int ret;
  2165. mutex_lock(&mutex);
  2166. old_period = sysctl_sched_rt_period;
  2167. old_runtime = sysctl_sched_rt_runtime;
  2168. ret = proc_dointvec(table, write, buffer, lenp, ppos);
  2169. if (!ret && write) {
  2170. ret = sched_rt_global_validate();
  2171. if (ret)
  2172. goto undo;
  2173. ret = sched_dl_global_validate();
  2174. if (ret)
  2175. goto undo;
  2176. ret = sched_rt_global_constraints();
  2177. if (ret)
  2178. goto undo;
  2179. sched_rt_do_global();
  2180. sched_dl_do_global();
  2181. }
  2182. if (0) {
  2183. undo:
  2184. sysctl_sched_rt_period = old_period;
  2185. sysctl_sched_rt_runtime = old_runtime;
  2186. }
  2187. mutex_unlock(&mutex);
  2188. return ret;
  2189. }
  2190. int sched_rr_handler(struct ctl_table *table, int write,
  2191. void __user *buffer, size_t *lenp,
  2192. loff_t *ppos)
  2193. {
  2194. int ret;
  2195. static DEFINE_MUTEX(mutex);
  2196. mutex_lock(&mutex);
  2197. ret = proc_dointvec(table, write, buffer, lenp, ppos);
  2198. /*
  2199. * Make sure that internally we keep jiffies.
  2200. * Also, writing zero resets the timeslice to default:
  2201. */
  2202. if (!ret && write) {
  2203. sched_rr_timeslice =
  2204. sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
  2205. msecs_to_jiffies(sysctl_sched_rr_timeslice);
  2206. }
  2207. mutex_unlock(&mutex);
  2208. return ret;
  2209. }
  2210. #ifdef CONFIG_SCHED_DEBUG
  2211. void print_rt_stats(struct seq_file *m, int cpu)
  2212. {
  2213. rt_rq_iter_t iter;
  2214. struct rt_rq *rt_rq;
  2215. rcu_read_lock();
  2216. for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
  2217. print_rt_rq(m, cpu, rt_rq);
  2218. rcu_read_unlock();
  2219. }
  2220. #endif /* CONFIG_SCHED_DEBUG */