rstat.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. #include "cgroup-internal.h"
  3. #include <linux/sched/cputime.h>
  4. #include <linux/bpf.h>
  5. #include <linux/btf.h>
  6. #include <linux/btf_ids.h>
  7. #include <trace/events/cgroup.h>
  8. static DEFINE_SPINLOCK(cgroup_rstat_lock);
  9. static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
  10. static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);
  11. static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
  12. {
  13. return per_cpu_ptr(cgrp->rstat_cpu, cpu);
  14. }
  15. /*
  16. * Helper functions for rstat per CPU lock (cgroup_rstat_cpu_lock).
  17. *
  18. * This makes it easier to diagnose locking issues and contention in
  19. * production environments. The parameter @fast_path determine the
  20. * tracepoints being added, allowing us to diagnose "flush" related
  21. * operations without handling high-frequency fast-path "update" events.
  22. */
  23. static __always_inline
  24. unsigned long _cgroup_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu,
  25. struct cgroup *cgrp, const bool fast_path)
  26. {
  27. unsigned long flags;
  28. bool contended;
  29. /*
  30. * The _irqsave() is needed because cgroup_rstat_lock is
  31. * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
  32. * this lock with the _irq() suffix only disables interrupts on
  33. * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
  34. * interrupts on both configurations. The _irqsave() ensures
  35. * that interrupts are always disabled and later restored.
  36. */
  37. contended = !raw_spin_trylock_irqsave(cpu_lock, flags);
  38. if (contended) {
  39. if (fast_path)
  40. trace_cgroup_rstat_cpu_lock_contended_fastpath(cgrp, cpu, contended);
  41. else
  42. trace_cgroup_rstat_cpu_lock_contended(cgrp, cpu, contended);
  43. raw_spin_lock_irqsave(cpu_lock, flags);
  44. }
  45. if (fast_path)
  46. trace_cgroup_rstat_cpu_locked_fastpath(cgrp, cpu, contended);
  47. else
  48. trace_cgroup_rstat_cpu_locked(cgrp, cpu, contended);
  49. return flags;
  50. }
  51. static __always_inline
  52. void _cgroup_rstat_cpu_unlock(raw_spinlock_t *cpu_lock, int cpu,
  53. struct cgroup *cgrp, unsigned long flags,
  54. const bool fast_path)
  55. {
  56. if (fast_path)
  57. trace_cgroup_rstat_cpu_unlock_fastpath(cgrp, cpu, false);
  58. else
  59. trace_cgroup_rstat_cpu_unlock(cgrp, cpu, false);
  60. raw_spin_unlock_irqrestore(cpu_lock, flags);
  61. }
  62. /**
  63. * cgroup_rstat_updated - keep track of updated rstat_cpu
  64. * @cgrp: target cgroup
  65. * @cpu: cpu on which rstat_cpu was updated
  66. *
  67. * @cgrp's rstat_cpu on @cpu was updated. Put it on the parent's matching
  68. * rstat_cpu->updated_children list. See the comment on top of
  69. * cgroup_rstat_cpu definition for details.
  70. */
  71. __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
  72. {
  73. raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
  74. unsigned long flags;
  75. /*
  76. * Speculative already-on-list test. This may race leading to
  77. * temporary inaccuracies, which is fine.
  78. *
  79. * Because @parent's updated_children is terminated with @parent
  80. * instead of NULL, we can tell whether @cgrp is on the list by
  81. * testing the next pointer for NULL.
  82. */
  83. if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next))
  84. return;
  85. flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, cgrp, true);
  86. /* put @cgrp and all ancestors on the corresponding updated lists */
  87. while (true) {
  88. struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
  89. struct cgroup *parent = cgroup_parent(cgrp);
  90. struct cgroup_rstat_cpu *prstatc;
  91. /*
  92. * Both additions and removals are bottom-up. If a cgroup
  93. * is already in the tree, all ancestors are.
  94. */
  95. if (rstatc->updated_next)
  96. break;
  97. /* Root has no parent to link it to, but mark it busy */
  98. if (!parent) {
  99. rstatc->updated_next = cgrp;
  100. break;
  101. }
  102. prstatc = cgroup_rstat_cpu(parent, cpu);
  103. rstatc->updated_next = prstatc->updated_children;
  104. prstatc->updated_children = cgrp;
  105. cgrp = parent;
  106. }
  107. _cgroup_rstat_cpu_unlock(cpu_lock, cpu, cgrp, flags, true);
  108. }
  109. /**
  110. * cgroup_rstat_push_children - push children cgroups into the given list
  111. * @head: current head of the list (= subtree root)
  112. * @child: first child of the root
  113. * @cpu: target cpu
  114. * Return: A new singly linked list of cgroups to be flush
  115. *
  116. * Iteratively traverse down the cgroup_rstat_cpu updated tree level by
  117. * level and push all the parents first before their next level children
  118. * into a singly linked list built from the tail backward like "pushing"
  119. * cgroups into a stack. The root is pushed by the caller.
  120. */
  121. static struct cgroup *cgroup_rstat_push_children(struct cgroup *head,
  122. struct cgroup *child, int cpu)
  123. {
  124. struct cgroup *chead = child; /* Head of child cgroup level */
  125. struct cgroup *ghead = NULL; /* Head of grandchild cgroup level */
  126. struct cgroup *parent, *grandchild;
  127. struct cgroup_rstat_cpu *crstatc;
  128. child->rstat_flush_next = NULL;
  129. next_level:
  130. while (chead) {
  131. child = chead;
  132. chead = child->rstat_flush_next;
  133. parent = cgroup_parent(child);
  134. /* updated_next is parent cgroup terminated */
  135. while (child != parent) {
  136. child->rstat_flush_next = head;
  137. head = child;
  138. crstatc = cgroup_rstat_cpu(child, cpu);
  139. grandchild = crstatc->updated_children;
  140. if (grandchild != child) {
  141. /* Push the grand child to the next level */
  142. crstatc->updated_children = child;
  143. grandchild->rstat_flush_next = ghead;
  144. ghead = grandchild;
  145. }
  146. child = crstatc->updated_next;
  147. crstatc->updated_next = NULL;
  148. }
  149. }
  150. if (ghead) {
  151. chead = ghead;
  152. ghead = NULL;
  153. goto next_level;
  154. }
  155. return head;
  156. }
  157. /**
  158. * cgroup_rstat_updated_list - return a list of updated cgroups to be flushed
  159. * @root: root of the cgroup subtree to traverse
  160. * @cpu: target cpu
  161. * Return: A singly linked list of cgroups to be flushed
  162. *
  163. * Walks the updated rstat_cpu tree on @cpu from @root. During traversal,
  164. * each returned cgroup is unlinked from the updated tree.
  165. *
  166. * The only ordering guarantee is that, for a parent and a child pair
  167. * covered by a given traversal, the child is before its parent in
  168. * the list.
  169. *
  170. * Note that updated_children is self terminated and points to a list of
  171. * child cgroups if not empty. Whereas updated_next is like a sibling link
  172. * within the children list and terminated by the parent cgroup. An exception
  173. * here is the cgroup root whose updated_next can be self terminated.
  174. */
  175. static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
  176. {
  177. raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
  178. struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(root, cpu);
  179. struct cgroup *head = NULL, *parent, *child;
  180. unsigned long flags;
  181. flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, root, false);
  182. /* Return NULL if this subtree is not on-list */
  183. if (!rstatc->updated_next)
  184. goto unlock_ret;
  185. /*
  186. * Unlink @root from its parent. As the updated_children list is
  187. * singly linked, we have to walk it to find the removal point.
  188. */
  189. parent = cgroup_parent(root);
  190. if (parent) {
  191. struct cgroup_rstat_cpu *prstatc;
  192. struct cgroup **nextp;
  193. prstatc = cgroup_rstat_cpu(parent, cpu);
  194. nextp = &prstatc->updated_children;
  195. while (*nextp != root) {
  196. struct cgroup_rstat_cpu *nrstatc;
  197. nrstatc = cgroup_rstat_cpu(*nextp, cpu);
  198. WARN_ON_ONCE(*nextp == parent);
  199. nextp = &nrstatc->updated_next;
  200. }
  201. *nextp = rstatc->updated_next;
  202. }
  203. rstatc->updated_next = NULL;
  204. /* Push @root to the list first before pushing the children */
  205. head = root;
  206. root->rstat_flush_next = NULL;
  207. child = rstatc->updated_children;
  208. rstatc->updated_children = root;
  209. if (child != root)
  210. head = cgroup_rstat_push_children(head, child, cpu);
  211. unlock_ret:
  212. _cgroup_rstat_cpu_unlock(cpu_lock, cpu, root, flags, false);
  213. return head;
  214. }
  215. /*
  216. * A hook for bpf stat collectors to attach to and flush their stats.
  217. * Together with providing bpf kfuncs for cgroup_rstat_updated() and
  218. * cgroup_rstat_flush(), this enables a complete workflow where bpf progs that
  219. * collect cgroup stats can integrate with rstat for efficient flushing.
  220. *
  221. * A static noinline declaration here could cause the compiler to optimize away
  222. * the function. A global noinline declaration will keep the definition, but may
  223. * optimize away the callsite. Therefore, __weak is needed to ensure that the
  224. * call is still emitted, by telling the compiler that we don't know what the
  225. * function might eventually be.
  226. */
  227. __bpf_hook_start();
  228. __weak noinline void bpf_rstat_flush(struct cgroup *cgrp,
  229. struct cgroup *parent, int cpu)
  230. {
  231. }
  232. __bpf_hook_end();
  233. /*
  234. * Helper functions for locking cgroup_rstat_lock.
  235. *
  236. * This makes it easier to diagnose locking issues and contention in
  237. * production environments. The parameter @cpu_in_loop indicate lock
  238. * was released and re-taken when collection data from the CPUs. The
  239. * value -1 is used when obtaining the main lock else this is the CPU
  240. * number processed last.
  241. */
  242. static inline void __cgroup_rstat_lock(struct cgroup *cgrp, int cpu_in_loop)
  243. __acquires(&cgroup_rstat_lock)
  244. {
  245. bool contended;
  246. contended = !spin_trylock_irq(&cgroup_rstat_lock);
  247. if (contended) {
  248. trace_cgroup_rstat_lock_contended(cgrp, cpu_in_loop, contended);
  249. spin_lock_irq(&cgroup_rstat_lock);
  250. }
  251. trace_cgroup_rstat_locked(cgrp, cpu_in_loop, contended);
  252. }
  253. static inline void __cgroup_rstat_unlock(struct cgroup *cgrp, int cpu_in_loop)
  254. __releases(&cgroup_rstat_lock)
  255. {
  256. trace_cgroup_rstat_unlock(cgrp, cpu_in_loop, false);
  257. spin_unlock_irq(&cgroup_rstat_lock);
  258. }
  259. /* see cgroup_rstat_flush() */
  260. static void cgroup_rstat_flush_locked(struct cgroup *cgrp)
  261. __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock)
  262. {
  263. int cpu;
  264. lockdep_assert_held(&cgroup_rstat_lock);
  265. for_each_possible_cpu(cpu) {
  266. struct cgroup *pos = cgroup_rstat_updated_list(cgrp, cpu);
  267. for (; pos; pos = pos->rstat_flush_next) {
  268. struct cgroup_subsys_state *css;
  269. cgroup_base_stat_flush(pos, cpu);
  270. bpf_rstat_flush(pos, cgroup_parent(pos), cpu);
  271. rcu_read_lock();
  272. list_for_each_entry_rcu(css, &pos->rstat_css_list,
  273. rstat_css_node)
  274. css->ss->css_rstat_flush(css, cpu);
  275. rcu_read_unlock();
  276. }
  277. /* play nice and yield if necessary */
  278. if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) {
  279. __cgroup_rstat_unlock(cgrp, cpu);
  280. if (!cond_resched())
  281. cpu_relax();
  282. __cgroup_rstat_lock(cgrp, cpu);
  283. }
  284. }
  285. }
  286. /**
  287. * cgroup_rstat_flush - flush stats in @cgrp's subtree
  288. * @cgrp: target cgroup
  289. *
  290. * Collect all per-cpu stats in @cgrp's subtree into the global counters
  291. * and propagate them upwards. After this function returns, all cgroups in
  292. * the subtree have up-to-date ->stat.
  293. *
  294. * This also gets all cgroups in the subtree including @cgrp off the
  295. * ->updated_children lists.
  296. *
  297. * This function may block.
  298. */
  299. __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
  300. {
  301. might_sleep();
  302. __cgroup_rstat_lock(cgrp, -1);
  303. cgroup_rstat_flush_locked(cgrp);
  304. __cgroup_rstat_unlock(cgrp, -1);
  305. }
  306. /**
  307. * cgroup_rstat_flush_hold - flush stats in @cgrp's subtree and hold
  308. * @cgrp: target cgroup
  309. *
  310. * Flush stats in @cgrp's subtree and prevent further flushes. Must be
  311. * paired with cgroup_rstat_flush_release().
  312. *
  313. * This function may block.
  314. */
  315. void cgroup_rstat_flush_hold(struct cgroup *cgrp)
  316. __acquires(&cgroup_rstat_lock)
  317. {
  318. might_sleep();
  319. __cgroup_rstat_lock(cgrp, -1);
  320. cgroup_rstat_flush_locked(cgrp);
  321. }
  322. /**
  323. * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold()
  324. * @cgrp: cgroup used by tracepoint
  325. */
  326. void cgroup_rstat_flush_release(struct cgroup *cgrp)
  327. __releases(&cgroup_rstat_lock)
  328. {
  329. __cgroup_rstat_unlock(cgrp, -1);
  330. }
  331. int cgroup_rstat_init(struct cgroup *cgrp)
  332. {
  333. int cpu;
  334. /* the root cgrp has rstat_cpu preallocated */
  335. if (!cgrp->rstat_cpu) {
  336. cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
  337. if (!cgrp->rstat_cpu)
  338. return -ENOMEM;
  339. }
  340. /* ->updated_children list is self terminated */
  341. for_each_possible_cpu(cpu) {
  342. struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
  343. rstatc->updated_children = cgrp;
  344. u64_stats_init(&rstatc->bsync);
  345. }
  346. return 0;
  347. }
  348. void cgroup_rstat_exit(struct cgroup *cgrp)
  349. {
  350. int cpu;
  351. cgroup_rstat_flush(cgrp);
  352. /* sanity check */
  353. for_each_possible_cpu(cpu) {
  354. struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
  355. if (WARN_ON_ONCE(rstatc->updated_children != cgrp) ||
  356. WARN_ON_ONCE(rstatc->updated_next))
  357. return;
  358. }
  359. free_percpu(cgrp->rstat_cpu);
  360. cgrp->rstat_cpu = NULL;
  361. }
  362. void __init cgroup_rstat_boot(void)
  363. {
  364. int cpu;
  365. for_each_possible_cpu(cpu)
  366. raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu));
  367. }
  368. /*
  369. * Functions for cgroup basic resource statistics implemented on top of
  370. * rstat.
  371. */
  372. static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat,
  373. struct cgroup_base_stat *src_bstat)
  374. {
  375. dst_bstat->cputime.utime += src_bstat->cputime.utime;
  376. dst_bstat->cputime.stime += src_bstat->cputime.stime;
  377. dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime;
  378. #ifdef CONFIG_SCHED_CORE
  379. dst_bstat->forceidle_sum += src_bstat->forceidle_sum;
  380. #endif
  381. }
  382. static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
  383. struct cgroup_base_stat *src_bstat)
  384. {
  385. dst_bstat->cputime.utime -= src_bstat->cputime.utime;
  386. dst_bstat->cputime.stime -= src_bstat->cputime.stime;
  387. dst_bstat->cputime.sum_exec_runtime -= src_bstat->cputime.sum_exec_runtime;
  388. #ifdef CONFIG_SCHED_CORE
  389. dst_bstat->forceidle_sum -= src_bstat->forceidle_sum;
  390. #endif
  391. }
  392. static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
  393. {
  394. struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
  395. struct cgroup *parent = cgroup_parent(cgrp);
  396. struct cgroup_rstat_cpu *prstatc;
  397. struct cgroup_base_stat delta;
  398. unsigned seq;
  399. /* Root-level stats are sourced from system-wide CPU stats */
  400. if (!parent)
  401. return;
  402. /* fetch the current per-cpu values */
  403. do {
  404. seq = __u64_stats_fetch_begin(&rstatc->bsync);
  405. delta = rstatc->bstat;
  406. } while (__u64_stats_fetch_retry(&rstatc->bsync, seq));
  407. /* propagate per-cpu delta to cgroup and per-cpu global statistics */
  408. cgroup_base_stat_sub(&delta, &rstatc->last_bstat);
  409. cgroup_base_stat_add(&cgrp->bstat, &delta);
  410. cgroup_base_stat_add(&rstatc->last_bstat, &delta);
  411. cgroup_base_stat_add(&rstatc->subtree_bstat, &delta);
  412. /* propagate cgroup and per-cpu global delta to parent (unless that's root) */
  413. if (cgroup_parent(parent)) {
  414. delta = cgrp->bstat;
  415. cgroup_base_stat_sub(&delta, &cgrp->last_bstat);
  416. cgroup_base_stat_add(&parent->bstat, &delta);
  417. cgroup_base_stat_add(&cgrp->last_bstat, &delta);
  418. delta = rstatc->subtree_bstat;
  419. prstatc = cgroup_rstat_cpu(parent, cpu);
  420. cgroup_base_stat_sub(&delta, &rstatc->last_subtree_bstat);
  421. cgroup_base_stat_add(&prstatc->subtree_bstat, &delta);
  422. cgroup_base_stat_add(&rstatc->last_subtree_bstat, &delta);
  423. }
  424. }
  425. static struct cgroup_rstat_cpu *
  426. cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags)
  427. {
  428. struct cgroup_rstat_cpu *rstatc;
  429. rstatc = get_cpu_ptr(cgrp->rstat_cpu);
  430. *flags = u64_stats_update_begin_irqsave(&rstatc->bsync);
  431. return rstatc;
  432. }
  433. static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
  434. struct cgroup_rstat_cpu *rstatc,
  435. unsigned long flags)
  436. {
  437. u64_stats_update_end_irqrestore(&rstatc->bsync, flags);
  438. cgroup_rstat_updated(cgrp, smp_processor_id());
  439. put_cpu_ptr(rstatc);
  440. }
  441. void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
  442. {
  443. struct cgroup_rstat_cpu *rstatc;
  444. unsigned long flags;
  445. rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
  446. rstatc->bstat.cputime.sum_exec_runtime += delta_exec;
  447. cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags);
  448. }
  449. void __cgroup_account_cputime_field(struct cgroup *cgrp,
  450. enum cpu_usage_stat index, u64 delta_exec)
  451. {
  452. struct cgroup_rstat_cpu *rstatc;
  453. unsigned long flags;
  454. rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
  455. switch (index) {
  456. case CPUTIME_USER:
  457. case CPUTIME_NICE:
  458. rstatc->bstat.cputime.utime += delta_exec;
  459. break;
  460. case CPUTIME_SYSTEM:
  461. case CPUTIME_IRQ:
  462. case CPUTIME_SOFTIRQ:
  463. rstatc->bstat.cputime.stime += delta_exec;
  464. break;
  465. #ifdef CONFIG_SCHED_CORE
  466. case CPUTIME_FORCEIDLE:
  467. rstatc->bstat.forceidle_sum += delta_exec;
  468. break;
  469. #endif
  470. default:
  471. break;
  472. }
  473. cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags);
  474. }
  475. /*
  476. * compute the cputime for the root cgroup by getting the per cpu data
  477. * at a global level, then categorizing the fields in a manner consistent
  478. * with how it is done by __cgroup_account_cputime_field for each bit of
  479. * cpu time attributed to a cgroup.
  480. */
  481. static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
  482. {
  483. struct task_cputime *cputime = &bstat->cputime;
  484. int i;
  485. memset(bstat, 0, sizeof(*bstat));
  486. for_each_possible_cpu(i) {
  487. struct kernel_cpustat kcpustat;
  488. u64 *cpustat = kcpustat.cpustat;
  489. u64 user = 0;
  490. u64 sys = 0;
  491. kcpustat_cpu_fetch(&kcpustat, i);
  492. user += cpustat[CPUTIME_USER];
  493. user += cpustat[CPUTIME_NICE];
  494. cputime->utime += user;
  495. sys += cpustat[CPUTIME_SYSTEM];
  496. sys += cpustat[CPUTIME_IRQ];
  497. sys += cpustat[CPUTIME_SOFTIRQ];
  498. cputime->stime += sys;
  499. cputime->sum_exec_runtime += user;
  500. cputime->sum_exec_runtime += sys;
  501. #ifdef CONFIG_SCHED_CORE
  502. bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE];
  503. #endif
  504. }
  505. }
  506. static void cgroup_force_idle_show(struct seq_file *seq, struct cgroup_base_stat *bstat)
  507. {
  508. #ifdef CONFIG_SCHED_CORE
  509. u64 forceidle_time = bstat->forceidle_sum;
  510. do_div(forceidle_time, NSEC_PER_USEC);
  511. seq_printf(seq, "core_sched.force_idle_usec %llu\n", forceidle_time);
  512. #endif
  513. }
  514. void cgroup_base_stat_cputime_show(struct seq_file *seq)
  515. {
  516. struct cgroup *cgrp = seq_css(seq)->cgroup;
  517. u64 usage, utime, stime;
  518. if (cgroup_parent(cgrp)) {
  519. cgroup_rstat_flush_hold(cgrp);
  520. usage = cgrp->bstat.cputime.sum_exec_runtime;
  521. cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
  522. &utime, &stime);
  523. cgroup_rstat_flush_release(cgrp);
  524. } else {
  525. /* cgrp->bstat of root is not actually used, reuse it */
  526. root_cgroup_cputime(&cgrp->bstat);
  527. usage = cgrp->bstat.cputime.sum_exec_runtime;
  528. utime = cgrp->bstat.cputime.utime;
  529. stime = cgrp->bstat.cputime.stime;
  530. }
  531. do_div(usage, NSEC_PER_USEC);
  532. do_div(utime, NSEC_PER_USEC);
  533. do_div(stime, NSEC_PER_USEC);
  534. seq_printf(seq, "usage_usec %llu\n"
  535. "user_usec %llu\n"
  536. "system_usec %llu\n",
  537. usage, utime, stime);
  538. cgroup_force_idle_show(seq, &cgrp->bstat);
  539. }
  540. /* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */
  541. BTF_KFUNCS_START(bpf_rstat_kfunc_ids)
  542. BTF_ID_FLAGS(func, cgroup_rstat_updated)
  543. BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE)
  544. BTF_KFUNCS_END(bpf_rstat_kfunc_ids)
  545. static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = {
  546. .owner = THIS_MODULE,
  547. .set = &bpf_rstat_kfunc_ids,
  548. };
  549. static int __init bpf_rstat_kfunc_init(void)
  550. {
  551. return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING,
  552. &bpf_rstat_kfunc_set);
  553. }
  554. late_initcall(bpf_rstat_kfunc_init);