cputime.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895
  1. /*
  2. * Simple CPU accounting cgroup controller
  3. */
  4. #include "sched.h"
  5. #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  6. /*
  7. * There are no locks covering percpu hardirq/softirq time.
  8. * They are only modified in vtime_account, on corresponding CPU
  9. * with interrupts disabled. So, writes are safe.
  10. * They are read and saved off onto struct rq in update_rq_clock().
  11. * This may result in other CPU reading this CPU's irq time and can
  12. * race with irq/vtime_account on this CPU. We would either get old
  13. * or new value with a side effect of accounting a slice of irq time to wrong
  14. * task when irq is in progress while we read rq->clock. That is a worthy
  15. * compromise in place of having locks on each irq in account_system_time.
  16. */
  17. DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
  18. static int sched_clock_irqtime;
  19. void enable_sched_clock_irqtime(void)
  20. {
  21. sched_clock_irqtime = 1;
  22. }
  23. void disable_sched_clock_irqtime(void)
  24. {
  25. sched_clock_irqtime = 0;
  26. }
  27. static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
  28. enum cpu_usage_stat idx)
  29. {
  30. u64 *cpustat = kcpustat_this_cpu->cpustat;
  31. u64_stats_update_begin(&irqtime->sync);
  32. cpustat[idx] += delta;
  33. irqtime->total += delta;
  34. irqtime->tick_delta += delta;
  35. u64_stats_update_end(&irqtime->sync);
  36. }
  37. /*
  38. * Called before incrementing preempt_count on {soft,}irq_enter
  39. * and before decrementing preempt_count on {soft,}irq_exit.
  40. */
  41. void irqtime_account_irq(struct task_struct *curr)
  42. {
  43. struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
  44. s64 delta;
  45. int cpu;
  46. if (!sched_clock_irqtime)
  47. return;
  48. cpu = smp_processor_id();
  49. delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
  50. irqtime->irq_start_time += delta;
  51. /*
  52. * We do not account for softirq time from ksoftirqd here.
  53. * We want to continue accounting softirq time to ksoftirqd thread
  54. * in that case, so as not to confuse scheduler with a special task
  55. * that do not consume any time, but still wants to run.
  56. */
  57. if (hardirq_count())
  58. irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
  59. else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
  60. irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
  61. }
  62. EXPORT_SYMBOL_GPL(irqtime_account_irq);
  63. static u64 irqtime_tick_accounted(u64 maxtime)
  64. {
  65. struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
  66. u64 delta;
  67. delta = min(irqtime->tick_delta, maxtime);
  68. irqtime->tick_delta -= delta;
  69. return delta;
  70. }
  71. #else /* CONFIG_IRQ_TIME_ACCOUNTING */
  72. #define sched_clock_irqtime (0)
  73. static u64 irqtime_tick_accounted(u64 dummy)
  74. {
  75. return 0;
  76. }
  77. #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
  78. static inline void task_group_account_field(struct task_struct *p, int index,
  79. u64 tmp)
  80. {
  81. /*
  82. * Since all updates are sure to touch the root cgroup, we
  83. * get ourselves ahead and touch it first. If the root cgroup
  84. * is the only cgroup, then nothing else should be necessary.
  85. *
  86. */
  87. __this_cpu_add(kernel_cpustat.cpustat[index], tmp);
  88. cgroup_account_cputime_field(p, index, tmp);
  89. }
  90. /*
  91. * Account user CPU time to a process.
  92. * @p: the process that the CPU time gets accounted to
  93. * @cputime: the CPU time spent in user space since the last update
  94. */
  95. void account_user_time(struct task_struct *p, u64 cputime)
  96. {
  97. int index;
  98. /* Add user time to process. */
  99. p->utime += cputime;
  100. account_group_user_time(p, cputime);
  101. index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
  102. /* Add user time to cpustat. */
  103. task_group_account_field(p, index, cputime);
  104. /* Account for user time used */
  105. acct_account_cputime(p);
  106. }
  107. /*
  108. * Account guest CPU time to a process.
  109. * @p: the process that the CPU time gets accounted to
  110. * @cputime: the CPU time spent in virtual machine since the last update
  111. */
  112. void account_guest_time(struct task_struct *p, u64 cputime)
  113. {
  114. u64 *cpustat = kcpustat_this_cpu->cpustat;
  115. /* Add guest time to process. */
  116. p->utime += cputime;
  117. account_group_user_time(p, cputime);
  118. p->gtime += cputime;
  119. /* Add guest time to cpustat. */
  120. if (task_nice(p) > 0) {
  121. cpustat[CPUTIME_NICE] += cputime;
  122. cpustat[CPUTIME_GUEST_NICE] += cputime;
  123. } else {
  124. cpustat[CPUTIME_USER] += cputime;
  125. cpustat[CPUTIME_GUEST] += cputime;
  126. }
  127. }
  128. /*
  129. * Account system CPU time to a process and desired cpustat field
  130. * @p: the process that the CPU time gets accounted to
  131. * @cputime: the CPU time spent in kernel space since the last update
  132. * @index: pointer to cpustat field that has to be updated
  133. */
  134. void account_system_index_time(struct task_struct *p,
  135. u64 cputime, enum cpu_usage_stat index)
  136. {
  137. /* Add system time to process. */
  138. p->stime += cputime;
  139. account_group_system_time(p, cputime);
  140. /* Add system time to cpustat. */
  141. task_group_account_field(p, index, cputime);
  142. /* Account for system time used */
  143. acct_account_cputime(p);
  144. }
  145. /*
  146. * Account system CPU time to a process.
  147. * @p: the process that the CPU time gets accounted to
  148. * @hardirq_offset: the offset to subtract from hardirq_count()
  149. * @cputime: the CPU time spent in kernel space since the last update
  150. */
  151. void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime)
  152. {
  153. int index;
  154. if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
  155. account_guest_time(p, cputime);
  156. return;
  157. }
  158. if (hardirq_count() - hardirq_offset)
  159. index = CPUTIME_IRQ;
  160. else if (in_serving_softirq())
  161. index = CPUTIME_SOFTIRQ;
  162. else
  163. index = CPUTIME_SYSTEM;
  164. account_system_index_time(p, cputime, index);
  165. }
  166. /*
  167. * Account for involuntary wait time.
  168. * @cputime: the CPU time spent in involuntary wait
  169. */
  170. void account_steal_time(u64 cputime)
  171. {
  172. u64 *cpustat = kcpustat_this_cpu->cpustat;
  173. cpustat[CPUTIME_STEAL] += cputime;
  174. }
  175. /*
  176. * Account for idle time.
  177. * @cputime: the CPU time spent in idle wait
  178. */
  179. void account_idle_time(u64 cputime)
  180. {
  181. u64 *cpustat = kcpustat_this_cpu->cpustat;
  182. struct rq *rq = this_rq();
  183. if (atomic_read(&rq->nr_iowait) > 0)
  184. cpustat[CPUTIME_IOWAIT] += cputime;
  185. else
  186. cpustat[CPUTIME_IDLE] += cputime;
  187. }
  188. /*
  189. * When a guest is interrupted for a longer amount of time, missed clock
  190. * ticks are not redelivered later. Due to that, this function may on
  191. * occasion account more time than the calling functions think elapsed.
  192. */
  193. static __always_inline u64 steal_account_process_time(u64 maxtime)
  194. {
  195. #ifdef CONFIG_PARAVIRT
  196. if (static_key_false(&paravirt_steal_enabled)) {
  197. u64 steal;
  198. steal = paravirt_steal_clock(smp_processor_id());
  199. steal -= this_rq()->prev_steal_time;
  200. steal = min(steal, maxtime);
  201. account_steal_time(steal);
  202. this_rq()->prev_steal_time += steal;
  203. return steal;
  204. }
  205. #endif
  206. return 0;
  207. }
  208. /*
  209. * Account how much elapsed time was spent in steal, irq, or softirq time.
  210. */
  211. static inline u64 account_other_time(u64 max)
  212. {
  213. u64 accounted;
  214. lockdep_assert_irqs_disabled();
  215. accounted = steal_account_process_time(max);
  216. if (accounted < max)
  217. accounted += irqtime_tick_accounted(max - accounted);
  218. return accounted;
  219. }
  220. #ifdef CONFIG_64BIT
  221. static inline u64 read_sum_exec_runtime(struct task_struct *t)
  222. {
  223. return t->se.sum_exec_runtime;
  224. }
  225. #else
  226. static u64 read_sum_exec_runtime(struct task_struct *t)
  227. {
  228. u64 ns;
  229. struct rq_flags rf;
  230. struct rq *rq;
  231. rq = task_rq_lock(t, &rf);
  232. ns = t->se.sum_exec_runtime;
  233. task_rq_unlock(rq, t, &rf);
  234. return ns;
  235. }
  236. #endif
  237. /*
  238. * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
  239. * tasks (sum on group iteration) belonging to @tsk's group.
  240. */
  241. void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
  242. {
  243. struct signal_struct *sig = tsk->signal;
  244. u64 utime, stime;
  245. struct task_struct *t;
  246. unsigned int seq, nextseq;
  247. unsigned long flags;
  248. /*
  249. * Update current task runtime to account pending time since last
  250. * scheduler action or thread_group_cputime() call. This thread group
  251. * might have other running tasks on different CPUs, but updating
  252. * their runtime can affect syscall performance, so we skip account
  253. * those pending times and rely only on values updated on tick or
  254. * other scheduler action.
  255. */
  256. if (same_thread_group(current, tsk))
  257. (void) task_sched_runtime(current);
  258. rcu_read_lock();
  259. /* Attempt a lockless read on the first round. */
  260. nextseq = 0;
  261. do {
  262. seq = nextseq;
  263. flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
  264. times->utime = sig->utime;
  265. times->stime = sig->stime;
  266. times->sum_exec_runtime = sig->sum_sched_runtime;
  267. for_each_thread(tsk, t) {
  268. task_cputime(t, &utime, &stime);
  269. times->utime += utime;
  270. times->stime += stime;
  271. times->sum_exec_runtime += read_sum_exec_runtime(t);
  272. }
  273. /* If lockless access failed, take the lock. */
  274. nextseq = 1;
  275. } while (need_seqretry(&sig->stats_lock, seq));
  276. done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
  277. rcu_read_unlock();
  278. }
  279. #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  280. /*
  281. * Account a tick to a process and cpustat
  282. * @p: the process that the CPU time gets accounted to
  283. * @user_tick: is the tick from userspace
  284. * @rq: the pointer to rq
  285. *
  286. * Tick demultiplexing follows the order
  287. * - pending hardirq update
  288. * - pending softirq update
  289. * - user_time
  290. * - idle_time
  291. * - system time
  292. * - check for guest_time
  293. * - else account as system_time
  294. *
  295. * Check for hardirq is done both for system and user time as there is
  296. * no timer going off while we are on hardirq and hence we may never get an
  297. * opportunity to update it solely in system time.
  298. * p->stime and friends are only updated on system time and not on irq
  299. * softirq as those do not count in task exec_runtime any more.
  300. */
  301. static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
  302. struct rq *rq, int ticks)
  303. {
  304. u64 other, cputime = TICK_NSEC * ticks;
  305. /*
  306. * When returning from idle, many ticks can get accounted at
  307. * once, including some ticks of steal, irq, and softirq time.
  308. * Subtract those ticks from the amount of time accounted to
  309. * idle, or potentially user or system time. Due to rounding,
  310. * other time can exceed ticks occasionally.
  311. */
  312. other = account_other_time(ULONG_MAX);
  313. if (other >= cputime)
  314. return;
  315. cputime -= other;
  316. if (this_cpu_ksoftirqd() == p) {
  317. /*
  318. * ksoftirqd time do not get accounted in cpu_softirq_time.
  319. * So, we have to handle it separately here.
  320. * Also, p->stime needs to be updated for ksoftirqd.
  321. */
  322. account_system_index_time(p, cputime, CPUTIME_SOFTIRQ);
  323. } else if (user_tick) {
  324. account_user_time(p, cputime);
  325. } else if (p == rq->idle) {
  326. account_idle_time(cputime);
  327. } else if (p->flags & PF_VCPU) { /* System time or guest time */
  328. account_guest_time(p, cputime);
  329. } else {
  330. account_system_index_time(p, cputime, CPUTIME_SYSTEM);
  331. }
  332. }
  333. static void irqtime_account_idle_ticks(int ticks)
  334. {
  335. struct rq *rq = this_rq();
  336. irqtime_account_process_tick(current, 0, rq, ticks);
  337. }
  338. #else /* CONFIG_IRQ_TIME_ACCOUNTING */
  339. static inline void irqtime_account_idle_ticks(int ticks) { }
  340. static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
  341. struct rq *rq, int nr_ticks) { }
  342. #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
  343. /*
  344. * Use precise platform statistics if available:
  345. */
  346. #ifdef CONFIG_VIRT_CPU_ACCOUNTING
  347. # ifndef __ARCH_HAS_VTIME_TASK_SWITCH
  348. void vtime_common_task_switch(struct task_struct *prev)
  349. {
  350. if (is_idle_task(prev))
  351. vtime_account_idle(prev);
  352. else
  353. vtime_account_system(prev);
  354. vtime_flush(prev);
  355. arch_vtime_task_switch(prev);
  356. }
  357. # endif
  358. #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
  359. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
  360. /*
  361. * Archs that account the whole time spent in the idle task
  362. * (outside irq) as idle time can rely on this and just implement
  363. * vtime_account_system() and vtime_account_idle(). Archs that
  364. * have other meaning of the idle time (s390 only includes the
  365. * time spent by the CPU when it's in low power mode) must override
  366. * vtime_account().
  367. */
  368. #ifndef __ARCH_HAS_VTIME_ACCOUNT
  369. void vtime_account_irq_enter(struct task_struct *tsk)
  370. {
  371. if (!in_interrupt() && is_idle_task(tsk))
  372. vtime_account_idle(tsk);
  373. else
  374. vtime_account_system(tsk);
  375. }
  376. EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
  377. #endif /* __ARCH_HAS_VTIME_ACCOUNT */
  378. void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
  379. u64 *ut, u64 *st)
  380. {
  381. *ut = curr->utime;
  382. *st = curr->stime;
  383. }
  384. void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
  385. {
  386. *ut = p->utime;
  387. *st = p->stime;
  388. }
  389. EXPORT_SYMBOL_GPL(task_cputime_adjusted);
  390. void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
  391. {
  392. struct task_cputime cputime;
  393. thread_group_cputime(p, &cputime);
  394. *ut = cputime.utime;
  395. *st = cputime.stime;
  396. }
  397. #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */
  398. /*
  399. * Account a single tick of CPU time.
  400. * @p: the process that the CPU time gets accounted to
  401. * @user_tick: indicates if the tick is a user or a system tick
  402. */
  403. void account_process_tick(struct task_struct *p, int user_tick)
  404. {
  405. u64 cputime, steal;
  406. struct rq *rq = this_rq();
  407. if (vtime_accounting_cpu_enabled())
  408. return;
  409. if (sched_clock_irqtime) {
  410. irqtime_account_process_tick(p, user_tick, rq, 1);
  411. return;
  412. }
  413. cputime = TICK_NSEC;
  414. steal = steal_account_process_time(ULONG_MAX);
  415. if (steal >= cputime)
  416. return;
  417. cputime -= steal;
  418. if (user_tick)
  419. account_user_time(p, cputime);
  420. else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
  421. account_system_time(p, HARDIRQ_OFFSET, cputime);
  422. else
  423. account_idle_time(cputime);
  424. }
  425. /*
  426. * Account multiple ticks of idle time.
  427. * @ticks: number of stolen ticks
  428. */
  429. void account_idle_ticks(unsigned long ticks)
  430. {
  431. u64 cputime, steal;
  432. if (sched_clock_irqtime) {
  433. irqtime_account_idle_ticks(ticks);
  434. return;
  435. }
  436. cputime = ticks * TICK_NSEC;
  437. steal = steal_account_process_time(ULONG_MAX);
  438. if (steal >= cputime)
  439. return;
  440. cputime -= steal;
  441. account_idle_time(cputime);
  442. }
  443. /*
  444. * Perform (stime * rtime) / total, but avoid multiplication overflow by
  445. * loosing precision when the numbers are big.
  446. */
  447. static u64 scale_stime(u64 stime, u64 rtime, u64 total)
  448. {
  449. u64 scaled;
  450. for (;;) {
  451. /* Make sure "rtime" is the bigger of stime/rtime */
  452. if (stime > rtime)
  453. swap(rtime, stime);
  454. /* Make sure 'total' fits in 32 bits */
  455. if (total >> 32)
  456. goto drop_precision;
  457. /* Does rtime (and thus stime) fit in 32 bits? */
  458. if (!(rtime >> 32))
  459. break;
  460. /* Can we just balance rtime/stime rather than dropping bits? */
  461. if (stime >> 31)
  462. goto drop_precision;
  463. /* We can grow stime and shrink rtime and try to make them both fit */
  464. stime <<= 1;
  465. rtime >>= 1;
  466. continue;
  467. drop_precision:
  468. /* We drop from rtime, it has more bits than stime */
  469. rtime >>= 1;
  470. total >>= 1;
  471. }
  472. /*
  473. * Make sure gcc understands that this is a 32x32->64 multiply,
  474. * followed by a 64/32->64 divide.
  475. */
  476. scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
  477. return scaled;
  478. }
  479. /*
  480. * Adjust tick based cputime random precision against scheduler runtime
  481. * accounting.
  482. *
  483. * Tick based cputime accounting depend on random scheduling timeslices of a
  484. * task to be interrupted or not by the timer. Depending on these
  485. * circumstances, the number of these interrupts may be over or
  486. * under-optimistic, matching the real user and system cputime with a variable
  487. * precision.
  488. *
  489. * Fix this by scaling these tick based values against the total runtime
  490. * accounted by the CFS scheduler.
  491. *
  492. * This code provides the following guarantees:
  493. *
  494. * stime + utime == rtime
  495. * stime_i+1 >= stime_i, utime_i+1 >= utime_i
  496. *
  497. * Assuming that rtime_i+1 >= rtime_i.
  498. */
  499. void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
  500. u64 *ut, u64 *st)
  501. {
  502. u64 rtime, stime, utime;
  503. unsigned long flags;
  504. /* Serialize concurrent callers such that we can honour our guarantees */
  505. raw_spin_lock_irqsave(&prev->lock, flags);
  506. rtime = curr->sum_exec_runtime;
  507. /*
  508. * This is possible under two circumstances:
  509. * - rtime isn't monotonic after all (a bug);
  510. * - we got reordered by the lock.
  511. *
  512. * In both cases this acts as a filter such that the rest of the code
  513. * can assume it is monotonic regardless of anything else.
  514. */
  515. if (prev->stime + prev->utime >= rtime)
  516. goto out;
  517. stime = curr->stime;
  518. utime = curr->utime;
  519. /*
  520. * If either stime or utime are 0, assume all runtime is userspace.
  521. * Once a task gets some ticks, the monotonicy code at 'update:'
  522. * will ensure things converge to the observed ratio.
  523. */
  524. if (stime == 0) {
  525. utime = rtime;
  526. goto update;
  527. }
  528. if (utime == 0) {
  529. stime = rtime;
  530. goto update;
  531. }
  532. stime = scale_stime(stime, rtime, stime + utime);
  533. update:
  534. /*
  535. * Make sure stime doesn't go backwards; this preserves monotonicity
  536. * for utime because rtime is monotonic.
  537. *
  538. * utime_i+1 = rtime_i+1 - stime_i
  539. * = rtime_i+1 - (rtime_i - utime_i)
  540. * = (rtime_i+1 - rtime_i) + utime_i
  541. * >= utime_i
  542. */
  543. if (stime < prev->stime)
  544. stime = prev->stime;
  545. utime = rtime - stime;
  546. /*
  547. * Make sure utime doesn't go backwards; this still preserves
  548. * monotonicity for stime, analogous argument to above.
  549. */
  550. if (utime < prev->utime) {
  551. utime = prev->utime;
  552. stime = rtime - utime;
  553. }
  554. prev->stime = stime;
  555. prev->utime = utime;
  556. out:
  557. *ut = prev->utime;
  558. *st = prev->stime;
  559. raw_spin_unlock_irqrestore(&prev->lock, flags);
  560. }
  561. void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
  562. {
  563. struct task_cputime cputime = {
  564. .sum_exec_runtime = p->se.sum_exec_runtime,
  565. };
  566. task_cputime(p, &cputime.utime, &cputime.stime);
  567. cputime_adjust(&cputime, &p->prev_cputime, ut, st);
  568. }
  569. EXPORT_SYMBOL_GPL(task_cputime_adjusted);
  570. void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
  571. {
  572. struct task_cputime cputime;
  573. thread_group_cputime(p, &cputime);
  574. cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
  575. }
  576. #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
  577. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  578. static u64 vtime_delta(struct vtime *vtime)
  579. {
  580. unsigned long long clock;
  581. clock = sched_clock();
  582. if (clock < vtime->starttime)
  583. return 0;
  584. return clock - vtime->starttime;
  585. }
  586. static u64 get_vtime_delta(struct vtime *vtime)
  587. {
  588. u64 delta = vtime_delta(vtime);
  589. u64 other;
  590. /*
  591. * Unlike tick based timing, vtime based timing never has lost
  592. * ticks, and no need for steal time accounting to make up for
  593. * lost ticks. Vtime accounts a rounded version of actual
  594. * elapsed time. Limit account_other_time to prevent rounding
  595. * errors from causing elapsed vtime to go negative.
  596. */
  597. other = account_other_time(delta);
  598. WARN_ON_ONCE(vtime->state == VTIME_INACTIVE);
  599. vtime->starttime += delta;
  600. return delta - other;
  601. }
  602. static void __vtime_account_system(struct task_struct *tsk,
  603. struct vtime *vtime)
  604. {
  605. vtime->stime += get_vtime_delta(vtime);
  606. if (vtime->stime >= TICK_NSEC) {
  607. account_system_time(tsk, irq_count(), vtime->stime);
  608. vtime->stime = 0;
  609. }
  610. }
  611. static void vtime_account_guest(struct task_struct *tsk,
  612. struct vtime *vtime)
  613. {
  614. vtime->gtime += get_vtime_delta(vtime);
  615. if (vtime->gtime >= TICK_NSEC) {
  616. account_guest_time(tsk, vtime->gtime);
  617. vtime->gtime = 0;
  618. }
  619. }
  620. void vtime_account_system(struct task_struct *tsk)
  621. {
  622. struct vtime *vtime = &tsk->vtime;
  623. if (!vtime_delta(vtime))
  624. return;
  625. write_seqcount_begin(&vtime->seqcount);
  626. /* We might have scheduled out from guest path */
  627. if (tsk->flags & PF_VCPU)
  628. vtime_account_guest(tsk, vtime);
  629. else
  630. __vtime_account_system(tsk, vtime);
  631. write_seqcount_end(&vtime->seqcount);
  632. }
  633. void vtime_user_enter(struct task_struct *tsk)
  634. {
  635. struct vtime *vtime = &tsk->vtime;
  636. write_seqcount_begin(&vtime->seqcount);
  637. __vtime_account_system(tsk, vtime);
  638. vtime->state = VTIME_USER;
  639. write_seqcount_end(&vtime->seqcount);
  640. }
  641. void vtime_user_exit(struct task_struct *tsk)
  642. {
  643. struct vtime *vtime = &tsk->vtime;
  644. write_seqcount_begin(&vtime->seqcount);
  645. vtime->utime += get_vtime_delta(vtime);
  646. if (vtime->utime >= TICK_NSEC) {
  647. account_user_time(tsk, vtime->utime);
  648. vtime->utime = 0;
  649. }
  650. vtime->state = VTIME_SYS;
  651. write_seqcount_end(&vtime->seqcount);
  652. }
  653. void vtime_guest_enter(struct task_struct *tsk)
  654. {
  655. struct vtime *vtime = &tsk->vtime;
  656. /*
  657. * The flags must be updated under the lock with
  658. * the vtime_starttime flush and update.
  659. * That enforces a right ordering and update sequence
  660. * synchronization against the reader (task_gtime())
  661. * that can thus safely catch up with a tickless delta.
  662. */
  663. write_seqcount_begin(&vtime->seqcount);
  664. __vtime_account_system(tsk, vtime);
  665. tsk->flags |= PF_VCPU;
  666. write_seqcount_end(&vtime->seqcount);
  667. }
  668. EXPORT_SYMBOL_GPL(vtime_guest_enter);
  669. void vtime_guest_exit(struct task_struct *tsk)
  670. {
  671. struct vtime *vtime = &tsk->vtime;
  672. write_seqcount_begin(&vtime->seqcount);
  673. vtime_account_guest(tsk, vtime);
  674. tsk->flags &= ~PF_VCPU;
  675. write_seqcount_end(&vtime->seqcount);
  676. }
  677. EXPORT_SYMBOL_GPL(vtime_guest_exit);
  678. void vtime_account_idle(struct task_struct *tsk)
  679. {
  680. account_idle_time(get_vtime_delta(&tsk->vtime));
  681. }
  682. void arch_vtime_task_switch(struct task_struct *prev)
  683. {
  684. struct vtime *vtime = &prev->vtime;
  685. write_seqcount_begin(&vtime->seqcount);
  686. vtime->state = VTIME_INACTIVE;
  687. write_seqcount_end(&vtime->seqcount);
  688. vtime = &current->vtime;
  689. write_seqcount_begin(&vtime->seqcount);
  690. vtime->state = VTIME_SYS;
  691. vtime->starttime = sched_clock();
  692. write_seqcount_end(&vtime->seqcount);
  693. }
  694. void vtime_init_idle(struct task_struct *t, int cpu)
  695. {
  696. struct vtime *vtime = &t->vtime;
  697. unsigned long flags;
  698. local_irq_save(flags);
  699. write_seqcount_begin(&vtime->seqcount);
  700. vtime->state = VTIME_SYS;
  701. vtime->starttime = sched_clock();
  702. write_seqcount_end(&vtime->seqcount);
  703. local_irq_restore(flags);
  704. }
  705. u64 task_gtime(struct task_struct *t)
  706. {
  707. struct vtime *vtime = &t->vtime;
  708. unsigned int seq;
  709. u64 gtime;
  710. if (!vtime_accounting_enabled())
  711. return t->gtime;
  712. do {
  713. seq = read_seqcount_begin(&vtime->seqcount);
  714. gtime = t->gtime;
  715. if (vtime->state == VTIME_SYS && t->flags & PF_VCPU)
  716. gtime += vtime->gtime + vtime_delta(vtime);
  717. } while (read_seqcount_retry(&vtime->seqcount, seq));
  718. return gtime;
  719. }
  720. /*
  721. * Fetch cputime raw values from fields of task_struct and
  722. * add up the pending nohz execution time since the last
  723. * cputime snapshot.
  724. */
  725. void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
  726. {
  727. struct vtime *vtime = &t->vtime;
  728. unsigned int seq;
  729. u64 delta;
  730. if (!vtime_accounting_enabled()) {
  731. *utime = t->utime;
  732. *stime = t->stime;
  733. return;
  734. }
  735. do {
  736. seq = read_seqcount_begin(&vtime->seqcount);
  737. *utime = t->utime;
  738. *stime = t->stime;
  739. /* Task is sleeping, nothing to add */
  740. if (vtime->state == VTIME_INACTIVE || is_idle_task(t))
  741. continue;
  742. delta = vtime_delta(vtime);
  743. /*
  744. * Task runs either in user or kernel space, add pending nohz time to
  745. * the right place.
  746. */
  747. if (vtime->state == VTIME_USER || t->flags & PF_VCPU)
  748. *utime += vtime->utime + delta;
  749. else if (vtime->state == VTIME_SYS)
  750. *stime += vtime->stime + delta;
  751. } while (read_seqcount_retry(&vtime->seqcount, seq));
  752. }
  753. #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */