timings.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. // SPDX-License-Identifier: GPL-2.0
  2. // Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
  3. #include <linux/kernel.h>
  4. #include <linux/percpu.h>
  5. #include <linux/slab.h>
  6. #include <linux/static_key.h>
  7. #include <linux/interrupt.h>
  8. #include <linux/idr.h>
  9. #include <linux/irq.h>
  10. #include <linux/math64.h>
  11. #include <trace/events/irq.h>
  12. #include "internals.h"
  13. DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
  14. DEFINE_PER_CPU(struct irq_timings, irq_timings);
  15. struct irqt_stat {
  16. u64 next_evt;
  17. u64 last_ts;
  18. u64 variance;
  19. u32 avg;
  20. u32 nr_samples;
  21. int anomalies;
  22. int valid;
  23. };
  24. static DEFINE_IDR(irqt_stats);
  25. void irq_timings_enable(void)
  26. {
  27. static_branch_enable(&irq_timing_enabled);
  28. }
  29. void irq_timings_disable(void)
  30. {
  31. static_branch_disable(&irq_timing_enabled);
  32. }
  33. /**
  34. * irqs_update - update the irq timing statistics with a new timestamp
  35. *
  36. * @irqs: an irqt_stat struct pointer
  37. * @ts: the new timestamp
  38. *
  39. * The statistics are computed online, in other words, the code is
  40. * designed to compute the statistics on a stream of values rather
  41. * than doing multiple passes on the values to compute the average,
  42. * then the variance. The integer division introduces a loss of
  43. * precision but with an acceptable error margin regarding the results
  44. * we would have with the double floating precision: we are dealing
  45. * with nanosec, so big numbers, consequently the mantisse is
  46. * negligeable, especially when converting the time in usec
  47. * afterwards.
  48. *
  49. * The computation happens at idle time. When the CPU is not idle, the
  50. * interrupts' timestamps are stored in the circular buffer, when the
  51. * CPU goes idle and this routine is called, all the buffer's values
  52. * are injected in the statistical model continuying to extend the
  53. * statistics from the previous busy-idle cycle.
  54. *
  55. * The observations showed a device will trigger a burst of periodic
  56. * interrupts followed by one or two peaks of longer time, for
  57. * instance when a SD card device flushes its cache, then the periodic
  58. * intervals occur again. A one second inactivity period resets the
  59. * stats, that gives us the certitude the statistical values won't
  60. * exceed 1x10^9, thus the computation won't overflow.
  61. *
  62. * Basically, the purpose of the algorithm is to watch the periodic
  63. * interrupts and eliminate the peaks.
  64. *
  65. * An interrupt is considered periodically stable if the interval of
  66. * its occurences follow the normal distribution, thus the values
  67. * comply with:
  68. *
  69. * avg - 3 x stddev < value < avg + 3 x stddev
  70. *
  71. * Which can be simplified to:
  72. *
  73. * -3 x stddev < value - avg < 3 x stddev
  74. *
  75. * abs(value - avg) < 3 x stddev
  76. *
  77. * In order to save a costly square root computation, we use the
  78. * variance. For the record, stddev = sqrt(variance). The equation
  79. * above becomes:
  80. *
  81. * abs(value - avg) < 3 x sqrt(variance)
  82. *
  83. * And finally we square it:
  84. *
  85. * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
  86. *
  87. * (value - avg) x (value - avg) < 9 x variance
  88. *
  89. * Statistically speaking, any values out of this interval is
  90. * considered as an anomaly and is discarded. However, a normal
  91. * distribution appears when the number of samples is 30 (it is the
  92. * rule of thumb in statistics, cf. "30 samples" on Internet). When
  93. * there are three consecutive anomalies, the statistics are resetted.
  94. *
  95. */
  96. static void irqs_update(struct irqt_stat *irqs, u64 ts)
  97. {
  98. u64 old_ts = irqs->last_ts;
  99. u64 variance = 0;
  100. u64 interval;
  101. s64 diff;
  102. /*
  103. * The timestamps are absolute time values, we need to compute
  104. * the timing interval between two interrupts.
  105. */
  106. irqs->last_ts = ts;
  107. /*
  108. * The interval type is u64 in order to deal with the same
  109. * type in our computation, that prevent mindfuck issues with
  110. * overflow, sign and division.
  111. */
  112. interval = ts - old_ts;
  113. /*
  114. * The interrupt triggered more than one second apart, that
  115. * ends the sequence as predictible for our purpose. In this
  116. * case, assume we have the beginning of a sequence and the
  117. * timestamp is the first value. As it is impossible to
  118. * predict anything at this point, return.
  119. *
  120. * Note the first timestamp of the sequence will always fall
  121. * in this test because the old_ts is zero. That is what we
  122. * want as we need another timestamp to compute an interval.
  123. */
  124. if (interval >= NSEC_PER_SEC) {
  125. memset(irqs, 0, sizeof(*irqs));
  126. irqs->last_ts = ts;
  127. return;
  128. }
  129. /*
  130. * Pre-compute the delta with the average as the result is
  131. * used several times in this function.
  132. */
  133. diff = interval - irqs->avg;
  134. /*
  135. * Increment the number of samples.
  136. */
  137. irqs->nr_samples++;
  138. /*
  139. * Online variance divided by the number of elements if there
  140. * is more than one sample. Normally the formula is division
  141. * by nr_samples - 1 but we assume the number of element will be
  142. * more than 32 and dividing by 32 instead of 31 is enough
  143. * precise.
  144. */
  145. if (likely(irqs->nr_samples > 1))
  146. variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
  147. /*
  148. * The rule of thumb in statistics for the normal distribution
  149. * is having at least 30 samples in order to have the model to
  150. * apply. Values outside the interval are considered as an
  151. * anomaly.
  152. */
  153. if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
  154. /*
  155. * After three consecutive anomalies, we reset the
  156. * stats as it is no longer stable enough.
  157. */
  158. if (irqs->anomalies++ >= 3) {
  159. memset(irqs, 0, sizeof(*irqs));
  160. irqs->last_ts = ts;
  161. return;
  162. }
  163. } else {
  164. /*
  165. * The anomalies must be consecutives, so at this
  166. * point, we reset the anomalies counter.
  167. */
  168. irqs->anomalies = 0;
  169. }
  170. /*
  171. * The interrupt is considered stable enough to try to predict
  172. * the next event on it.
  173. */
  174. irqs->valid = 1;
  175. /*
  176. * Online average algorithm:
  177. *
  178. * new_average = average + ((value - average) / count)
  179. *
  180. * The variance computation depends on the new average
  181. * to be computed here first.
  182. *
  183. */
  184. irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
  185. /*
  186. * Online variance algorithm:
  187. *
  188. * new_variance = variance + (value - average) x (value - new_average)
  189. *
  190. * Warning: irqs->avg is updated with the line above, hence
  191. * 'interval - irqs->avg' is no longer equal to 'diff'
  192. */
  193. irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
  194. /*
  195. * Update the next event
  196. */
  197. irqs->next_evt = ts + irqs->avg;
  198. }
  199. /**
  200. * irq_timings_next_event - Return when the next event is supposed to arrive
  201. *
  202. * During the last busy cycle, the number of interrupts is incremented
  203. * and stored in the irq_timings structure. This information is
  204. * necessary to:
  205. *
  206. * - know if the index in the table wrapped up:
  207. *
  208. * If more than the array size interrupts happened during the
  209. * last busy/idle cycle, the index wrapped up and we have to
  210. * begin with the next element in the array which is the last one
  211. * in the sequence, otherwise it is a the index 0.
  212. *
  213. * - have an indication of the interrupts activity on this CPU
  214. * (eg. irq/sec)
  215. *
  216. * The values are 'consumed' after inserting in the statistical model,
  217. * thus the count is reinitialized.
  218. *
  219. * The array of values **must** be browsed in the time direction, the
  220. * timestamp must increase between an element and the next one.
  221. *
  222. * Returns a nanosec time based estimation of the earliest interrupt,
  223. * U64_MAX otherwise.
  224. */
  225. u64 irq_timings_next_event(u64 now)
  226. {
  227. struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
  228. struct irqt_stat *irqs;
  229. struct irqt_stat __percpu *s;
  230. u64 ts, next_evt = U64_MAX;
  231. int i, irq = 0;
  232. /*
  233. * This function must be called with the local irq disabled in
  234. * order to prevent the timings circular buffer to be updated
  235. * while we are reading it.
  236. */
  237. lockdep_assert_irqs_disabled();
  238. /*
  239. * Number of elements in the circular buffer: If it happens it
  240. * was flushed before, then the number of elements could be
  241. * smaller than IRQ_TIMINGS_SIZE, so the count is used,
  242. * otherwise the array size is used as we wrapped. The index
  243. * begins from zero when we did not wrap. That could be done
  244. * in a nicer way with the proper circular array structure
  245. * type but with the cost of extra computation in the
  246. * interrupt handler hot path. We choose efficiency.
  247. *
  248. * Inject measured irq/timestamp to the statistical model
  249. * while decrementing the counter because we consume the data
  250. * from our circular buffer.
  251. */
  252. for (i = irqts->count & IRQ_TIMINGS_MASK,
  253. irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
  254. irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
  255. irq = irq_timing_decode(irqts->values[i], &ts);
  256. s = idr_find(&irqt_stats, irq);
  257. if (s) {
  258. irqs = this_cpu_ptr(s);
  259. irqs_update(irqs, ts);
  260. }
  261. }
  262. /*
  263. * Look in the list of interrupts' statistics, the earliest
  264. * next event.
  265. */
  266. idr_for_each_entry(&irqt_stats, s, i) {
  267. irqs = this_cpu_ptr(s);
  268. if (!irqs->valid)
  269. continue;
  270. if (irqs->next_evt <= now) {
  271. irq = i;
  272. next_evt = now;
  273. /*
  274. * This interrupt mustn't use in the future
  275. * until new events occur and update the
  276. * statistics.
  277. */
  278. irqs->valid = 0;
  279. break;
  280. }
  281. if (irqs->next_evt < next_evt) {
  282. irq = i;
  283. next_evt = irqs->next_evt;
  284. }
  285. }
  286. return next_evt;
  287. }
  288. void irq_timings_free(int irq)
  289. {
  290. struct irqt_stat __percpu *s;
  291. s = idr_find(&irqt_stats, irq);
  292. if (s) {
  293. free_percpu(s);
  294. idr_remove(&irqt_stats, irq);
  295. }
  296. }
  297. int irq_timings_alloc(int irq)
  298. {
  299. struct irqt_stat __percpu *s;
  300. int id;
  301. /*
  302. * Some platforms can have the same private interrupt per cpu,
  303. * so this function may be be called several times with the
  304. * same interrupt number. Just bail out in case the per cpu
  305. * stat structure is already allocated.
  306. */
  307. s = idr_find(&irqt_stats, irq);
  308. if (s)
  309. return 0;
  310. s = alloc_percpu(*s);
  311. if (!s)
  312. return -ENOMEM;
  313. idr_preload(GFP_KERNEL);
  314. id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);
  315. idr_preload_end();
  316. if (id < 0) {
  317. free_percpu(s);
  318. return id;
  319. }
  320. return 0;
  321. }