watchdog_perf.c 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Detect hard lockups on a system using perf
  4. *
  5. * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
  6. *
  7. * Note: Most of this code is borrowed heavily from the original softlockup
  8. * detector, so thanks to Ingo for the initial implementation.
  9. * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
  10. * to those contributors as well.
  11. */
  12. #define pr_fmt(fmt) "NMI watchdog: " fmt
  13. #include <linux/nmi.h>
  14. #include <linux/atomic.h>
  15. #include <linux/module.h>
  16. #include <linux/sched/debug.h>
  17. #include <asm/irq_regs.h>
  18. #include <linux/perf_event.h>
  19. static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
  20. static atomic_t watchdog_cpus = ATOMIC_INIT(0);
  21. #ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
  22. static DEFINE_PER_CPU(ktime_t, last_timestamp);
  23. static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
  24. static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
  25. void watchdog_update_hrtimer_threshold(u64 period)
  26. {
  27. /*
  28. * The hrtimer runs with a period of (watchdog_threshold * 2) / 5
  29. *
  30. * So it runs effectively with 2.5 times the rate of the NMI
  31. * watchdog. That means the hrtimer should fire 2-3 times before
  32. * the NMI watchdog expires. The NMI watchdog on x86 is based on
  33. * unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
  34. * might run way faster than expected and the NMI fires in a
  35. * smaller period than the one deduced from the nominal CPU
  36. * frequency. Depending on the Turbo-Mode factor this might be fast
  37. * enough to get the NMI period smaller than the hrtimer watchdog
  38. * period and trigger false positives.
  39. *
  40. * The sample threshold is used to check in the NMI handler whether
  41. * the minimum time between two NMI samples has elapsed. That
  42. * prevents false positives.
  43. *
  44. * Set this to 4/5 of the actual watchdog threshold period so the
  45. * hrtimer is guaranteed to fire at least once within the real
  46. * watchdog threshold.
  47. */
  48. watchdog_hrtimer_sample_threshold = period * 2;
  49. }
  50. static bool watchdog_check_timestamp(void)
  51. {
  52. ktime_t delta, now = ktime_get_mono_fast_ns();
  53. delta = now - __this_cpu_read(last_timestamp);
  54. if (delta < watchdog_hrtimer_sample_threshold) {
  55. /*
  56. * If ktime is jiffies based, a stalled timer would prevent
  57. * jiffies from being incremented and the filter would look
  58. * at a stale timestamp and never trigger.
  59. */
  60. if (__this_cpu_inc_return(nmi_rearmed) < 10)
  61. return false;
  62. }
  63. __this_cpu_write(nmi_rearmed, 0);
  64. __this_cpu_write(last_timestamp, now);
  65. return true;
  66. }
  67. static void watchdog_init_timestamp(void)
  68. {
  69. __this_cpu_write(nmi_rearmed, 0);
  70. __this_cpu_write(last_timestamp, ktime_get_mono_fast_ns());
  71. }
  72. #else
  73. static inline bool watchdog_check_timestamp(void) { return true; }
  74. static inline void watchdog_init_timestamp(void) { }
  75. #endif
  76. static struct perf_event_attr wd_hw_attr = {
  77. .type = PERF_TYPE_HARDWARE,
  78. .config = PERF_COUNT_HW_CPU_CYCLES,
  79. .size = sizeof(struct perf_event_attr),
  80. .pinned = 1,
  81. .disabled = 1,
  82. };
  83. static struct perf_event_attr fallback_wd_hw_attr = {
  84. .type = PERF_TYPE_HARDWARE,
  85. .config = PERF_COUNT_HW_CPU_CYCLES,
  86. .size = sizeof(struct perf_event_attr),
  87. .pinned = 1,
  88. .disabled = 1,
  89. };
  90. /* Callback function for perf event subsystem */
  91. static void watchdog_overflow_callback(struct perf_event *event,
  92. struct perf_sample_data *data,
  93. struct pt_regs *regs)
  94. {
  95. /* Ensure the watchdog never gets throttled */
  96. event->hw.interrupts = 0;
  97. if (!watchdog_check_timestamp())
  98. return;
  99. watchdog_hardlockup_check(smp_processor_id(), regs);
  100. }
  101. static int hardlockup_detector_event_create(void)
  102. {
  103. unsigned int cpu;
  104. struct perf_event_attr *wd_attr;
  105. struct perf_event *evt;
  106. /*
  107. * Preemption is not disabled because memory will be allocated.
  108. * Ensure CPU-locality by calling this in per-CPU kthread.
  109. */
  110. WARN_ON(!is_percpu_thread());
  111. cpu = raw_smp_processor_id();
  112. wd_attr = &wd_hw_attr;
  113. wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
  114. /* Try to register using hardware perf events */
  115. evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
  116. watchdog_overflow_callback, NULL);
  117. if (IS_ERR(evt)) {
  118. wd_attr = &fallback_wd_hw_attr;
  119. wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
  120. evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
  121. watchdog_overflow_callback, NULL);
  122. }
  123. if (IS_ERR(evt)) {
  124. pr_debug("Perf event create on CPU %d failed with %ld\n", cpu,
  125. PTR_ERR(evt));
  126. return PTR_ERR(evt);
  127. }
  128. this_cpu_write(watchdog_ev, evt);
  129. return 0;
  130. }
  131. /**
  132. * watchdog_hardlockup_enable - Enable the local event
  133. * @cpu: The CPU to enable hard lockup on.
  134. */
  135. void watchdog_hardlockup_enable(unsigned int cpu)
  136. {
  137. WARN_ON_ONCE(cpu != smp_processor_id());
  138. if (hardlockup_detector_event_create())
  139. return;
  140. /* use original value for check */
  141. if (!atomic_fetch_inc(&watchdog_cpus))
  142. pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
  143. watchdog_init_timestamp();
  144. perf_event_enable(this_cpu_read(watchdog_ev));
  145. }
  146. /**
  147. * watchdog_hardlockup_disable - Disable the local event
  148. * @cpu: The CPU to enable hard lockup on.
  149. */
  150. void watchdog_hardlockup_disable(unsigned int cpu)
  151. {
  152. struct perf_event *event = this_cpu_read(watchdog_ev);
  153. WARN_ON_ONCE(cpu != smp_processor_id());
  154. if (event) {
  155. perf_event_disable(event);
  156. perf_event_release_kernel(event);
  157. this_cpu_write(watchdog_ev, NULL);
  158. atomic_dec(&watchdog_cpus);
  159. }
  160. }
  161. /**
  162. * hardlockup_detector_perf_stop - Globally stop watchdog events
  163. *
  164. * Special interface for x86 to handle the perf HT bug.
  165. */
  166. void __init hardlockup_detector_perf_stop(void)
  167. {
  168. int cpu;
  169. lockdep_assert_cpus_held();
  170. for_each_online_cpu(cpu) {
  171. struct perf_event *event = per_cpu(watchdog_ev, cpu);
  172. if (event)
  173. perf_event_disable(event);
  174. }
  175. }
  176. /**
  177. * hardlockup_detector_perf_restart - Globally restart watchdog events
  178. *
  179. * Special interface for x86 to handle the perf HT bug.
  180. */
  181. void __init hardlockup_detector_perf_restart(void)
  182. {
  183. int cpu;
  184. lockdep_assert_cpus_held();
  185. if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
  186. return;
  187. for_each_online_cpu(cpu) {
  188. struct perf_event *event = per_cpu(watchdog_ev, cpu);
  189. if (event)
  190. perf_event_enable(event);
  191. }
  192. }
  193. bool __weak __init arch_perf_nmi_is_available(void)
  194. {
  195. return true;
  196. }
  197. /**
  198. * watchdog_hardlockup_probe - Probe whether NMI event is available at all
  199. */
  200. int __init watchdog_hardlockup_probe(void)
  201. {
  202. int ret;
  203. if (!arch_perf_nmi_is_available())
  204. return -ENODEV;
  205. ret = hardlockup_detector_event_create();
  206. if (ret) {
  207. pr_info("Perf NMI watchdog permanently disabled\n");
  208. } else {
  209. perf_event_release_kernel(this_cpu_read(watchdog_ev));
  210. this_cpu_write(watchdog_ev, NULL);
  211. }
  212. return ret;
  213. }
  214. /**
  215. * hardlockup_config_perf_event - Overwrite config of wd_hw_attr.
  216. * @str: number which identifies the raw perf event to use
  217. */
  218. void __init hardlockup_config_perf_event(const char *str)
  219. {
  220. u64 config;
  221. char buf[24];
  222. char *comma = strchr(str, ',');
  223. if (!comma) {
  224. if (kstrtoull(str, 16, &config))
  225. return;
  226. } else {
  227. unsigned int len = comma - str;
  228. if (len >= sizeof(buf))
  229. return;
  230. if (strscpy(buf, str, sizeof(buf)) < 0)
  231. return;
  232. buf[len] = 0;
  233. if (kstrtoull(buf, 16, &config))
  234. return;
  235. }
  236. wd_hw_attr.type = PERF_TYPE_RAW;
  237. wd_hw_attr.config = config;
  238. }