irq_work.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
  4. *
  5. * Provides a framework for enqueueing and running callbacks from hardirq
  6. * context. The enqueueing is NMI-safe.
  7. */
  8. #include <linux/bug.h>
  9. #include <linux/kernel.h>
  10. #include <linux/export.h>
  11. #include <linux/irq_work.h>
  12. #include <linux/percpu.h>
  13. #include <linux/hardirq.h>
  14. #include <linux/irqflags.h>
  15. #include <linux/sched.h>
  16. #include <linux/tick.h>
  17. #include <linux/cpu.h>
  18. #include <linux/notifier.h>
  19. #include <linux/smp.h>
  20. #include <linux/smpboot.h>
  21. #include <asm/processor.h>
  22. #include <linux/kasan.h>
  23. #include <trace/events/ipi.h>
  24. static DEFINE_PER_CPU(struct llist_head, raised_list);
  25. static DEFINE_PER_CPU(struct llist_head, lazy_list);
  26. static DEFINE_PER_CPU(struct task_struct *, irq_workd);
  27. static void wake_irq_workd(void)
  28. {
  29. struct task_struct *tsk = __this_cpu_read(irq_workd);
  30. if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk)
  31. wake_up_process(tsk);
  32. }
  33. #ifdef CONFIG_SMP
  34. static void irq_work_wake(struct irq_work *entry)
  35. {
  36. wake_irq_workd();
  37. }
  38. static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) =
  39. IRQ_WORK_INIT_HARD(irq_work_wake);
  40. #endif
  41. static int irq_workd_should_run(unsigned int cpu)
  42. {
  43. return !llist_empty(this_cpu_ptr(&lazy_list));
  44. }
  45. /*
  46. * Claim the entry so that no one else will poke at it.
  47. */
  48. static bool irq_work_claim(struct irq_work *work)
  49. {
  50. int oflags;
  51. oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->node.a_flags);
  52. /*
  53. * If the work is already pending, no need to raise the IPI.
  54. * The pairing smp_mb() in irq_work_single() makes sure
  55. * everything we did before is visible.
  56. */
  57. if (oflags & IRQ_WORK_PENDING)
  58. return false;
  59. return true;
  60. }
  61. void __weak arch_irq_work_raise(void)
  62. {
  63. /*
  64. * Lame architectures will get the timer tick callback
  65. */
  66. }
  67. static __always_inline void irq_work_raise(struct irq_work *work)
  68. {
  69. if (trace_ipi_send_cpu_enabled() && arch_irq_work_has_interrupt())
  70. trace_ipi_send_cpu(smp_processor_id(), _RET_IP_, work->func);
  71. arch_irq_work_raise();
  72. }
  73. /* Enqueue on current CPU, work must already be claimed and preempt disabled */
  74. static void __irq_work_queue_local(struct irq_work *work)
  75. {
  76. struct llist_head *list;
  77. bool rt_lazy_work = false;
  78. bool lazy_work = false;
  79. int work_flags;
  80. work_flags = atomic_read(&work->node.a_flags);
  81. if (work_flags & IRQ_WORK_LAZY)
  82. lazy_work = true;
  83. else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
  84. !(work_flags & IRQ_WORK_HARD_IRQ))
  85. rt_lazy_work = true;
  86. if (lazy_work || rt_lazy_work)
  87. list = this_cpu_ptr(&lazy_list);
  88. else
  89. list = this_cpu_ptr(&raised_list);
  90. if (!llist_add(&work->node.llist, list))
  91. return;
  92. /* If the work is "lazy", handle it from next tick if any */
  93. if (!lazy_work || tick_nohz_tick_stopped())
  94. irq_work_raise(work);
  95. }
  96. /* Enqueue the irq work @work on the current CPU */
  97. bool irq_work_queue(struct irq_work *work)
  98. {
  99. /* Only queue if not already pending */
  100. if (!irq_work_claim(work))
  101. return false;
  102. /* Queue the entry and raise the IPI if needed. */
  103. preempt_disable();
  104. __irq_work_queue_local(work);
  105. preempt_enable();
  106. return true;
  107. }
  108. EXPORT_SYMBOL_GPL(irq_work_queue);
  109. /*
  110. * Enqueue the irq_work @work on @cpu unless it's already pending
  111. * somewhere.
  112. *
  113. * Can be re-enqueued while the callback is still in progress.
  114. */
  115. bool irq_work_queue_on(struct irq_work *work, int cpu)
  116. {
  117. #ifndef CONFIG_SMP
  118. return irq_work_queue(work);
  119. #else /* CONFIG_SMP: */
  120. /* All work should have been flushed before going offline */
  121. WARN_ON_ONCE(cpu_is_offline(cpu));
  122. /* Only queue if not already pending */
  123. if (!irq_work_claim(work))
  124. return false;
  125. kasan_record_aux_stack_noalloc(work);
  126. preempt_disable();
  127. if (cpu != smp_processor_id()) {
  128. /* Arch remote IPI send/receive backend aren't NMI safe */
  129. WARN_ON_ONCE(in_nmi());
  130. /*
  131. * On PREEMPT_RT the items which are not marked as
  132. * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work
  133. * item is used on the remote CPU to wake the thread.
  134. */
  135. if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
  136. !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
  137. if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
  138. goto out;
  139. work = &per_cpu(irq_work_wakeup, cpu);
  140. if (!irq_work_claim(work))
  141. goto out;
  142. }
  143. __smp_call_single_queue(cpu, &work->node.llist);
  144. } else {
  145. __irq_work_queue_local(work);
  146. }
  147. out:
  148. preempt_enable();
  149. return true;
  150. #endif /* CONFIG_SMP */
  151. }
  152. bool irq_work_needs_cpu(void)
  153. {
  154. struct llist_head *raised, *lazy;
  155. raised = this_cpu_ptr(&raised_list);
  156. lazy = this_cpu_ptr(&lazy_list);
  157. if (llist_empty(raised) || arch_irq_work_has_interrupt())
  158. if (llist_empty(lazy))
  159. return false;
  160. /* All work should have been flushed before going offline */
  161. WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
  162. return true;
  163. }
  164. void irq_work_single(void *arg)
  165. {
  166. struct irq_work *work = arg;
  167. int flags;
  168. /*
  169. * Clear the PENDING bit, after this point the @work can be re-used.
  170. * The PENDING bit acts as a lock, and we own it, so we can clear it
  171. * without atomic ops.
  172. */
  173. flags = atomic_read(&work->node.a_flags);
  174. flags &= ~IRQ_WORK_PENDING;
  175. atomic_set(&work->node.a_flags, flags);
  176. /*
  177. * See irq_work_claim().
  178. */
  179. smp_mb();
  180. lockdep_irq_work_enter(flags);
  181. work->func(work);
  182. lockdep_irq_work_exit(flags);
  183. /*
  184. * Clear the BUSY bit, if set, and return to the free state if no-one
  185. * else claimed it meanwhile.
  186. */
  187. (void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
  188. if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
  189. !arch_irq_work_has_interrupt())
  190. rcuwait_wake_up(&work->irqwait);
  191. }
  192. static void irq_work_run_list(struct llist_head *list)
  193. {
  194. struct irq_work *work, *tmp;
  195. struct llist_node *llnode;
  196. /*
  197. * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed
  198. * in a per-CPU thread in preemptible context. Only the items which are
  199. * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context.
  200. */
  201. BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT));
  202. if (llist_empty(list))
  203. return;
  204. llnode = llist_del_all(list);
  205. llist_for_each_entry_safe(work, tmp, llnode, node.llist)
  206. irq_work_single(work);
  207. }
  208. /*
  209. * hotplug calls this through:
  210. * hotplug_cfd() -> flush_smp_call_function_queue()
  211. */
  212. void irq_work_run(void)
  213. {
  214. irq_work_run_list(this_cpu_ptr(&raised_list));
  215. if (!IS_ENABLED(CONFIG_PREEMPT_RT))
  216. irq_work_run_list(this_cpu_ptr(&lazy_list));
  217. else
  218. wake_irq_workd();
  219. }
  220. EXPORT_SYMBOL_GPL(irq_work_run);
  221. void irq_work_tick(void)
  222. {
  223. struct llist_head *raised = this_cpu_ptr(&raised_list);
  224. if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
  225. irq_work_run_list(raised);
  226. if (!IS_ENABLED(CONFIG_PREEMPT_RT))
  227. irq_work_run_list(this_cpu_ptr(&lazy_list));
  228. else
  229. wake_irq_workd();
  230. }
  231. /*
  232. * Synchronize against the irq_work @entry, ensures the entry is not
  233. * currently in use.
  234. */
  235. void irq_work_sync(struct irq_work *work)
  236. {
  237. lockdep_assert_irqs_enabled();
  238. might_sleep();
  239. if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
  240. !arch_irq_work_has_interrupt()) {
  241. rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work),
  242. TASK_UNINTERRUPTIBLE);
  243. return;
  244. }
  245. while (irq_work_is_busy(work))
  246. cpu_relax();
  247. }
  248. EXPORT_SYMBOL_GPL(irq_work_sync);
  249. static void run_irq_workd(unsigned int cpu)
  250. {
  251. irq_work_run_list(this_cpu_ptr(&lazy_list));
  252. }
  253. static void irq_workd_setup(unsigned int cpu)
  254. {
  255. sched_set_fifo_low(current);
  256. }
  257. static struct smp_hotplug_thread irqwork_threads = {
  258. .store = &irq_workd,
  259. .setup = irq_workd_setup,
  260. .thread_should_run = irq_workd_should_run,
  261. .thread_fn = run_irq_workd,
  262. .thread_comm = "irq_work/%u",
  263. };
  264. static __init int irq_work_init_threads(void)
  265. {
  266. if (IS_ENABLED(CONFIG_PREEMPT_RT))
  267. BUG_ON(smpboot_register_percpu_thread(&irqwork_threads));
  268. return 0;
  269. }
  270. early_initcall(irq_work_init_threads);