tick-broadcast.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * This file contains functions which emulate a local clock-event
  4. * device via a broadcast event source.
  5. *
  6. * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
  7. * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
  8. * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
  9. */
  10. #include <linux/cpu.h>
  11. #include <linux/err.h>
  12. #include <linux/hrtimer.h>
  13. #include <linux/interrupt.h>
  14. #include <linux/percpu.h>
  15. #include <linux/profile.h>
  16. #include <linux/sched.h>
  17. #include <linux/smp.h>
  18. #include <linux/module.h>
  19. #include "tick-internal.h"
  20. /*
  21. * Broadcast support for broken x86 hardware, where the local apic
  22. * timer stops in C3 state.
  23. */
  24. static struct tick_device tick_broadcast_device;
  25. static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly;
  26. static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly;
  27. static cpumask_var_t tmpmask __cpumask_var_read_mostly;
  28. static int tick_broadcast_forced;
  29. static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
  30. #ifdef CONFIG_TICK_ONESHOT
  31. static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device);
  32. static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic);
  33. static void tick_broadcast_clear_oneshot(int cpu);
  34. static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
  35. # ifdef CONFIG_HOTPLUG_CPU
  36. static void tick_broadcast_oneshot_offline(unsigned int cpu);
  37. # endif
  38. #else
  39. static inline void
  40. tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic) { BUG(); }
  41. static inline void tick_broadcast_clear_oneshot(int cpu) { }
  42. static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
  43. # ifdef CONFIG_HOTPLUG_CPU
  44. static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { }
  45. # endif
  46. #endif
  47. /*
  48. * Debugging: see timer_list.c
  49. */
  50. struct tick_device *tick_get_broadcast_device(void)
  51. {
  52. return &tick_broadcast_device;
  53. }
  54. struct cpumask *tick_get_broadcast_mask(void)
  55. {
  56. return tick_broadcast_mask;
  57. }
  58. static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu);
  59. const struct clock_event_device *tick_get_wakeup_device(int cpu)
  60. {
  61. return tick_get_oneshot_wakeup_device(cpu);
  62. }
  63. /*
  64. * Start the device in periodic mode
  65. */
  66. static void tick_broadcast_start_periodic(struct clock_event_device *bc)
  67. {
  68. if (bc)
  69. tick_setup_periodic(bc, 1);
  70. }
  71. /*
  72. * Check, if the device can be utilized as broadcast device:
  73. */
  74. static bool tick_check_broadcast_device(struct clock_event_device *curdev,
  75. struct clock_event_device *newdev)
  76. {
  77. if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
  78. (newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
  79. (newdev->features & CLOCK_EVT_FEAT_C3STOP))
  80. return false;
  81. if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
  82. !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
  83. return false;
  84. return !curdev || newdev->rating > curdev->rating;
  85. }
  86. #ifdef CONFIG_TICK_ONESHOT
  87. static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
  88. {
  89. return per_cpu(tick_oneshot_wakeup_device, cpu);
  90. }
  91. static void tick_oneshot_wakeup_handler(struct clock_event_device *wd)
  92. {
  93. /*
  94. * If we woke up early and the tick was reprogrammed in the
  95. * meantime then this may be spurious but harmless.
  96. */
  97. tick_receive_broadcast();
  98. }
  99. static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
  100. int cpu)
  101. {
  102. struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu);
  103. if (!newdev)
  104. goto set_device;
  105. if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
  106. (newdev->features & CLOCK_EVT_FEAT_C3STOP))
  107. return false;
  108. if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
  109. !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
  110. return false;
  111. if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
  112. return false;
  113. if (curdev && newdev->rating <= curdev->rating)
  114. return false;
  115. if (!try_module_get(newdev->owner))
  116. return false;
  117. newdev->event_handler = tick_oneshot_wakeup_handler;
  118. set_device:
  119. clockevents_exchange_device(curdev, newdev);
  120. per_cpu(tick_oneshot_wakeup_device, cpu) = newdev;
  121. return true;
  122. }
  123. #else
  124. static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
  125. {
  126. return NULL;
  127. }
  128. static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
  129. int cpu)
  130. {
  131. return false;
  132. }
  133. #endif
  134. /*
  135. * Conditionally install/replace broadcast device
  136. */
  137. void tick_install_broadcast_device(struct clock_event_device *dev, int cpu)
  138. {
  139. struct clock_event_device *cur = tick_broadcast_device.evtdev;
  140. if (tick_set_oneshot_wakeup_device(dev, cpu))
  141. return;
  142. if (!tick_check_broadcast_device(cur, dev))
  143. return;
  144. if (!try_module_get(dev->owner))
  145. return;
  146. clockevents_exchange_device(cur, dev);
  147. if (cur)
  148. cur->event_handler = clockevents_handle_noop;
  149. tick_broadcast_device.evtdev = dev;
  150. if (!cpumask_empty(tick_broadcast_mask))
  151. tick_broadcast_start_periodic(dev);
  152. if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
  153. return;
  154. /*
  155. * If the system already runs in oneshot mode, switch the newly
  156. * registered broadcast device to oneshot mode explicitly.
  157. */
  158. if (tick_broadcast_oneshot_active()) {
  159. tick_broadcast_switch_to_oneshot();
  160. return;
  161. }
  162. /*
  163. * Inform all cpus about this. We might be in a situation
  164. * where we did not switch to oneshot mode because the per cpu
  165. * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
  166. * of a oneshot capable broadcast device. Without that
  167. * notification the systems stays stuck in periodic mode
  168. * forever.
  169. */
  170. tick_clock_notify();
  171. }
  172. /*
  173. * Check, if the device is the broadcast device
  174. */
  175. int tick_is_broadcast_device(struct clock_event_device *dev)
  176. {
  177. return (dev && tick_broadcast_device.evtdev == dev);
  178. }
  179. int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
  180. {
  181. int ret = -ENODEV;
  182. if (tick_is_broadcast_device(dev)) {
  183. raw_spin_lock(&tick_broadcast_lock);
  184. ret = __clockevents_update_freq(dev, freq);
  185. raw_spin_unlock(&tick_broadcast_lock);
  186. }
  187. return ret;
  188. }
  189. static void err_broadcast(const struct cpumask *mask)
  190. {
  191. pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
  192. }
  193. static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
  194. {
  195. if (!dev->broadcast)
  196. dev->broadcast = tick_broadcast;
  197. if (!dev->broadcast) {
  198. pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
  199. dev->name);
  200. dev->broadcast = err_broadcast;
  201. }
  202. }
  203. /*
  204. * Check, if the device is dysfunctional and a placeholder, which
  205. * needs to be handled by the broadcast device.
  206. */
  207. int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
  208. {
  209. struct clock_event_device *bc = tick_broadcast_device.evtdev;
  210. unsigned long flags;
  211. int ret = 0;
  212. raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
  213. /*
  214. * Devices might be registered with both periodic and oneshot
  215. * mode disabled. This signals, that the device needs to be
  216. * operated from the broadcast device and is a placeholder for
  217. * the cpu local device.
  218. */
  219. if (!tick_device_is_functional(dev)) {
  220. dev->event_handler = tick_handle_periodic;
  221. tick_device_setup_broadcast_func(dev);
  222. cpumask_set_cpu(cpu, tick_broadcast_mask);
  223. if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
  224. tick_broadcast_start_periodic(bc);
  225. else
  226. tick_broadcast_setup_oneshot(bc, false);
  227. ret = 1;
  228. } else {
  229. /*
  230. * Clear the broadcast bit for this cpu if the
  231. * device is not power state affected.
  232. */
  233. if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
  234. cpumask_clear_cpu(cpu, tick_broadcast_mask);
  235. else
  236. tick_device_setup_broadcast_func(dev);
  237. /*
  238. * Clear the broadcast bit if the CPU is not in
  239. * periodic broadcast on state.
  240. */
  241. if (!cpumask_test_cpu(cpu, tick_broadcast_on))
  242. cpumask_clear_cpu(cpu, tick_broadcast_mask);
  243. switch (tick_broadcast_device.mode) {
  244. case TICKDEV_MODE_ONESHOT:
  245. /*
  246. * If the system is in oneshot mode we can
  247. * unconditionally clear the oneshot mask bit,
  248. * because the CPU is running and therefore
  249. * not in an idle state which causes the power
  250. * state affected device to stop. Let the
  251. * caller initialize the device.
  252. */
  253. tick_broadcast_clear_oneshot(cpu);
  254. ret = 0;
  255. break;
  256. case TICKDEV_MODE_PERIODIC:
  257. /*
  258. * If the system is in periodic mode, check
  259. * whether the broadcast device can be
  260. * switched off now.
  261. */
  262. if (cpumask_empty(tick_broadcast_mask) && bc)
  263. clockevents_shutdown(bc);
  264. /*
  265. * If we kept the cpu in the broadcast mask,
  266. * tell the caller to leave the per cpu device
  267. * in shutdown state. The periodic interrupt
  268. * is delivered by the broadcast device, if
  269. * the broadcast device exists and is not
  270. * hrtimer based.
  271. */
  272. if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER))
  273. ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
  274. break;
  275. default:
  276. break;
  277. }
  278. }
  279. raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
  280. return ret;
  281. }
  282. int tick_receive_broadcast(void)
  283. {
  284. struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
  285. struct clock_event_device *evt = td->evtdev;
  286. if (!evt)
  287. return -ENODEV;
  288. if (!evt->event_handler)
  289. return -EINVAL;
  290. evt->event_handler(evt);
  291. return 0;
  292. }
  293. /*
  294. * Broadcast the event to the cpus, which are set in the mask (mangled).
  295. */
  296. static bool tick_do_broadcast(struct cpumask *mask)
  297. {
  298. int cpu = smp_processor_id();
  299. struct tick_device *td;
  300. bool local = false;
  301. /*
  302. * Check, if the current cpu is in the mask
  303. */
  304. if (cpumask_test_cpu(cpu, mask)) {
  305. struct clock_event_device *bc = tick_broadcast_device.evtdev;
  306. cpumask_clear_cpu(cpu, mask);
  307. /*
  308. * We only run the local handler, if the broadcast
  309. * device is not hrtimer based. Otherwise we run into
  310. * a hrtimer recursion.
  311. *
  312. * local timer_interrupt()
  313. * local_handler()
  314. * expire_hrtimers()
  315. * bc_handler()
  316. * local_handler()
  317. * expire_hrtimers()
  318. */
  319. local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER);
  320. }
  321. if (!cpumask_empty(mask)) {
  322. /*
  323. * It might be necessary to actually check whether the devices
  324. * have different broadcast functions. For now, just use the
  325. * one of the first device. This works as long as we have this
  326. * misfeature only on x86 (lapic)
  327. */
  328. td = &per_cpu(tick_cpu_device, cpumask_first(mask));
  329. td->evtdev->broadcast(mask);
  330. }
  331. return local;
  332. }
  333. /*
  334. * Periodic broadcast:
  335. * - invoke the broadcast handlers
  336. */
  337. static bool tick_do_periodic_broadcast(void)
  338. {
  339. cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
  340. return tick_do_broadcast(tmpmask);
  341. }
  342. /*
  343. * Event handler for periodic broadcast ticks
  344. */
  345. static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
  346. {
  347. struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
  348. bool bc_local;
  349. raw_spin_lock(&tick_broadcast_lock);
  350. /* Handle spurious interrupts gracefully */
  351. if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) {
  352. raw_spin_unlock(&tick_broadcast_lock);
  353. return;
  354. }
  355. bc_local = tick_do_periodic_broadcast();
  356. if (clockevent_state_oneshot(dev)) {
  357. ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC);
  358. clockevents_program_event(dev, next, true);
  359. }
  360. raw_spin_unlock(&tick_broadcast_lock);
  361. /*
  362. * We run the handler of the local cpu after dropping
  363. * tick_broadcast_lock because the handler might deadlock when
  364. * trying to switch to oneshot mode.
  365. */
  366. if (bc_local)
  367. td->evtdev->event_handler(td->evtdev);
  368. }
  369. /**
  370. * tick_broadcast_control - Enable/disable or force broadcast mode
  371. * @mode: The selected broadcast mode
  372. *
  373. * Called when the system enters a state where affected tick devices
  374. * might stop. Note: TICK_BROADCAST_FORCE cannot be undone.
  375. */
  376. void tick_broadcast_control(enum tick_broadcast_mode mode)
  377. {
  378. struct clock_event_device *bc, *dev;
  379. struct tick_device *td;
  380. int cpu, bc_stopped;
  381. unsigned long flags;
  382. /* Protects also the local clockevent device. */
  383. raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
  384. td = this_cpu_ptr(&tick_cpu_device);
  385. dev = td->evtdev;
  386. /*
  387. * Is the device not affected by the powerstate ?
  388. */
  389. if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
  390. goto out;
  391. if (!tick_device_is_functional(dev))
  392. goto out;
  393. cpu = smp_processor_id();
  394. bc = tick_broadcast_device.evtdev;
  395. bc_stopped = cpumask_empty(tick_broadcast_mask);
  396. switch (mode) {
  397. case TICK_BROADCAST_FORCE:
  398. tick_broadcast_forced = 1;
  399. fallthrough;
  400. case TICK_BROADCAST_ON:
  401. cpumask_set_cpu(cpu, tick_broadcast_on);
  402. if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
  403. /*
  404. * Only shutdown the cpu local device, if:
  405. *
  406. * - the broadcast device exists
  407. * - the broadcast device is not a hrtimer based one
  408. * - the broadcast device is in periodic mode to
  409. * avoid a hiccup during switch to oneshot mode
  410. */
  411. if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) &&
  412. tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
  413. clockevents_shutdown(dev);
  414. }
  415. break;
  416. case TICK_BROADCAST_OFF:
  417. if (tick_broadcast_forced)
  418. break;
  419. cpumask_clear_cpu(cpu, tick_broadcast_on);
  420. if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
  421. if (tick_broadcast_device.mode ==
  422. TICKDEV_MODE_PERIODIC)
  423. tick_setup_periodic(dev, 0);
  424. }
  425. break;
  426. }
  427. if (bc) {
  428. if (cpumask_empty(tick_broadcast_mask)) {
  429. if (!bc_stopped)
  430. clockevents_shutdown(bc);
  431. } else if (bc_stopped) {
  432. if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
  433. tick_broadcast_start_periodic(bc);
  434. else
  435. tick_broadcast_setup_oneshot(bc, false);
  436. }
  437. }
  438. out:
  439. raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
  440. }
  441. EXPORT_SYMBOL_GPL(tick_broadcast_control);
  442. /*
  443. * Set the periodic handler depending on broadcast on/off
  444. */
  445. void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
  446. {
  447. if (!broadcast)
  448. dev->event_handler = tick_handle_periodic;
  449. else
  450. dev->event_handler = tick_handle_periodic_broadcast;
  451. }
  452. #ifdef CONFIG_HOTPLUG_CPU
  453. static void tick_shutdown_broadcast(void)
  454. {
  455. struct clock_event_device *bc = tick_broadcast_device.evtdev;
  456. if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
  457. if (bc && cpumask_empty(tick_broadcast_mask))
  458. clockevents_shutdown(bc);
  459. }
  460. }
  461. /*
  462. * Remove a CPU from broadcasting
  463. */
  464. void tick_broadcast_offline(unsigned int cpu)
  465. {
  466. raw_spin_lock(&tick_broadcast_lock);
  467. cpumask_clear_cpu(cpu, tick_broadcast_mask);
  468. cpumask_clear_cpu(cpu, tick_broadcast_on);
  469. tick_broadcast_oneshot_offline(cpu);
  470. tick_shutdown_broadcast();
  471. raw_spin_unlock(&tick_broadcast_lock);
  472. }
  473. #endif
  474. void tick_suspend_broadcast(void)
  475. {
  476. struct clock_event_device *bc;
  477. unsigned long flags;
  478. raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
  479. bc = tick_broadcast_device.evtdev;
  480. if (bc)
  481. clockevents_shutdown(bc);
  482. raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
  483. }
  484. /*
  485. * This is called from tick_resume_local() on a resuming CPU. That's
  486. * called from the core resume function, tick_unfreeze() and the magic XEN
  487. * resume hackery.
  488. *
  489. * In none of these cases the broadcast device mode can change and the
  490. * bit of the resuming CPU in the broadcast mask is safe as well.
  491. */
  492. bool tick_resume_check_broadcast(void)
  493. {
  494. if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT)
  495. return false;
  496. else
  497. return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask);
  498. }
  499. void tick_resume_broadcast(void)
  500. {
  501. struct clock_event_device *bc;
  502. unsigned long flags;
  503. raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
  504. bc = tick_broadcast_device.evtdev;
  505. if (bc) {
  506. clockevents_tick_resume(bc);
  507. switch (tick_broadcast_device.mode) {
  508. case TICKDEV_MODE_PERIODIC:
  509. if (!cpumask_empty(tick_broadcast_mask))
  510. tick_broadcast_start_periodic(bc);
  511. break;
  512. case TICKDEV_MODE_ONESHOT:
  513. if (!cpumask_empty(tick_broadcast_mask))
  514. tick_resume_broadcast_oneshot(bc);
  515. break;
  516. }
  517. }
  518. raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
  519. }
  520. #ifdef CONFIG_TICK_ONESHOT
  521. static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly;
  522. static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly;
  523. static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly;
  524. /*
  525. * Exposed for debugging: see timer_list.c
  526. */
  527. struct cpumask *tick_get_broadcast_oneshot_mask(void)
  528. {
  529. return tick_broadcast_oneshot_mask;
  530. }
  531. /*
  532. * Called before going idle with interrupts disabled. Checks whether a
  533. * broadcast event from the other core is about to happen. We detected
  534. * that in tick_broadcast_oneshot_control(). The callsite can use this
  535. * to avoid a deep idle transition as we are about to get the
  536. * broadcast IPI right away.
  537. */
  538. noinstr int tick_check_broadcast_expired(void)
  539. {
  540. #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
  541. return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask));
  542. #else
  543. return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
  544. #endif
  545. }
  546. /*
  547. * Set broadcast interrupt affinity
  548. */
  549. static void tick_broadcast_set_affinity(struct clock_event_device *bc,
  550. const struct cpumask *cpumask)
  551. {
  552. if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
  553. return;
  554. if (cpumask_equal(bc->cpumask, cpumask))
  555. return;
  556. bc->cpumask = cpumask;
  557. irq_set_affinity(bc->irq, bc->cpumask);
  558. }
  559. static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
  560. ktime_t expires)
  561. {
  562. if (!clockevent_state_oneshot(bc))
  563. clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
  564. clockevents_program_event(bc, expires, 1);
  565. tick_broadcast_set_affinity(bc, cpumask_of(cpu));
  566. }
  567. static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
  568. {
  569. clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
  570. }
  571. /*
  572. * Called from irq_enter() when idle was interrupted to reenable the
  573. * per cpu device.
  574. */
  575. void tick_check_oneshot_broadcast_this_cpu(void)
  576. {
  577. if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
  578. struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
  579. /*
  580. * We might be in the middle of switching over from
  581. * periodic to oneshot. If the CPU has not yet
  582. * switched over, leave the device alone.
  583. */
  584. if (td->mode == TICKDEV_MODE_ONESHOT) {
  585. clockevents_switch_state(td->evtdev,
  586. CLOCK_EVT_STATE_ONESHOT);
  587. }
  588. }
  589. }
  590. /*
  591. * Handle oneshot mode broadcasting
  592. */
  593. static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
  594. {
  595. struct tick_device *td;
  596. ktime_t now, next_event;
  597. int cpu, next_cpu = 0;
  598. bool bc_local;
  599. raw_spin_lock(&tick_broadcast_lock);
  600. dev->next_event = KTIME_MAX;
  601. next_event = KTIME_MAX;
  602. cpumask_clear(tmpmask);
  603. now = ktime_get();
  604. /* Find all expired events */
  605. for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
  606. /*
  607. * Required for !SMP because for_each_cpu() reports
  608. * unconditionally CPU0 as set on UP kernels.
  609. */
  610. if (!IS_ENABLED(CONFIG_SMP) &&
  611. cpumask_empty(tick_broadcast_oneshot_mask))
  612. break;
  613. td = &per_cpu(tick_cpu_device, cpu);
  614. if (td->evtdev->next_event <= now) {
  615. cpumask_set_cpu(cpu, tmpmask);
  616. /*
  617. * Mark the remote cpu in the pending mask, so
  618. * it can avoid reprogramming the cpu local
  619. * timer in tick_broadcast_oneshot_control().
  620. */
  621. cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
  622. } else if (td->evtdev->next_event < next_event) {
  623. next_event = td->evtdev->next_event;
  624. next_cpu = cpu;
  625. }
  626. }
  627. /*
  628. * Remove the current cpu from the pending mask. The event is
  629. * delivered immediately in tick_do_broadcast() !
  630. */
  631. cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
  632. /* Take care of enforced broadcast requests */
  633. cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
  634. cpumask_clear(tick_broadcast_force_mask);
  635. /*
  636. * Sanity check. Catch the case where we try to broadcast to
  637. * offline cpus.
  638. */
  639. if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
  640. cpumask_and(tmpmask, tmpmask, cpu_online_mask);
  641. /*
  642. * Wakeup the cpus which have an expired event.
  643. */
  644. bc_local = tick_do_broadcast(tmpmask);
  645. /*
  646. * Two reasons for reprogram:
  647. *
  648. * - The global event did not expire any CPU local
  649. * events. This happens in dyntick mode, as the maximum PIT
  650. * delta is quite small.
  651. *
  652. * - There are pending events on sleeping CPUs which were not
  653. * in the event mask
  654. */
  655. if (next_event != KTIME_MAX)
  656. tick_broadcast_set_event(dev, next_cpu, next_event);
  657. raw_spin_unlock(&tick_broadcast_lock);
  658. if (bc_local) {
  659. td = this_cpu_ptr(&tick_cpu_device);
  660. td->evtdev->event_handler(td->evtdev);
  661. }
  662. }
  663. static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
  664. {
  665. if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
  666. return 0;
  667. if (bc->next_event == KTIME_MAX)
  668. return 0;
  669. return bc->bound_on == cpu ? -EBUSY : 0;
  670. }
  671. static void broadcast_shutdown_local(struct clock_event_device *bc,
  672. struct clock_event_device *dev)
  673. {
  674. /*
  675. * For hrtimer based broadcasting we cannot shutdown the cpu
  676. * local device if our own event is the first one to expire or
  677. * if we own the broadcast timer.
  678. */
  679. if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
  680. if (broadcast_needs_cpu(bc, smp_processor_id()))
  681. return;
  682. if (dev->next_event < bc->next_event)
  683. return;
  684. }
  685. clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
  686. }
  687. static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state,
  688. struct tick_device *td,
  689. int cpu)
  690. {
  691. struct clock_event_device *bc, *dev = td->evtdev;
  692. int ret = 0;
  693. ktime_t now;
  694. raw_spin_lock(&tick_broadcast_lock);
  695. bc = tick_broadcast_device.evtdev;
  696. if (state == TICK_BROADCAST_ENTER) {
  697. /*
  698. * If the current CPU owns the hrtimer broadcast
  699. * mechanism, it cannot go deep idle and we do not add
  700. * the CPU to the broadcast mask. We don't have to go
  701. * through the EXIT path as the local timer is not
  702. * shutdown.
  703. */
  704. ret = broadcast_needs_cpu(bc, cpu);
  705. if (ret)
  706. goto out;
  707. /*
  708. * If the broadcast device is in periodic mode, we
  709. * return.
  710. */
  711. if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
  712. /* If it is a hrtimer based broadcast, return busy */
  713. if (bc->features & CLOCK_EVT_FEAT_HRTIMER)
  714. ret = -EBUSY;
  715. goto out;
  716. }
  717. if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
  718. WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
  719. /* Conditionally shut down the local timer. */
  720. broadcast_shutdown_local(bc, dev);
  721. /*
  722. * We only reprogram the broadcast timer if we
  723. * did not mark ourself in the force mask and
  724. * if the cpu local event is earlier than the
  725. * broadcast event. If the current CPU is in
  726. * the force mask, then we are going to be
  727. * woken by the IPI right away; we return
  728. * busy, so the CPU does not try to go deep
  729. * idle.
  730. */
  731. if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) {
  732. ret = -EBUSY;
  733. } else if (dev->next_event < bc->next_event) {
  734. tick_broadcast_set_event(bc, cpu, dev->next_event);
  735. /*
  736. * In case of hrtimer broadcasts the
  737. * programming might have moved the
  738. * timer to this cpu. If yes, remove
  739. * us from the broadcast mask and
  740. * return busy.
  741. */
  742. ret = broadcast_needs_cpu(bc, cpu);
  743. if (ret) {
  744. cpumask_clear_cpu(cpu,
  745. tick_broadcast_oneshot_mask);
  746. }
  747. }
  748. }
  749. } else {
  750. if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
  751. clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
  752. /*
  753. * The cpu which was handling the broadcast
  754. * timer marked this cpu in the broadcast
  755. * pending mask and fired the broadcast
  756. * IPI. So we are going to handle the expired
  757. * event anyway via the broadcast IPI
  758. * handler. No need to reprogram the timer
  759. * with an already expired event.
  760. */
  761. if (cpumask_test_and_clear_cpu(cpu,
  762. tick_broadcast_pending_mask))
  763. goto out;
  764. /*
  765. * Bail out if there is no next event.
  766. */
  767. if (dev->next_event == KTIME_MAX)
  768. goto out;
  769. /*
  770. * If the pending bit is not set, then we are
  771. * either the CPU handling the broadcast
  772. * interrupt or we got woken by something else.
  773. *
  774. * We are no longer in the broadcast mask, so
  775. * if the cpu local expiry time is already
  776. * reached, we would reprogram the cpu local
  777. * timer with an already expired event.
  778. *
  779. * This can lead to a ping-pong when we return
  780. * to idle and therefore rearm the broadcast
  781. * timer before the cpu local timer was able
  782. * to fire. This happens because the forced
  783. * reprogramming makes sure that the event
  784. * will happen in the future and depending on
  785. * the min_delta setting this might be far
  786. * enough out that the ping-pong starts.
  787. *
  788. * If the cpu local next_event has expired
  789. * then we know that the broadcast timer
  790. * next_event has expired as well and
  791. * broadcast is about to be handled. So we
  792. * avoid reprogramming and enforce that the
  793. * broadcast handler, which did not run yet,
  794. * will invoke the cpu local handler.
  795. *
  796. * We cannot call the handler directly from
  797. * here, because we might be in a NOHZ phase
  798. * and we did not go through the irq_enter()
  799. * nohz fixups.
  800. */
  801. now = ktime_get();
  802. if (dev->next_event <= now) {
  803. cpumask_set_cpu(cpu, tick_broadcast_force_mask);
  804. goto out;
  805. }
  806. /*
  807. * We got woken by something else. Reprogram
  808. * the cpu local timer device.
  809. */
  810. tick_program_event(dev->next_event, 1);
  811. }
  812. }
  813. out:
  814. raw_spin_unlock(&tick_broadcast_lock);
  815. return ret;
  816. }
  817. static int tick_oneshot_wakeup_control(enum tick_broadcast_state state,
  818. struct tick_device *td,
  819. int cpu)
  820. {
  821. struct clock_event_device *dev, *wd;
  822. dev = td->evtdev;
  823. if (td->mode != TICKDEV_MODE_ONESHOT)
  824. return -EINVAL;
  825. wd = tick_get_oneshot_wakeup_device(cpu);
  826. if (!wd)
  827. return -ENODEV;
  828. switch (state) {
  829. case TICK_BROADCAST_ENTER:
  830. clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED);
  831. clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT);
  832. clockevents_program_event(wd, dev->next_event, 1);
  833. break;
  834. case TICK_BROADCAST_EXIT:
  835. /* We may have transitioned to oneshot mode while idle */
  836. if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT)
  837. return -ENODEV;
  838. }
  839. return 0;
  840. }
  841. int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
  842. {
  843. struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
  844. int cpu = smp_processor_id();
  845. if (!tick_oneshot_wakeup_control(state, td, cpu))
  846. return 0;
  847. if (tick_broadcast_device.evtdev)
  848. return ___tick_broadcast_oneshot_control(state, td, cpu);
  849. /*
  850. * If there is no broadcast or wakeup device, tell the caller not
  851. * to go into deep idle.
  852. */
  853. return -EBUSY;
  854. }
  855. /*
  856. * Reset the one shot broadcast for a cpu
  857. *
  858. * Called with tick_broadcast_lock held
  859. */
  860. static void tick_broadcast_clear_oneshot(int cpu)
  861. {
  862. cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
  863. cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
  864. }
  865. static void tick_broadcast_init_next_event(struct cpumask *mask,
  866. ktime_t expires)
  867. {
  868. struct tick_device *td;
  869. int cpu;
  870. for_each_cpu(cpu, mask) {
  871. td = &per_cpu(tick_cpu_device, cpu);
  872. if (td->evtdev)
  873. td->evtdev->next_event = expires;
  874. }
  875. }
  876. static inline ktime_t tick_get_next_period(void)
  877. {
  878. ktime_t next;
  879. /*
  880. * Protect against concurrent updates (store /load tearing on
  881. * 32bit). It does not matter if the time is already in the
  882. * past. The broadcast device which is about to be programmed will
  883. * fire in any case.
  884. */
  885. raw_spin_lock(&jiffies_lock);
  886. next = tick_next_period;
  887. raw_spin_unlock(&jiffies_lock);
  888. return next;
  889. }
  890. /**
  891. * tick_broadcast_setup_oneshot - setup the broadcast device
  892. */
  893. static void tick_broadcast_setup_oneshot(struct clock_event_device *bc,
  894. bool from_periodic)
  895. {
  896. int cpu = smp_processor_id();
  897. ktime_t nexttick = 0;
  898. if (!bc)
  899. return;
  900. /*
  901. * When the broadcast device was switched to oneshot by the first
  902. * CPU handling the NOHZ change, the other CPUs will reach this
  903. * code via hrtimer_run_queues() -> tick_check_oneshot_change()
  904. * too. Set up the broadcast device only once!
  905. */
  906. if (bc->event_handler == tick_handle_oneshot_broadcast) {
  907. /*
  908. * The CPU which switched from periodic to oneshot mode
  909. * set the broadcast oneshot bit for all other CPUs which
  910. * are in the general (periodic) broadcast mask to ensure
  911. * that CPUs which wait for the periodic broadcast are
  912. * woken up.
  913. *
  914. * Clear the bit for the local CPU as the set bit would
  915. * prevent the first tick_broadcast_enter() after this CPU
  916. * switched to oneshot state to program the broadcast
  917. * device.
  918. *
  919. * This code can also be reached via tick_broadcast_control(),
  920. * but this cannot avoid the tick_broadcast_clear_oneshot()
  921. * as that would break the periodic to oneshot transition of
  922. * secondary CPUs. But that's harmless as the below only
  923. * clears already cleared bits.
  924. */
  925. tick_broadcast_clear_oneshot(cpu);
  926. return;
  927. }
  928. bc->event_handler = tick_handle_oneshot_broadcast;
  929. bc->next_event = KTIME_MAX;
  930. /*
  931. * When the tick mode is switched from periodic to oneshot it must
  932. * be ensured that CPUs which are waiting for periodic broadcast
  933. * get their wake-up at the next tick. This is achieved by ORing
  934. * tick_broadcast_mask into tick_broadcast_oneshot_mask.
  935. *
  936. * For other callers, e.g. broadcast device replacement,
  937. * tick_broadcast_oneshot_mask must not be touched as this would
  938. * set bits for CPUs which are already NOHZ, but not idle. Their
  939. * next tick_broadcast_enter() would observe the bit set and fail
  940. * to update the expiry time and the broadcast event device.
  941. */
  942. if (from_periodic) {
  943. cpumask_copy(tmpmask, tick_broadcast_mask);
  944. /* Remove the local CPU as it is obviously not idle */
  945. cpumask_clear_cpu(cpu, tmpmask);
  946. cpumask_or(tick_broadcast_oneshot_mask, tick_broadcast_oneshot_mask, tmpmask);
  947. /*
  948. * Ensure that the oneshot broadcast handler will wake the
  949. * CPUs which are still waiting for periodic broadcast.
  950. */
  951. nexttick = tick_get_next_period();
  952. tick_broadcast_init_next_event(tmpmask, nexttick);
  953. /*
  954. * If the underlying broadcast clock event device is
  955. * already in oneshot state, then there is nothing to do.
  956. * The device was already armed for the next tick
  957. * in tick_handle_broadcast_periodic()
  958. */
  959. if (clockevent_state_oneshot(bc))
  960. return;
  961. }
  962. /*
  963. * When switching from periodic to oneshot mode arm the broadcast
  964. * device for the next tick.
  965. *
  966. * If the broadcast device has been replaced in oneshot mode and
  967. * the oneshot broadcast mask is not empty, then arm it to expire
  968. * immediately in order to reevaluate the next expiring timer.
  969. * @nexttick is 0 and therefore in the past which will cause the
  970. * clockevent code to force an event.
  971. *
  972. * For both cases the programming can be avoided when the oneshot
  973. * broadcast mask is empty.
  974. *
  975. * tick_broadcast_set_event() implicitly switches the broadcast
  976. * device to oneshot state.
  977. */
  978. if (!cpumask_empty(tick_broadcast_oneshot_mask))
  979. tick_broadcast_set_event(bc, cpu, nexttick);
  980. }
  981. /*
  982. * Select oneshot operating mode for the broadcast device
  983. */
  984. void tick_broadcast_switch_to_oneshot(void)
  985. {
  986. struct clock_event_device *bc;
  987. enum tick_device_mode oldmode;
  988. unsigned long flags;
  989. raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
  990. oldmode = tick_broadcast_device.mode;
  991. tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
  992. bc = tick_broadcast_device.evtdev;
  993. if (bc)
  994. tick_broadcast_setup_oneshot(bc, oldmode == TICKDEV_MODE_PERIODIC);
  995. raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
  996. }
  997. #ifdef CONFIG_HOTPLUG_CPU
  998. void hotplug_cpu__broadcast_tick_pull(int deadcpu)
  999. {
  1000. struct clock_event_device *bc;
  1001. unsigned long flags;
  1002. raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
  1003. bc = tick_broadcast_device.evtdev;
  1004. if (bc && broadcast_needs_cpu(bc, deadcpu)) {
  1005. /*
  1006. * If the broadcast force bit of the current CPU is set,
  1007. * then the current CPU has not yet reprogrammed the local
  1008. * timer device to avoid a ping-pong race. See
  1009. * ___tick_broadcast_oneshot_control().
  1010. *
  1011. * If the broadcast device is hrtimer based then
  1012. * programming the broadcast event below does not have any
  1013. * effect because the local clockevent device is not
  1014. * running and not programmed because the broadcast event
  1015. * is not earlier than the pending event of the local clock
  1016. * event device. As a consequence all CPUs waiting for a
  1017. * broadcast event are stuck forever.
  1018. *
  1019. * Detect this condition and reprogram the cpu local timer
  1020. * device to avoid the starvation.
  1021. */
  1022. if (tick_check_broadcast_expired()) {
  1023. struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
  1024. cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask);
  1025. tick_program_event(td->evtdev->next_event, 1);
  1026. }
  1027. /* This moves the broadcast assignment to this CPU: */
  1028. clockevents_program_event(bc, bc->next_event, 1);
  1029. }
  1030. raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
  1031. }
  1032. /*
  1033. * Remove a dying CPU from broadcasting
  1034. */
  1035. static void tick_broadcast_oneshot_offline(unsigned int cpu)
  1036. {
  1037. if (tick_get_oneshot_wakeup_device(cpu))
  1038. tick_set_oneshot_wakeup_device(NULL, cpu);
  1039. /*
  1040. * Clear the broadcast masks for the dead cpu, but do not stop
  1041. * the broadcast device!
  1042. */
  1043. cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
  1044. cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
  1045. cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
  1046. }
  1047. #endif
  1048. /*
  1049. * Check, whether the broadcast device is in one shot mode
  1050. */
  1051. int tick_broadcast_oneshot_active(void)
  1052. {
  1053. return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
  1054. }
  1055. /*
  1056. * Check whether the broadcast device supports oneshot.
  1057. */
  1058. bool tick_broadcast_oneshot_available(void)
  1059. {
  1060. struct clock_event_device *bc = tick_broadcast_device.evtdev;
  1061. return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
  1062. }
  1063. #else
  1064. int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
  1065. {
  1066. struct clock_event_device *bc = tick_broadcast_device.evtdev;
  1067. if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER))
  1068. return -EBUSY;
  1069. return 0;
  1070. }
  1071. #endif
  1072. void __init tick_broadcast_init(void)
  1073. {
  1074. zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
  1075. zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
  1076. zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
  1077. #ifdef CONFIG_TICK_ONESHOT
  1078. zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
  1079. zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
  1080. zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
  1081. #endif
  1082. }