link_watch.c 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Linux network device link state notification
  4. *
  5. * Author:
  6. * Stefan Rompf <sux@loplof.de>
  7. */
  8. #include <linux/module.h>
  9. #include <linux/netdevice.h>
  10. #include <linux/if.h>
  11. #include <net/sock.h>
  12. #include <net/pkt_sched.h>
  13. #include <linux/rtnetlink.h>
  14. #include <linux/jiffies.h>
  15. #include <linux/spinlock.h>
  16. #include <linux/workqueue.h>
  17. #include <linux/bitops.h>
  18. #include <linux/types.h>
  19. #include "dev.h"
  20. enum lw_bits {
  21. LW_URGENT = 0,
  22. };
  23. static unsigned long linkwatch_flags;
  24. static unsigned long linkwatch_nextevent;
  25. static void linkwatch_event(struct work_struct *dummy);
  26. static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
  27. static LIST_HEAD(lweventlist);
  28. static DEFINE_SPINLOCK(lweventlist_lock);
  29. static unsigned int default_operstate(const struct net_device *dev)
  30. {
  31. if (netif_testing(dev))
  32. return IF_OPER_TESTING;
  33. /* Some uppers (DSA) have additional sources for being down, so
  34. * first check whether lower is indeed the source of its down state.
  35. */
  36. if (!netif_carrier_ok(dev)) {
  37. struct net_device *peer;
  38. int iflink;
  39. /* If called from netdev_run_todo()/linkwatch_sync_dev(),
  40. * dev_net(dev) can be already freed, and RTNL is not held.
  41. */
  42. if (dev->reg_state <= NETREG_REGISTERED)
  43. iflink = dev_get_iflink(dev);
  44. else
  45. iflink = dev->ifindex;
  46. if (iflink == dev->ifindex)
  47. return IF_OPER_DOWN;
  48. ASSERT_RTNL();
  49. peer = __dev_get_by_index(dev_net(dev), iflink);
  50. if (!peer)
  51. return IF_OPER_DOWN;
  52. return netif_carrier_ok(peer) ? IF_OPER_DOWN :
  53. IF_OPER_LOWERLAYERDOWN;
  54. }
  55. if (netif_dormant(dev))
  56. return IF_OPER_DORMANT;
  57. return IF_OPER_UP;
  58. }
  59. static void rfc2863_policy(struct net_device *dev)
  60. {
  61. unsigned int operstate = default_operstate(dev);
  62. if (operstate == READ_ONCE(dev->operstate))
  63. return;
  64. switch(dev->link_mode) {
  65. case IF_LINK_MODE_TESTING:
  66. if (operstate == IF_OPER_UP)
  67. operstate = IF_OPER_TESTING;
  68. break;
  69. case IF_LINK_MODE_DORMANT:
  70. if (operstate == IF_OPER_UP)
  71. operstate = IF_OPER_DORMANT;
  72. break;
  73. case IF_LINK_MODE_DEFAULT:
  74. default:
  75. break;
  76. }
  77. WRITE_ONCE(dev->operstate, operstate);
  78. }
  79. void linkwatch_init_dev(struct net_device *dev)
  80. {
  81. /* Handle pre-registration link state changes */
  82. if (!netif_carrier_ok(dev) || netif_dormant(dev) ||
  83. netif_testing(dev))
  84. rfc2863_policy(dev);
  85. }
  86. static bool linkwatch_urgent_event(struct net_device *dev)
  87. {
  88. if (!netif_running(dev))
  89. return false;
  90. if (dev->ifindex != dev_get_iflink(dev))
  91. return true;
  92. if (netif_is_lag_port(dev) || netif_is_lag_master(dev))
  93. return true;
  94. return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
  95. }
  96. static void linkwatch_add_event(struct net_device *dev)
  97. {
  98. unsigned long flags;
  99. spin_lock_irqsave(&lweventlist_lock, flags);
  100. if (list_empty(&dev->link_watch_list)) {
  101. list_add_tail(&dev->link_watch_list, &lweventlist);
  102. netdev_hold(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC);
  103. }
  104. spin_unlock_irqrestore(&lweventlist_lock, flags);
  105. }
  106. static void linkwatch_schedule_work(int urgent)
  107. {
  108. unsigned long delay = linkwatch_nextevent - jiffies;
  109. if (test_bit(LW_URGENT, &linkwatch_flags))
  110. return;
  111. /* Minimise down-time: drop delay for up event. */
  112. if (urgent) {
  113. if (test_and_set_bit(LW_URGENT, &linkwatch_flags))
  114. return;
  115. delay = 0;
  116. }
  117. /* If we wrap around we'll delay it by at most HZ. */
  118. if (delay > HZ)
  119. delay = 0;
  120. /*
  121. * If urgent, schedule immediate execution; otherwise, don't
  122. * override the existing timer.
  123. */
  124. if (test_bit(LW_URGENT, &linkwatch_flags))
  125. mod_delayed_work(system_unbound_wq, &linkwatch_work, 0);
  126. else
  127. queue_delayed_work(system_unbound_wq, &linkwatch_work, delay);
  128. }
  129. static void linkwatch_do_dev(struct net_device *dev)
  130. {
  131. /*
  132. * Make sure the above read is complete since it can be
  133. * rewritten as soon as we clear the bit below.
  134. */
  135. smp_mb__before_atomic();
  136. /* We are about to handle this device,
  137. * so new events can be accepted
  138. */
  139. clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
  140. rfc2863_policy(dev);
  141. if (dev->flags & IFF_UP) {
  142. if (netif_carrier_ok(dev))
  143. dev_activate(dev);
  144. else
  145. dev_deactivate(dev);
  146. netdev_state_change(dev);
  147. }
  148. /* Note: our callers are responsible for calling netdev_tracker_free().
  149. * This is the reason we use __dev_put() instead of dev_put().
  150. */
  151. __dev_put(dev);
  152. }
  153. static void __linkwatch_run_queue(int urgent_only)
  154. {
  155. #define MAX_DO_DEV_PER_LOOP 100
  156. int do_dev = MAX_DO_DEV_PER_LOOP;
  157. /* Use a local list here since we add non-urgent
  158. * events back to the global one when called with
  159. * urgent_only=1.
  160. */
  161. LIST_HEAD(wrk);
  162. /* Give urgent case more budget */
  163. if (urgent_only)
  164. do_dev += MAX_DO_DEV_PER_LOOP;
  165. /*
  166. * Limit the number of linkwatch events to one
  167. * per second so that a runaway driver does not
  168. * cause a storm of messages on the netlink
  169. * socket. This limit does not apply to up events
  170. * while the device qdisc is down.
  171. */
  172. if (!urgent_only)
  173. linkwatch_nextevent = jiffies + HZ;
  174. /* Limit wrap-around effect on delay. */
  175. else if (time_after(linkwatch_nextevent, jiffies + HZ))
  176. linkwatch_nextevent = jiffies;
  177. clear_bit(LW_URGENT, &linkwatch_flags);
  178. spin_lock_irq(&lweventlist_lock);
  179. list_splice_init(&lweventlist, &wrk);
  180. while (!list_empty(&wrk) && do_dev > 0) {
  181. struct net_device *dev;
  182. dev = list_first_entry(&wrk, struct net_device, link_watch_list);
  183. list_del_init(&dev->link_watch_list);
  184. if (!netif_device_present(dev) ||
  185. (urgent_only && !linkwatch_urgent_event(dev))) {
  186. list_add_tail(&dev->link_watch_list, &lweventlist);
  187. continue;
  188. }
  189. /* We must free netdev tracker under
  190. * the spinlock protection.
  191. */
  192. netdev_tracker_free(dev, &dev->linkwatch_dev_tracker);
  193. spin_unlock_irq(&lweventlist_lock);
  194. linkwatch_do_dev(dev);
  195. do_dev--;
  196. spin_lock_irq(&lweventlist_lock);
  197. }
  198. /* Add the remaining work back to lweventlist */
  199. list_splice_init(&wrk, &lweventlist);
  200. if (!list_empty(&lweventlist))
  201. linkwatch_schedule_work(0);
  202. spin_unlock_irq(&lweventlist_lock);
  203. }
  204. void linkwatch_sync_dev(struct net_device *dev)
  205. {
  206. unsigned long flags;
  207. int clean = 0;
  208. spin_lock_irqsave(&lweventlist_lock, flags);
  209. if (!list_empty(&dev->link_watch_list)) {
  210. list_del_init(&dev->link_watch_list);
  211. clean = 1;
  212. /* We must release netdev tracker under
  213. * the spinlock protection.
  214. */
  215. netdev_tracker_free(dev, &dev->linkwatch_dev_tracker);
  216. }
  217. spin_unlock_irqrestore(&lweventlist_lock, flags);
  218. if (clean)
  219. linkwatch_do_dev(dev);
  220. }
  221. /* Must be called with the rtnl semaphore held */
  222. void linkwatch_run_queue(void)
  223. {
  224. __linkwatch_run_queue(0);
  225. }
  226. static void linkwatch_event(struct work_struct *dummy)
  227. {
  228. rtnl_lock();
  229. __linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies));
  230. rtnl_unlock();
  231. }
  232. void linkwatch_fire_event(struct net_device *dev)
  233. {
  234. bool urgent = linkwatch_urgent_event(dev);
  235. if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
  236. linkwatch_add_event(dev);
  237. } else if (!urgent)
  238. return;
  239. linkwatch_schedule_work(urgent);
  240. }
  241. EXPORT_SYMBOL(linkwatch_fire_event);