eventfd.c 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/kernel.h>
  3. #include <linux/errno.h>
  4. #include <linux/mm.h>
  5. #include <linux/slab.h>
  6. #include <linux/eventfd.h>
  7. #include <linux/eventpoll.h>
  8. #include <linux/io_uring.h>
  9. #include <linux/io_uring_types.h>
  10. #include "io-wq.h"
  11. #include "eventfd.h"
  12. struct io_ev_fd {
  13. struct eventfd_ctx *cq_ev_fd;
  14. unsigned int eventfd_async: 1;
  15. struct rcu_head rcu;
  16. refcount_t refs;
  17. atomic_t ops;
  18. };
  19. enum {
  20. IO_EVENTFD_OP_SIGNAL_BIT,
  21. };
  22. static void io_eventfd_free(struct rcu_head *rcu)
  23. {
  24. struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
  25. eventfd_ctx_put(ev_fd->cq_ev_fd);
  26. kfree(ev_fd);
  27. }
  28. static void io_eventfd_do_signal(struct rcu_head *rcu)
  29. {
  30. struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
  31. eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
  32. if (refcount_dec_and_test(&ev_fd->refs))
  33. call_rcu(&ev_fd->rcu, io_eventfd_free);
  34. }
  35. void io_eventfd_signal(struct io_ring_ctx *ctx)
  36. {
  37. struct io_ev_fd *ev_fd = NULL;
  38. if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
  39. return;
  40. guard(rcu)();
  41. /*
  42. * rcu_dereference ctx->io_ev_fd once and use it for both for checking
  43. * and eventfd_signal
  44. */
  45. ev_fd = rcu_dereference(ctx->io_ev_fd);
  46. /*
  47. * Check again if ev_fd exists incase an io_eventfd_unregister call
  48. * completed between the NULL check of ctx->io_ev_fd at the start of
  49. * the function and rcu_read_lock.
  50. */
  51. if (unlikely(!ev_fd))
  52. return;
  53. if (!refcount_inc_not_zero(&ev_fd->refs))
  54. return;
  55. if (ev_fd->eventfd_async && !io_wq_current_is_worker())
  56. goto out;
  57. if (likely(eventfd_signal_allowed())) {
  58. eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
  59. } else {
  60. if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
  61. call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
  62. return;
  63. }
  64. }
  65. out:
  66. if (refcount_dec_and_test(&ev_fd->refs))
  67. call_rcu(&ev_fd->rcu, io_eventfd_free);
  68. }
  69. void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
  70. {
  71. bool skip;
  72. spin_lock(&ctx->completion_lock);
  73. /*
  74. * Eventfd should only get triggered when at least one event has been
  75. * posted. Some applications rely on the eventfd notification count
  76. * only changing IFF a new CQE has been added to the CQ ring. There's
  77. * no depedency on 1:1 relationship between how many times this
  78. * function is called (and hence the eventfd count) and number of CQEs
  79. * posted to the CQ ring.
  80. */
  81. skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
  82. ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
  83. spin_unlock(&ctx->completion_lock);
  84. if (skip)
  85. return;
  86. io_eventfd_signal(ctx);
  87. }
  88. int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
  89. unsigned int eventfd_async)
  90. {
  91. struct io_ev_fd *ev_fd;
  92. __s32 __user *fds = arg;
  93. int fd;
  94. ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
  95. lockdep_is_held(&ctx->uring_lock));
  96. if (ev_fd)
  97. return -EBUSY;
  98. if (copy_from_user(&fd, fds, sizeof(*fds)))
  99. return -EFAULT;
  100. ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
  101. if (!ev_fd)
  102. return -ENOMEM;
  103. ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
  104. if (IS_ERR(ev_fd->cq_ev_fd)) {
  105. int ret = PTR_ERR(ev_fd->cq_ev_fd);
  106. kfree(ev_fd);
  107. return ret;
  108. }
  109. spin_lock(&ctx->completion_lock);
  110. ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
  111. spin_unlock(&ctx->completion_lock);
  112. ev_fd->eventfd_async = eventfd_async;
  113. ctx->has_evfd = true;
  114. refcount_set(&ev_fd->refs, 1);
  115. atomic_set(&ev_fd->ops, 0);
  116. rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
  117. return 0;
  118. }
  119. int io_eventfd_unregister(struct io_ring_ctx *ctx)
  120. {
  121. struct io_ev_fd *ev_fd;
  122. ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
  123. lockdep_is_held(&ctx->uring_lock));
  124. if (ev_fd) {
  125. ctx->has_evfd = false;
  126. rcu_assign_pointer(ctx->io_ev_fd, NULL);
  127. if (refcount_dec_and_test(&ev_fd->refs))
  128. call_rcu(&ev_fd->rcu, io_eventfd_free);
  129. return 0;
  130. }
  131. return -ENXIO;
  132. }