virqfd.c 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * VFIO generic eventfd code for IRQFD support.
  4. * Derived from drivers/vfio/pci/vfio_pci_intrs.c
  5. *
  6. * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
  7. * Author: Alex Williamson <alex.williamson@redhat.com>
  8. */
  9. #include <linux/vfio.h>
  10. #include <linux/eventfd.h>
  11. #include <linux/file.h>
  12. #include <linux/module.h>
  13. #include <linux/slab.h>
  14. #include "vfio.h"
  15. static struct workqueue_struct *vfio_irqfd_cleanup_wq;
  16. static DEFINE_SPINLOCK(virqfd_lock);
  17. int __init vfio_virqfd_init(void)
  18. {
  19. vfio_irqfd_cleanup_wq =
  20. create_singlethread_workqueue("vfio-irqfd-cleanup");
  21. if (!vfio_irqfd_cleanup_wq)
  22. return -ENOMEM;
  23. return 0;
  24. }
  25. void vfio_virqfd_exit(void)
  26. {
  27. destroy_workqueue(vfio_irqfd_cleanup_wq);
  28. }
  29. static void virqfd_deactivate(struct virqfd *virqfd)
  30. {
  31. queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
  32. }
  33. static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
  34. {
  35. struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
  36. __poll_t flags = key_to_poll(key);
  37. if (flags & EPOLLIN) {
  38. u64 cnt;
  39. eventfd_ctx_do_read(virqfd->eventfd, &cnt);
  40. /* An event has been signaled, call function */
  41. if ((!virqfd->handler ||
  42. virqfd->handler(virqfd->opaque, virqfd->data)) &&
  43. virqfd->thread)
  44. schedule_work(&virqfd->inject);
  45. }
  46. if (flags & EPOLLHUP) {
  47. unsigned long flags;
  48. spin_lock_irqsave(&virqfd_lock, flags);
  49. /*
  50. * The eventfd is closing, if the virqfd has not yet been
  51. * queued for release, as determined by testing whether the
  52. * virqfd pointer to it is still valid, queue it now. As
  53. * with kvm irqfds, we know we won't race against the virqfd
  54. * going away because we hold the lock to get here.
  55. */
  56. if (*(virqfd->pvirqfd) == virqfd) {
  57. *(virqfd->pvirqfd) = NULL;
  58. virqfd_deactivate(virqfd);
  59. }
  60. spin_unlock_irqrestore(&virqfd_lock, flags);
  61. }
  62. return 0;
  63. }
  64. static void virqfd_ptable_queue_proc(struct file *file,
  65. wait_queue_head_t *wqh, poll_table *pt)
  66. {
  67. struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
  68. add_wait_queue(wqh, &virqfd->wait);
  69. }
  70. static void virqfd_shutdown(struct work_struct *work)
  71. {
  72. struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
  73. u64 cnt;
  74. eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
  75. flush_work(&virqfd->inject);
  76. eventfd_ctx_put(virqfd->eventfd);
  77. kfree(virqfd);
  78. }
  79. static void virqfd_inject(struct work_struct *work)
  80. {
  81. struct virqfd *virqfd = container_of(work, struct virqfd, inject);
  82. if (virqfd->thread)
  83. virqfd->thread(virqfd->opaque, virqfd->data);
  84. }
  85. static void virqfd_flush_inject(struct work_struct *work)
  86. {
  87. struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject);
  88. flush_work(&virqfd->inject);
  89. }
  90. int vfio_virqfd_enable(void *opaque,
  91. int (*handler)(void *, void *),
  92. void (*thread)(void *, void *),
  93. void *data, struct virqfd **pvirqfd, int fd)
  94. {
  95. struct fd irqfd;
  96. struct eventfd_ctx *ctx;
  97. struct virqfd *virqfd;
  98. int ret = 0;
  99. __poll_t events;
  100. virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL_ACCOUNT);
  101. if (!virqfd)
  102. return -ENOMEM;
  103. virqfd->pvirqfd = pvirqfd;
  104. virqfd->opaque = opaque;
  105. virqfd->handler = handler;
  106. virqfd->thread = thread;
  107. virqfd->data = data;
  108. INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
  109. INIT_WORK(&virqfd->inject, virqfd_inject);
  110. INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject);
  111. irqfd = fdget(fd);
  112. if (!fd_file(irqfd)) {
  113. ret = -EBADF;
  114. goto err_fd;
  115. }
  116. ctx = eventfd_ctx_fileget(fd_file(irqfd));
  117. if (IS_ERR(ctx)) {
  118. ret = PTR_ERR(ctx);
  119. goto err_ctx;
  120. }
  121. virqfd->eventfd = ctx;
  122. /*
  123. * virqfds can be released by closing the eventfd or directly
  124. * through ioctl. These are both done through a workqueue, so
  125. * we update the pointer to the virqfd under lock to avoid
  126. * pushing multiple jobs to release the same virqfd.
  127. */
  128. spin_lock_irq(&virqfd_lock);
  129. if (*pvirqfd) {
  130. spin_unlock_irq(&virqfd_lock);
  131. ret = -EBUSY;
  132. goto err_busy;
  133. }
  134. *pvirqfd = virqfd;
  135. spin_unlock_irq(&virqfd_lock);
  136. /*
  137. * Install our own custom wake-up handling so we are notified via
  138. * a callback whenever someone signals the underlying eventfd.
  139. */
  140. init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
  141. init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
  142. events = vfs_poll(fd_file(irqfd), &virqfd->pt);
  143. /*
  144. * Check if there was an event already pending on the eventfd
  145. * before we registered and trigger it as if we didn't miss it.
  146. */
  147. if (events & EPOLLIN) {
  148. if ((!handler || handler(opaque, data)) && thread)
  149. schedule_work(&virqfd->inject);
  150. }
  151. /*
  152. * Do not drop the file until the irqfd is fully initialized,
  153. * otherwise we might race against the EPOLLHUP.
  154. */
  155. fdput(irqfd);
  156. return 0;
  157. err_busy:
  158. eventfd_ctx_put(ctx);
  159. err_ctx:
  160. fdput(irqfd);
  161. err_fd:
  162. kfree(virqfd);
  163. return ret;
  164. }
  165. EXPORT_SYMBOL_GPL(vfio_virqfd_enable);
  166. void vfio_virqfd_disable(struct virqfd **pvirqfd)
  167. {
  168. unsigned long flags;
  169. spin_lock_irqsave(&virqfd_lock, flags);
  170. if (*pvirqfd) {
  171. virqfd_deactivate(*pvirqfd);
  172. *pvirqfd = NULL;
  173. }
  174. spin_unlock_irqrestore(&virqfd_lock, flags);
  175. /*
  176. * Block until we know all outstanding shutdown jobs have completed.
  177. * Even if we don't queue the job, flush the wq to be sure it's
  178. * been released.
  179. */
  180. flush_workqueue(vfio_irqfd_cleanup_wq);
  181. }
  182. EXPORT_SYMBOL_GPL(vfio_virqfd_disable);
  183. void vfio_virqfd_flush_thread(struct virqfd **pvirqfd)
  184. {
  185. unsigned long flags;
  186. spin_lock_irqsave(&virqfd_lock, flags);
  187. if (*pvirqfd && (*pvirqfd)->thread)
  188. queue_work(vfio_irqfd_cleanup_wq, &(*pvirqfd)->flush_inject);
  189. spin_unlock_irqrestore(&virqfd_lock, flags);
  190. flush_workqueue(vfio_irqfd_cleanup_wq);
  191. }
  192. EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread);