eventfd.c 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * kvm eventfd support - use eventfd objects to signal various KVM events
  4. *
  5. * Copyright 2009 Novell. All Rights Reserved.
  6. * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  7. *
  8. * Author:
  9. * Gregory Haskins <ghaskins@novell.com>
  10. */
  11. #include <linux/kvm_host.h>
  12. #include <linux/kvm.h>
  13. #include <linux/kvm_irqfd.h>
  14. #include <linux/workqueue.h>
  15. #include <linux/syscalls.h>
  16. #include <linux/wait.h>
  17. #include <linux/poll.h>
  18. #include <linux/file.h>
  19. #include <linux/list.h>
  20. #include <linux/eventfd.h>
  21. #include <linux/kernel.h>
  22. #include <linux/srcu.h>
  23. #include <linux/slab.h>
  24. #include <linux/seqlock.h>
  25. #include <linux/irqbypass.h>
  26. #include <trace/events/kvm.h>
  27. #include <kvm/iodev.h>
  28. #ifdef CONFIG_HAVE_KVM_IRQCHIP
  29. static struct workqueue_struct *irqfd_cleanup_wq;
  30. bool __attribute__((weak))
  31. kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
  32. {
  33. return true;
  34. }
  35. static void
  36. irqfd_inject(struct work_struct *work)
  37. {
  38. struct kvm_kernel_irqfd *irqfd =
  39. container_of(work, struct kvm_kernel_irqfd, inject);
  40. struct kvm *kvm = irqfd->kvm;
  41. if (!irqfd->resampler) {
  42. kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
  43. false);
  44. kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
  45. false);
  46. } else
  47. kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
  48. irqfd->gsi, 1, false);
  49. }
  50. static void irqfd_resampler_notify(struct kvm_kernel_irqfd_resampler *resampler)
  51. {
  52. struct kvm_kernel_irqfd *irqfd;
  53. list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link,
  54. srcu_read_lock_held(&resampler->kvm->irq_srcu))
  55. eventfd_signal(irqfd->resamplefd);
  56. }
  57. /*
  58. * Since resampler irqfds share an IRQ source ID, we de-assert once
  59. * then notify all of the resampler irqfds using this GSI. We can't
  60. * do multiple de-asserts or we risk racing with incoming re-asserts.
  61. */
  62. static void
  63. irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
  64. {
  65. struct kvm_kernel_irqfd_resampler *resampler;
  66. struct kvm *kvm;
  67. int idx;
  68. resampler = container_of(kian,
  69. struct kvm_kernel_irqfd_resampler, notifier);
  70. kvm = resampler->kvm;
  71. kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
  72. resampler->notifier.gsi, 0, false);
  73. idx = srcu_read_lock(&kvm->irq_srcu);
  74. irqfd_resampler_notify(resampler);
  75. srcu_read_unlock(&kvm->irq_srcu, idx);
  76. }
  77. static void
  78. irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
  79. {
  80. struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
  81. struct kvm *kvm = resampler->kvm;
  82. mutex_lock(&kvm->irqfds.resampler_lock);
  83. list_del_rcu(&irqfd->resampler_link);
  84. if (list_empty(&resampler->list)) {
  85. list_del_rcu(&resampler->link);
  86. kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
  87. /*
  88. * synchronize_srcu_expedited(&kvm->irq_srcu) already called
  89. * in kvm_unregister_irq_ack_notifier().
  90. */
  91. kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
  92. resampler->notifier.gsi, 0, false);
  93. kfree(resampler);
  94. } else {
  95. synchronize_srcu_expedited(&kvm->irq_srcu);
  96. }
  97. mutex_unlock(&kvm->irqfds.resampler_lock);
  98. }
  99. /*
  100. * Race-free decouple logic (ordering is critical)
  101. */
  102. static void
  103. irqfd_shutdown(struct work_struct *work)
  104. {
  105. struct kvm_kernel_irqfd *irqfd =
  106. container_of(work, struct kvm_kernel_irqfd, shutdown);
  107. struct kvm *kvm = irqfd->kvm;
  108. u64 cnt;
  109. /* Make sure irqfd has been initialized in assign path. */
  110. synchronize_srcu_expedited(&kvm->irq_srcu);
  111. /*
  112. * Synchronize with the wait-queue and unhook ourselves to prevent
  113. * further events.
  114. */
  115. eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
  116. /*
  117. * We know no new events will be scheduled at this point, so block
  118. * until all previously outstanding events have completed
  119. */
  120. flush_work(&irqfd->inject);
  121. if (irqfd->resampler) {
  122. irqfd_resampler_shutdown(irqfd);
  123. eventfd_ctx_put(irqfd->resamplefd);
  124. }
  125. /*
  126. * It is now safe to release the object's resources
  127. */
  128. #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
  129. irq_bypass_unregister_consumer(&irqfd->consumer);
  130. #endif
  131. eventfd_ctx_put(irqfd->eventfd);
  132. kfree(irqfd);
  133. }
  134. /* assumes kvm->irqfds.lock is held */
  135. static bool
  136. irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
  137. {
  138. return list_empty(&irqfd->list) ? false : true;
  139. }
  140. /*
  141. * Mark the irqfd as inactive and schedule it for removal
  142. *
  143. * assumes kvm->irqfds.lock is held
  144. */
  145. static void
  146. irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
  147. {
  148. BUG_ON(!irqfd_is_active(irqfd));
  149. list_del_init(&irqfd->list);
  150. queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
  151. }
  152. int __attribute__((weak)) kvm_arch_set_irq_inatomic(
  153. struct kvm_kernel_irq_routing_entry *irq,
  154. struct kvm *kvm, int irq_source_id,
  155. int level,
  156. bool line_status)
  157. {
  158. return -EWOULDBLOCK;
  159. }
  160. /*
  161. * Called with wqh->lock held and interrupts disabled
  162. */
  163. static int
  164. irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
  165. {
  166. struct kvm_kernel_irqfd *irqfd =
  167. container_of(wait, struct kvm_kernel_irqfd, wait);
  168. __poll_t flags = key_to_poll(key);
  169. struct kvm_kernel_irq_routing_entry irq;
  170. struct kvm *kvm = irqfd->kvm;
  171. unsigned seq;
  172. int idx;
  173. int ret = 0;
  174. if (flags & EPOLLIN) {
  175. u64 cnt;
  176. eventfd_ctx_do_read(irqfd->eventfd, &cnt);
  177. idx = srcu_read_lock(&kvm->irq_srcu);
  178. do {
  179. seq = read_seqcount_begin(&irqfd->irq_entry_sc);
  180. irq = irqfd->irq_entry;
  181. } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
  182. /* An event has been signaled, inject an interrupt */
  183. if (kvm_arch_set_irq_inatomic(&irq, kvm,
  184. KVM_USERSPACE_IRQ_SOURCE_ID, 1,
  185. false) == -EWOULDBLOCK)
  186. schedule_work(&irqfd->inject);
  187. srcu_read_unlock(&kvm->irq_srcu, idx);
  188. ret = 1;
  189. }
  190. if (flags & EPOLLHUP) {
  191. /* The eventfd is closing, detach from KVM */
  192. unsigned long iflags;
  193. spin_lock_irqsave(&kvm->irqfds.lock, iflags);
  194. /*
  195. * We must check if someone deactivated the irqfd before
  196. * we could acquire the irqfds.lock since the item is
  197. * deactivated from the KVM side before it is unhooked from
  198. * the wait-queue. If it is already deactivated, we can
  199. * simply return knowing the other side will cleanup for us.
  200. * We cannot race against the irqfd going away since the
  201. * other side is required to acquire wqh->lock, which we hold
  202. */
  203. if (irqfd_is_active(irqfd))
  204. irqfd_deactivate(irqfd);
  205. spin_unlock_irqrestore(&kvm->irqfds.lock, iflags);
  206. }
  207. return ret;
  208. }
  209. static void
  210. irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
  211. poll_table *pt)
  212. {
  213. struct kvm_kernel_irqfd *irqfd =
  214. container_of(pt, struct kvm_kernel_irqfd, pt);
  215. add_wait_queue_priority(wqh, &irqfd->wait);
  216. }
  217. /* Must be called under irqfds.lock */
  218. static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
  219. {
  220. struct kvm_kernel_irq_routing_entry *e;
  221. struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
  222. int n_entries;
  223. n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
  224. write_seqcount_begin(&irqfd->irq_entry_sc);
  225. e = entries;
  226. if (n_entries == 1)
  227. irqfd->irq_entry = *e;
  228. else
  229. irqfd->irq_entry.type = 0;
  230. write_seqcount_end(&irqfd->irq_entry_sc);
  231. }
  232. #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
  233. void __attribute__((weak)) kvm_arch_irq_bypass_stop(
  234. struct irq_bypass_consumer *cons)
  235. {
  236. }
  237. void __attribute__((weak)) kvm_arch_irq_bypass_start(
  238. struct irq_bypass_consumer *cons)
  239. {
  240. }
  241. int __attribute__((weak)) kvm_arch_update_irqfd_routing(
  242. struct kvm *kvm, unsigned int host_irq,
  243. uint32_t guest_irq, bool set)
  244. {
  245. return 0;
  246. }
  247. bool __attribute__((weak)) kvm_arch_irqfd_route_changed(
  248. struct kvm_kernel_irq_routing_entry *old,
  249. struct kvm_kernel_irq_routing_entry *new)
  250. {
  251. return true;
  252. }
  253. #endif
  254. static int
  255. kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
  256. {
  257. struct kvm_kernel_irqfd *irqfd, *tmp;
  258. struct fd f;
  259. struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
  260. int ret;
  261. __poll_t events;
  262. int idx;
  263. if (!kvm_arch_intc_initialized(kvm))
  264. return -EAGAIN;
  265. if (!kvm_arch_irqfd_allowed(kvm, args))
  266. return -EINVAL;
  267. irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT);
  268. if (!irqfd)
  269. return -ENOMEM;
  270. irqfd->kvm = kvm;
  271. irqfd->gsi = args->gsi;
  272. INIT_LIST_HEAD(&irqfd->list);
  273. INIT_WORK(&irqfd->inject, irqfd_inject);
  274. INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
  275. seqcount_spinlock_init(&irqfd->irq_entry_sc, &kvm->irqfds.lock);
  276. f = fdget(args->fd);
  277. if (!fd_file(f)) {
  278. ret = -EBADF;
  279. goto out;
  280. }
  281. eventfd = eventfd_ctx_fileget(fd_file(f));
  282. if (IS_ERR(eventfd)) {
  283. ret = PTR_ERR(eventfd);
  284. goto fail;
  285. }
  286. irqfd->eventfd = eventfd;
  287. if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
  288. struct kvm_kernel_irqfd_resampler *resampler;
  289. resamplefd = eventfd_ctx_fdget(args->resamplefd);
  290. if (IS_ERR(resamplefd)) {
  291. ret = PTR_ERR(resamplefd);
  292. goto fail;
  293. }
  294. irqfd->resamplefd = resamplefd;
  295. INIT_LIST_HEAD(&irqfd->resampler_link);
  296. mutex_lock(&kvm->irqfds.resampler_lock);
  297. list_for_each_entry(resampler,
  298. &kvm->irqfds.resampler_list, link) {
  299. if (resampler->notifier.gsi == irqfd->gsi) {
  300. irqfd->resampler = resampler;
  301. break;
  302. }
  303. }
  304. if (!irqfd->resampler) {
  305. resampler = kzalloc(sizeof(*resampler),
  306. GFP_KERNEL_ACCOUNT);
  307. if (!resampler) {
  308. ret = -ENOMEM;
  309. mutex_unlock(&kvm->irqfds.resampler_lock);
  310. goto fail;
  311. }
  312. resampler->kvm = kvm;
  313. INIT_LIST_HEAD(&resampler->list);
  314. resampler->notifier.gsi = irqfd->gsi;
  315. resampler->notifier.irq_acked = irqfd_resampler_ack;
  316. INIT_LIST_HEAD(&resampler->link);
  317. list_add_rcu(&resampler->link, &kvm->irqfds.resampler_list);
  318. kvm_register_irq_ack_notifier(kvm,
  319. &resampler->notifier);
  320. irqfd->resampler = resampler;
  321. }
  322. list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
  323. synchronize_srcu_expedited(&kvm->irq_srcu);
  324. mutex_unlock(&kvm->irqfds.resampler_lock);
  325. }
  326. /*
  327. * Install our own custom wake-up handling so we are notified via
  328. * a callback whenever someone signals the underlying eventfd
  329. */
  330. init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
  331. init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
  332. spin_lock_irq(&kvm->irqfds.lock);
  333. ret = 0;
  334. list_for_each_entry(tmp, &kvm->irqfds.items, list) {
  335. if (irqfd->eventfd != tmp->eventfd)
  336. continue;
  337. /* This fd is used for another irq already. */
  338. ret = -EBUSY;
  339. spin_unlock_irq(&kvm->irqfds.lock);
  340. goto fail;
  341. }
  342. idx = srcu_read_lock(&kvm->irq_srcu);
  343. irqfd_update(kvm, irqfd);
  344. list_add_tail(&irqfd->list, &kvm->irqfds.items);
  345. spin_unlock_irq(&kvm->irqfds.lock);
  346. /*
  347. * Check if there was an event already pending on the eventfd
  348. * before we registered, and trigger it as if we didn't miss it.
  349. */
  350. events = vfs_poll(fd_file(f), &irqfd->pt);
  351. if (events & EPOLLIN)
  352. schedule_work(&irqfd->inject);
  353. #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
  354. if (kvm_arch_has_irq_bypass()) {
  355. irqfd->consumer.token = (void *)irqfd->eventfd;
  356. irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
  357. irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
  358. irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
  359. irqfd->consumer.start = kvm_arch_irq_bypass_start;
  360. ret = irq_bypass_register_consumer(&irqfd->consumer);
  361. if (ret)
  362. pr_info("irq bypass consumer (token %p) registration fails: %d\n",
  363. irqfd->consumer.token, ret);
  364. }
  365. #endif
  366. srcu_read_unlock(&kvm->irq_srcu, idx);
  367. /*
  368. * do not drop the file until the irqfd is fully initialized, otherwise
  369. * we might race against the EPOLLHUP
  370. */
  371. fdput(f);
  372. return 0;
  373. fail:
  374. if (irqfd->resampler)
  375. irqfd_resampler_shutdown(irqfd);
  376. if (resamplefd && !IS_ERR(resamplefd))
  377. eventfd_ctx_put(resamplefd);
  378. if (eventfd && !IS_ERR(eventfd))
  379. eventfd_ctx_put(eventfd);
  380. fdput(f);
  381. out:
  382. kfree(irqfd);
  383. return ret;
  384. }
  385. bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
  386. {
  387. struct kvm_irq_ack_notifier *kian;
  388. int gsi, idx;
  389. idx = srcu_read_lock(&kvm->irq_srcu);
  390. gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
  391. if (gsi != -1)
  392. hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
  393. link, srcu_read_lock_held(&kvm->irq_srcu))
  394. if (kian->gsi == gsi) {
  395. srcu_read_unlock(&kvm->irq_srcu, idx);
  396. return true;
  397. }
  398. srcu_read_unlock(&kvm->irq_srcu, idx);
  399. return false;
  400. }
  401. EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
  402. void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
  403. {
  404. struct kvm_irq_ack_notifier *kian;
  405. hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
  406. link, srcu_read_lock_held(&kvm->irq_srcu))
  407. if (kian->gsi == gsi)
  408. kian->irq_acked(kian);
  409. }
  410. void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
  411. {
  412. int gsi, idx;
  413. trace_kvm_ack_irq(irqchip, pin);
  414. idx = srcu_read_lock(&kvm->irq_srcu);
  415. gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
  416. if (gsi != -1)
  417. kvm_notify_acked_gsi(kvm, gsi);
  418. srcu_read_unlock(&kvm->irq_srcu, idx);
  419. }
  420. void kvm_register_irq_ack_notifier(struct kvm *kvm,
  421. struct kvm_irq_ack_notifier *kian)
  422. {
  423. mutex_lock(&kvm->irq_lock);
  424. hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
  425. mutex_unlock(&kvm->irq_lock);
  426. kvm_arch_post_irq_ack_notifier_list_update(kvm);
  427. }
  428. void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
  429. struct kvm_irq_ack_notifier *kian)
  430. {
  431. mutex_lock(&kvm->irq_lock);
  432. hlist_del_init_rcu(&kian->link);
  433. mutex_unlock(&kvm->irq_lock);
  434. synchronize_srcu_expedited(&kvm->irq_srcu);
  435. kvm_arch_post_irq_ack_notifier_list_update(kvm);
  436. }
  437. /*
  438. * shutdown any irqfd's that match fd+gsi
  439. */
  440. static int
  441. kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
  442. {
  443. struct kvm_kernel_irqfd *irqfd, *tmp;
  444. struct eventfd_ctx *eventfd;
  445. eventfd = eventfd_ctx_fdget(args->fd);
  446. if (IS_ERR(eventfd))
  447. return PTR_ERR(eventfd);
  448. spin_lock_irq(&kvm->irqfds.lock);
  449. list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
  450. if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
  451. /*
  452. * This clearing of irq_entry.type is needed for when
  453. * another thread calls kvm_irq_routing_update before
  454. * we flush workqueue below (we synchronize with
  455. * kvm_irq_routing_update using irqfds.lock).
  456. */
  457. write_seqcount_begin(&irqfd->irq_entry_sc);
  458. irqfd->irq_entry.type = 0;
  459. write_seqcount_end(&irqfd->irq_entry_sc);
  460. irqfd_deactivate(irqfd);
  461. }
  462. }
  463. spin_unlock_irq(&kvm->irqfds.lock);
  464. eventfd_ctx_put(eventfd);
  465. /*
  466. * Block until we know all outstanding shutdown jobs have completed
  467. * so that we guarantee there will not be any more interrupts on this
  468. * gsi once this deassign function returns.
  469. */
  470. flush_workqueue(irqfd_cleanup_wq);
  471. return 0;
  472. }
  473. int
  474. kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
  475. {
  476. if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
  477. return -EINVAL;
  478. if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
  479. return kvm_irqfd_deassign(kvm, args);
  480. return kvm_irqfd_assign(kvm, args);
  481. }
  482. /*
  483. * This function is called as the kvm VM fd is being released. Shutdown all
  484. * irqfds that still remain open
  485. */
  486. void
  487. kvm_irqfd_release(struct kvm *kvm)
  488. {
  489. struct kvm_kernel_irqfd *irqfd, *tmp;
  490. spin_lock_irq(&kvm->irqfds.lock);
  491. list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
  492. irqfd_deactivate(irqfd);
  493. spin_unlock_irq(&kvm->irqfds.lock);
  494. /*
  495. * Block until we know all outstanding shutdown jobs have completed
  496. * since we do not take a kvm* reference.
  497. */
  498. flush_workqueue(irqfd_cleanup_wq);
  499. }
  500. /*
  501. * Take note of a change in irq routing.
  502. * Caller must invoke synchronize_srcu_expedited(&kvm->irq_srcu) afterwards.
  503. */
  504. void kvm_irq_routing_update(struct kvm *kvm)
  505. {
  506. struct kvm_kernel_irqfd *irqfd;
  507. spin_lock_irq(&kvm->irqfds.lock);
  508. list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
  509. #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
  510. /* Under irqfds.lock, so can read irq_entry safely */
  511. struct kvm_kernel_irq_routing_entry old = irqfd->irq_entry;
  512. #endif
  513. irqfd_update(kvm, irqfd);
  514. #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
  515. if (irqfd->producer &&
  516. kvm_arch_irqfd_route_changed(&old, &irqfd->irq_entry)) {
  517. int ret = kvm_arch_update_irqfd_routing(
  518. irqfd->kvm, irqfd->producer->irq,
  519. irqfd->gsi, 1);
  520. WARN_ON(ret);
  521. }
  522. #endif
  523. }
  524. spin_unlock_irq(&kvm->irqfds.lock);
  525. }
  526. bool kvm_notify_irqfd_resampler(struct kvm *kvm,
  527. unsigned int irqchip,
  528. unsigned int pin)
  529. {
  530. struct kvm_kernel_irqfd_resampler *resampler;
  531. int gsi, idx;
  532. idx = srcu_read_lock(&kvm->irq_srcu);
  533. gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
  534. if (gsi != -1) {
  535. list_for_each_entry_srcu(resampler,
  536. &kvm->irqfds.resampler_list, link,
  537. srcu_read_lock_held(&kvm->irq_srcu)) {
  538. if (resampler->notifier.gsi == gsi) {
  539. irqfd_resampler_notify(resampler);
  540. srcu_read_unlock(&kvm->irq_srcu, idx);
  541. return true;
  542. }
  543. }
  544. }
  545. srcu_read_unlock(&kvm->irq_srcu, idx);
  546. return false;
  547. }
  548. /*
  549. * create a host-wide workqueue for issuing deferred shutdown requests
  550. * aggregated from all vm* instances. We need our own isolated
  551. * queue to ease flushing work items when a VM exits.
  552. */
  553. int kvm_irqfd_init(void)
  554. {
  555. irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
  556. if (!irqfd_cleanup_wq)
  557. return -ENOMEM;
  558. return 0;
  559. }
  560. void kvm_irqfd_exit(void)
  561. {
  562. destroy_workqueue(irqfd_cleanup_wq);
  563. }
  564. #endif
  565. /*
  566. * --------------------------------------------------------------------
  567. * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
  568. *
  569. * userspace can register a PIO/MMIO address with an eventfd for receiving
  570. * notification when the memory has been touched.
  571. * --------------------------------------------------------------------
  572. */
  573. struct _ioeventfd {
  574. struct list_head list;
  575. u64 addr;
  576. int length;
  577. struct eventfd_ctx *eventfd;
  578. u64 datamatch;
  579. struct kvm_io_device dev;
  580. u8 bus_idx;
  581. bool wildcard;
  582. };
  583. static inline struct _ioeventfd *
  584. to_ioeventfd(struct kvm_io_device *dev)
  585. {
  586. return container_of(dev, struct _ioeventfd, dev);
  587. }
  588. static void
  589. ioeventfd_release(struct _ioeventfd *p)
  590. {
  591. eventfd_ctx_put(p->eventfd);
  592. list_del(&p->list);
  593. kfree(p);
  594. }
  595. static bool
  596. ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
  597. {
  598. u64 _val;
  599. if (addr != p->addr)
  600. /* address must be precise for a hit */
  601. return false;
  602. if (!p->length)
  603. /* length = 0 means only look at the address, so always a hit */
  604. return true;
  605. if (len != p->length)
  606. /* address-range must be precise for a hit */
  607. return false;
  608. if (p->wildcard)
  609. /* all else equal, wildcard is always a hit */
  610. return true;
  611. /* otherwise, we have to actually compare the data */
  612. BUG_ON(!IS_ALIGNED((unsigned long)val, len));
  613. switch (len) {
  614. case 1:
  615. _val = *(u8 *)val;
  616. break;
  617. case 2:
  618. _val = *(u16 *)val;
  619. break;
  620. case 4:
  621. _val = *(u32 *)val;
  622. break;
  623. case 8:
  624. _val = *(u64 *)val;
  625. break;
  626. default:
  627. return false;
  628. }
  629. return _val == p->datamatch;
  630. }
  631. /* MMIO/PIO writes trigger an event if the addr/val match */
  632. static int
  633. ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
  634. int len, const void *val)
  635. {
  636. struct _ioeventfd *p = to_ioeventfd(this);
  637. if (!ioeventfd_in_range(p, addr, len, val))
  638. return -EOPNOTSUPP;
  639. eventfd_signal(p->eventfd);
  640. return 0;
  641. }
  642. /*
  643. * This function is called as KVM is completely shutting down. We do not
  644. * need to worry about locking just nuke anything we have as quickly as possible
  645. */
  646. static void
  647. ioeventfd_destructor(struct kvm_io_device *this)
  648. {
  649. struct _ioeventfd *p = to_ioeventfd(this);
  650. ioeventfd_release(p);
  651. }
  652. static const struct kvm_io_device_ops ioeventfd_ops = {
  653. .write = ioeventfd_write,
  654. .destructor = ioeventfd_destructor,
  655. };
  656. /* assumes kvm->slots_lock held */
  657. static bool
  658. ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
  659. {
  660. struct _ioeventfd *_p;
  661. list_for_each_entry(_p, &kvm->ioeventfds, list)
  662. if (_p->bus_idx == p->bus_idx &&
  663. _p->addr == p->addr &&
  664. (!_p->length || !p->length ||
  665. (_p->length == p->length &&
  666. (_p->wildcard || p->wildcard ||
  667. _p->datamatch == p->datamatch))))
  668. return true;
  669. return false;
  670. }
  671. static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
  672. {
  673. if (flags & KVM_IOEVENTFD_FLAG_PIO)
  674. return KVM_PIO_BUS;
  675. if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
  676. return KVM_VIRTIO_CCW_NOTIFY_BUS;
  677. return KVM_MMIO_BUS;
  678. }
  679. static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
  680. enum kvm_bus bus_idx,
  681. struct kvm_ioeventfd *args)
  682. {
  683. struct eventfd_ctx *eventfd;
  684. struct _ioeventfd *p;
  685. int ret;
  686. eventfd = eventfd_ctx_fdget(args->fd);
  687. if (IS_ERR(eventfd))
  688. return PTR_ERR(eventfd);
  689. p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT);
  690. if (!p) {
  691. ret = -ENOMEM;
  692. goto fail;
  693. }
  694. INIT_LIST_HEAD(&p->list);
  695. p->addr = args->addr;
  696. p->bus_idx = bus_idx;
  697. p->length = args->len;
  698. p->eventfd = eventfd;
  699. /* The datamatch feature is optional, otherwise this is a wildcard */
  700. if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
  701. p->datamatch = args->datamatch;
  702. else
  703. p->wildcard = true;
  704. mutex_lock(&kvm->slots_lock);
  705. /* Verify that there isn't a match already */
  706. if (ioeventfd_check_collision(kvm, p)) {
  707. ret = -EEXIST;
  708. goto unlock_fail;
  709. }
  710. kvm_iodevice_init(&p->dev, &ioeventfd_ops);
  711. ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
  712. &p->dev);
  713. if (ret < 0)
  714. goto unlock_fail;
  715. kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
  716. list_add_tail(&p->list, &kvm->ioeventfds);
  717. mutex_unlock(&kvm->slots_lock);
  718. return 0;
  719. unlock_fail:
  720. mutex_unlock(&kvm->slots_lock);
  721. kfree(p);
  722. fail:
  723. eventfd_ctx_put(eventfd);
  724. return ret;
  725. }
  726. static int
  727. kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
  728. struct kvm_ioeventfd *args)
  729. {
  730. struct _ioeventfd *p;
  731. struct eventfd_ctx *eventfd;
  732. struct kvm_io_bus *bus;
  733. int ret = -ENOENT;
  734. bool wildcard;
  735. eventfd = eventfd_ctx_fdget(args->fd);
  736. if (IS_ERR(eventfd))
  737. return PTR_ERR(eventfd);
  738. wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
  739. mutex_lock(&kvm->slots_lock);
  740. list_for_each_entry(p, &kvm->ioeventfds, list) {
  741. if (p->bus_idx != bus_idx ||
  742. p->eventfd != eventfd ||
  743. p->addr != args->addr ||
  744. p->length != args->len ||
  745. p->wildcard != wildcard)
  746. continue;
  747. if (!p->wildcard && p->datamatch != args->datamatch)
  748. continue;
  749. kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
  750. bus = kvm_get_bus(kvm, bus_idx);
  751. if (bus)
  752. bus->ioeventfd_count--;
  753. ret = 0;
  754. break;
  755. }
  756. mutex_unlock(&kvm->slots_lock);
  757. eventfd_ctx_put(eventfd);
  758. return ret;
  759. }
  760. static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  761. {
  762. enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
  763. int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
  764. if (!args->len && bus_idx == KVM_MMIO_BUS)
  765. kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
  766. return ret;
  767. }
  768. static int
  769. kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  770. {
  771. enum kvm_bus bus_idx;
  772. int ret;
  773. bus_idx = ioeventfd_bus_from_flags(args->flags);
  774. /* must be natural-word sized, or 0 to ignore length */
  775. switch (args->len) {
  776. case 0:
  777. case 1:
  778. case 2:
  779. case 4:
  780. case 8:
  781. break;
  782. default:
  783. return -EINVAL;
  784. }
  785. /* check for range overflow */
  786. if (args->addr + args->len < args->addr)
  787. return -EINVAL;
  788. /* check for extra flags that we don't understand */
  789. if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
  790. return -EINVAL;
  791. /* ioeventfd with no length can't be combined with DATAMATCH */
  792. if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
  793. return -EINVAL;
  794. ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
  795. if (ret)
  796. goto fail;
  797. /* When length is ignored, MMIO is also put on a separate bus, for
  798. * faster lookups.
  799. */
  800. if (!args->len && bus_idx == KVM_MMIO_BUS) {
  801. ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
  802. if (ret < 0)
  803. goto fast_fail;
  804. }
  805. return 0;
  806. fast_fail:
  807. kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
  808. fail:
  809. return ret;
  810. }
  811. int
  812. kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  813. {
  814. if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
  815. return kvm_deassign_ioeventfd(kvm, args);
  816. return kvm_assign_ioeventfd(kvm, args);
  817. }
  818. void
  819. kvm_eventfd_init(struct kvm *kvm)
  820. {
  821. #ifdef CONFIG_HAVE_KVM_IRQCHIP
  822. spin_lock_init(&kvm->irqfds.lock);
  823. INIT_LIST_HEAD(&kvm->irqfds.items);
  824. INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
  825. mutex_init(&kvm->irqfds.resampler_lock);
  826. #endif
  827. INIT_LIST_HEAD(&kvm->ioeventfds);
  828. }