async_pf.c 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * kvm asynchronous fault support
  4. *
  5. * Copyright 2010 Red Hat, Inc.
  6. *
  7. * Author:
  8. * Gleb Natapov <gleb@redhat.com>
  9. */
  10. #include <linux/kvm_host.h>
  11. #include <linux/slab.h>
  12. #include <linux/module.h>
  13. #include <linux/mmu_context.h>
  14. #include <linux/sched/mm.h>
  15. #include "async_pf.h"
  16. #include <trace/events/kvm.h>
  17. static struct kmem_cache *async_pf_cache;
  18. int kvm_async_pf_init(void)
  19. {
  20. async_pf_cache = KMEM_CACHE(kvm_async_pf, 0);
  21. if (!async_pf_cache)
  22. return -ENOMEM;
  23. return 0;
  24. }
  25. void kvm_async_pf_deinit(void)
  26. {
  27. kmem_cache_destroy(async_pf_cache);
  28. async_pf_cache = NULL;
  29. }
  30. void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
  31. {
  32. INIT_LIST_HEAD(&vcpu->async_pf.done);
  33. INIT_LIST_HEAD(&vcpu->async_pf.queue);
  34. spin_lock_init(&vcpu->async_pf.lock);
  35. }
  36. static void async_pf_execute(struct work_struct *work)
  37. {
  38. struct kvm_async_pf *apf =
  39. container_of(work, struct kvm_async_pf, work);
  40. struct kvm_vcpu *vcpu = apf->vcpu;
  41. struct mm_struct *mm = vcpu->kvm->mm;
  42. unsigned long addr = apf->addr;
  43. gpa_t cr2_or_gpa = apf->cr2_or_gpa;
  44. int locked = 1;
  45. bool first;
  46. might_sleep();
  47. /*
  48. * Attempt to pin the VM's host address space, and simply skip gup() if
  49. * acquiring a pin fail, i.e. if the process is exiting. Note, KVM
  50. * holds a reference to its associated mm_struct until the very end of
  51. * kvm_destroy_vm(), i.e. the struct itself won't be freed before this
  52. * work item is fully processed.
  53. */
  54. if (mmget_not_zero(mm)) {
  55. mmap_read_lock(mm);
  56. get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
  57. if (locked)
  58. mmap_read_unlock(mm);
  59. mmput(mm);
  60. }
  61. /*
  62. * Notify and kick the vCPU even if faulting in the page failed, e.g.
  63. * so that the vCPU can retry the fault synchronously.
  64. */
  65. if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
  66. kvm_arch_async_page_present(vcpu, apf);
  67. spin_lock(&vcpu->async_pf.lock);
  68. first = list_empty(&vcpu->async_pf.done);
  69. list_add_tail(&apf->link, &vcpu->async_pf.done);
  70. spin_unlock(&vcpu->async_pf.lock);
  71. /*
  72. * The apf struct may be freed by kvm_check_async_pf_completion() as
  73. * soon as the lock is dropped. Nullify it to prevent improper usage.
  74. */
  75. apf = NULL;
  76. if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
  77. kvm_arch_async_page_present_queued(vcpu);
  78. trace_kvm_async_pf_completed(addr, cr2_or_gpa);
  79. __kvm_vcpu_wake_up(vcpu);
  80. }
  81. static void kvm_flush_and_free_async_pf_work(struct kvm_async_pf *work)
  82. {
  83. /*
  84. * The async #PF is "done", but KVM must wait for the work item itself,
  85. * i.e. async_pf_execute(), to run to completion. If KVM is a module,
  86. * KVM must ensure *no* code owned by the KVM (the module) can be run
  87. * after the last call to module_put(). Note, flushing the work item
  88. * is always required when the item is taken off the completion queue.
  89. * E.g. even if the vCPU handles the item in the "normal" path, the VM
  90. * could be terminated before async_pf_execute() completes.
  91. *
  92. * Wake all events skip the queue and go straight done, i.e. don't
  93. * need to be flushed (but sanity check that the work wasn't queued).
  94. */
  95. if (work->wakeup_all)
  96. WARN_ON_ONCE(work->work.func);
  97. else
  98. flush_work(&work->work);
  99. kmem_cache_free(async_pf_cache, work);
  100. }
  101. void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
  102. {
  103. /* cancel outstanding work queue item */
  104. while (!list_empty(&vcpu->async_pf.queue)) {
  105. struct kvm_async_pf *work =
  106. list_first_entry(&vcpu->async_pf.queue,
  107. typeof(*work), queue);
  108. list_del(&work->queue);
  109. #ifdef CONFIG_KVM_ASYNC_PF_SYNC
  110. flush_work(&work->work);
  111. #else
  112. if (cancel_work_sync(&work->work))
  113. kmem_cache_free(async_pf_cache, work);
  114. #endif
  115. }
  116. spin_lock(&vcpu->async_pf.lock);
  117. while (!list_empty(&vcpu->async_pf.done)) {
  118. struct kvm_async_pf *work =
  119. list_first_entry(&vcpu->async_pf.done,
  120. typeof(*work), link);
  121. list_del(&work->link);
  122. spin_unlock(&vcpu->async_pf.lock);
  123. kvm_flush_and_free_async_pf_work(work);
  124. spin_lock(&vcpu->async_pf.lock);
  125. }
  126. spin_unlock(&vcpu->async_pf.lock);
  127. vcpu->async_pf.queued = 0;
  128. }
  129. void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
  130. {
  131. struct kvm_async_pf *work;
  132. while (!list_empty_careful(&vcpu->async_pf.done) &&
  133. kvm_arch_can_dequeue_async_page_present(vcpu)) {
  134. spin_lock(&vcpu->async_pf.lock);
  135. work = list_first_entry(&vcpu->async_pf.done, typeof(*work),
  136. link);
  137. list_del(&work->link);
  138. spin_unlock(&vcpu->async_pf.lock);
  139. kvm_arch_async_page_ready(vcpu, work);
  140. if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
  141. kvm_arch_async_page_present(vcpu, work);
  142. list_del(&work->queue);
  143. vcpu->async_pf.queued--;
  144. kvm_flush_and_free_async_pf_work(work);
  145. }
  146. }
  147. /*
  148. * Try to schedule a job to handle page fault asynchronously. Returns 'true' on
  149. * success, 'false' on failure (page fault has to be handled synchronously).
  150. */
  151. bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
  152. unsigned long hva, struct kvm_arch_async_pf *arch)
  153. {
  154. struct kvm_async_pf *work;
  155. if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
  156. return false;
  157. /* Arch specific code should not do async PF in this case */
  158. if (unlikely(kvm_is_error_hva(hva)))
  159. return false;
  160. /*
  161. * do alloc nowait since if we are going to sleep anyway we
  162. * may as well sleep faulting in page
  163. */
  164. work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN);
  165. if (!work)
  166. return false;
  167. work->wakeup_all = false;
  168. work->vcpu = vcpu;
  169. work->cr2_or_gpa = cr2_or_gpa;
  170. work->addr = hva;
  171. work->arch = *arch;
  172. INIT_WORK(&work->work, async_pf_execute);
  173. list_add_tail(&work->queue, &vcpu->async_pf.queue);
  174. vcpu->async_pf.queued++;
  175. work->notpresent_injected = kvm_arch_async_page_not_present(vcpu, work);
  176. schedule_work(&work->work);
  177. return true;
  178. }
  179. int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
  180. {
  181. struct kvm_async_pf *work;
  182. bool first;
  183. if (!list_empty_careful(&vcpu->async_pf.done))
  184. return 0;
  185. work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC);
  186. if (!work)
  187. return -ENOMEM;
  188. work->wakeup_all = true;
  189. INIT_LIST_HEAD(&work->queue); /* for list_del to work */
  190. spin_lock(&vcpu->async_pf.lock);
  191. first = list_empty(&vcpu->async_pf.done);
  192. list_add_tail(&work->link, &vcpu->async_pf.done);
  193. spin_unlock(&vcpu->async_pf.lock);
  194. if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
  195. kvm_arch_async_page_present_queued(vcpu);
  196. vcpu->async_pf.queued++;
  197. return 0;
  198. }