mmu.c 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. #define pr_fmt(fmt) "Hyper-V: " fmt
  2. #include <linux/hyperv.h>
  3. #include <linux/log2.h>
  4. #include <linux/slab.h>
  5. #include <linux/types.h>
  6. #include <asm/fpu/api.h>
  7. #include <asm/mshyperv.h>
  8. #include <asm/msr.h>
  9. #include <asm/tlbflush.h>
  10. #include <asm/tlb.h>
  11. #define CREATE_TRACE_POINTS
  12. #include <asm/trace/hyperv.h>
  13. /* Each gva in gva_list encodes up to 4096 pages to flush */
  14. #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
  15. static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
  16. const struct flush_tlb_info *info);
  17. /*
  18. * Fills in gva_list starting from offset. Returns the number of items added.
  19. */
  20. static inline int fill_gva_list(u64 gva_list[], int offset,
  21. unsigned long start, unsigned long end)
  22. {
  23. int gva_n = offset;
  24. unsigned long cur = start, diff;
  25. do {
  26. diff = end > cur ? end - cur : 0;
  27. gva_list[gva_n] = cur & PAGE_MASK;
  28. /*
  29. * Lower 12 bits encode the number of additional
  30. * pages to flush (in addition to the 'cur' page).
  31. */
  32. if (diff >= HV_TLB_FLUSH_UNIT) {
  33. gva_list[gva_n] |= ~PAGE_MASK;
  34. cur += HV_TLB_FLUSH_UNIT;
  35. } else if (diff) {
  36. gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
  37. cur = end;
  38. }
  39. gva_n++;
  40. } while (cur < end);
  41. return gva_n - offset;
  42. }
  43. static void hyperv_flush_tlb_others(const struct cpumask *cpus,
  44. const struct flush_tlb_info *info)
  45. {
  46. int cpu, vcpu, gva_n, max_gvas;
  47. struct hv_tlb_flush **flush_pcpu;
  48. struct hv_tlb_flush *flush;
  49. u64 status = U64_MAX;
  50. unsigned long flags;
  51. trace_hyperv_mmu_flush_tlb_others(cpus, info);
  52. if (!hv_hypercall_pg)
  53. goto do_native;
  54. local_irq_save(flags);
  55. /*
  56. * Only check the mask _after_ interrupt has been disabled to avoid the
  57. * mask changing under our feet.
  58. */
  59. if (cpumask_empty(cpus)) {
  60. local_irq_restore(flags);
  61. return;
  62. }
  63. flush_pcpu = (struct hv_tlb_flush **)
  64. this_cpu_ptr(hyperv_pcpu_input_arg);
  65. flush = *flush_pcpu;
  66. if (unlikely(!flush)) {
  67. local_irq_restore(flags);
  68. goto do_native;
  69. }
  70. if (info->mm) {
  71. /*
  72. * AddressSpace argument must match the CR3 with PCID bits
  73. * stripped out.
  74. */
  75. flush->address_space = virt_to_phys(info->mm->pgd);
  76. flush->address_space &= CR3_ADDR_MASK;
  77. flush->flags = 0;
  78. } else {
  79. flush->address_space = 0;
  80. flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
  81. }
  82. flush->processor_mask = 0;
  83. if (cpumask_equal(cpus, cpu_present_mask)) {
  84. flush->flags |= HV_FLUSH_ALL_PROCESSORS;
  85. } else {
  86. /*
  87. * From the supplied CPU set we need to figure out if we can get
  88. * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
  89. * hypercalls. This is possible when the highest VP number in
  90. * the set is < 64. As VP numbers are usually in ascending order
  91. * and match Linux CPU ids, here is an optimization: we check
  92. * the VP number for the highest bit in the supplied set first
  93. * so we can quickly find out if using *_EX hypercalls is a
  94. * must. We will also check all VP numbers when walking the
  95. * supplied CPU set to remain correct in all cases.
  96. */
  97. if (hv_cpu_number_to_vp_number(cpumask_last(cpus)) >= 64)
  98. goto do_ex_hypercall;
  99. for_each_cpu(cpu, cpus) {
  100. vcpu = hv_cpu_number_to_vp_number(cpu);
  101. if (vcpu == VP_INVAL) {
  102. local_irq_restore(flags);
  103. goto do_native;
  104. }
  105. if (vcpu >= 64)
  106. goto do_ex_hypercall;
  107. __set_bit(vcpu, (unsigned long *)
  108. &flush->processor_mask);
  109. }
  110. }
  111. /*
  112. * We can flush not more than max_gvas with one hypercall. Flush the
  113. * whole address space if we were asked to do more.
  114. */
  115. max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
  116. if (info->end == TLB_FLUSH_ALL) {
  117. flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
  118. status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
  119. flush, NULL);
  120. } else if (info->end &&
  121. ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
  122. status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
  123. flush, NULL);
  124. } else {
  125. gva_n = fill_gva_list(flush->gva_list, 0,
  126. info->start, info->end);
  127. status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
  128. gva_n, 0, flush, NULL);
  129. }
  130. goto check_status;
  131. do_ex_hypercall:
  132. status = hyperv_flush_tlb_others_ex(cpus, info);
  133. check_status:
  134. local_irq_restore(flags);
  135. if (!(status & HV_HYPERCALL_RESULT_MASK))
  136. return;
  137. do_native:
  138. native_flush_tlb_others(cpus, info);
  139. }
  140. static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
  141. const struct flush_tlb_info *info)
  142. {
  143. int nr_bank = 0, max_gvas, gva_n;
  144. struct hv_tlb_flush_ex **flush_pcpu;
  145. struct hv_tlb_flush_ex *flush;
  146. u64 status;
  147. if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
  148. return U64_MAX;
  149. flush_pcpu = (struct hv_tlb_flush_ex **)
  150. this_cpu_ptr(hyperv_pcpu_input_arg);
  151. flush = *flush_pcpu;
  152. if (info->mm) {
  153. /*
  154. * AddressSpace argument must match the CR3 with PCID bits
  155. * stripped out.
  156. */
  157. flush->address_space = virt_to_phys(info->mm->pgd);
  158. flush->address_space &= CR3_ADDR_MASK;
  159. flush->flags = 0;
  160. } else {
  161. flush->address_space = 0;
  162. flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
  163. }
  164. flush->hv_vp_set.valid_bank_mask = 0;
  165. flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
  166. nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
  167. if (nr_bank < 0)
  168. return U64_MAX;
  169. /*
  170. * We can flush not more than max_gvas with one hypercall. Flush the
  171. * whole address space if we were asked to do more.
  172. */
  173. max_gvas =
  174. (PAGE_SIZE - sizeof(*flush) - nr_bank *
  175. sizeof(flush->hv_vp_set.bank_contents[0])) /
  176. sizeof(flush->gva_list[0]);
  177. if (info->end == TLB_FLUSH_ALL) {
  178. flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
  179. status = hv_do_rep_hypercall(
  180. HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
  181. 0, nr_bank, flush, NULL);
  182. } else if (info->end &&
  183. ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
  184. status = hv_do_rep_hypercall(
  185. HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
  186. 0, nr_bank, flush, NULL);
  187. } else {
  188. gva_n = fill_gva_list(flush->gva_list, nr_bank,
  189. info->start, info->end);
  190. status = hv_do_rep_hypercall(
  191. HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
  192. gva_n, nr_bank, flush, NULL);
  193. }
  194. return status;
  195. }
  196. void hyperv_setup_mmu_ops(void)
  197. {
  198. if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
  199. return;
  200. pr_info("Using hypercall for remote TLB flush\n");
  201. pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
  202. pv_mmu_ops.tlb_remove_table = tlb_remove_table;
  203. }