swiotlb-xen.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright 2010
  4. * by Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
  5. *
  6. * This code provides a IOMMU for Xen PV guests with PCI passthrough.
  7. *
  8. * PV guests under Xen are running in an non-contiguous memory architecture.
  9. *
  10. * When PCI pass-through is utilized, this necessitates an IOMMU for
  11. * translating bus (DMA) to virtual and vice-versa and also providing a
  12. * mechanism to have contiguous pages for device drivers operations (say DMA
  13. * operations).
  14. *
  15. * Specifically, under Xen the Linux idea of pages is an illusion. It
  16. * assumes that pages start at zero and go up to the available memory. To
  17. * help with that, the Linux Xen MMU provides a lookup mechanism to
  18. * translate the page frame numbers (PFN) to machine frame numbers (MFN)
  19. * and vice-versa. The MFN are the "real" frame numbers. Furthermore
  20. * memory is not contiguous. Xen hypervisor stitches memory for guests
  21. * from different pools, which means there is no guarantee that PFN==MFN
  22. * and PFN+1==MFN+1. Lastly with Xen 4.0, pages (in debug mode) are
  23. * allocated in descending order (high to low), meaning the guest might
  24. * never get any MFN's under the 4GB mark.
  25. */
  26. #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
  27. #include <linux/memblock.h>
  28. #include <linux/dma-direct.h>
  29. #include <linux/dma-map-ops.h>
  30. #include <linux/export.h>
  31. #include <xen/swiotlb-xen.h>
  32. #include <xen/page.h>
  33. #include <xen/xen-ops.h>
  34. #include <xen/hvc-console.h>
  35. #include <asm/dma-mapping.h>
  36. #include <trace/events/swiotlb.h>
  37. #define MAX_DMA_BITS 32
  38. /*
  39. * Quick lookup value of the bus address of the IOTLB.
  40. */
  41. static inline phys_addr_t xen_phys_to_bus(struct device *dev, phys_addr_t paddr)
  42. {
  43. unsigned long bfn = pfn_to_bfn(XEN_PFN_DOWN(paddr));
  44. phys_addr_t baddr = (phys_addr_t)bfn << XEN_PAGE_SHIFT;
  45. baddr |= paddr & ~XEN_PAGE_MASK;
  46. return baddr;
  47. }
  48. static inline dma_addr_t xen_phys_to_dma(struct device *dev, phys_addr_t paddr)
  49. {
  50. return phys_to_dma(dev, xen_phys_to_bus(dev, paddr));
  51. }
  52. static inline phys_addr_t xen_bus_to_phys(struct device *dev,
  53. phys_addr_t baddr)
  54. {
  55. unsigned long xen_pfn = bfn_to_pfn(XEN_PFN_DOWN(baddr));
  56. phys_addr_t paddr = (xen_pfn << XEN_PAGE_SHIFT) |
  57. (baddr & ~XEN_PAGE_MASK);
  58. return paddr;
  59. }
  60. static inline phys_addr_t xen_dma_to_phys(struct device *dev,
  61. dma_addr_t dma_addr)
  62. {
  63. return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr));
  64. }
  65. static inline bool range_requires_alignment(phys_addr_t p, size_t size)
  66. {
  67. phys_addr_t algn = 1ULL << (get_order(size) + PAGE_SHIFT);
  68. phys_addr_t bus_addr = pfn_to_bfn(XEN_PFN_DOWN(p)) << XEN_PAGE_SHIFT;
  69. return IS_ALIGNED(p, algn) && !IS_ALIGNED(bus_addr, algn);
  70. }
  71. static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
  72. {
  73. unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p);
  74. unsigned int i, nr_pages = XEN_PFN_UP(xen_offset_in_page(p) + size);
  75. next_bfn = pfn_to_bfn(xen_pfn);
  76. for (i = 1; i < nr_pages; i++)
  77. if (pfn_to_bfn(++xen_pfn) != ++next_bfn)
  78. return 1;
  79. return 0;
  80. }
  81. static struct io_tlb_pool *xen_swiotlb_find_pool(struct device *dev,
  82. dma_addr_t dma_addr)
  83. {
  84. unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
  85. unsigned long xen_pfn = bfn_to_local_pfn(bfn);
  86. phys_addr_t paddr = (phys_addr_t)xen_pfn << XEN_PAGE_SHIFT;
  87. /* If the address is outside our domain, it CAN
  88. * have the same virtual address as another address
  89. * in our domain. Therefore _only_ check address within our domain.
  90. */
  91. if (pfn_valid(PFN_DOWN(paddr)))
  92. return swiotlb_find_pool(dev, paddr);
  93. return NULL;
  94. }
  95. #ifdef CONFIG_X86
  96. int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
  97. {
  98. int rc;
  99. unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT);
  100. unsigned int i, dma_bits = order + PAGE_SHIFT;
  101. dma_addr_t dma_handle;
  102. phys_addr_t p = virt_to_phys(buf);
  103. BUILD_BUG_ON(IO_TLB_SEGSIZE & (IO_TLB_SEGSIZE - 1));
  104. BUG_ON(nslabs % IO_TLB_SEGSIZE);
  105. i = 0;
  106. do {
  107. do {
  108. rc = xen_create_contiguous_region(
  109. p + (i << IO_TLB_SHIFT), order,
  110. dma_bits, &dma_handle);
  111. } while (rc && dma_bits++ < MAX_DMA_BITS);
  112. if (rc)
  113. return rc;
  114. i += IO_TLB_SEGSIZE;
  115. } while (i < nslabs);
  116. return 0;
  117. }
  118. static void *
  119. xen_swiotlb_alloc_coherent(struct device *dev, size_t size,
  120. dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
  121. {
  122. u64 dma_mask = dev->coherent_dma_mask;
  123. int order = get_order(size);
  124. phys_addr_t phys;
  125. void *ret;
  126. /* Align the allocation to the Xen page size */
  127. size = ALIGN(size, XEN_PAGE_SIZE);
  128. ret = (void *)__get_free_pages(flags, get_order(size));
  129. if (!ret)
  130. return ret;
  131. phys = virt_to_phys(ret);
  132. *dma_handle = xen_phys_to_dma(dev, phys);
  133. if (*dma_handle + size - 1 > dma_mask ||
  134. range_straddles_page_boundary(phys, size) ||
  135. range_requires_alignment(phys, size)) {
  136. if (xen_create_contiguous_region(phys, order, fls64(dma_mask),
  137. dma_handle) != 0)
  138. goto out_free_pages;
  139. SetPageXenRemapped(virt_to_page(ret));
  140. }
  141. memset(ret, 0, size);
  142. return ret;
  143. out_free_pages:
  144. free_pages((unsigned long)ret, get_order(size));
  145. return NULL;
  146. }
  147. static void
  148. xen_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr,
  149. dma_addr_t dma_handle, unsigned long attrs)
  150. {
  151. phys_addr_t phys = virt_to_phys(vaddr);
  152. int order = get_order(size);
  153. /* Convert the size to actually allocated. */
  154. size = ALIGN(size, XEN_PAGE_SIZE);
  155. if (WARN_ON_ONCE(dma_handle + size - 1 > dev->coherent_dma_mask) ||
  156. WARN_ON_ONCE(range_straddles_page_boundary(phys, size) ||
  157. range_requires_alignment(phys, size)))
  158. return;
  159. if (TestClearPageXenRemapped(virt_to_page(vaddr)))
  160. xen_destroy_contiguous_region(phys, order);
  161. free_pages((unsigned long)vaddr, get_order(size));
  162. }
  163. #endif /* CONFIG_X86 */
  164. /*
  165. * Map a single buffer of the indicated size for DMA in streaming mode. The
  166. * physical address to use is returned.
  167. *
  168. * Once the device is given the dma address, the device owns this memory until
  169. * either xen_swiotlb_unmap_page or xen_swiotlb_dma_sync_single is performed.
  170. */
  171. static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
  172. unsigned long offset, size_t size,
  173. enum dma_data_direction dir,
  174. unsigned long attrs)
  175. {
  176. phys_addr_t map, phys = page_to_phys(page) + offset;
  177. dma_addr_t dev_addr = xen_phys_to_dma(dev, phys);
  178. BUG_ON(dir == DMA_NONE);
  179. /*
  180. * If the address happens to be in the device's DMA window,
  181. * we can safely return the device addr and not worry about bounce
  182. * buffering it.
  183. */
  184. if (dma_capable(dev, dev_addr, size, true) &&
  185. !range_straddles_page_boundary(phys, size) &&
  186. !xen_arch_need_swiotlb(dev, phys, dev_addr) &&
  187. !is_swiotlb_force_bounce(dev))
  188. goto done;
  189. /*
  190. * Oh well, have to allocate and map a bounce buffer.
  191. */
  192. trace_swiotlb_bounced(dev, dev_addr, size);
  193. map = swiotlb_tbl_map_single(dev, phys, size, 0, dir, attrs);
  194. if (map == (phys_addr_t)DMA_MAPPING_ERROR)
  195. return DMA_MAPPING_ERROR;
  196. phys = map;
  197. dev_addr = xen_phys_to_dma(dev, map);
  198. /*
  199. * Ensure that the address returned is DMA'ble
  200. */
  201. if (unlikely(!dma_capable(dev, dev_addr, size, true))) {
  202. __swiotlb_tbl_unmap_single(dev, map, size, dir,
  203. attrs | DMA_ATTR_SKIP_CPU_SYNC,
  204. swiotlb_find_pool(dev, map));
  205. return DMA_MAPPING_ERROR;
  206. }
  207. done:
  208. if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
  209. if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr))))
  210. arch_sync_dma_for_device(phys, size, dir);
  211. else
  212. xen_dma_sync_for_device(dev, dev_addr, size, dir);
  213. }
  214. return dev_addr;
  215. }
  216. /*
  217. * Unmap a single streaming mode DMA translation. The dma_addr and size must
  218. * match what was provided for in a previous xen_swiotlb_map_page call. All
  219. * other usages are undefined.
  220. *
  221. * After this call, reads by the cpu to the buffer are guaranteed to see
  222. * whatever the device wrote there.
  223. */
  224. static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
  225. size_t size, enum dma_data_direction dir, unsigned long attrs)
  226. {
  227. phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr);
  228. struct io_tlb_pool *pool;
  229. BUG_ON(dir == DMA_NONE);
  230. if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
  231. if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr))))
  232. arch_sync_dma_for_cpu(paddr, size, dir);
  233. else
  234. xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir);
  235. }
  236. /* NOTE: We use dev_addr here, not paddr! */
  237. pool = xen_swiotlb_find_pool(hwdev, dev_addr);
  238. if (pool)
  239. __swiotlb_tbl_unmap_single(hwdev, paddr, size, dir,
  240. attrs, pool);
  241. }
  242. static void
  243. xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
  244. size_t size, enum dma_data_direction dir)
  245. {
  246. phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
  247. struct io_tlb_pool *pool;
  248. if (!dev_is_dma_coherent(dev)) {
  249. if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
  250. arch_sync_dma_for_cpu(paddr, size, dir);
  251. else
  252. xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
  253. }
  254. pool = xen_swiotlb_find_pool(dev, dma_addr);
  255. if (pool)
  256. __swiotlb_sync_single_for_cpu(dev, paddr, size, dir, pool);
  257. }
  258. static void
  259. xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
  260. size_t size, enum dma_data_direction dir)
  261. {
  262. phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
  263. struct io_tlb_pool *pool;
  264. pool = xen_swiotlb_find_pool(dev, dma_addr);
  265. if (pool)
  266. __swiotlb_sync_single_for_device(dev, paddr, size, dir, pool);
  267. if (!dev_is_dma_coherent(dev)) {
  268. if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
  269. arch_sync_dma_for_device(paddr, size, dir);
  270. else
  271. xen_dma_sync_for_device(dev, dma_addr, size, dir);
  272. }
  273. }
  274. /*
  275. * Unmap a set of streaming mode DMA translations. Again, cpu read rules
  276. * concerning calls here are the same as for swiotlb_unmap_page() above.
  277. */
  278. static void
  279. xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
  280. enum dma_data_direction dir, unsigned long attrs)
  281. {
  282. struct scatterlist *sg;
  283. int i;
  284. BUG_ON(dir == DMA_NONE);
  285. for_each_sg(sgl, sg, nelems, i)
  286. xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg),
  287. dir, attrs);
  288. }
  289. static int
  290. xen_swiotlb_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
  291. enum dma_data_direction dir, unsigned long attrs)
  292. {
  293. struct scatterlist *sg;
  294. int i;
  295. BUG_ON(dir == DMA_NONE);
  296. for_each_sg(sgl, sg, nelems, i) {
  297. sg->dma_address = xen_swiotlb_map_page(dev, sg_page(sg),
  298. sg->offset, sg->length, dir, attrs);
  299. if (sg->dma_address == DMA_MAPPING_ERROR)
  300. goto out_unmap;
  301. sg_dma_len(sg) = sg->length;
  302. }
  303. return nelems;
  304. out_unmap:
  305. xen_swiotlb_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
  306. sg_dma_len(sgl) = 0;
  307. return -EIO;
  308. }
  309. static void
  310. xen_swiotlb_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
  311. int nelems, enum dma_data_direction dir)
  312. {
  313. struct scatterlist *sg;
  314. int i;
  315. for_each_sg(sgl, sg, nelems, i) {
  316. xen_swiotlb_sync_single_for_cpu(dev, sg->dma_address,
  317. sg->length, dir);
  318. }
  319. }
  320. static void
  321. xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
  322. int nelems, enum dma_data_direction dir)
  323. {
  324. struct scatterlist *sg;
  325. int i;
  326. for_each_sg(sgl, sg, nelems, i) {
  327. xen_swiotlb_sync_single_for_device(dev, sg->dma_address,
  328. sg->length, dir);
  329. }
  330. }
  331. /*
  332. * Return whether the given device DMA address mask can be supported
  333. * properly. For example, if your device can only drive the low 24-bits
  334. * during bus mastering, then you would pass 0x00ffffff as the mask to
  335. * this function.
  336. */
  337. static int
  338. xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
  339. {
  340. return xen_phys_to_dma(hwdev, default_swiotlb_limit()) <= mask;
  341. }
  342. const struct dma_map_ops xen_swiotlb_dma_ops = {
  343. #ifdef CONFIG_X86
  344. .alloc = xen_swiotlb_alloc_coherent,
  345. .free = xen_swiotlb_free_coherent,
  346. #else
  347. .alloc = dma_direct_alloc,
  348. .free = dma_direct_free,
  349. #endif
  350. .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
  351. .sync_single_for_device = xen_swiotlb_sync_single_for_device,
  352. .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
  353. .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
  354. .map_sg = xen_swiotlb_map_sg,
  355. .unmap_sg = xen_swiotlb_unmap_sg,
  356. .map_page = xen_swiotlb_map_page,
  357. .unmap_page = xen_swiotlb_unmap_page,
  358. .dma_supported = xen_swiotlb_dma_supported,
  359. .mmap = dma_common_mmap,
  360. .get_sgtable = dma_common_get_sgtable,
  361. .alloc_pages_op = dma_common_alloc_pages,
  362. .free_pages = dma_common_free_pages,
  363. .max_mapping_size = swiotlb_max_mapping_size,
  364. };