io_pgtable_v2.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * CPU-agnostic AMD IO page table v2 allocator.
  4. *
  5. * Copyright (C) 2022, 2023 Advanced Micro Devices, Inc.
  6. * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
  7. * Author: Vasant Hegde <vasant.hegde@amd.com>
  8. */
  9. #define pr_fmt(fmt) "AMD-Vi: " fmt
  10. #define dev_fmt(fmt) pr_fmt(fmt)
  11. #include <linux/bitops.h>
  12. #include <linux/io-pgtable.h>
  13. #include <linux/kernel.h>
  14. #include <asm/barrier.h>
  15. #include "amd_iommu_types.h"
  16. #include "amd_iommu.h"
  17. #include "../iommu-pages.h"
  18. #define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */
  19. #define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */
  20. #define IOMMU_PAGE_USER BIT_ULL(2) /* Userspace addressable */
  21. #define IOMMU_PAGE_PWT BIT_ULL(3) /* Page write through */
  22. #define IOMMU_PAGE_PCD BIT_ULL(4) /* Page cache disabled */
  23. #define IOMMU_PAGE_ACCESS BIT_ULL(5) /* Was accessed (updated by IOMMU) */
  24. #define IOMMU_PAGE_DIRTY BIT_ULL(6) /* Was written to (updated by IOMMU) */
  25. #define IOMMU_PAGE_PSE BIT_ULL(7) /* Page Size Extensions */
  26. #define IOMMU_PAGE_NX BIT_ULL(63) /* No execute */
  27. #define MAX_PTRS_PER_PAGE 512
  28. #define IOMMU_PAGE_SIZE_2M BIT_ULL(21)
  29. #define IOMMU_PAGE_SIZE_1G BIT_ULL(30)
  30. static inline int get_pgtable_level(void)
  31. {
  32. return amd_iommu_gpt_level;
  33. }
  34. static inline bool is_large_pte(u64 pte)
  35. {
  36. return (pte & IOMMU_PAGE_PSE);
  37. }
  38. static inline u64 set_pgtable_attr(u64 *page)
  39. {
  40. u64 prot;
  41. prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER;
  42. prot |= IOMMU_PAGE_ACCESS;
  43. return (iommu_virt_to_phys(page) | prot);
  44. }
  45. static inline void *get_pgtable_pte(u64 pte)
  46. {
  47. return iommu_phys_to_virt(pte & PM_ADDR_MASK);
  48. }
  49. static u64 set_pte_attr(u64 paddr, u64 pg_size, int prot)
  50. {
  51. u64 pte;
  52. pte = __sme_set(paddr & PM_ADDR_MASK);
  53. pte |= IOMMU_PAGE_PRESENT | IOMMU_PAGE_USER;
  54. pte |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY;
  55. if (prot & IOMMU_PROT_IW)
  56. pte |= IOMMU_PAGE_RW;
  57. /* Large page */
  58. if (pg_size == IOMMU_PAGE_SIZE_1G || pg_size == IOMMU_PAGE_SIZE_2M)
  59. pte |= IOMMU_PAGE_PSE;
  60. return pte;
  61. }
  62. static inline u64 get_alloc_page_size(u64 size)
  63. {
  64. if (size >= IOMMU_PAGE_SIZE_1G)
  65. return IOMMU_PAGE_SIZE_1G;
  66. if (size >= IOMMU_PAGE_SIZE_2M)
  67. return IOMMU_PAGE_SIZE_2M;
  68. return PAGE_SIZE;
  69. }
  70. static inline int page_size_to_level(u64 pg_size)
  71. {
  72. if (pg_size == IOMMU_PAGE_SIZE_1G)
  73. return PAGE_MODE_3_LEVEL;
  74. if (pg_size == IOMMU_PAGE_SIZE_2M)
  75. return PAGE_MODE_2_LEVEL;
  76. return PAGE_MODE_1_LEVEL;
  77. }
  78. static void free_pgtable(u64 *pt, int level)
  79. {
  80. u64 *p;
  81. int i;
  82. for (i = 0; i < MAX_PTRS_PER_PAGE; i++) {
  83. /* PTE present? */
  84. if (!IOMMU_PTE_PRESENT(pt[i]))
  85. continue;
  86. if (is_large_pte(pt[i]))
  87. continue;
  88. /*
  89. * Free the next level. No need to look at l1 tables here since
  90. * they can only contain leaf PTEs; just free them directly.
  91. */
  92. p = get_pgtable_pte(pt[i]);
  93. if (level > 2)
  94. free_pgtable(p, level - 1);
  95. else
  96. iommu_free_page(p);
  97. }
  98. iommu_free_page(pt);
  99. }
  100. /* Allocate page table */
  101. static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova,
  102. unsigned long pg_size, gfp_t gfp, bool *updated)
  103. {
  104. u64 *pte, *page;
  105. int level, end_level;
  106. level = get_pgtable_level() - 1;
  107. end_level = page_size_to_level(pg_size);
  108. pte = &pgd[PM_LEVEL_INDEX(level, iova)];
  109. iova = PAGE_SIZE_ALIGN(iova, PAGE_SIZE);
  110. while (level >= end_level) {
  111. u64 __pte, __npte;
  112. __pte = *pte;
  113. if (IOMMU_PTE_PRESENT(__pte) && is_large_pte(__pte)) {
  114. /* Unmap large pte */
  115. cmpxchg64(pte, *pte, 0ULL);
  116. *updated = true;
  117. continue;
  118. }
  119. if (!IOMMU_PTE_PRESENT(__pte)) {
  120. page = iommu_alloc_page_node(nid, gfp);
  121. if (!page)
  122. return NULL;
  123. __npte = set_pgtable_attr(page);
  124. /* pte could have been changed somewhere. */
  125. if (!try_cmpxchg64(pte, &__pte, __npte))
  126. iommu_free_page(page);
  127. else if (IOMMU_PTE_PRESENT(__pte))
  128. *updated = true;
  129. continue;
  130. }
  131. level -= 1;
  132. pte = get_pgtable_pte(__pte);
  133. pte = &pte[PM_LEVEL_INDEX(level, iova)];
  134. }
  135. /* Tear down existing pte entries */
  136. if (IOMMU_PTE_PRESENT(*pte)) {
  137. u64 *__pte;
  138. *updated = true;
  139. __pte = get_pgtable_pte(*pte);
  140. cmpxchg64(pte, *pte, 0ULL);
  141. if (pg_size == IOMMU_PAGE_SIZE_1G)
  142. free_pgtable(__pte, end_level - 1);
  143. else if (pg_size == IOMMU_PAGE_SIZE_2M)
  144. iommu_free_page(__pte);
  145. }
  146. return pte;
  147. }
  148. /*
  149. * This function checks if there is a PTE for a given dma address.
  150. * If there is one, it returns the pointer to it.
  151. */
  152. static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
  153. unsigned long iova, unsigned long *page_size)
  154. {
  155. u64 *pte;
  156. int level;
  157. level = get_pgtable_level() - 1;
  158. pte = &pgtable->pgd[PM_LEVEL_INDEX(level, iova)];
  159. /* Default page size is 4K */
  160. *page_size = PAGE_SIZE;
  161. while (level) {
  162. /* Not present */
  163. if (!IOMMU_PTE_PRESENT(*pte))
  164. return NULL;
  165. /* Walk to the next level */
  166. pte = get_pgtable_pte(*pte);
  167. pte = &pte[PM_LEVEL_INDEX(level - 1, iova)];
  168. /* Large page */
  169. if (is_large_pte(*pte)) {
  170. if (level == PAGE_MODE_3_LEVEL)
  171. *page_size = IOMMU_PAGE_SIZE_1G;
  172. else if (level == PAGE_MODE_2_LEVEL)
  173. *page_size = IOMMU_PAGE_SIZE_2M;
  174. else
  175. return NULL; /* Wrongly set PSE bit in PTE */
  176. break;
  177. }
  178. level -= 1;
  179. }
  180. return pte;
  181. }
  182. static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
  183. phys_addr_t paddr, size_t pgsize, size_t pgcount,
  184. int prot, gfp_t gfp, size_t *mapped)
  185. {
  186. struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
  187. struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
  188. u64 *pte;
  189. unsigned long map_size;
  190. unsigned long mapped_size = 0;
  191. unsigned long o_iova = iova;
  192. size_t size = pgcount << __ffs(pgsize);
  193. int ret = 0;
  194. bool updated = false;
  195. if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize) || !pgcount)
  196. return -EINVAL;
  197. if (!(prot & IOMMU_PROT_MASK))
  198. return -EINVAL;
  199. while (mapped_size < size) {
  200. map_size = get_alloc_page_size(pgsize);
  201. pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd,
  202. iova, map_size, gfp, &updated);
  203. if (!pte) {
  204. ret = -EINVAL;
  205. goto out;
  206. }
  207. *pte = set_pte_attr(paddr, map_size, prot);
  208. iova += map_size;
  209. paddr += map_size;
  210. mapped_size += map_size;
  211. }
  212. out:
  213. if (updated) {
  214. struct protection_domain *pdom = io_pgtable_ops_to_domain(ops);
  215. unsigned long flags;
  216. spin_lock_irqsave(&pdom->lock, flags);
  217. amd_iommu_domain_flush_pages(pdom, o_iova, size);
  218. spin_unlock_irqrestore(&pdom->lock, flags);
  219. }
  220. if (mapped)
  221. *mapped += mapped_size;
  222. return ret;
  223. }
  224. static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops,
  225. unsigned long iova,
  226. size_t pgsize, size_t pgcount,
  227. struct iommu_iotlb_gather *gather)
  228. {
  229. struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
  230. struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
  231. unsigned long unmap_size;
  232. unsigned long unmapped = 0;
  233. size_t size = pgcount << __ffs(pgsize);
  234. u64 *pte;
  235. if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
  236. return 0;
  237. while (unmapped < size) {
  238. pte = fetch_pte(pgtable, iova, &unmap_size);
  239. if (!pte)
  240. return unmapped;
  241. *pte = 0ULL;
  242. iova = (iova & ~(unmap_size - 1)) + unmap_size;
  243. unmapped += unmap_size;
  244. }
  245. return unmapped;
  246. }
  247. static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
  248. {
  249. struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
  250. unsigned long offset_mask, pte_pgsize;
  251. u64 *pte, __pte;
  252. pte = fetch_pte(pgtable, iova, &pte_pgsize);
  253. if (!pte || !IOMMU_PTE_PRESENT(*pte))
  254. return 0;
  255. offset_mask = pte_pgsize - 1;
  256. __pte = __sme_clr(*pte & PM_ADDR_MASK);
  257. return (__pte & ~offset_mask) | (iova & offset_mask);
  258. }
  259. /*
  260. * ----------------------------------------------------
  261. */
  262. static void v2_free_pgtable(struct io_pgtable *iop)
  263. {
  264. struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl);
  265. if (!pgtable || !pgtable->pgd)
  266. return;
  267. /* Free page table */
  268. free_pgtable(pgtable->pgd, get_pgtable_level());
  269. pgtable->pgd = NULL;
  270. }
  271. static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
  272. {
  273. struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
  274. int ias = IOMMU_IN_ADDR_BIT_SIZE;
  275. pgtable->pgd = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL);
  276. if (!pgtable->pgd)
  277. return NULL;
  278. if (get_pgtable_level() == PAGE_MODE_5_LEVEL)
  279. ias = 57;
  280. pgtable->pgtbl.ops.map_pages = iommu_v2_map_pages;
  281. pgtable->pgtbl.ops.unmap_pages = iommu_v2_unmap_pages;
  282. pgtable->pgtbl.ops.iova_to_phys = iommu_v2_iova_to_phys;
  283. cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
  284. cfg->ias = ias;
  285. cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE;
  286. return &pgtable->pgtbl;
  287. }
  288. struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = {
  289. .alloc = v2_alloc_pgtable,
  290. .free = v2_free_pgtable,
  291. };