io_pagetable.c 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
  3. *
  4. * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The
  5. * PFNs can be placed into an iommu_domain, or returned to the caller as a page
  6. * list for access by an in-kernel user.
  7. *
  8. * The datastructure uses the iopt_pages to optimize the storage of the PFNs
  9. * between the domains and xarray.
  10. */
  11. #include <linux/err.h>
  12. #include <linux/errno.h>
  13. #include <linux/iommu.h>
  14. #include <linux/iommufd.h>
  15. #include <linux/lockdep.h>
  16. #include <linux/sched/mm.h>
  17. #include <linux/slab.h>
  18. #include <uapi/linux/iommufd.h>
  19. #include "double_span.h"
  20. #include "io_pagetable.h"
  21. struct iopt_pages_list {
  22. struct iopt_pages *pages;
  23. struct iopt_area *area;
  24. struct list_head next;
  25. unsigned long start_byte;
  26. unsigned long length;
  27. };
  28. struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter,
  29. struct io_pagetable *iopt,
  30. unsigned long iova,
  31. unsigned long last_iova)
  32. {
  33. lockdep_assert_held(&iopt->iova_rwsem);
  34. iter->cur_iova = iova;
  35. iter->last_iova = last_iova;
  36. iter->area = iopt_area_iter_first(iopt, iova, iova);
  37. if (!iter->area)
  38. return NULL;
  39. if (!iter->area->pages) {
  40. iter->area = NULL;
  41. return NULL;
  42. }
  43. return iter->area;
  44. }
  45. struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter)
  46. {
  47. unsigned long last_iova;
  48. if (!iter->area)
  49. return NULL;
  50. last_iova = iopt_area_last_iova(iter->area);
  51. if (iter->last_iova <= last_iova)
  52. return NULL;
  53. iter->cur_iova = last_iova + 1;
  54. iter->area = iopt_area_iter_next(iter->area, iter->cur_iova,
  55. iter->last_iova);
  56. if (!iter->area)
  57. return NULL;
  58. if (iter->cur_iova != iopt_area_iova(iter->area) ||
  59. !iter->area->pages) {
  60. iter->area = NULL;
  61. return NULL;
  62. }
  63. return iter->area;
  64. }
  65. static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span,
  66. unsigned long length,
  67. unsigned long iova_alignment,
  68. unsigned long page_offset)
  69. {
  70. if (span->is_used || span->last_hole - span->start_hole < length - 1)
  71. return false;
  72. span->start_hole = ALIGN(span->start_hole, iova_alignment) |
  73. page_offset;
  74. if (span->start_hole > span->last_hole ||
  75. span->last_hole - span->start_hole < length - 1)
  76. return false;
  77. return true;
  78. }
  79. static bool __alloc_iova_check_used(struct interval_tree_span_iter *span,
  80. unsigned long length,
  81. unsigned long iova_alignment,
  82. unsigned long page_offset)
  83. {
  84. if (span->is_hole || span->last_used - span->start_used < length - 1)
  85. return false;
  86. span->start_used = ALIGN(span->start_used, iova_alignment) |
  87. page_offset;
  88. if (span->start_used > span->last_used ||
  89. span->last_used - span->start_used < length - 1)
  90. return false;
  91. return true;
  92. }
  93. /*
  94. * Automatically find a block of IOVA that is not being used and not reserved.
  95. * Does not return a 0 IOVA even if it is valid.
  96. */
  97. static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova,
  98. unsigned long uptr, unsigned long length)
  99. {
  100. unsigned long page_offset = uptr % PAGE_SIZE;
  101. struct interval_tree_double_span_iter used_span;
  102. struct interval_tree_span_iter allowed_span;
  103. unsigned long max_alignment = PAGE_SIZE;
  104. unsigned long iova_alignment;
  105. lockdep_assert_held(&iopt->iova_rwsem);
  106. /* Protect roundup_pow-of_two() from overflow */
  107. if (length == 0 || length >= ULONG_MAX / 2)
  108. return -EOVERFLOW;
  109. /*
  110. * Keep alignment present in the uptr when building the IOVA, this
  111. * increases the chance we can map a THP.
  112. */
  113. if (!uptr)
  114. iova_alignment = roundup_pow_of_two(length);
  115. else
  116. iova_alignment = min_t(unsigned long,
  117. roundup_pow_of_two(length),
  118. 1UL << __ffs64(uptr));
  119. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  120. max_alignment = HPAGE_SIZE;
  121. #endif
  122. /* Protect against ALIGN() overflow */
  123. if (iova_alignment >= max_alignment)
  124. iova_alignment = max_alignment;
  125. if (iova_alignment < iopt->iova_alignment)
  126. return -EINVAL;
  127. interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree,
  128. PAGE_SIZE, ULONG_MAX - PAGE_SIZE) {
  129. if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) {
  130. allowed_span.start_used = PAGE_SIZE;
  131. allowed_span.last_used = ULONG_MAX - PAGE_SIZE;
  132. allowed_span.is_hole = false;
  133. }
  134. if (!__alloc_iova_check_used(&allowed_span, length,
  135. iova_alignment, page_offset))
  136. continue;
  137. interval_tree_for_each_double_span(
  138. &used_span, &iopt->reserved_itree, &iopt->area_itree,
  139. allowed_span.start_used, allowed_span.last_used) {
  140. if (!__alloc_iova_check_hole(&used_span, length,
  141. iova_alignment,
  142. page_offset))
  143. continue;
  144. *iova = used_span.start_hole;
  145. return 0;
  146. }
  147. }
  148. return -ENOSPC;
  149. }
  150. static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova,
  151. unsigned long length)
  152. {
  153. unsigned long last;
  154. lockdep_assert_held(&iopt->iova_rwsem);
  155. if ((iova & (iopt->iova_alignment - 1)))
  156. return -EINVAL;
  157. if (check_add_overflow(iova, length - 1, &last))
  158. return -EOVERFLOW;
  159. /* No reserved IOVA intersects the range */
  160. if (iopt_reserved_iter_first(iopt, iova, last))
  161. return -EINVAL;
  162. /* Check that there is not already a mapping in the range */
  163. if (iopt_area_iter_first(iopt, iova, last))
  164. return -EEXIST;
  165. return 0;
  166. }
  167. /*
  168. * The area takes a slice of the pages from start_bytes to start_byte + length
  169. */
  170. static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area,
  171. struct iopt_pages *pages, unsigned long iova,
  172. unsigned long start_byte, unsigned long length,
  173. int iommu_prot)
  174. {
  175. lockdep_assert_held_write(&iopt->iova_rwsem);
  176. if ((iommu_prot & IOMMU_WRITE) && !pages->writable)
  177. return -EPERM;
  178. area->iommu_prot = iommu_prot;
  179. area->page_offset = start_byte % PAGE_SIZE;
  180. if (area->page_offset & (iopt->iova_alignment - 1))
  181. return -EINVAL;
  182. area->node.start = iova;
  183. if (check_add_overflow(iova, length - 1, &area->node.last))
  184. return -EOVERFLOW;
  185. area->pages_node.start = start_byte / PAGE_SIZE;
  186. if (check_add_overflow(start_byte, length - 1, &area->pages_node.last))
  187. return -EOVERFLOW;
  188. area->pages_node.last = area->pages_node.last / PAGE_SIZE;
  189. if (WARN_ON(area->pages_node.last >= pages->npages))
  190. return -EOVERFLOW;
  191. /*
  192. * The area is inserted with a NULL pages indicating it is not fully
  193. * initialized yet.
  194. */
  195. area->iopt = iopt;
  196. interval_tree_insert(&area->node, &iopt->area_itree);
  197. return 0;
  198. }
  199. static struct iopt_area *iopt_area_alloc(void)
  200. {
  201. struct iopt_area *area;
  202. area = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT);
  203. if (!area)
  204. return NULL;
  205. RB_CLEAR_NODE(&area->node.rb);
  206. RB_CLEAR_NODE(&area->pages_node.rb);
  207. return area;
  208. }
  209. static int iopt_alloc_area_pages(struct io_pagetable *iopt,
  210. struct list_head *pages_list,
  211. unsigned long length, unsigned long *dst_iova,
  212. int iommu_prot, unsigned int flags)
  213. {
  214. struct iopt_pages_list *elm;
  215. unsigned long iova;
  216. int rc = 0;
  217. list_for_each_entry(elm, pages_list, next) {
  218. elm->area = iopt_area_alloc();
  219. if (!elm->area)
  220. return -ENOMEM;
  221. }
  222. down_write(&iopt->iova_rwsem);
  223. if ((length & (iopt->iova_alignment - 1)) || !length) {
  224. rc = -EINVAL;
  225. goto out_unlock;
  226. }
  227. if (flags & IOPT_ALLOC_IOVA) {
  228. /* Use the first entry to guess the ideal IOVA alignment */
  229. elm = list_first_entry(pages_list, struct iopt_pages_list,
  230. next);
  231. rc = iopt_alloc_iova(
  232. iopt, dst_iova,
  233. (uintptr_t)elm->pages->uptr + elm->start_byte, length);
  234. if (rc)
  235. goto out_unlock;
  236. if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
  237. WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) {
  238. rc = -EINVAL;
  239. goto out_unlock;
  240. }
  241. } else {
  242. rc = iopt_check_iova(iopt, *dst_iova, length);
  243. if (rc)
  244. goto out_unlock;
  245. }
  246. /*
  247. * Areas are created with a NULL pages so that the IOVA space is
  248. * reserved and we can unlock the iova_rwsem.
  249. */
  250. iova = *dst_iova;
  251. list_for_each_entry(elm, pages_list, next) {
  252. rc = iopt_insert_area(iopt, elm->area, elm->pages, iova,
  253. elm->start_byte, elm->length, iommu_prot);
  254. if (rc)
  255. goto out_unlock;
  256. iova += elm->length;
  257. }
  258. out_unlock:
  259. up_write(&iopt->iova_rwsem);
  260. return rc;
  261. }
  262. static void iopt_abort_area(struct iopt_area *area)
  263. {
  264. if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
  265. WARN_ON(area->pages);
  266. if (area->iopt) {
  267. down_write(&area->iopt->iova_rwsem);
  268. interval_tree_remove(&area->node, &area->iopt->area_itree);
  269. up_write(&area->iopt->iova_rwsem);
  270. }
  271. kfree(area);
  272. }
  273. void iopt_free_pages_list(struct list_head *pages_list)
  274. {
  275. struct iopt_pages_list *elm;
  276. while ((elm = list_first_entry_or_null(pages_list,
  277. struct iopt_pages_list, next))) {
  278. if (elm->area)
  279. iopt_abort_area(elm->area);
  280. if (elm->pages)
  281. iopt_put_pages(elm->pages);
  282. list_del(&elm->next);
  283. kfree(elm);
  284. }
  285. }
  286. static int iopt_fill_domains_pages(struct list_head *pages_list)
  287. {
  288. struct iopt_pages_list *undo_elm;
  289. struct iopt_pages_list *elm;
  290. int rc;
  291. list_for_each_entry(elm, pages_list, next) {
  292. rc = iopt_area_fill_domains(elm->area, elm->pages);
  293. if (rc)
  294. goto err_undo;
  295. }
  296. return 0;
  297. err_undo:
  298. list_for_each_entry(undo_elm, pages_list, next) {
  299. if (undo_elm == elm)
  300. break;
  301. iopt_area_unfill_domains(undo_elm->area, undo_elm->pages);
  302. }
  303. return rc;
  304. }
  305. int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
  306. unsigned long length, unsigned long *dst_iova,
  307. int iommu_prot, unsigned int flags)
  308. {
  309. struct iopt_pages_list *elm;
  310. int rc;
  311. rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova,
  312. iommu_prot, flags);
  313. if (rc)
  314. return rc;
  315. down_read(&iopt->domains_rwsem);
  316. rc = iopt_fill_domains_pages(pages_list);
  317. if (rc)
  318. goto out_unlock_domains;
  319. down_write(&iopt->iova_rwsem);
  320. list_for_each_entry(elm, pages_list, next) {
  321. /*
  322. * area->pages must be set inside the domains_rwsem to ensure
  323. * any newly added domains will get filled. Moves the reference
  324. * in from the list.
  325. */
  326. elm->area->pages = elm->pages;
  327. elm->pages = NULL;
  328. elm->area = NULL;
  329. }
  330. up_write(&iopt->iova_rwsem);
  331. out_unlock_domains:
  332. up_read(&iopt->domains_rwsem);
  333. return rc;
  334. }
  335. /**
  336. * iopt_map_user_pages() - Map a user VA to an iova in the io page table
  337. * @ictx: iommufd_ctx the iopt is part of
  338. * @iopt: io_pagetable to act on
  339. * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains
  340. * the chosen iova on output. Otherwise is the iova to map to on input
  341. * @uptr: User VA to map
  342. * @length: Number of bytes to map
  343. * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping
  344. * @flags: IOPT_ALLOC_IOVA or zero
  345. *
  346. * iova, uptr, and length must be aligned to iova_alignment. For domain backed
  347. * page tables this will pin the pages and load them into the domain at iova.
  348. * For non-domain page tables this will only setup a lazy reference and the
  349. * caller must use iopt_access_pages() to touch them.
  350. *
  351. * iopt_unmap_iova() must be called to undo this before the io_pagetable can be
  352. * destroyed.
  353. */
  354. int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
  355. unsigned long *iova, void __user *uptr,
  356. unsigned long length, int iommu_prot,
  357. unsigned int flags)
  358. {
  359. struct iopt_pages_list elm = {};
  360. LIST_HEAD(pages_list);
  361. int rc;
  362. elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE);
  363. if (IS_ERR(elm.pages))
  364. return PTR_ERR(elm.pages);
  365. if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM &&
  366. elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER)
  367. elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM;
  368. elm.start_byte = uptr - elm.pages->uptr;
  369. elm.length = length;
  370. list_add(&elm.next, &pages_list);
  371. rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags);
  372. if (rc) {
  373. if (elm.area)
  374. iopt_abort_area(elm.area);
  375. if (elm.pages)
  376. iopt_put_pages(elm.pages);
  377. return rc;
  378. }
  379. return 0;
  380. }
  381. struct iova_bitmap_fn_arg {
  382. unsigned long flags;
  383. struct io_pagetable *iopt;
  384. struct iommu_domain *domain;
  385. struct iommu_dirty_bitmap *dirty;
  386. };
  387. static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap,
  388. unsigned long iova, size_t length,
  389. void *opaque)
  390. {
  391. struct iopt_area *area;
  392. struct iopt_area_contig_iter iter;
  393. struct iova_bitmap_fn_arg *arg = opaque;
  394. struct iommu_domain *domain = arg->domain;
  395. struct iommu_dirty_bitmap *dirty = arg->dirty;
  396. const struct iommu_dirty_ops *ops = domain->dirty_ops;
  397. unsigned long last_iova = iova + length - 1;
  398. unsigned long flags = arg->flags;
  399. int ret;
  400. iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) {
  401. unsigned long last = min(last_iova, iopt_area_last_iova(area));
  402. ret = ops->read_and_clear_dirty(domain, iter.cur_iova,
  403. last - iter.cur_iova + 1, flags,
  404. dirty);
  405. if (ret)
  406. return ret;
  407. }
  408. if (!iopt_area_contig_done(&iter))
  409. return -EINVAL;
  410. return 0;
  411. }
  412. static int
  413. iommu_read_and_clear_dirty(struct iommu_domain *domain,
  414. struct io_pagetable *iopt, unsigned long flags,
  415. struct iommu_hwpt_get_dirty_bitmap *bitmap)
  416. {
  417. const struct iommu_dirty_ops *ops = domain->dirty_ops;
  418. struct iommu_iotlb_gather gather;
  419. struct iommu_dirty_bitmap dirty;
  420. struct iova_bitmap_fn_arg arg;
  421. struct iova_bitmap *iter;
  422. int ret = 0;
  423. if (!ops || !ops->read_and_clear_dirty)
  424. return -EOPNOTSUPP;
  425. iter = iova_bitmap_alloc(bitmap->iova, bitmap->length,
  426. bitmap->page_size,
  427. u64_to_user_ptr(bitmap->data));
  428. if (IS_ERR(iter))
  429. return -ENOMEM;
  430. iommu_dirty_bitmap_init(&dirty, iter, &gather);
  431. arg.flags = flags;
  432. arg.iopt = iopt;
  433. arg.domain = domain;
  434. arg.dirty = &dirty;
  435. iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty);
  436. if (!(flags & IOMMU_DIRTY_NO_CLEAR))
  437. iommu_iotlb_sync(domain, &gather);
  438. iova_bitmap_free(iter);
  439. return ret;
  440. }
  441. int iommufd_check_iova_range(struct io_pagetable *iopt,
  442. struct iommu_hwpt_get_dirty_bitmap *bitmap)
  443. {
  444. size_t iommu_pgsize = iopt->iova_alignment;
  445. u64 last_iova;
  446. if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova))
  447. return -EOVERFLOW;
  448. if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX)
  449. return -EOVERFLOW;
  450. if ((bitmap->iova & (iommu_pgsize - 1)) ||
  451. ((last_iova + 1) & (iommu_pgsize - 1)))
  452. return -EINVAL;
  453. if (!bitmap->page_size)
  454. return -EINVAL;
  455. if ((bitmap->iova & (bitmap->page_size - 1)) ||
  456. ((last_iova + 1) & (bitmap->page_size - 1)))
  457. return -EINVAL;
  458. return 0;
  459. }
  460. int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
  461. struct iommu_domain *domain,
  462. unsigned long flags,
  463. struct iommu_hwpt_get_dirty_bitmap *bitmap)
  464. {
  465. int ret;
  466. ret = iommufd_check_iova_range(iopt, bitmap);
  467. if (ret)
  468. return ret;
  469. down_read(&iopt->iova_rwsem);
  470. ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap);
  471. up_read(&iopt->iova_rwsem);
  472. return ret;
  473. }
  474. static int iopt_clear_dirty_data(struct io_pagetable *iopt,
  475. struct iommu_domain *domain)
  476. {
  477. const struct iommu_dirty_ops *ops = domain->dirty_ops;
  478. struct iommu_iotlb_gather gather;
  479. struct iommu_dirty_bitmap dirty;
  480. struct iopt_area *area;
  481. int ret = 0;
  482. lockdep_assert_held_read(&iopt->iova_rwsem);
  483. iommu_dirty_bitmap_init(&dirty, NULL, &gather);
  484. for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
  485. area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
  486. if (!area->pages)
  487. continue;
  488. ret = ops->read_and_clear_dirty(domain, iopt_area_iova(area),
  489. iopt_area_length(area), 0,
  490. &dirty);
  491. if (ret)
  492. break;
  493. }
  494. iommu_iotlb_sync(domain, &gather);
  495. return ret;
  496. }
  497. int iopt_set_dirty_tracking(struct io_pagetable *iopt,
  498. struct iommu_domain *domain, bool enable)
  499. {
  500. const struct iommu_dirty_ops *ops = domain->dirty_ops;
  501. int ret = 0;
  502. if (!ops)
  503. return -EOPNOTSUPP;
  504. down_read(&iopt->iova_rwsem);
  505. /* Clear dirty bits from PTEs to ensure a clean snapshot */
  506. if (enable) {
  507. ret = iopt_clear_dirty_data(iopt, domain);
  508. if (ret)
  509. goto out_unlock;
  510. }
  511. ret = ops->set_dirty_tracking(domain, enable);
  512. out_unlock:
  513. up_read(&iopt->iova_rwsem);
  514. return ret;
  515. }
  516. int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
  517. unsigned long length, struct list_head *pages_list)
  518. {
  519. struct iopt_area_contig_iter iter;
  520. unsigned long last_iova;
  521. struct iopt_area *area;
  522. int rc;
  523. if (!length)
  524. return -EINVAL;
  525. if (check_add_overflow(iova, length - 1, &last_iova))
  526. return -EOVERFLOW;
  527. down_read(&iopt->iova_rwsem);
  528. iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
  529. struct iopt_pages_list *elm;
  530. unsigned long last = min(last_iova, iopt_area_last_iova(area));
  531. elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT);
  532. if (!elm) {
  533. rc = -ENOMEM;
  534. goto err_free;
  535. }
  536. elm->start_byte = iopt_area_start_byte(area, iter.cur_iova);
  537. elm->pages = area->pages;
  538. elm->length = (last - iter.cur_iova) + 1;
  539. kref_get(&elm->pages->kref);
  540. list_add_tail(&elm->next, pages_list);
  541. }
  542. if (!iopt_area_contig_done(&iter)) {
  543. rc = -ENOENT;
  544. goto err_free;
  545. }
  546. up_read(&iopt->iova_rwsem);
  547. return 0;
  548. err_free:
  549. up_read(&iopt->iova_rwsem);
  550. iopt_free_pages_list(pages_list);
  551. return rc;
  552. }
  553. static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start,
  554. unsigned long last, unsigned long *unmapped)
  555. {
  556. struct iopt_area *area;
  557. unsigned long unmapped_bytes = 0;
  558. unsigned int tries = 0;
  559. int rc = -ENOENT;
  560. /*
  561. * The domains_rwsem must be held in read mode any time any area->pages
  562. * is NULL. This prevents domain attach/detatch from running
  563. * concurrently with cleaning up the area.
  564. */
  565. again:
  566. down_read(&iopt->domains_rwsem);
  567. down_write(&iopt->iova_rwsem);
  568. while ((area = iopt_area_iter_first(iopt, start, last))) {
  569. unsigned long area_last = iopt_area_last_iova(area);
  570. unsigned long area_first = iopt_area_iova(area);
  571. struct iopt_pages *pages;
  572. /* Userspace should not race map/unmap's of the same area */
  573. if (!area->pages) {
  574. rc = -EBUSY;
  575. goto out_unlock_iova;
  576. }
  577. if (area_first < start || area_last > last) {
  578. rc = -ENOENT;
  579. goto out_unlock_iova;
  580. }
  581. if (area_first != start)
  582. tries = 0;
  583. /*
  584. * num_accesses writers must hold the iova_rwsem too, so we can
  585. * safely read it under the write side of the iovam_rwsem
  586. * without the pages->mutex.
  587. */
  588. if (area->num_accesses) {
  589. size_t length = iopt_area_length(area);
  590. start = area_first;
  591. area->prevent_access = true;
  592. up_write(&iopt->iova_rwsem);
  593. up_read(&iopt->domains_rwsem);
  594. iommufd_access_notify_unmap(iopt, area_first, length);
  595. /* Something is not responding to unmap requests. */
  596. tries++;
  597. if (WARN_ON(tries > 100))
  598. return -EDEADLOCK;
  599. goto again;
  600. }
  601. pages = area->pages;
  602. area->pages = NULL;
  603. up_write(&iopt->iova_rwsem);
  604. iopt_area_unfill_domains(area, pages);
  605. iopt_abort_area(area);
  606. iopt_put_pages(pages);
  607. unmapped_bytes += area_last - area_first + 1;
  608. down_write(&iopt->iova_rwsem);
  609. }
  610. if (unmapped_bytes)
  611. rc = 0;
  612. out_unlock_iova:
  613. up_write(&iopt->iova_rwsem);
  614. up_read(&iopt->domains_rwsem);
  615. if (unmapped)
  616. *unmapped = unmapped_bytes;
  617. return rc;
  618. }
  619. /**
  620. * iopt_unmap_iova() - Remove a range of iova
  621. * @iopt: io_pagetable to act on
  622. * @iova: Starting iova to unmap
  623. * @length: Number of bytes to unmap
  624. * @unmapped: Return number of bytes unmapped
  625. *
  626. * The requested range must be a superset of existing ranges.
  627. * Splitting/truncating IOVA mappings is not allowed.
  628. */
  629. int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
  630. unsigned long length, unsigned long *unmapped)
  631. {
  632. unsigned long iova_last;
  633. if (!length)
  634. return -EINVAL;
  635. if (check_add_overflow(iova, length - 1, &iova_last))
  636. return -EOVERFLOW;
  637. return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped);
  638. }
  639. int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped)
  640. {
  641. int rc;
  642. rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped);
  643. /* If the IOVAs are empty then unmap all succeeds */
  644. if (rc == -ENOENT)
  645. return 0;
  646. return rc;
  647. }
  648. /* The caller must always free all the nodes in the allowed_iova rb_root. */
  649. int iopt_set_allow_iova(struct io_pagetable *iopt,
  650. struct rb_root_cached *allowed_iova)
  651. {
  652. struct iopt_allowed *allowed;
  653. down_write(&iopt->iova_rwsem);
  654. swap(*allowed_iova, iopt->allowed_itree);
  655. for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed;
  656. allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) {
  657. if (iopt_reserved_iter_first(iopt, allowed->node.start,
  658. allowed->node.last)) {
  659. swap(*allowed_iova, iopt->allowed_itree);
  660. up_write(&iopt->iova_rwsem);
  661. return -EADDRINUSE;
  662. }
  663. }
  664. up_write(&iopt->iova_rwsem);
  665. return 0;
  666. }
  667. int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
  668. unsigned long last, void *owner)
  669. {
  670. struct iopt_reserved *reserved;
  671. lockdep_assert_held_write(&iopt->iova_rwsem);
  672. if (iopt_area_iter_first(iopt, start, last) ||
  673. iopt_allowed_iter_first(iopt, start, last))
  674. return -EADDRINUSE;
  675. reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT);
  676. if (!reserved)
  677. return -ENOMEM;
  678. reserved->node.start = start;
  679. reserved->node.last = last;
  680. reserved->owner = owner;
  681. interval_tree_insert(&reserved->node, &iopt->reserved_itree);
  682. return 0;
  683. }
  684. static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner)
  685. {
  686. struct iopt_reserved *reserved, *next;
  687. lockdep_assert_held_write(&iopt->iova_rwsem);
  688. for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved;
  689. reserved = next) {
  690. next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX);
  691. if (reserved->owner == owner) {
  692. interval_tree_remove(&reserved->node,
  693. &iopt->reserved_itree);
  694. kfree(reserved);
  695. }
  696. }
  697. }
  698. void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner)
  699. {
  700. down_write(&iopt->iova_rwsem);
  701. __iopt_remove_reserved_iova(iopt, owner);
  702. up_write(&iopt->iova_rwsem);
  703. }
  704. void iopt_init_table(struct io_pagetable *iopt)
  705. {
  706. init_rwsem(&iopt->iova_rwsem);
  707. init_rwsem(&iopt->domains_rwsem);
  708. iopt->area_itree = RB_ROOT_CACHED;
  709. iopt->allowed_itree = RB_ROOT_CACHED;
  710. iopt->reserved_itree = RB_ROOT_CACHED;
  711. xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT);
  712. xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC);
  713. /*
  714. * iopt's start as SW tables that can use the entire size_t IOVA space
  715. * due to the use of size_t in the APIs. They have no alignment
  716. * restriction.
  717. */
  718. iopt->iova_alignment = 1;
  719. }
  720. void iopt_destroy_table(struct io_pagetable *iopt)
  721. {
  722. struct interval_tree_node *node;
  723. if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
  724. iopt_remove_reserved_iova(iopt, NULL);
  725. while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0,
  726. ULONG_MAX))) {
  727. interval_tree_remove(node, &iopt->allowed_itree);
  728. kfree(container_of(node, struct iopt_allowed, node));
  729. }
  730. WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root));
  731. WARN_ON(!xa_empty(&iopt->domains));
  732. WARN_ON(!xa_empty(&iopt->access_list));
  733. WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root));
  734. }
  735. /**
  736. * iopt_unfill_domain() - Unfill a domain with PFNs
  737. * @iopt: io_pagetable to act on
  738. * @domain: domain to unfill
  739. *
  740. * This is used when removing a domain from the iopt. Every area in the iopt
  741. * will be unmapped from the domain. The domain must already be removed from the
  742. * domains xarray.
  743. */
  744. static void iopt_unfill_domain(struct io_pagetable *iopt,
  745. struct iommu_domain *domain)
  746. {
  747. struct iopt_area *area;
  748. lockdep_assert_held(&iopt->iova_rwsem);
  749. lockdep_assert_held_write(&iopt->domains_rwsem);
  750. /*
  751. * Some other domain is holding all the pfns still, rapidly unmap this
  752. * domain.
  753. */
  754. if (iopt->next_domain_id != 0) {
  755. /* Pick an arbitrary remaining domain to act as storage */
  756. struct iommu_domain *storage_domain =
  757. xa_load(&iopt->domains, 0);
  758. for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
  759. area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
  760. struct iopt_pages *pages = area->pages;
  761. if (!pages)
  762. continue;
  763. mutex_lock(&pages->mutex);
  764. if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
  765. WARN_ON(!area->storage_domain);
  766. if (area->storage_domain == domain)
  767. area->storage_domain = storage_domain;
  768. mutex_unlock(&pages->mutex);
  769. iopt_area_unmap_domain(area, domain);
  770. }
  771. return;
  772. }
  773. for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
  774. area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
  775. struct iopt_pages *pages = area->pages;
  776. if (!pages)
  777. continue;
  778. mutex_lock(&pages->mutex);
  779. interval_tree_remove(&area->pages_node, &pages->domains_itree);
  780. WARN_ON(area->storage_domain != domain);
  781. area->storage_domain = NULL;
  782. iopt_area_unfill_domain(area, pages, domain);
  783. mutex_unlock(&pages->mutex);
  784. }
  785. }
  786. /**
  787. * iopt_fill_domain() - Fill a domain with PFNs
  788. * @iopt: io_pagetable to act on
  789. * @domain: domain to fill
  790. *
  791. * Fill the domain with PFNs from every area in the iopt. On failure the domain
  792. * is left unchanged.
  793. */
  794. static int iopt_fill_domain(struct io_pagetable *iopt,
  795. struct iommu_domain *domain)
  796. {
  797. struct iopt_area *end_area;
  798. struct iopt_area *area;
  799. int rc;
  800. lockdep_assert_held(&iopt->iova_rwsem);
  801. lockdep_assert_held_write(&iopt->domains_rwsem);
  802. for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
  803. area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
  804. struct iopt_pages *pages = area->pages;
  805. if (!pages)
  806. continue;
  807. mutex_lock(&pages->mutex);
  808. rc = iopt_area_fill_domain(area, domain);
  809. if (rc) {
  810. mutex_unlock(&pages->mutex);
  811. goto out_unfill;
  812. }
  813. if (!area->storage_domain) {
  814. WARN_ON(iopt->next_domain_id != 0);
  815. area->storage_domain = domain;
  816. interval_tree_insert(&area->pages_node,
  817. &pages->domains_itree);
  818. }
  819. mutex_unlock(&pages->mutex);
  820. }
  821. return 0;
  822. out_unfill:
  823. end_area = area;
  824. for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
  825. area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
  826. struct iopt_pages *pages = area->pages;
  827. if (area == end_area)
  828. break;
  829. if (!pages)
  830. continue;
  831. mutex_lock(&pages->mutex);
  832. if (iopt->next_domain_id == 0) {
  833. interval_tree_remove(&area->pages_node,
  834. &pages->domains_itree);
  835. area->storage_domain = NULL;
  836. }
  837. iopt_area_unfill_domain(area, pages, domain);
  838. mutex_unlock(&pages->mutex);
  839. }
  840. return rc;
  841. }
  842. /* All existing area's conform to an increased page size */
  843. static int iopt_check_iova_alignment(struct io_pagetable *iopt,
  844. unsigned long new_iova_alignment)
  845. {
  846. unsigned long align_mask = new_iova_alignment - 1;
  847. struct iopt_area *area;
  848. lockdep_assert_held(&iopt->iova_rwsem);
  849. lockdep_assert_held(&iopt->domains_rwsem);
  850. for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
  851. area = iopt_area_iter_next(area, 0, ULONG_MAX))
  852. if ((iopt_area_iova(area) & align_mask) ||
  853. (iopt_area_length(area) & align_mask) ||
  854. (area->page_offset & align_mask))
  855. return -EADDRINUSE;
  856. if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) {
  857. struct iommufd_access *access;
  858. unsigned long index;
  859. xa_for_each(&iopt->access_list, index, access)
  860. if (WARN_ON(access->iova_alignment >
  861. new_iova_alignment))
  862. return -EADDRINUSE;
  863. }
  864. return 0;
  865. }
  866. int iopt_table_add_domain(struct io_pagetable *iopt,
  867. struct iommu_domain *domain)
  868. {
  869. const struct iommu_domain_geometry *geometry = &domain->geometry;
  870. struct iommu_domain *iter_domain;
  871. unsigned int new_iova_alignment;
  872. unsigned long index;
  873. int rc;
  874. down_write(&iopt->domains_rwsem);
  875. down_write(&iopt->iova_rwsem);
  876. xa_for_each(&iopt->domains, index, iter_domain) {
  877. if (WARN_ON(iter_domain == domain)) {
  878. rc = -EEXIST;
  879. goto out_unlock;
  880. }
  881. }
  882. /*
  883. * The io page size drives the iova_alignment. Internally the iopt_pages
  884. * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE
  885. * objects into the iommu_domain.
  886. *
  887. * A iommu_domain must always be able to accept PAGE_SIZE to be
  888. * compatible as we can't guarantee higher contiguity.
  889. */
  890. new_iova_alignment = max_t(unsigned long,
  891. 1UL << __ffs(domain->pgsize_bitmap),
  892. iopt->iova_alignment);
  893. if (new_iova_alignment > PAGE_SIZE) {
  894. rc = -EINVAL;
  895. goto out_unlock;
  896. }
  897. if (new_iova_alignment != iopt->iova_alignment) {
  898. rc = iopt_check_iova_alignment(iopt, new_iova_alignment);
  899. if (rc)
  900. goto out_unlock;
  901. }
  902. /* No area exists that is outside the allowed domain aperture */
  903. if (geometry->aperture_start != 0) {
  904. rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1,
  905. domain);
  906. if (rc)
  907. goto out_reserved;
  908. }
  909. if (geometry->aperture_end != ULONG_MAX) {
  910. rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1,
  911. ULONG_MAX, domain);
  912. if (rc)
  913. goto out_reserved;
  914. }
  915. rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL);
  916. if (rc)
  917. goto out_reserved;
  918. rc = iopt_fill_domain(iopt, domain);
  919. if (rc)
  920. goto out_release;
  921. iopt->iova_alignment = new_iova_alignment;
  922. xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL);
  923. iopt->next_domain_id++;
  924. up_write(&iopt->iova_rwsem);
  925. up_write(&iopt->domains_rwsem);
  926. return 0;
  927. out_release:
  928. xa_release(&iopt->domains, iopt->next_domain_id);
  929. out_reserved:
  930. __iopt_remove_reserved_iova(iopt, domain);
  931. out_unlock:
  932. up_write(&iopt->iova_rwsem);
  933. up_write(&iopt->domains_rwsem);
  934. return rc;
  935. }
  936. static int iopt_calculate_iova_alignment(struct io_pagetable *iopt)
  937. {
  938. unsigned long new_iova_alignment;
  939. struct iommufd_access *access;
  940. struct iommu_domain *domain;
  941. unsigned long index;
  942. lockdep_assert_held_write(&iopt->iova_rwsem);
  943. lockdep_assert_held(&iopt->domains_rwsem);
  944. /* See batch_iommu_map_small() */
  945. if (iopt->disable_large_pages)
  946. new_iova_alignment = PAGE_SIZE;
  947. else
  948. new_iova_alignment = 1;
  949. xa_for_each(&iopt->domains, index, domain)
  950. new_iova_alignment = max_t(unsigned long,
  951. 1UL << __ffs(domain->pgsize_bitmap),
  952. new_iova_alignment);
  953. xa_for_each(&iopt->access_list, index, access)
  954. new_iova_alignment = max_t(unsigned long,
  955. access->iova_alignment,
  956. new_iova_alignment);
  957. if (new_iova_alignment > iopt->iova_alignment) {
  958. int rc;
  959. rc = iopt_check_iova_alignment(iopt, new_iova_alignment);
  960. if (rc)
  961. return rc;
  962. }
  963. iopt->iova_alignment = new_iova_alignment;
  964. return 0;
  965. }
  966. void iopt_table_remove_domain(struct io_pagetable *iopt,
  967. struct iommu_domain *domain)
  968. {
  969. struct iommu_domain *iter_domain = NULL;
  970. unsigned long index;
  971. down_write(&iopt->domains_rwsem);
  972. down_write(&iopt->iova_rwsem);
  973. xa_for_each(&iopt->domains, index, iter_domain)
  974. if (iter_domain == domain)
  975. break;
  976. if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id)
  977. goto out_unlock;
  978. /*
  979. * Compress the xarray to keep it linear by swapping the entry to erase
  980. * with the tail entry and shrinking the tail.
  981. */
  982. iopt->next_domain_id--;
  983. iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id);
  984. if (index != iopt->next_domain_id)
  985. xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL);
  986. iopt_unfill_domain(iopt, domain);
  987. __iopt_remove_reserved_iova(iopt, domain);
  988. WARN_ON(iopt_calculate_iova_alignment(iopt));
  989. out_unlock:
  990. up_write(&iopt->iova_rwsem);
  991. up_write(&iopt->domains_rwsem);
  992. }
  993. /**
  994. * iopt_area_split - Split an area into two parts at iova
  995. * @area: The area to split
  996. * @iova: Becomes the last of a new area
  997. *
  998. * This splits an area into two. It is part of the VFIO compatibility to allow
  999. * poking a hole in the mapping. The two areas continue to point at the same
  1000. * iopt_pages, just with different starting bytes.
  1001. */
  1002. static int iopt_area_split(struct iopt_area *area, unsigned long iova)
  1003. {
  1004. unsigned long alignment = area->iopt->iova_alignment;
  1005. unsigned long last_iova = iopt_area_last_iova(area);
  1006. unsigned long start_iova = iopt_area_iova(area);
  1007. unsigned long new_start = iova + 1;
  1008. struct io_pagetable *iopt = area->iopt;
  1009. struct iopt_pages *pages = area->pages;
  1010. struct iopt_area *lhs;
  1011. struct iopt_area *rhs;
  1012. int rc;
  1013. lockdep_assert_held_write(&iopt->iova_rwsem);
  1014. if (iova == start_iova || iova == last_iova)
  1015. return 0;
  1016. if (!pages || area->prevent_access)
  1017. return -EBUSY;
  1018. if (new_start & (alignment - 1) ||
  1019. iopt_area_start_byte(area, new_start) & (alignment - 1))
  1020. return -EINVAL;
  1021. lhs = iopt_area_alloc();
  1022. if (!lhs)
  1023. return -ENOMEM;
  1024. rhs = iopt_area_alloc();
  1025. if (!rhs) {
  1026. rc = -ENOMEM;
  1027. goto err_free_lhs;
  1028. }
  1029. mutex_lock(&pages->mutex);
  1030. /*
  1031. * Splitting is not permitted if an access exists, we don't track enough
  1032. * information to split existing accesses.
  1033. */
  1034. if (area->num_accesses) {
  1035. rc = -EINVAL;
  1036. goto err_unlock;
  1037. }
  1038. /*
  1039. * Splitting is not permitted if a domain could have been mapped with
  1040. * huge pages.
  1041. */
  1042. if (area->storage_domain && !iopt->disable_large_pages) {
  1043. rc = -EINVAL;
  1044. goto err_unlock;
  1045. }
  1046. interval_tree_remove(&area->node, &iopt->area_itree);
  1047. rc = iopt_insert_area(iopt, lhs, area->pages, start_iova,
  1048. iopt_area_start_byte(area, start_iova),
  1049. (new_start - 1) - start_iova + 1,
  1050. area->iommu_prot);
  1051. if (WARN_ON(rc))
  1052. goto err_insert;
  1053. rc = iopt_insert_area(iopt, rhs, area->pages, new_start,
  1054. iopt_area_start_byte(area, new_start),
  1055. last_iova - new_start + 1, area->iommu_prot);
  1056. if (WARN_ON(rc))
  1057. goto err_remove_lhs;
  1058. /*
  1059. * If the original area has filled a domain, domains_itree has to be
  1060. * updated.
  1061. */
  1062. if (area->storage_domain) {
  1063. interval_tree_remove(&area->pages_node, &pages->domains_itree);
  1064. interval_tree_insert(&lhs->pages_node, &pages->domains_itree);
  1065. interval_tree_insert(&rhs->pages_node, &pages->domains_itree);
  1066. }
  1067. lhs->storage_domain = area->storage_domain;
  1068. lhs->pages = area->pages;
  1069. rhs->storage_domain = area->storage_domain;
  1070. rhs->pages = area->pages;
  1071. kref_get(&rhs->pages->kref);
  1072. kfree(area);
  1073. mutex_unlock(&pages->mutex);
  1074. /*
  1075. * No change to domains or accesses because the pages hasn't been
  1076. * changed
  1077. */
  1078. return 0;
  1079. err_remove_lhs:
  1080. interval_tree_remove(&lhs->node, &iopt->area_itree);
  1081. err_insert:
  1082. interval_tree_insert(&area->node, &iopt->area_itree);
  1083. err_unlock:
  1084. mutex_unlock(&pages->mutex);
  1085. kfree(rhs);
  1086. err_free_lhs:
  1087. kfree(lhs);
  1088. return rc;
  1089. }
  1090. int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
  1091. size_t num_iovas)
  1092. {
  1093. int rc = 0;
  1094. int i;
  1095. down_write(&iopt->iova_rwsem);
  1096. for (i = 0; i < num_iovas; i++) {
  1097. struct iopt_area *area;
  1098. area = iopt_area_iter_first(iopt, iovas[i], iovas[i]);
  1099. if (!area)
  1100. continue;
  1101. rc = iopt_area_split(area, iovas[i]);
  1102. if (rc)
  1103. break;
  1104. }
  1105. up_write(&iopt->iova_rwsem);
  1106. return rc;
  1107. }
  1108. void iopt_enable_large_pages(struct io_pagetable *iopt)
  1109. {
  1110. int rc;
  1111. down_write(&iopt->domains_rwsem);
  1112. down_write(&iopt->iova_rwsem);
  1113. WRITE_ONCE(iopt->disable_large_pages, false);
  1114. rc = iopt_calculate_iova_alignment(iopt);
  1115. WARN_ON(rc);
  1116. up_write(&iopt->iova_rwsem);
  1117. up_write(&iopt->domains_rwsem);
  1118. }
  1119. int iopt_disable_large_pages(struct io_pagetable *iopt)
  1120. {
  1121. int rc = 0;
  1122. down_write(&iopt->domains_rwsem);
  1123. down_write(&iopt->iova_rwsem);
  1124. if (iopt->disable_large_pages)
  1125. goto out_unlock;
  1126. /* Won't do it if domains already have pages mapped in them */
  1127. if (!xa_empty(&iopt->domains) &&
  1128. !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) {
  1129. rc = -EINVAL;
  1130. goto out_unlock;
  1131. }
  1132. WRITE_ONCE(iopt->disable_large_pages, true);
  1133. rc = iopt_calculate_iova_alignment(iopt);
  1134. if (rc)
  1135. WRITE_ONCE(iopt->disable_large_pages, false);
  1136. out_unlock:
  1137. up_write(&iopt->iova_rwsem);
  1138. up_write(&iopt->domains_rwsem);
  1139. return rc;
  1140. }
  1141. int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access)
  1142. {
  1143. u32 new_id;
  1144. int rc;
  1145. down_write(&iopt->domains_rwsem);
  1146. down_write(&iopt->iova_rwsem);
  1147. rc = xa_alloc(&iopt->access_list, &new_id, access, xa_limit_16b,
  1148. GFP_KERNEL_ACCOUNT);
  1149. if (rc)
  1150. goto out_unlock;
  1151. rc = iopt_calculate_iova_alignment(iopt);
  1152. if (rc) {
  1153. xa_erase(&iopt->access_list, new_id);
  1154. goto out_unlock;
  1155. }
  1156. access->iopt_access_list_id = new_id;
  1157. out_unlock:
  1158. up_write(&iopt->iova_rwsem);
  1159. up_write(&iopt->domains_rwsem);
  1160. return rc;
  1161. }
  1162. void iopt_remove_access(struct io_pagetable *iopt,
  1163. struct iommufd_access *access,
  1164. u32 iopt_access_list_id)
  1165. {
  1166. down_write(&iopt->domains_rwsem);
  1167. down_write(&iopt->iova_rwsem);
  1168. WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access);
  1169. WARN_ON(iopt_calculate_iova_alignment(iopt));
  1170. up_write(&iopt->iova_rwsem);
  1171. up_write(&iopt->domains_rwsem);
  1172. }
  1173. /* Narrow the valid_iova_itree to include reserved ranges from a device. */
  1174. int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
  1175. struct device *dev,
  1176. phys_addr_t *sw_msi_start)
  1177. {
  1178. struct iommu_resv_region *resv;
  1179. LIST_HEAD(resv_regions);
  1180. unsigned int num_hw_msi = 0;
  1181. unsigned int num_sw_msi = 0;
  1182. int rc;
  1183. if (iommufd_should_fail())
  1184. return -EINVAL;
  1185. down_write(&iopt->iova_rwsem);
  1186. /* FIXME: drivers allocate memory but there is no failure propogated */
  1187. iommu_get_resv_regions(dev, &resv_regions);
  1188. list_for_each_entry(resv, &resv_regions, list) {
  1189. if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
  1190. continue;
  1191. if (sw_msi_start && resv->type == IOMMU_RESV_MSI)
  1192. num_hw_msi++;
  1193. if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) {
  1194. *sw_msi_start = resv->start;
  1195. num_sw_msi++;
  1196. }
  1197. rc = iopt_reserve_iova(iopt, resv->start,
  1198. resv->length - 1 + resv->start, dev);
  1199. if (rc)
  1200. goto out_reserved;
  1201. }
  1202. /* Drivers must offer sane combinations of regions */
  1203. if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) {
  1204. rc = -EINVAL;
  1205. goto out_reserved;
  1206. }
  1207. rc = 0;
  1208. goto out_free_resv;
  1209. out_reserved:
  1210. __iopt_remove_reserved_iova(iopt, dev);
  1211. out_free_resv:
  1212. iommu_put_resv_regions(dev, &resv_regions);
  1213. up_write(&iopt->iova_rwsem);
  1214. return rc;
  1215. }