iova.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright © 2006-2009, Intel Corporation.
  4. *
  5. * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  6. */
  7. #include <linux/iova.h>
  8. #include <linux/module.h>
  9. #include <linux/slab.h>
  10. #include <linux/smp.h>
  11. #include <linux/bitops.h>
  12. #include <linux/cpu.h>
  13. #include <linux/workqueue.h>
  14. /* The anchor node sits above the top of the usable address space */
  15. #define IOVA_ANCHOR ~0UL
  16. #define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */
  17. static bool iova_rcache_insert(struct iova_domain *iovad,
  18. unsigned long pfn,
  19. unsigned long size);
  20. static unsigned long iova_rcache_get(struct iova_domain *iovad,
  21. unsigned long size,
  22. unsigned long limit_pfn);
  23. static void free_iova_rcaches(struct iova_domain *iovad);
  24. static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
  25. static void free_global_cached_iovas(struct iova_domain *iovad);
  26. static struct iova *to_iova(struct rb_node *node)
  27. {
  28. return rb_entry(node, struct iova, node);
  29. }
  30. void
  31. init_iova_domain(struct iova_domain *iovad, unsigned long granule,
  32. unsigned long start_pfn)
  33. {
  34. /*
  35. * IOVA granularity will normally be equal to the smallest
  36. * supported IOMMU page size; both *must* be capable of
  37. * representing individual CPU pages exactly.
  38. */
  39. BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
  40. spin_lock_init(&iovad->iova_rbtree_lock);
  41. iovad->rbroot = RB_ROOT;
  42. iovad->cached_node = &iovad->anchor.node;
  43. iovad->cached32_node = &iovad->anchor.node;
  44. iovad->granule = granule;
  45. iovad->start_pfn = start_pfn;
  46. iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
  47. iovad->max32_alloc_size = iovad->dma_32bit_pfn;
  48. iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
  49. rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
  50. rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
  51. }
  52. EXPORT_SYMBOL_GPL(init_iova_domain);
  53. static struct rb_node *
  54. __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
  55. {
  56. if (limit_pfn <= iovad->dma_32bit_pfn)
  57. return iovad->cached32_node;
  58. return iovad->cached_node;
  59. }
  60. static void
  61. __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
  62. {
  63. if (new->pfn_hi < iovad->dma_32bit_pfn)
  64. iovad->cached32_node = &new->node;
  65. else
  66. iovad->cached_node = &new->node;
  67. }
  68. static void
  69. __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
  70. {
  71. struct iova *cached_iova;
  72. cached_iova = to_iova(iovad->cached32_node);
  73. if (free == cached_iova ||
  74. (free->pfn_hi < iovad->dma_32bit_pfn &&
  75. free->pfn_lo >= cached_iova->pfn_lo))
  76. iovad->cached32_node = rb_next(&free->node);
  77. if (free->pfn_lo < iovad->dma_32bit_pfn)
  78. iovad->max32_alloc_size = iovad->dma_32bit_pfn;
  79. cached_iova = to_iova(iovad->cached_node);
  80. if (free->pfn_lo >= cached_iova->pfn_lo)
  81. iovad->cached_node = rb_next(&free->node);
  82. }
  83. static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn)
  84. {
  85. struct rb_node *node, *next;
  86. /*
  87. * Ideally what we'd like to judge here is whether limit_pfn is close
  88. * enough to the highest-allocated IOVA that starting the allocation
  89. * walk from the anchor node will be quicker than this initial work to
  90. * find an exact starting point (especially if that ends up being the
  91. * anchor node anyway). This is an incredibly crude approximation which
  92. * only really helps the most likely case, but is at least trivially easy.
  93. */
  94. if (limit_pfn > iovad->dma_32bit_pfn)
  95. return &iovad->anchor.node;
  96. node = iovad->rbroot.rb_node;
  97. while (to_iova(node)->pfn_hi < limit_pfn)
  98. node = node->rb_right;
  99. search_left:
  100. while (node->rb_left && to_iova(node->rb_left)->pfn_lo >= limit_pfn)
  101. node = node->rb_left;
  102. if (!node->rb_left)
  103. return node;
  104. next = node->rb_left;
  105. while (next->rb_right) {
  106. next = next->rb_right;
  107. if (to_iova(next)->pfn_lo >= limit_pfn) {
  108. node = next;
  109. goto search_left;
  110. }
  111. }
  112. return node;
  113. }
  114. /* Insert the iova into domain rbtree by holding writer lock */
  115. static void
  116. iova_insert_rbtree(struct rb_root *root, struct iova *iova,
  117. struct rb_node *start)
  118. {
  119. struct rb_node **new, *parent = NULL;
  120. new = (start) ? &start : &(root->rb_node);
  121. /* Figure out where to put new node */
  122. while (*new) {
  123. struct iova *this = to_iova(*new);
  124. parent = *new;
  125. if (iova->pfn_lo < this->pfn_lo)
  126. new = &((*new)->rb_left);
  127. else if (iova->pfn_lo > this->pfn_lo)
  128. new = &((*new)->rb_right);
  129. else {
  130. WARN_ON(1); /* this should not happen */
  131. return;
  132. }
  133. }
  134. /* Add new node and rebalance tree. */
  135. rb_link_node(&iova->node, parent, new);
  136. rb_insert_color(&iova->node, root);
  137. }
  138. static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
  139. unsigned long size, unsigned long limit_pfn,
  140. struct iova *new, bool size_aligned)
  141. {
  142. struct rb_node *curr, *prev;
  143. struct iova *curr_iova;
  144. unsigned long flags;
  145. unsigned long new_pfn, retry_pfn;
  146. unsigned long align_mask = ~0UL;
  147. unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
  148. if (size_aligned)
  149. align_mask <<= fls_long(size - 1);
  150. /* Walk the tree backwards */
  151. spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
  152. if (limit_pfn <= iovad->dma_32bit_pfn &&
  153. size >= iovad->max32_alloc_size)
  154. goto iova32_full;
  155. curr = __get_cached_rbnode(iovad, limit_pfn);
  156. curr_iova = to_iova(curr);
  157. retry_pfn = curr_iova->pfn_hi;
  158. retry:
  159. do {
  160. high_pfn = min(high_pfn, curr_iova->pfn_lo);
  161. new_pfn = (high_pfn - size) & align_mask;
  162. prev = curr;
  163. curr = rb_prev(curr);
  164. curr_iova = to_iova(curr);
  165. } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
  166. if (high_pfn < size || new_pfn < low_pfn) {
  167. if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
  168. high_pfn = limit_pfn;
  169. low_pfn = retry_pfn + 1;
  170. curr = iova_find_limit(iovad, limit_pfn);
  171. curr_iova = to_iova(curr);
  172. goto retry;
  173. }
  174. iovad->max32_alloc_size = size;
  175. goto iova32_full;
  176. }
  177. /* pfn_lo will point to size aligned address if size_aligned is set */
  178. new->pfn_lo = new_pfn;
  179. new->pfn_hi = new->pfn_lo + size - 1;
  180. /* If we have 'prev', it's a valid place to start the insertion. */
  181. iova_insert_rbtree(&iovad->rbroot, new, prev);
  182. __cached_rbnode_insert_update(iovad, new);
  183. spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
  184. return 0;
  185. iova32_full:
  186. spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
  187. return -ENOMEM;
  188. }
  189. static struct kmem_cache *iova_cache;
  190. static unsigned int iova_cache_users;
  191. static DEFINE_MUTEX(iova_cache_mutex);
  192. static struct iova *alloc_iova_mem(void)
  193. {
  194. return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
  195. }
  196. static void free_iova_mem(struct iova *iova)
  197. {
  198. if (iova->pfn_lo != IOVA_ANCHOR)
  199. kmem_cache_free(iova_cache, iova);
  200. }
  201. /**
  202. * alloc_iova - allocates an iova
  203. * @iovad: - iova domain in question
  204. * @size: - size of page frames to allocate
  205. * @limit_pfn: - max limit address
  206. * @size_aligned: - set if size_aligned address range is required
  207. * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
  208. * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
  209. * flag is set then the allocated address iova->pfn_lo will be naturally
  210. * aligned on roundup_power_of_two(size).
  211. */
  212. struct iova *
  213. alloc_iova(struct iova_domain *iovad, unsigned long size,
  214. unsigned long limit_pfn,
  215. bool size_aligned)
  216. {
  217. struct iova *new_iova;
  218. int ret;
  219. new_iova = alloc_iova_mem();
  220. if (!new_iova)
  221. return NULL;
  222. ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
  223. new_iova, size_aligned);
  224. if (ret) {
  225. free_iova_mem(new_iova);
  226. return NULL;
  227. }
  228. return new_iova;
  229. }
  230. EXPORT_SYMBOL_GPL(alloc_iova);
  231. static struct iova *
  232. private_find_iova(struct iova_domain *iovad, unsigned long pfn)
  233. {
  234. struct rb_node *node = iovad->rbroot.rb_node;
  235. assert_spin_locked(&iovad->iova_rbtree_lock);
  236. while (node) {
  237. struct iova *iova = to_iova(node);
  238. if (pfn < iova->pfn_lo)
  239. node = node->rb_left;
  240. else if (pfn > iova->pfn_hi)
  241. node = node->rb_right;
  242. else
  243. return iova; /* pfn falls within iova's range */
  244. }
  245. return NULL;
  246. }
  247. static void remove_iova(struct iova_domain *iovad, struct iova *iova)
  248. {
  249. assert_spin_locked(&iovad->iova_rbtree_lock);
  250. __cached_rbnode_delete_update(iovad, iova);
  251. rb_erase(&iova->node, &iovad->rbroot);
  252. }
  253. /**
  254. * find_iova - finds an iova for a given pfn
  255. * @iovad: - iova domain in question.
  256. * @pfn: - page frame number
  257. * This function finds and returns an iova belonging to the
  258. * given domain which matches the given pfn.
  259. */
  260. struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
  261. {
  262. unsigned long flags;
  263. struct iova *iova;
  264. /* Take the lock so that no other thread is manipulating the rbtree */
  265. spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
  266. iova = private_find_iova(iovad, pfn);
  267. spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
  268. return iova;
  269. }
  270. EXPORT_SYMBOL_GPL(find_iova);
  271. /**
  272. * __free_iova - frees the given iova
  273. * @iovad: iova domain in question.
  274. * @iova: iova in question.
  275. * Frees the given iova belonging to the giving domain
  276. */
  277. void
  278. __free_iova(struct iova_domain *iovad, struct iova *iova)
  279. {
  280. unsigned long flags;
  281. spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
  282. remove_iova(iovad, iova);
  283. spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
  284. free_iova_mem(iova);
  285. }
  286. EXPORT_SYMBOL_GPL(__free_iova);
  287. /**
  288. * free_iova - finds and frees the iova for a given pfn
  289. * @iovad: - iova domain in question.
  290. * @pfn: - pfn that is allocated previously
  291. * This functions finds an iova for a given pfn and then
  292. * frees the iova from that domain.
  293. */
  294. void
  295. free_iova(struct iova_domain *iovad, unsigned long pfn)
  296. {
  297. unsigned long flags;
  298. struct iova *iova;
  299. spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
  300. iova = private_find_iova(iovad, pfn);
  301. if (!iova) {
  302. spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
  303. return;
  304. }
  305. remove_iova(iovad, iova);
  306. spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
  307. free_iova_mem(iova);
  308. }
  309. EXPORT_SYMBOL_GPL(free_iova);
  310. /**
  311. * alloc_iova_fast - allocates an iova from rcache
  312. * @iovad: - iova domain in question
  313. * @size: - size of page frames to allocate
  314. * @limit_pfn: - max limit address
  315. * @flush_rcache: - set to flush rcache on regular allocation failure
  316. * This function tries to satisfy an iova allocation from the rcache,
  317. * and falls back to regular allocation on failure. If regular allocation
  318. * fails too and the flush_rcache flag is set then the rcache will be flushed.
  319. */
  320. unsigned long
  321. alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
  322. unsigned long limit_pfn, bool flush_rcache)
  323. {
  324. unsigned long iova_pfn;
  325. struct iova *new_iova;
  326. /*
  327. * Freeing non-power-of-two-sized allocations back into the IOVA caches
  328. * will come back to bite us badly, so we have to waste a bit of space
  329. * rounding up anything cacheable to make sure that can't happen. The
  330. * order of the unadjusted size will still match upon freeing.
  331. */
  332. if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
  333. size = roundup_pow_of_two(size);
  334. iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
  335. if (iova_pfn)
  336. return iova_pfn;
  337. retry:
  338. new_iova = alloc_iova(iovad, size, limit_pfn, true);
  339. if (!new_iova) {
  340. unsigned int cpu;
  341. if (!flush_rcache)
  342. return 0;
  343. /* Try replenishing IOVAs by flushing rcache. */
  344. flush_rcache = false;
  345. for_each_online_cpu(cpu)
  346. free_cpu_cached_iovas(cpu, iovad);
  347. free_global_cached_iovas(iovad);
  348. goto retry;
  349. }
  350. return new_iova->pfn_lo;
  351. }
  352. EXPORT_SYMBOL_GPL(alloc_iova_fast);
  353. /**
  354. * free_iova_fast - free iova pfn range into rcache
  355. * @iovad: - iova domain in question.
  356. * @pfn: - pfn that is allocated previously
  357. * @size: - # of pages in range
  358. * This functions frees an iova range by trying to put it into the rcache,
  359. * falling back to regular iova deallocation via free_iova() if this fails.
  360. */
  361. void
  362. free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
  363. {
  364. if (iova_rcache_insert(iovad, pfn, size))
  365. return;
  366. free_iova(iovad, pfn);
  367. }
  368. EXPORT_SYMBOL_GPL(free_iova_fast);
  369. static void iova_domain_free_rcaches(struct iova_domain *iovad)
  370. {
  371. cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
  372. &iovad->cpuhp_dead);
  373. free_iova_rcaches(iovad);
  374. }
  375. /**
  376. * put_iova_domain - destroys the iova domain
  377. * @iovad: - iova domain in question.
  378. * All the iova's in that domain are destroyed.
  379. */
  380. void put_iova_domain(struct iova_domain *iovad)
  381. {
  382. struct iova *iova, *tmp;
  383. if (iovad->rcaches)
  384. iova_domain_free_rcaches(iovad);
  385. rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
  386. free_iova_mem(iova);
  387. }
  388. EXPORT_SYMBOL_GPL(put_iova_domain);
  389. static int
  390. __is_range_overlap(struct rb_node *node,
  391. unsigned long pfn_lo, unsigned long pfn_hi)
  392. {
  393. struct iova *iova = to_iova(node);
  394. if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
  395. return 1;
  396. return 0;
  397. }
  398. static inline struct iova *
  399. alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
  400. {
  401. struct iova *iova;
  402. iova = alloc_iova_mem();
  403. if (iova) {
  404. iova->pfn_lo = pfn_lo;
  405. iova->pfn_hi = pfn_hi;
  406. }
  407. return iova;
  408. }
  409. static struct iova *
  410. __insert_new_range(struct iova_domain *iovad,
  411. unsigned long pfn_lo, unsigned long pfn_hi)
  412. {
  413. struct iova *iova;
  414. iova = alloc_and_init_iova(pfn_lo, pfn_hi);
  415. if (iova)
  416. iova_insert_rbtree(&iovad->rbroot, iova, NULL);
  417. return iova;
  418. }
  419. static void
  420. __adjust_overlap_range(struct iova *iova,
  421. unsigned long *pfn_lo, unsigned long *pfn_hi)
  422. {
  423. if (*pfn_lo < iova->pfn_lo)
  424. iova->pfn_lo = *pfn_lo;
  425. if (*pfn_hi > iova->pfn_hi)
  426. *pfn_lo = iova->pfn_hi + 1;
  427. }
  428. /**
  429. * reserve_iova - reserves an iova in the given range
  430. * @iovad: - iova domain pointer
  431. * @pfn_lo: - lower page frame address
  432. * @pfn_hi:- higher pfn adderss
  433. * This function allocates reserves the address range from pfn_lo to pfn_hi so
  434. * that this address is not dished out as part of alloc_iova.
  435. */
  436. struct iova *
  437. reserve_iova(struct iova_domain *iovad,
  438. unsigned long pfn_lo, unsigned long pfn_hi)
  439. {
  440. struct rb_node *node;
  441. unsigned long flags;
  442. struct iova *iova;
  443. unsigned int overlap = 0;
  444. /* Don't allow nonsensical pfns */
  445. if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
  446. return NULL;
  447. spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
  448. for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
  449. if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
  450. iova = to_iova(node);
  451. __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
  452. if ((pfn_lo >= iova->pfn_lo) &&
  453. (pfn_hi <= iova->pfn_hi))
  454. goto finish;
  455. overlap = 1;
  456. } else if (overlap)
  457. break;
  458. }
  459. /* We are here either because this is the first reserver node
  460. * or need to insert remaining non overlap addr range
  461. */
  462. iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
  463. finish:
  464. spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
  465. return iova;
  466. }
  467. EXPORT_SYMBOL_GPL(reserve_iova);
  468. /*
  469. * Magazine caches for IOVA ranges. For an introduction to magazines,
  470. * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
  471. * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
  472. * For simplicity, we use a static magazine size and don't implement the
  473. * dynamic size tuning described in the paper.
  474. */
  475. /*
  476. * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to
  477. * assure size of 'iova_magazine' to be 1024 bytes, so that no memory
  478. * will be wasted. Since only full magazines are inserted into the depot,
  479. * we don't need to waste PFN capacity on a separate list head either.
  480. */
  481. #define IOVA_MAG_SIZE 127
  482. #define IOVA_DEPOT_DELAY msecs_to_jiffies(100)
  483. struct iova_magazine {
  484. union {
  485. unsigned long size;
  486. struct iova_magazine *next;
  487. };
  488. unsigned long pfns[IOVA_MAG_SIZE];
  489. };
  490. static_assert(!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1)));
  491. struct iova_cpu_rcache {
  492. spinlock_t lock;
  493. struct iova_magazine *loaded;
  494. struct iova_magazine *prev;
  495. };
  496. struct iova_rcache {
  497. spinlock_t lock;
  498. unsigned int depot_size;
  499. struct iova_magazine *depot;
  500. struct iova_cpu_rcache __percpu *cpu_rcaches;
  501. struct iova_domain *iovad;
  502. struct delayed_work work;
  503. };
  504. static struct kmem_cache *iova_magazine_cache;
  505. unsigned long iova_rcache_range(void)
  506. {
  507. return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1);
  508. }
  509. static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
  510. {
  511. struct iova_magazine *mag;
  512. mag = kmem_cache_alloc(iova_magazine_cache, flags);
  513. if (mag)
  514. mag->size = 0;
  515. return mag;
  516. }
  517. static void iova_magazine_free(struct iova_magazine *mag)
  518. {
  519. kmem_cache_free(iova_magazine_cache, mag);
  520. }
  521. static void
  522. iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
  523. {
  524. unsigned long flags;
  525. int i;
  526. spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
  527. for (i = 0 ; i < mag->size; ++i) {
  528. struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
  529. if (WARN_ON(!iova))
  530. continue;
  531. remove_iova(iovad, iova);
  532. free_iova_mem(iova);
  533. }
  534. spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
  535. mag->size = 0;
  536. }
  537. static bool iova_magazine_full(struct iova_magazine *mag)
  538. {
  539. return mag->size == IOVA_MAG_SIZE;
  540. }
  541. static bool iova_magazine_empty(struct iova_magazine *mag)
  542. {
  543. return mag->size == 0;
  544. }
  545. static unsigned long iova_magazine_pop(struct iova_magazine *mag,
  546. unsigned long limit_pfn)
  547. {
  548. int i;
  549. unsigned long pfn;
  550. /* Only fall back to the rbtree if we have no suitable pfns at all */
  551. for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
  552. if (i == 0)
  553. return 0;
  554. /* Swap it to pop it */
  555. pfn = mag->pfns[i];
  556. mag->pfns[i] = mag->pfns[--mag->size];
  557. return pfn;
  558. }
  559. static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
  560. {
  561. mag->pfns[mag->size++] = pfn;
  562. }
  563. static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache)
  564. {
  565. struct iova_magazine *mag = rcache->depot;
  566. rcache->depot = mag->next;
  567. mag->size = IOVA_MAG_SIZE;
  568. rcache->depot_size--;
  569. return mag;
  570. }
  571. static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag)
  572. {
  573. mag->next = rcache->depot;
  574. rcache->depot = mag;
  575. rcache->depot_size++;
  576. }
  577. static void iova_depot_work_func(struct work_struct *work)
  578. {
  579. struct iova_rcache *rcache = container_of(work, typeof(*rcache), work.work);
  580. struct iova_magazine *mag = NULL;
  581. unsigned long flags;
  582. spin_lock_irqsave(&rcache->lock, flags);
  583. if (rcache->depot_size > num_online_cpus())
  584. mag = iova_depot_pop(rcache);
  585. spin_unlock_irqrestore(&rcache->lock, flags);
  586. if (mag) {
  587. iova_magazine_free_pfns(mag, rcache->iovad);
  588. iova_magazine_free(mag);
  589. schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
  590. }
  591. }
  592. int iova_domain_init_rcaches(struct iova_domain *iovad)
  593. {
  594. unsigned int cpu;
  595. int i, ret;
  596. iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
  597. sizeof(struct iova_rcache),
  598. GFP_KERNEL);
  599. if (!iovad->rcaches)
  600. return -ENOMEM;
  601. for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
  602. struct iova_cpu_rcache *cpu_rcache;
  603. struct iova_rcache *rcache;
  604. rcache = &iovad->rcaches[i];
  605. spin_lock_init(&rcache->lock);
  606. rcache->iovad = iovad;
  607. INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func);
  608. rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
  609. cache_line_size());
  610. if (!rcache->cpu_rcaches) {
  611. ret = -ENOMEM;
  612. goto out_err;
  613. }
  614. for_each_possible_cpu(cpu) {
  615. cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
  616. spin_lock_init(&cpu_rcache->lock);
  617. cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
  618. cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
  619. if (!cpu_rcache->loaded || !cpu_rcache->prev) {
  620. ret = -ENOMEM;
  621. goto out_err;
  622. }
  623. }
  624. }
  625. ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
  626. &iovad->cpuhp_dead);
  627. if (ret)
  628. goto out_err;
  629. return 0;
  630. out_err:
  631. free_iova_rcaches(iovad);
  632. return ret;
  633. }
  634. EXPORT_SYMBOL_GPL(iova_domain_init_rcaches);
  635. /*
  636. * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
  637. * return true on success. Can fail if rcache is full and we can't free
  638. * space, and free_iova() (our only caller) will then return the IOVA
  639. * range to the rbtree instead.
  640. */
  641. static bool __iova_rcache_insert(struct iova_domain *iovad,
  642. struct iova_rcache *rcache,
  643. unsigned long iova_pfn)
  644. {
  645. struct iova_cpu_rcache *cpu_rcache;
  646. bool can_insert = false;
  647. unsigned long flags;
  648. cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
  649. spin_lock_irqsave(&cpu_rcache->lock, flags);
  650. if (!iova_magazine_full(cpu_rcache->loaded)) {
  651. can_insert = true;
  652. } else if (!iova_magazine_full(cpu_rcache->prev)) {
  653. swap(cpu_rcache->prev, cpu_rcache->loaded);
  654. can_insert = true;
  655. } else {
  656. struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
  657. if (new_mag) {
  658. spin_lock(&rcache->lock);
  659. iova_depot_push(rcache, cpu_rcache->loaded);
  660. spin_unlock(&rcache->lock);
  661. schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
  662. cpu_rcache->loaded = new_mag;
  663. can_insert = true;
  664. }
  665. }
  666. if (can_insert)
  667. iova_magazine_push(cpu_rcache->loaded, iova_pfn);
  668. spin_unlock_irqrestore(&cpu_rcache->lock, flags);
  669. return can_insert;
  670. }
  671. static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
  672. unsigned long size)
  673. {
  674. unsigned int log_size = order_base_2(size);
  675. if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
  676. return false;
  677. return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
  678. }
  679. /*
  680. * Caller wants to allocate a new IOVA range from 'rcache'. If we can
  681. * satisfy the request, return a matching non-NULL range and remove
  682. * it from the 'rcache'.
  683. */
  684. static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
  685. unsigned long limit_pfn)
  686. {
  687. struct iova_cpu_rcache *cpu_rcache;
  688. unsigned long iova_pfn = 0;
  689. bool has_pfn = false;
  690. unsigned long flags;
  691. cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
  692. spin_lock_irqsave(&cpu_rcache->lock, flags);
  693. if (!iova_magazine_empty(cpu_rcache->loaded)) {
  694. has_pfn = true;
  695. } else if (!iova_magazine_empty(cpu_rcache->prev)) {
  696. swap(cpu_rcache->prev, cpu_rcache->loaded);
  697. has_pfn = true;
  698. } else {
  699. spin_lock(&rcache->lock);
  700. if (rcache->depot) {
  701. iova_magazine_free(cpu_rcache->loaded);
  702. cpu_rcache->loaded = iova_depot_pop(rcache);
  703. has_pfn = true;
  704. }
  705. spin_unlock(&rcache->lock);
  706. }
  707. if (has_pfn)
  708. iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
  709. spin_unlock_irqrestore(&cpu_rcache->lock, flags);
  710. return iova_pfn;
  711. }
  712. /*
  713. * Try to satisfy IOVA allocation range from rcache. Fail if requested
  714. * size is too big or the DMA limit we are given isn't satisfied by the
  715. * top element in the magazine.
  716. */
  717. static unsigned long iova_rcache_get(struct iova_domain *iovad,
  718. unsigned long size,
  719. unsigned long limit_pfn)
  720. {
  721. unsigned int log_size = order_base_2(size);
  722. if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
  723. return 0;
  724. return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
  725. }
  726. /*
  727. * free rcache data structures.
  728. */
  729. static void free_iova_rcaches(struct iova_domain *iovad)
  730. {
  731. struct iova_rcache *rcache;
  732. struct iova_cpu_rcache *cpu_rcache;
  733. unsigned int cpu;
  734. for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
  735. rcache = &iovad->rcaches[i];
  736. if (!rcache->cpu_rcaches)
  737. break;
  738. for_each_possible_cpu(cpu) {
  739. cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
  740. iova_magazine_free(cpu_rcache->loaded);
  741. iova_magazine_free(cpu_rcache->prev);
  742. }
  743. free_percpu(rcache->cpu_rcaches);
  744. cancel_delayed_work_sync(&rcache->work);
  745. while (rcache->depot)
  746. iova_magazine_free(iova_depot_pop(rcache));
  747. }
  748. kfree(iovad->rcaches);
  749. iovad->rcaches = NULL;
  750. }
  751. /*
  752. * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
  753. */
  754. static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
  755. {
  756. struct iova_cpu_rcache *cpu_rcache;
  757. struct iova_rcache *rcache;
  758. unsigned long flags;
  759. int i;
  760. for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
  761. rcache = &iovad->rcaches[i];
  762. cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
  763. spin_lock_irqsave(&cpu_rcache->lock, flags);
  764. iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
  765. iova_magazine_free_pfns(cpu_rcache->prev, iovad);
  766. spin_unlock_irqrestore(&cpu_rcache->lock, flags);
  767. }
  768. }
  769. /*
  770. * free all the IOVA ranges of global cache
  771. */
  772. static void free_global_cached_iovas(struct iova_domain *iovad)
  773. {
  774. struct iova_rcache *rcache;
  775. unsigned long flags;
  776. for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
  777. rcache = &iovad->rcaches[i];
  778. spin_lock_irqsave(&rcache->lock, flags);
  779. while (rcache->depot) {
  780. struct iova_magazine *mag = iova_depot_pop(rcache);
  781. iova_magazine_free_pfns(mag, iovad);
  782. iova_magazine_free(mag);
  783. }
  784. spin_unlock_irqrestore(&rcache->lock, flags);
  785. }
  786. }
  787. static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node)
  788. {
  789. struct iova_domain *iovad;
  790. iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead);
  791. free_cpu_cached_iovas(cpu, iovad);
  792. return 0;
  793. }
  794. int iova_cache_get(void)
  795. {
  796. int err = -ENOMEM;
  797. mutex_lock(&iova_cache_mutex);
  798. if (!iova_cache_users) {
  799. iova_cache = kmem_cache_create("iommu_iova", sizeof(struct iova), 0,
  800. SLAB_HWCACHE_ALIGN, NULL);
  801. if (!iova_cache)
  802. goto out_err;
  803. iova_magazine_cache = kmem_cache_create("iommu_iova_magazine",
  804. sizeof(struct iova_magazine),
  805. 0, SLAB_HWCACHE_ALIGN, NULL);
  806. if (!iova_magazine_cache)
  807. goto out_err;
  808. err = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead",
  809. NULL, iova_cpuhp_dead);
  810. if (err) {
  811. pr_err("IOVA: Couldn't register cpuhp handler: %pe\n", ERR_PTR(err));
  812. goto out_err;
  813. }
  814. }
  815. iova_cache_users++;
  816. mutex_unlock(&iova_cache_mutex);
  817. return 0;
  818. out_err:
  819. kmem_cache_destroy(iova_cache);
  820. kmem_cache_destroy(iova_magazine_cache);
  821. mutex_unlock(&iova_cache_mutex);
  822. return err;
  823. }
  824. EXPORT_SYMBOL_GPL(iova_cache_get);
  825. void iova_cache_put(void)
  826. {
  827. mutex_lock(&iova_cache_mutex);
  828. if (WARN_ON(!iova_cache_users)) {
  829. mutex_unlock(&iova_cache_mutex);
  830. return;
  831. }
  832. iova_cache_users--;
  833. if (!iova_cache_users) {
  834. cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
  835. kmem_cache_destroy(iova_cache);
  836. kmem_cache_destroy(iova_magazine_cache);
  837. }
  838. mutex_unlock(&iova_cache_mutex);
  839. }
  840. EXPORT_SYMBOL_GPL(iova_cache_put);
  841. MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
  842. MODULE_DESCRIPTION("IOMMU I/O Virtual Address management");
  843. MODULE_LICENSE("GPL");