cache.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * cache.c - Intel VT-d cache invalidation
  4. *
  5. * Copyright (C) 2024 Intel Corporation
  6. *
  7. * Author: Lu Baolu <baolu.lu@linux.intel.com>
  8. */
  9. #define pr_fmt(fmt) "DMAR: " fmt
  10. #include <linux/dmar.h>
  11. #include <linux/iommu.h>
  12. #include <linux/memory.h>
  13. #include <linux/pci.h>
  14. #include <linux/spinlock.h>
  15. #include "iommu.h"
  16. #include "pasid.h"
  17. #include "trace.h"
  18. /* Check if an existing cache tag can be reused for a new association. */
  19. static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
  20. struct intel_iommu *iommu, struct device *dev,
  21. ioasid_t pasid, enum cache_tag_type type)
  22. {
  23. if (tag->type != type)
  24. return false;
  25. if (tag->domain_id != domain_id || tag->pasid != pasid)
  26. return false;
  27. if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB)
  28. return tag->iommu == iommu;
  29. if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
  30. return tag->dev == dev;
  31. return false;
  32. }
  33. /* Assign a cache tag with specified type to domain. */
  34. static int cache_tag_assign(struct dmar_domain *domain, u16 did,
  35. struct device *dev, ioasid_t pasid,
  36. enum cache_tag_type type)
  37. {
  38. struct device_domain_info *info = dev_iommu_priv_get(dev);
  39. struct intel_iommu *iommu = info->iommu;
  40. struct cache_tag *tag, *temp;
  41. unsigned long flags;
  42. tag = kzalloc(sizeof(*tag), GFP_KERNEL);
  43. if (!tag)
  44. return -ENOMEM;
  45. tag->type = type;
  46. tag->iommu = iommu;
  47. tag->domain_id = did;
  48. tag->pasid = pasid;
  49. tag->users = 1;
  50. if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
  51. tag->dev = dev;
  52. else
  53. tag->dev = iommu->iommu.dev;
  54. spin_lock_irqsave(&domain->cache_lock, flags);
  55. list_for_each_entry(temp, &domain->cache_tags, node) {
  56. if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
  57. temp->users++;
  58. spin_unlock_irqrestore(&domain->cache_lock, flags);
  59. kfree(tag);
  60. trace_cache_tag_assign(temp);
  61. return 0;
  62. }
  63. }
  64. list_add_tail(&tag->node, &domain->cache_tags);
  65. spin_unlock_irqrestore(&domain->cache_lock, flags);
  66. trace_cache_tag_assign(tag);
  67. return 0;
  68. }
  69. /* Unassign a cache tag with specified type from domain. */
  70. static void cache_tag_unassign(struct dmar_domain *domain, u16 did,
  71. struct device *dev, ioasid_t pasid,
  72. enum cache_tag_type type)
  73. {
  74. struct device_domain_info *info = dev_iommu_priv_get(dev);
  75. struct intel_iommu *iommu = info->iommu;
  76. struct cache_tag *tag;
  77. unsigned long flags;
  78. spin_lock_irqsave(&domain->cache_lock, flags);
  79. list_for_each_entry(tag, &domain->cache_tags, node) {
  80. if (cache_tage_match(tag, did, iommu, dev, pasid, type)) {
  81. trace_cache_tag_unassign(tag);
  82. if (--tag->users == 0) {
  83. list_del(&tag->node);
  84. kfree(tag);
  85. }
  86. break;
  87. }
  88. }
  89. spin_unlock_irqrestore(&domain->cache_lock, flags);
  90. }
  91. /* domain->qi_batch will be freed in iommu_free_domain() path. */
  92. static int domain_qi_batch_alloc(struct dmar_domain *domain)
  93. {
  94. unsigned long flags;
  95. int ret = 0;
  96. spin_lock_irqsave(&domain->cache_lock, flags);
  97. if (domain->qi_batch)
  98. goto out_unlock;
  99. domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_ATOMIC);
  100. if (!domain->qi_batch)
  101. ret = -ENOMEM;
  102. out_unlock:
  103. spin_unlock_irqrestore(&domain->cache_lock, flags);
  104. return ret;
  105. }
  106. static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
  107. struct device *dev, ioasid_t pasid)
  108. {
  109. struct device_domain_info *info = dev_iommu_priv_get(dev);
  110. int ret;
  111. ret = domain_qi_batch_alloc(domain);
  112. if (ret)
  113. return ret;
  114. ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
  115. if (ret || !info->ats_enabled)
  116. return ret;
  117. ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
  118. if (ret)
  119. cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
  120. return ret;
  121. }
  122. static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
  123. struct device *dev, ioasid_t pasid)
  124. {
  125. struct device_domain_info *info = dev_iommu_priv_get(dev);
  126. cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
  127. if (info->ats_enabled)
  128. cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
  129. }
  130. static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did,
  131. struct device *dev, ioasid_t pasid)
  132. {
  133. struct device_domain_info *info = dev_iommu_priv_get(dev);
  134. int ret;
  135. ret = domain_qi_batch_alloc(domain);
  136. if (ret)
  137. return ret;
  138. ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
  139. if (ret || !info->ats_enabled)
  140. return ret;
  141. ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
  142. if (ret)
  143. cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
  144. return ret;
  145. }
  146. static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did,
  147. struct device *dev, ioasid_t pasid)
  148. {
  149. struct device_domain_info *info = dev_iommu_priv_get(dev);
  150. cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
  151. if (info->ats_enabled)
  152. cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
  153. }
  154. static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev)
  155. {
  156. struct device_domain_info *info = dev_iommu_priv_get(dev);
  157. struct intel_iommu *iommu = info->iommu;
  158. /*
  159. * The driver assigns different domain IDs for all domains except
  160. * the SVA type.
  161. */
  162. if (domain->domain.type == IOMMU_DOMAIN_SVA)
  163. return FLPT_DEFAULT_DID;
  164. return domain_id_iommu(domain, iommu);
  165. }
  166. /*
  167. * Assign cache tags to a domain when it's associated with a device's
  168. * PASID using a specific domain ID.
  169. *
  170. * On success (return value of 0), cache tags are created and added to the
  171. * domain's cache tag list. On failure (negative return value), an error
  172. * code is returned indicating the reason for the failure.
  173. */
  174. int cache_tag_assign_domain(struct dmar_domain *domain,
  175. struct device *dev, ioasid_t pasid)
  176. {
  177. u16 did = domain_get_id_for_dev(domain, dev);
  178. int ret;
  179. ret = __cache_tag_assign_domain(domain, did, dev, pasid);
  180. if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
  181. return ret;
  182. ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid);
  183. if (ret)
  184. __cache_tag_unassign_domain(domain, did, dev, pasid);
  185. return ret;
  186. }
  187. /*
  188. * Remove the cache tags associated with a device's PASID when the domain is
  189. * detached from the device.
  190. *
  191. * The cache tags must be previously assigned to the domain by calling the
  192. * assign interface.
  193. */
  194. void cache_tag_unassign_domain(struct dmar_domain *domain,
  195. struct device *dev, ioasid_t pasid)
  196. {
  197. u16 did = domain_get_id_for_dev(domain, dev);
  198. __cache_tag_unassign_domain(domain, did, dev, pasid);
  199. if (domain->domain.type == IOMMU_DOMAIN_NESTED)
  200. __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
  201. }
  202. static unsigned long calculate_psi_aligned_address(unsigned long start,
  203. unsigned long end,
  204. unsigned long *_pages,
  205. unsigned long *_mask)
  206. {
  207. unsigned long pages = aligned_nrpages(start, end - start + 1);
  208. unsigned long aligned_pages = __roundup_pow_of_two(pages);
  209. unsigned long bitmask = aligned_pages - 1;
  210. unsigned long mask = ilog2(aligned_pages);
  211. unsigned long pfn = IOVA_PFN(start);
  212. /*
  213. * PSI masks the low order bits of the base address. If the
  214. * address isn't aligned to the mask, then compute a mask value
  215. * needed to ensure the target range is flushed.
  216. */
  217. if (unlikely(bitmask & pfn)) {
  218. unsigned long end_pfn = pfn + pages - 1, shared_bits;
  219. /*
  220. * Since end_pfn <= pfn + bitmask, the only way bits
  221. * higher than bitmask can differ in pfn and end_pfn is
  222. * by carrying. This means after masking out bitmask,
  223. * high bits starting with the first set bit in
  224. * shared_bits are all equal in both pfn and end_pfn.
  225. */
  226. shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
  227. mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH;
  228. aligned_pages = 1UL << mask;
  229. }
  230. *_pages = aligned_pages;
  231. *_mask = mask;
  232. return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
  233. }
  234. static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
  235. {
  236. if (!iommu || !batch->index)
  237. return;
  238. qi_submit_sync(iommu, batch->descs, batch->index, 0);
  239. /* Reset the index value and clean the whole batch buffer. */
  240. memset(batch, 0, sizeof(*batch));
  241. }
  242. static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch)
  243. {
  244. if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
  245. qi_batch_flush_descs(iommu, batch);
  246. }
  247. static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
  248. unsigned int size_order, u64 type,
  249. struct qi_batch *batch)
  250. {
  251. qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]);
  252. qi_batch_increment_index(iommu, batch);
  253. }
  254. static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
  255. u16 qdep, u64 addr, unsigned int mask,
  256. struct qi_batch *batch)
  257. {
  258. /*
  259. * According to VT-d spec, software is recommended to not submit any Device-TLB
  260. * invalidation requests while address remapping hardware is disabled.
  261. */
  262. if (!(iommu->gcmd & DMA_GCMD_TE))
  263. return;
  264. qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]);
  265. qi_batch_increment_index(iommu, batch);
  266. }
  267. static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
  268. u64 addr, unsigned long npages, bool ih,
  269. struct qi_batch *batch)
  270. {
  271. /*
  272. * npages == -1 means a PASID-selective invalidation, otherwise,
  273. * a positive value for Page-selective-within-PASID invalidation.
  274. * 0 is not a valid input.
  275. */
  276. if (!npages)
  277. return;
  278. qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]);
  279. qi_batch_increment_index(iommu, batch);
  280. }
  281. static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
  282. u32 pasid, u16 qdep, u64 addr,
  283. unsigned int size_order, struct qi_batch *batch)
  284. {
  285. /*
  286. * According to VT-d spec, software is recommended to not submit any
  287. * Device-TLB invalidation requests while address remapping hardware
  288. * is disabled.
  289. */
  290. if (!(iommu->gcmd & DMA_GCMD_TE))
  291. return;
  292. qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order,
  293. &batch->descs[batch->index]);
  294. qi_batch_increment_index(iommu, batch);
  295. }
  296. static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
  297. unsigned long addr, unsigned long pages,
  298. unsigned long mask, int ih)
  299. {
  300. struct intel_iommu *iommu = tag->iommu;
  301. u64 type = DMA_TLB_PSI_FLUSH;
  302. if (domain->use_first_level) {
  303. qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
  304. pages, ih, domain->qi_batch);
  305. return;
  306. }
  307. /*
  308. * Fallback to domain selective flush if no PSI support or the size
  309. * is too big.
  310. */
  311. if (!cap_pgsel_inv(iommu->cap) ||
  312. mask > cap_max_amask_val(iommu->cap) || pages == -1) {
  313. addr = 0;
  314. mask = 0;
  315. ih = 0;
  316. type = DMA_TLB_DSI_FLUSH;
  317. }
  318. if (ecap_qis(iommu->ecap))
  319. qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type,
  320. domain->qi_batch);
  321. else
  322. __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
  323. }
  324. static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag,
  325. unsigned long addr, unsigned long mask)
  326. {
  327. struct intel_iommu *iommu = tag->iommu;
  328. struct device_domain_info *info;
  329. u16 sid;
  330. info = dev_iommu_priv_get(tag->dev);
  331. sid = PCI_DEVID(info->bus, info->devfn);
  332. if (tag->pasid == IOMMU_NO_PASID) {
  333. qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
  334. addr, mask, domain->qi_batch);
  335. if (info->dtlb_extra_inval)
  336. qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
  337. addr, mask, domain->qi_batch);
  338. return;
  339. }
  340. qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
  341. info->ats_qdep, addr, mask, domain->qi_batch);
  342. if (info->dtlb_extra_inval)
  343. qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
  344. info->ats_qdep, addr, mask,
  345. domain->qi_batch);
  346. }
  347. static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
  348. {
  349. struct intel_iommu *iommu = tag->iommu;
  350. struct device_domain_info *info;
  351. u16 sid;
  352. info = dev_iommu_priv_get(tag->dev);
  353. sid = PCI_DEVID(info->bus, info->devfn);
  354. qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
  355. MAX_AGAW_PFN_WIDTH, domain->qi_batch);
  356. if (info->dtlb_extra_inval)
  357. qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
  358. MAX_AGAW_PFN_WIDTH, domain->qi_batch);
  359. }
  360. /*
  361. * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
  362. * when the memory mappings in the target domain have been modified.
  363. */
  364. void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
  365. unsigned long end, int ih)
  366. {
  367. struct intel_iommu *iommu = NULL;
  368. unsigned long pages, mask, addr;
  369. struct cache_tag *tag;
  370. unsigned long flags;
  371. addr = calculate_psi_aligned_address(start, end, &pages, &mask);
  372. spin_lock_irqsave(&domain->cache_lock, flags);
  373. list_for_each_entry(tag, &domain->cache_tags, node) {
  374. if (iommu && iommu != tag->iommu)
  375. qi_batch_flush_descs(iommu, domain->qi_batch);
  376. iommu = tag->iommu;
  377. switch (tag->type) {
  378. case CACHE_TAG_IOTLB:
  379. case CACHE_TAG_NESTING_IOTLB:
  380. cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih);
  381. break;
  382. case CACHE_TAG_NESTING_DEVTLB:
  383. /*
  384. * Address translation cache in device side caches the
  385. * result of nested translation. There is no easy way
  386. * to identify the exact set of nested translations
  387. * affected by a change in S2. So just flush the entire
  388. * device cache.
  389. */
  390. addr = 0;
  391. mask = MAX_AGAW_PFN_WIDTH;
  392. fallthrough;
  393. case CACHE_TAG_DEVTLB:
  394. cache_tag_flush_devtlb_psi(domain, tag, addr, mask);
  395. break;
  396. }
  397. trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
  398. }
  399. qi_batch_flush_descs(iommu, domain->qi_batch);
  400. spin_unlock_irqrestore(&domain->cache_lock, flags);
  401. }
  402. /*
  403. * Invalidates all ranges of IOVA when the memory mappings in the target
  404. * domain have been modified.
  405. */
  406. void cache_tag_flush_all(struct dmar_domain *domain)
  407. {
  408. struct intel_iommu *iommu = NULL;
  409. struct cache_tag *tag;
  410. unsigned long flags;
  411. spin_lock_irqsave(&domain->cache_lock, flags);
  412. list_for_each_entry(tag, &domain->cache_tags, node) {
  413. if (iommu && iommu != tag->iommu)
  414. qi_batch_flush_descs(iommu, domain->qi_batch);
  415. iommu = tag->iommu;
  416. switch (tag->type) {
  417. case CACHE_TAG_IOTLB:
  418. case CACHE_TAG_NESTING_IOTLB:
  419. cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0);
  420. break;
  421. case CACHE_TAG_DEVTLB:
  422. case CACHE_TAG_NESTING_DEVTLB:
  423. cache_tag_flush_devtlb_all(domain, tag);
  424. break;
  425. }
  426. trace_cache_tag_flush_all(tag);
  427. }
  428. qi_batch_flush_descs(iommu, domain->qi_batch);
  429. spin_unlock_irqrestore(&domain->cache_lock, flags);
  430. }
  431. /*
  432. * Invalidate a range of IOVA when new mappings are created in the target
  433. * domain.
  434. *
  435. * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
  436. * Set, any software updates to remapping structures other than first-
  437. * stage mapping requires explicit invalidation of the caches.
  438. * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
  439. * write buffer flushing, software must explicitly perform write-buffer
  440. * flushing, if cache invalidation is not required.
  441. */
  442. void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
  443. unsigned long end)
  444. {
  445. struct intel_iommu *iommu = NULL;
  446. unsigned long pages, mask, addr;
  447. struct cache_tag *tag;
  448. unsigned long flags;
  449. addr = calculate_psi_aligned_address(start, end, &pages, &mask);
  450. spin_lock_irqsave(&domain->cache_lock, flags);
  451. list_for_each_entry(tag, &domain->cache_tags, node) {
  452. if (iommu && iommu != tag->iommu)
  453. qi_batch_flush_descs(iommu, domain->qi_batch);
  454. iommu = tag->iommu;
  455. if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
  456. iommu_flush_write_buffer(iommu);
  457. continue;
  458. }
  459. if (tag->type == CACHE_TAG_IOTLB ||
  460. tag->type == CACHE_TAG_NESTING_IOTLB)
  461. cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0);
  462. trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
  463. }
  464. qi_batch_flush_descs(iommu, domain->qi_batch);
  465. spin_unlock_irqrestore(&domain->cache_lock, flags);
  466. }