vmd.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Volume Management Device driver
  4. * Copyright (c) 2015, Intel Corporation.
  5. */
  6. #include <linux/device.h>
  7. #include <linux/interrupt.h>
  8. #include <linux/irq.h>
  9. #include <linux/kernel.h>
  10. #include <linux/module.h>
  11. #include <linux/msi.h>
  12. #include <linux/pci.h>
  13. #include <linux/srcu.h>
  14. #include <linux/rculist.h>
  15. #include <linux/rcupdate.h>
  16. #include <asm/irqdomain.h>
  17. #include <asm/device.h>
  18. #include <asm/msi.h>
  19. #include <asm/msidef.h>
  20. #define VMD_CFGBAR 0
  21. #define VMD_MEMBAR1 2
  22. #define VMD_MEMBAR2 4
  23. #define PCI_REG_VMCAP 0x40
  24. #define BUS_RESTRICT_CAP(vmcap) (vmcap & 0x1)
  25. #define PCI_REG_VMCONFIG 0x44
  26. #define BUS_RESTRICT_CFG(vmcfg) ((vmcfg >> 8) & 0x3)
  27. #define PCI_REG_VMLOCK 0x70
  28. #define MB2_SHADOW_EN(vmlock) (vmlock & 0x2)
  29. #define MB2_SHADOW_OFFSET 0x2000
  30. #define MB2_SHADOW_SIZE 16
  31. enum vmd_features {
  32. /*
  33. * Device may contain registers which hint the physical location of the
  34. * membars, in order to allow proper address translation during
  35. * resource assignment to enable guest virtualization
  36. */
  37. VMD_FEAT_HAS_MEMBAR_SHADOW = (1 << 0),
  38. /*
  39. * Device may provide root port configuration information which limits
  40. * bus numbering
  41. */
  42. VMD_FEAT_HAS_BUS_RESTRICTIONS = (1 << 1),
  43. };
  44. /*
  45. * Lock for manipulating VMD IRQ lists.
  46. */
  47. static DEFINE_RAW_SPINLOCK(list_lock);
  48. /**
  49. * struct vmd_irq - private data to map driver IRQ to the VMD shared vector
  50. * @node: list item for parent traversal.
  51. * @irq: back pointer to parent.
  52. * @enabled: true if driver enabled IRQ
  53. * @virq: the virtual IRQ value provided to the requesting driver.
  54. *
  55. * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to
  56. * a VMD IRQ using this structure.
  57. */
  58. struct vmd_irq {
  59. struct list_head node;
  60. struct vmd_irq_list *irq;
  61. bool enabled;
  62. unsigned int virq;
  63. };
  64. /**
  65. * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector
  66. * @irq_list: the list of irq's the VMD one demuxes to.
  67. * @srcu: SRCU struct for local synchronization.
  68. * @count: number of child IRQs assigned to this vector; used to track
  69. * sharing.
  70. */
  71. struct vmd_irq_list {
  72. struct list_head irq_list;
  73. struct srcu_struct srcu;
  74. unsigned int count;
  75. };
  76. struct vmd_dev {
  77. struct pci_dev *dev;
  78. spinlock_t cfg_lock;
  79. char __iomem *cfgbar;
  80. int msix_count;
  81. struct vmd_irq_list *irqs;
  82. struct pci_sysdata sysdata;
  83. struct resource resources[3];
  84. struct irq_domain *irq_domain;
  85. struct pci_bus *bus;
  86. u8 busn_start;
  87. #ifdef CONFIG_X86_DEV_DMA_OPS
  88. struct dma_map_ops dma_ops;
  89. struct dma_domain dma_domain;
  90. #endif
  91. };
  92. static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus)
  93. {
  94. return container_of(bus->sysdata, struct vmd_dev, sysdata);
  95. }
  96. static inline unsigned int index_from_irqs(struct vmd_dev *vmd,
  97. struct vmd_irq_list *irqs)
  98. {
  99. return irqs - vmd->irqs;
  100. }
  101. /*
  102. * Drivers managing a device in a VMD domain allocate their own IRQs as before,
  103. * but the MSI entry for the hardware it's driving will be programmed with a
  104. * destination ID for the VMD MSI-X table. The VMD muxes interrupts in its
  105. * domain into one of its own, and the VMD driver de-muxes these for the
  106. * handlers sharing that VMD IRQ. The vmd irq_domain provides the operations
  107. * and irq_chip to set this up.
  108. */
  109. static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
  110. {
  111. struct vmd_irq *vmdirq = data->chip_data;
  112. struct vmd_irq_list *irq = vmdirq->irq;
  113. struct vmd_dev *vmd = irq_data_get_irq_handler_data(data);
  114. msg->address_hi = MSI_ADDR_BASE_HI;
  115. msg->address_lo = MSI_ADDR_BASE_LO |
  116. MSI_ADDR_DEST_ID(index_from_irqs(vmd, irq));
  117. msg->data = 0;
  118. }
  119. /*
  120. * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops.
  121. */
  122. static void vmd_irq_enable(struct irq_data *data)
  123. {
  124. struct vmd_irq *vmdirq = data->chip_data;
  125. unsigned long flags;
  126. raw_spin_lock_irqsave(&list_lock, flags);
  127. WARN_ON(vmdirq->enabled);
  128. list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list);
  129. vmdirq->enabled = true;
  130. raw_spin_unlock_irqrestore(&list_lock, flags);
  131. data->chip->irq_unmask(data);
  132. }
  133. static void vmd_irq_disable(struct irq_data *data)
  134. {
  135. struct vmd_irq *vmdirq = data->chip_data;
  136. unsigned long flags;
  137. data->chip->irq_mask(data);
  138. raw_spin_lock_irqsave(&list_lock, flags);
  139. if (vmdirq->enabled) {
  140. list_del_rcu(&vmdirq->node);
  141. vmdirq->enabled = false;
  142. }
  143. raw_spin_unlock_irqrestore(&list_lock, flags);
  144. }
  145. /*
  146. * XXX: Stubbed until we develop acceptable way to not create conflicts with
  147. * other devices sharing the same vector.
  148. */
  149. static int vmd_irq_set_affinity(struct irq_data *data,
  150. const struct cpumask *dest, bool force)
  151. {
  152. return -EINVAL;
  153. }
  154. static struct irq_chip vmd_msi_controller = {
  155. .name = "VMD-MSI",
  156. .irq_enable = vmd_irq_enable,
  157. .irq_disable = vmd_irq_disable,
  158. .irq_compose_msi_msg = vmd_compose_msi_msg,
  159. .irq_set_affinity = vmd_irq_set_affinity,
  160. };
  161. static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info,
  162. msi_alloc_info_t *arg)
  163. {
  164. return 0;
  165. }
  166. /*
  167. * XXX: We can be even smarter selecting the best IRQ once we solve the
  168. * affinity problem.
  169. */
  170. static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc)
  171. {
  172. int i, best = 1;
  173. unsigned long flags;
  174. if (vmd->msix_count == 1)
  175. return &vmd->irqs[0];
  176. /*
  177. * White list for fast-interrupt handlers. All others will share the
  178. * "slow" interrupt vector.
  179. */
  180. switch (msi_desc_to_pci_dev(desc)->class) {
  181. case PCI_CLASS_STORAGE_EXPRESS:
  182. break;
  183. default:
  184. return &vmd->irqs[0];
  185. }
  186. raw_spin_lock_irqsave(&list_lock, flags);
  187. for (i = 1; i < vmd->msix_count; i++)
  188. if (vmd->irqs[i].count < vmd->irqs[best].count)
  189. best = i;
  190. vmd->irqs[best].count++;
  191. raw_spin_unlock_irqrestore(&list_lock, flags);
  192. return &vmd->irqs[best];
  193. }
  194. static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
  195. unsigned int virq, irq_hw_number_t hwirq,
  196. msi_alloc_info_t *arg)
  197. {
  198. struct msi_desc *desc = arg->desc;
  199. struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus);
  200. struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL);
  201. unsigned int index, vector;
  202. if (!vmdirq)
  203. return -ENOMEM;
  204. INIT_LIST_HEAD(&vmdirq->node);
  205. vmdirq->irq = vmd_next_irq(vmd, desc);
  206. vmdirq->virq = virq;
  207. index = index_from_irqs(vmd, vmdirq->irq);
  208. vector = pci_irq_vector(vmd->dev, index);
  209. irq_domain_set_info(domain, virq, vector, info->chip, vmdirq,
  210. handle_untracked_irq, vmd, NULL);
  211. return 0;
  212. }
  213. static void vmd_msi_free(struct irq_domain *domain,
  214. struct msi_domain_info *info, unsigned int virq)
  215. {
  216. struct vmd_irq *vmdirq = irq_get_chip_data(virq);
  217. unsigned long flags;
  218. synchronize_srcu(&vmdirq->irq->srcu);
  219. /* XXX: Potential optimization to rebalance */
  220. raw_spin_lock_irqsave(&list_lock, flags);
  221. vmdirq->irq->count--;
  222. raw_spin_unlock_irqrestore(&list_lock, flags);
  223. kfree(vmdirq);
  224. }
  225. static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev,
  226. int nvec, msi_alloc_info_t *arg)
  227. {
  228. struct pci_dev *pdev = to_pci_dev(dev);
  229. struct vmd_dev *vmd = vmd_from_bus(pdev->bus);
  230. if (nvec > vmd->msix_count)
  231. return vmd->msix_count;
  232. memset(arg, 0, sizeof(*arg));
  233. return 0;
  234. }
  235. static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
  236. {
  237. arg->desc = desc;
  238. }
  239. static struct msi_domain_ops vmd_msi_domain_ops = {
  240. .get_hwirq = vmd_get_hwirq,
  241. .msi_init = vmd_msi_init,
  242. .msi_free = vmd_msi_free,
  243. .msi_prepare = vmd_msi_prepare,
  244. .set_desc = vmd_set_desc,
  245. };
  246. static struct msi_domain_info vmd_msi_domain_info = {
  247. .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
  248. MSI_FLAG_PCI_MSIX,
  249. .ops = &vmd_msi_domain_ops,
  250. .chip = &vmd_msi_controller,
  251. };
  252. #ifdef CONFIG_X86_DEV_DMA_OPS
  253. /*
  254. * VMD replaces the requester ID with its own. DMA mappings for devices in a
  255. * VMD domain need to be mapped for the VMD, not the device requiring
  256. * the mapping.
  257. */
  258. static struct device *to_vmd_dev(struct device *dev)
  259. {
  260. struct pci_dev *pdev = to_pci_dev(dev);
  261. struct vmd_dev *vmd = vmd_from_bus(pdev->bus);
  262. return &vmd->dev->dev;
  263. }
  264. static const struct dma_map_ops *vmd_dma_ops(struct device *dev)
  265. {
  266. return get_dma_ops(to_vmd_dev(dev));
  267. }
  268. static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr,
  269. gfp_t flag, unsigned long attrs)
  270. {
  271. return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag,
  272. attrs);
  273. }
  274. static void vmd_free(struct device *dev, size_t size, void *vaddr,
  275. dma_addr_t addr, unsigned long attrs)
  276. {
  277. return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr,
  278. attrs);
  279. }
  280. static int vmd_mmap(struct device *dev, struct vm_area_struct *vma,
  281. void *cpu_addr, dma_addr_t addr, size_t size,
  282. unsigned long attrs)
  283. {
  284. return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr,
  285. size, attrs);
  286. }
  287. static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt,
  288. void *cpu_addr, dma_addr_t addr, size_t size,
  289. unsigned long attrs)
  290. {
  291. return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr,
  292. addr, size, attrs);
  293. }
  294. static dma_addr_t vmd_map_page(struct device *dev, struct page *page,
  295. unsigned long offset, size_t size,
  296. enum dma_data_direction dir,
  297. unsigned long attrs)
  298. {
  299. return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size,
  300. dir, attrs);
  301. }
  302. static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size,
  303. enum dma_data_direction dir, unsigned long attrs)
  304. {
  305. vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs);
  306. }
  307. static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents,
  308. enum dma_data_direction dir, unsigned long attrs)
  309. {
  310. return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
  311. }
  312. static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
  313. enum dma_data_direction dir, unsigned long attrs)
  314. {
  315. vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
  316. }
  317. static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
  318. size_t size, enum dma_data_direction dir)
  319. {
  320. vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir);
  321. }
  322. static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr,
  323. size_t size, enum dma_data_direction dir)
  324. {
  325. vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size,
  326. dir);
  327. }
  328. static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
  329. int nents, enum dma_data_direction dir)
  330. {
  331. vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir);
  332. }
  333. static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
  334. int nents, enum dma_data_direction dir)
  335. {
  336. vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir);
  337. }
  338. static int vmd_mapping_error(struct device *dev, dma_addr_t addr)
  339. {
  340. return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr);
  341. }
  342. static int vmd_dma_supported(struct device *dev, u64 mask)
  343. {
  344. return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask);
  345. }
  346. #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
  347. static u64 vmd_get_required_mask(struct device *dev)
  348. {
  349. return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev));
  350. }
  351. #endif
  352. static void vmd_teardown_dma_ops(struct vmd_dev *vmd)
  353. {
  354. struct dma_domain *domain = &vmd->dma_domain;
  355. if (get_dma_ops(&vmd->dev->dev))
  356. del_dma_domain(domain);
  357. }
  358. #define ASSIGN_VMD_DMA_OPS(source, dest, fn) \
  359. do { \
  360. if (source->fn) \
  361. dest->fn = vmd_##fn; \
  362. } while (0)
  363. static void vmd_setup_dma_ops(struct vmd_dev *vmd)
  364. {
  365. const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev);
  366. struct dma_map_ops *dest = &vmd->dma_ops;
  367. struct dma_domain *domain = &vmd->dma_domain;
  368. domain->domain_nr = vmd->sysdata.domain;
  369. domain->dma_ops = dest;
  370. if (!source)
  371. return;
  372. ASSIGN_VMD_DMA_OPS(source, dest, alloc);
  373. ASSIGN_VMD_DMA_OPS(source, dest, free);
  374. ASSIGN_VMD_DMA_OPS(source, dest, mmap);
  375. ASSIGN_VMD_DMA_OPS(source, dest, get_sgtable);
  376. ASSIGN_VMD_DMA_OPS(source, dest, map_page);
  377. ASSIGN_VMD_DMA_OPS(source, dest, unmap_page);
  378. ASSIGN_VMD_DMA_OPS(source, dest, map_sg);
  379. ASSIGN_VMD_DMA_OPS(source, dest, unmap_sg);
  380. ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_cpu);
  381. ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device);
  382. ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu);
  383. ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device);
  384. ASSIGN_VMD_DMA_OPS(source, dest, mapping_error);
  385. ASSIGN_VMD_DMA_OPS(source, dest, dma_supported);
  386. #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
  387. ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask);
  388. #endif
  389. add_dma_domain(domain);
  390. }
  391. #undef ASSIGN_VMD_DMA_OPS
  392. #else
  393. static void vmd_teardown_dma_ops(struct vmd_dev *vmd) {}
  394. static void vmd_setup_dma_ops(struct vmd_dev *vmd) {}
  395. #endif
  396. static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus,
  397. unsigned int devfn, int reg, int len)
  398. {
  399. char __iomem *addr = vmd->cfgbar +
  400. ((bus->number - vmd->busn_start) << 20) +
  401. (devfn << 12) + reg;
  402. if ((addr - vmd->cfgbar) + len >=
  403. resource_size(&vmd->dev->resource[VMD_CFGBAR]))
  404. return NULL;
  405. return addr;
  406. }
  407. /*
  408. * CPU may deadlock if config space is not serialized on some versions of this
  409. * hardware, so all config space access is done under a spinlock.
  410. */
  411. static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg,
  412. int len, u32 *value)
  413. {
  414. struct vmd_dev *vmd = vmd_from_bus(bus);
  415. char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
  416. unsigned long flags;
  417. int ret = 0;
  418. if (!addr)
  419. return -EFAULT;
  420. spin_lock_irqsave(&vmd->cfg_lock, flags);
  421. switch (len) {
  422. case 1:
  423. *value = readb(addr);
  424. break;
  425. case 2:
  426. *value = readw(addr);
  427. break;
  428. case 4:
  429. *value = readl(addr);
  430. break;
  431. default:
  432. ret = -EINVAL;
  433. break;
  434. }
  435. spin_unlock_irqrestore(&vmd->cfg_lock, flags);
  436. return ret;
  437. }
  438. /*
  439. * VMD h/w converts non-posted config writes to posted memory writes. The
  440. * read-back in this function forces the completion so it returns only after
  441. * the config space was written, as expected.
  442. */
  443. static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg,
  444. int len, u32 value)
  445. {
  446. struct vmd_dev *vmd = vmd_from_bus(bus);
  447. char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
  448. unsigned long flags;
  449. int ret = 0;
  450. if (!addr)
  451. return -EFAULT;
  452. spin_lock_irqsave(&vmd->cfg_lock, flags);
  453. switch (len) {
  454. case 1:
  455. writeb(value, addr);
  456. readb(addr);
  457. break;
  458. case 2:
  459. writew(value, addr);
  460. readw(addr);
  461. break;
  462. case 4:
  463. writel(value, addr);
  464. readl(addr);
  465. break;
  466. default:
  467. ret = -EINVAL;
  468. break;
  469. }
  470. spin_unlock_irqrestore(&vmd->cfg_lock, flags);
  471. return ret;
  472. }
  473. static struct pci_ops vmd_ops = {
  474. .read = vmd_pci_read,
  475. .write = vmd_pci_write,
  476. };
  477. static void vmd_attach_resources(struct vmd_dev *vmd)
  478. {
  479. vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
  480. vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2];
  481. }
  482. static void vmd_detach_resources(struct vmd_dev *vmd)
  483. {
  484. vmd->dev->resource[VMD_MEMBAR1].child = NULL;
  485. vmd->dev->resource[VMD_MEMBAR2].child = NULL;
  486. }
  487. /*
  488. * VMD domains start at 0x10000 to not clash with ACPI _SEG domains.
  489. * Per ACPI r6.0, sec 6.5.6, _SEG returns an integer, of which the lower
  490. * 16 bits are the PCI Segment Group (domain) number. Other bits are
  491. * currently reserved.
  492. */
  493. static int vmd_find_free_domain(void)
  494. {
  495. int domain = 0xffff;
  496. struct pci_bus *bus = NULL;
  497. while ((bus = pci_find_next_bus(bus)) != NULL)
  498. domain = max_t(int, domain, pci_domain_nr(bus));
  499. return domain + 1;
  500. }
  501. static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
  502. {
  503. struct pci_sysdata *sd = &vmd->sysdata;
  504. struct fwnode_handle *fn;
  505. struct resource *res;
  506. u32 upper_bits;
  507. unsigned long flags;
  508. LIST_HEAD(resources);
  509. resource_size_t offset[2] = {0};
  510. resource_size_t membar2_offset = 0x2000;
  511. /*
  512. * Shadow registers may exist in certain VMD device ids which allow
  513. * guests to correctly assign host physical addresses to the root ports
  514. * and child devices. These registers will either return the host value
  515. * or 0, depending on an enable bit in the VMD device.
  516. */
  517. if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
  518. u32 vmlock;
  519. int ret;
  520. membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
  521. ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock);
  522. if (ret || vmlock == ~0)
  523. return -ENODEV;
  524. if (MB2_SHADOW_EN(vmlock)) {
  525. void __iomem *membar2;
  526. membar2 = pci_iomap(vmd->dev, VMD_MEMBAR2, 0);
  527. if (!membar2)
  528. return -ENOMEM;
  529. offset[0] = vmd->dev->resource[VMD_MEMBAR1].start -
  530. (readq(membar2 + MB2_SHADOW_OFFSET) &
  531. PCI_BASE_ADDRESS_MEM_MASK);
  532. offset[1] = vmd->dev->resource[VMD_MEMBAR2].start -
  533. (readq(membar2 + MB2_SHADOW_OFFSET + 8) &
  534. PCI_BASE_ADDRESS_MEM_MASK);
  535. pci_iounmap(vmd->dev, membar2);
  536. }
  537. }
  538. /*
  539. * Certain VMD devices may have a root port configuration option which
  540. * limits the bus range to between 0-127 or 128-255
  541. */
  542. if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) {
  543. u32 vmcap, vmconfig;
  544. pci_read_config_dword(vmd->dev, PCI_REG_VMCAP, &vmcap);
  545. pci_read_config_dword(vmd->dev, PCI_REG_VMCONFIG, &vmconfig);
  546. if (BUS_RESTRICT_CAP(vmcap) &&
  547. (BUS_RESTRICT_CFG(vmconfig) == 0x1))
  548. vmd->busn_start = 128;
  549. }
  550. res = &vmd->dev->resource[VMD_CFGBAR];
  551. vmd->resources[0] = (struct resource) {
  552. .name = "VMD CFGBAR",
  553. .start = vmd->busn_start,
  554. .end = vmd->busn_start + (resource_size(res) >> 20) - 1,
  555. .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
  556. };
  557. /*
  558. * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can
  559. * put 32-bit resources in the window.
  560. *
  561. * There's no hardware reason why a 64-bit window *couldn't*
  562. * contain a 32-bit resource, but pbus_size_mem() computes the
  563. * bridge window size assuming a 64-bit window will contain no
  564. * 32-bit resources. __pci_assign_resource() enforces that
  565. * artificial restriction to make sure everything will fit.
  566. *
  567. * The only way we could use a 64-bit non-prefechable MEMBAR is
  568. * if its address is <4GB so that we can convert it to a 32-bit
  569. * resource. To be visible to the host OS, all VMD endpoints must
  570. * be initially configured by platform BIOS, which includes setting
  571. * up these resources. We can assume the device is configured
  572. * according to the platform needs.
  573. */
  574. res = &vmd->dev->resource[VMD_MEMBAR1];
  575. upper_bits = upper_32_bits(res->end);
  576. flags = res->flags & ~IORESOURCE_SIZEALIGN;
  577. if (!upper_bits)
  578. flags &= ~IORESOURCE_MEM_64;
  579. vmd->resources[1] = (struct resource) {
  580. .name = "VMD MEMBAR1",
  581. .start = res->start,
  582. .end = res->end,
  583. .flags = flags,
  584. .parent = res,
  585. };
  586. res = &vmd->dev->resource[VMD_MEMBAR2];
  587. upper_bits = upper_32_bits(res->end);
  588. flags = res->flags & ~IORESOURCE_SIZEALIGN;
  589. if (!upper_bits)
  590. flags &= ~IORESOURCE_MEM_64;
  591. vmd->resources[2] = (struct resource) {
  592. .name = "VMD MEMBAR2",
  593. .start = res->start + membar2_offset,
  594. .end = res->end,
  595. .flags = flags,
  596. .parent = res,
  597. };
  598. sd->vmd_domain = true;
  599. sd->domain = vmd_find_free_domain();
  600. if (sd->domain < 0)
  601. return sd->domain;
  602. sd->node = pcibus_to_node(vmd->dev->bus);
  603. fn = irq_domain_alloc_named_id_fwnode("VMD-MSI", vmd->sysdata.domain);
  604. if (!fn)
  605. return -ENODEV;
  606. vmd->irq_domain = pci_msi_create_irq_domain(fn, &vmd_msi_domain_info,
  607. x86_vector_domain);
  608. if (!vmd->irq_domain) {
  609. irq_domain_free_fwnode(fn);
  610. return -ENODEV;
  611. }
  612. pci_add_resource(&resources, &vmd->resources[0]);
  613. pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]);
  614. pci_add_resource_offset(&resources, &vmd->resources[2], offset[1]);
  615. vmd->bus = pci_create_root_bus(&vmd->dev->dev, vmd->busn_start,
  616. &vmd_ops, sd, &resources);
  617. if (!vmd->bus) {
  618. pci_free_resource_list(&resources);
  619. irq_domain_remove(vmd->irq_domain);
  620. irq_domain_free_fwnode(fn);
  621. return -ENODEV;
  622. }
  623. vmd_attach_resources(vmd);
  624. vmd_setup_dma_ops(vmd);
  625. dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain);
  626. pci_rescan_bus(vmd->bus);
  627. WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj,
  628. "domain"), "Can't create symlink to domain\n");
  629. return 0;
  630. }
  631. static irqreturn_t vmd_irq(int irq, void *data)
  632. {
  633. struct vmd_irq_list *irqs = data;
  634. struct vmd_irq *vmdirq;
  635. int idx;
  636. idx = srcu_read_lock(&irqs->srcu);
  637. list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node)
  638. generic_handle_irq(vmdirq->virq);
  639. srcu_read_unlock(&irqs->srcu, idx);
  640. return IRQ_HANDLED;
  641. }
  642. static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
  643. {
  644. struct vmd_dev *vmd;
  645. int i, err;
  646. if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20))
  647. return -ENOMEM;
  648. vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL);
  649. if (!vmd)
  650. return -ENOMEM;
  651. vmd->dev = dev;
  652. err = pcim_enable_device(dev);
  653. if (err < 0)
  654. return err;
  655. vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0);
  656. if (!vmd->cfgbar)
  657. return -ENOMEM;
  658. pci_set_master(dev);
  659. if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) &&
  660. dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32)))
  661. return -ENODEV;
  662. vmd->msix_count = pci_msix_vec_count(dev);
  663. if (vmd->msix_count < 0)
  664. return -ENODEV;
  665. vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd->msix_count,
  666. PCI_IRQ_MSIX);
  667. if (vmd->msix_count < 0)
  668. return vmd->msix_count;
  669. vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs),
  670. GFP_KERNEL);
  671. if (!vmd->irqs)
  672. return -ENOMEM;
  673. for (i = 0; i < vmd->msix_count; i++) {
  674. err = init_srcu_struct(&vmd->irqs[i].srcu);
  675. if (err)
  676. return err;
  677. INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
  678. err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i),
  679. vmd_irq, IRQF_NO_THREAD,
  680. "vmd", &vmd->irqs[i]);
  681. if (err)
  682. return err;
  683. }
  684. spin_lock_init(&vmd->cfg_lock);
  685. pci_set_drvdata(dev, vmd);
  686. err = vmd_enable_domain(vmd, (unsigned long) id->driver_data);
  687. if (err)
  688. return err;
  689. dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n",
  690. vmd->sysdata.domain);
  691. return 0;
  692. }
  693. static void vmd_cleanup_srcu(struct vmd_dev *vmd)
  694. {
  695. int i;
  696. for (i = 0; i < vmd->msix_count; i++)
  697. cleanup_srcu_struct(&vmd->irqs[i].srcu);
  698. }
  699. static void vmd_remove(struct pci_dev *dev)
  700. {
  701. struct vmd_dev *vmd = pci_get_drvdata(dev);
  702. struct fwnode_handle *fn = vmd->irq_domain->fwnode;
  703. sysfs_remove_link(&vmd->dev->dev.kobj, "domain");
  704. pci_stop_root_bus(vmd->bus);
  705. pci_remove_root_bus(vmd->bus);
  706. vmd_cleanup_srcu(vmd);
  707. vmd_teardown_dma_ops(vmd);
  708. vmd_detach_resources(vmd);
  709. irq_domain_remove(vmd->irq_domain);
  710. irq_domain_free_fwnode(fn);
  711. }
  712. #ifdef CONFIG_PM_SLEEP
  713. static int vmd_suspend(struct device *dev)
  714. {
  715. struct pci_dev *pdev = to_pci_dev(dev);
  716. struct vmd_dev *vmd = pci_get_drvdata(pdev);
  717. int i;
  718. for (i = 0; i < vmd->msix_count; i++)
  719. devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]);
  720. pci_save_state(pdev);
  721. return 0;
  722. }
  723. static int vmd_resume(struct device *dev)
  724. {
  725. struct pci_dev *pdev = to_pci_dev(dev);
  726. struct vmd_dev *vmd = pci_get_drvdata(pdev);
  727. int err, i;
  728. for (i = 0; i < vmd->msix_count; i++) {
  729. err = devm_request_irq(dev, pci_irq_vector(pdev, i),
  730. vmd_irq, IRQF_NO_THREAD,
  731. "vmd", &vmd->irqs[i]);
  732. if (err)
  733. return err;
  734. }
  735. pci_restore_state(pdev);
  736. return 0;
  737. }
  738. #endif
  739. static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume);
  740. static const struct pci_device_id vmd_ids[] = {
  741. {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_201D),},
  742. {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_28C0),
  743. .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW |
  744. VMD_FEAT_HAS_BUS_RESTRICTIONS,},
  745. {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_9A0B),
  746. .driver_data = VMD_FEAT_HAS_BUS_RESTRICTIONS,},
  747. {0,}
  748. };
  749. MODULE_DEVICE_TABLE(pci, vmd_ids);
  750. static struct pci_driver vmd_drv = {
  751. .name = "vmd",
  752. .id_table = vmd_ids,
  753. .probe = vmd_probe,
  754. .remove = vmd_remove,
  755. .driver = {
  756. .pm = &vmd_dev_pm_ops,
  757. },
  758. };
  759. module_pci_driver(vmd_drv);
  760. MODULE_AUTHOR("Intel Corporation");
  761. MODULE_LICENSE("GPL v2");
  762. MODULE_VERSION("0.6");