vfio_pci_rdwr.c 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. /*
  2. * VFIO PCI I/O Port & MMIO access
  3. *
  4. * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
  5. * Author: Alex Williamson <alex.williamson@redhat.com>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. *
  11. * Derived from original vfio:
  12. * Copyright 2010 Cisco Systems, Inc. All rights reserved.
  13. * Author: Tom Lyon, pugs@cisco.com
  14. */
  15. #include <linux/fs.h>
  16. #include <linux/pci.h>
  17. #include <linux/uaccess.h>
  18. #include <linux/io.h>
  19. #include <linux/vfio.h>
  20. #include <linux/vgaarb.h>
  21. #include "vfio_pci_private.h"
  22. #ifdef __LITTLE_ENDIAN
  23. #define vfio_ioread64 ioread64
  24. #define vfio_iowrite64 iowrite64
  25. #define vfio_ioread32 ioread32
  26. #define vfio_iowrite32 iowrite32
  27. #define vfio_ioread16 ioread16
  28. #define vfio_iowrite16 iowrite16
  29. #else
  30. #define vfio_ioread64 ioread64be
  31. #define vfio_iowrite64 iowrite64be
  32. #define vfio_ioread32 ioread32be
  33. #define vfio_iowrite32 iowrite32be
  34. #define vfio_ioread16 ioread16be
  35. #define vfio_iowrite16 iowrite16be
  36. #endif
  37. #define vfio_ioread8 ioread8
  38. #define vfio_iowrite8 iowrite8
  39. /*
  40. * Read or write from an __iomem region (MMIO or I/O port) with an excluded
  41. * range which is inaccessible. The excluded range drops writes and fills
  42. * reads with -1. This is intended for handling MSI-X vector tables and
  43. * leftover space for ROM BARs.
  44. */
  45. static ssize_t do_io_rw(void __iomem *io, char __user *buf,
  46. loff_t off, size_t count, size_t x_start,
  47. size_t x_end, bool iswrite)
  48. {
  49. ssize_t done = 0;
  50. while (count) {
  51. size_t fillable, filled;
  52. if (off < x_start)
  53. fillable = min(count, (size_t)(x_start - off));
  54. else if (off >= x_end)
  55. fillable = count;
  56. else
  57. fillable = 0;
  58. if (fillable >= 4 && !(off % 4)) {
  59. u32 val;
  60. if (iswrite) {
  61. if (copy_from_user(&val, buf, 4))
  62. return -EFAULT;
  63. vfio_iowrite32(val, io + off);
  64. } else {
  65. val = vfio_ioread32(io + off);
  66. if (copy_to_user(buf, &val, 4))
  67. return -EFAULT;
  68. }
  69. filled = 4;
  70. } else if (fillable >= 2 && !(off % 2)) {
  71. u16 val;
  72. if (iswrite) {
  73. if (copy_from_user(&val, buf, 2))
  74. return -EFAULT;
  75. vfio_iowrite16(val, io + off);
  76. } else {
  77. val = vfio_ioread16(io + off);
  78. if (copy_to_user(buf, &val, 2))
  79. return -EFAULT;
  80. }
  81. filled = 2;
  82. } else if (fillable) {
  83. u8 val;
  84. if (iswrite) {
  85. if (copy_from_user(&val, buf, 1))
  86. return -EFAULT;
  87. vfio_iowrite8(val, io + off);
  88. } else {
  89. val = vfio_ioread8(io + off);
  90. if (copy_to_user(buf, &val, 1))
  91. return -EFAULT;
  92. }
  93. filled = 1;
  94. } else {
  95. /* Fill reads with -1, drop writes */
  96. filled = min(count, (size_t)(x_end - off));
  97. if (!iswrite) {
  98. u8 val = 0xFF;
  99. size_t i;
  100. for (i = 0; i < filled; i++)
  101. if (copy_to_user(buf + i, &val, 1))
  102. return -EFAULT;
  103. }
  104. }
  105. count -= filled;
  106. done += filled;
  107. off += filled;
  108. buf += filled;
  109. }
  110. return done;
  111. }
  112. static int vfio_pci_setup_barmap(struct vfio_pci_device *vdev, int bar)
  113. {
  114. struct pci_dev *pdev = vdev->pdev;
  115. int ret;
  116. void __iomem *io;
  117. if (vdev->barmap[bar])
  118. return 0;
  119. ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
  120. if (ret)
  121. return ret;
  122. io = pci_iomap(pdev, bar, 0);
  123. if (!io) {
  124. pci_release_selected_regions(pdev, 1 << bar);
  125. return -ENOMEM;
  126. }
  127. vdev->barmap[bar] = io;
  128. return 0;
  129. }
  130. ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
  131. size_t count, loff_t *ppos, bool iswrite)
  132. {
  133. struct pci_dev *pdev = vdev->pdev;
  134. loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
  135. int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
  136. size_t x_start = 0, x_end = 0;
  137. resource_size_t end;
  138. void __iomem *io;
  139. struct resource *res = &vdev->pdev->resource[bar];
  140. ssize_t done;
  141. if (pci_resource_start(pdev, bar))
  142. end = pci_resource_len(pdev, bar);
  143. else if (bar == PCI_ROM_RESOURCE &&
  144. pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
  145. end = 0x20000;
  146. else
  147. return -EINVAL;
  148. if (pos >= end)
  149. return -EINVAL;
  150. count = min(count, (size_t)(end - pos));
  151. if (res->flags & IORESOURCE_MEM) {
  152. down_read(&vdev->memory_lock);
  153. if (!__vfio_pci_memory_enabled(vdev)) {
  154. up_read(&vdev->memory_lock);
  155. return -EIO;
  156. }
  157. }
  158. if (bar == PCI_ROM_RESOURCE) {
  159. /*
  160. * The ROM can fill less space than the BAR, so we start the
  161. * excluded range at the end of the actual ROM. This makes
  162. * filling large ROM BARs much faster.
  163. */
  164. io = pci_map_rom(pdev, &x_start);
  165. if (!io) {
  166. done = -ENOMEM;
  167. goto out;
  168. }
  169. x_end = end;
  170. } else {
  171. int ret = vfio_pci_setup_barmap(vdev, bar);
  172. if (ret) {
  173. done = ret;
  174. goto out;
  175. }
  176. io = vdev->barmap[bar];
  177. }
  178. if (bar == vdev->msix_bar) {
  179. x_start = vdev->msix_offset;
  180. x_end = vdev->msix_offset + vdev->msix_size;
  181. }
  182. done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite);
  183. if (done >= 0)
  184. *ppos += done;
  185. if (bar == PCI_ROM_RESOURCE)
  186. pci_unmap_rom(pdev, io);
  187. out:
  188. if (res->flags & IORESOURCE_MEM)
  189. up_read(&vdev->memory_lock);
  190. return done;
  191. }
  192. ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
  193. size_t count, loff_t *ppos, bool iswrite)
  194. {
  195. int ret;
  196. loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
  197. void __iomem *iomem = NULL;
  198. unsigned int rsrc;
  199. bool is_ioport;
  200. ssize_t done;
  201. if (!vdev->has_vga)
  202. return -EINVAL;
  203. if (pos > 0xbfffful)
  204. return -EINVAL;
  205. switch ((u32)pos) {
  206. case 0xa0000 ... 0xbffff:
  207. count = min(count, (size_t)(0xc0000 - pos));
  208. iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1);
  209. off = pos - 0xa0000;
  210. rsrc = VGA_RSRC_LEGACY_MEM;
  211. is_ioport = false;
  212. break;
  213. case 0x3b0 ... 0x3bb:
  214. count = min(count, (size_t)(0x3bc - pos));
  215. iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
  216. off = pos - 0x3b0;
  217. rsrc = VGA_RSRC_LEGACY_IO;
  218. is_ioport = true;
  219. break;
  220. case 0x3c0 ... 0x3df:
  221. count = min(count, (size_t)(0x3e0 - pos));
  222. iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
  223. off = pos - 0x3c0;
  224. rsrc = VGA_RSRC_LEGACY_IO;
  225. is_ioport = true;
  226. break;
  227. default:
  228. return -EINVAL;
  229. }
  230. if (!iomem)
  231. return -ENOMEM;
  232. ret = vga_get_interruptible(vdev->pdev, rsrc);
  233. if (ret) {
  234. is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
  235. return ret;
  236. }
  237. done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite);
  238. vga_put(vdev->pdev, rsrc);
  239. is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
  240. if (done >= 0)
  241. *ppos += done;
  242. return done;
  243. }
  244. static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
  245. {
  246. struct vfio_pci_ioeventfd *ioeventfd = opaque;
  247. switch (ioeventfd->count) {
  248. case 1:
  249. vfio_iowrite8(ioeventfd->data, ioeventfd->addr);
  250. break;
  251. case 2:
  252. vfio_iowrite16(ioeventfd->data, ioeventfd->addr);
  253. break;
  254. case 4:
  255. vfio_iowrite32(ioeventfd->data, ioeventfd->addr);
  256. break;
  257. #ifdef iowrite64
  258. case 8:
  259. vfio_iowrite64(ioeventfd->data, ioeventfd->addr);
  260. break;
  261. #endif
  262. }
  263. return 0;
  264. }
  265. long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
  266. uint64_t data, int count, int fd)
  267. {
  268. struct pci_dev *pdev = vdev->pdev;
  269. loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
  270. int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
  271. struct vfio_pci_ioeventfd *ioeventfd;
  272. /* Only support ioeventfds into BARs */
  273. if (bar > VFIO_PCI_BAR5_REGION_INDEX)
  274. return -EINVAL;
  275. if (pos + count > pci_resource_len(pdev, bar))
  276. return -EINVAL;
  277. /* Disallow ioeventfds working around MSI-X table writes */
  278. if (bar == vdev->msix_bar &&
  279. !(pos + count <= vdev->msix_offset ||
  280. pos >= vdev->msix_offset + vdev->msix_size))
  281. return -EINVAL;
  282. #ifndef iowrite64
  283. if (count == 8)
  284. return -EINVAL;
  285. #endif
  286. ret = vfio_pci_setup_barmap(vdev, bar);
  287. if (ret)
  288. return ret;
  289. mutex_lock(&vdev->ioeventfds_lock);
  290. list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
  291. if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
  292. ioeventfd->data == data && ioeventfd->count == count) {
  293. if (fd == -1) {
  294. vfio_virqfd_disable(&ioeventfd->virqfd);
  295. list_del(&ioeventfd->next);
  296. vdev->ioeventfds_nr--;
  297. kfree(ioeventfd);
  298. ret = 0;
  299. } else
  300. ret = -EEXIST;
  301. goto out_unlock;
  302. }
  303. }
  304. if (fd < 0) {
  305. ret = -ENODEV;
  306. goto out_unlock;
  307. }
  308. if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
  309. ret = -ENOSPC;
  310. goto out_unlock;
  311. }
  312. ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL);
  313. if (!ioeventfd) {
  314. ret = -ENOMEM;
  315. goto out_unlock;
  316. }
  317. ioeventfd->addr = vdev->barmap[bar] + pos;
  318. ioeventfd->data = data;
  319. ioeventfd->pos = pos;
  320. ioeventfd->bar = bar;
  321. ioeventfd->count = count;
  322. ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
  323. NULL, NULL, &ioeventfd->virqfd, fd);
  324. if (ret) {
  325. kfree(ioeventfd);
  326. goto out_unlock;
  327. }
  328. list_add(&ioeventfd->next, &vdev->ioeventfds_list);
  329. vdev->ioeventfds_nr++;
  330. out_unlock:
  331. mutex_unlock(&vdev->ioeventfds_lock);
  332. return ret;
  333. }