container.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
  4. *
  5. * VFIO container (/dev/vfio/vfio)
  6. */
  7. #include <linux/file.h>
  8. #include <linux/slab.h>
  9. #include <linux/fs.h>
  10. #include <linux/capability.h>
  11. #include <linux/iommu.h>
  12. #include <linux/miscdevice.h>
  13. #include <linux/vfio.h>
  14. #include <uapi/linux/vfio.h>
  15. #include "vfio.h"
  16. struct vfio_container {
  17. struct kref kref;
  18. struct list_head group_list;
  19. struct rw_semaphore group_lock;
  20. struct vfio_iommu_driver *iommu_driver;
  21. void *iommu_data;
  22. bool noiommu;
  23. };
  24. static struct vfio {
  25. struct list_head iommu_drivers_list;
  26. struct mutex iommu_drivers_lock;
  27. } vfio;
  28. static void *vfio_noiommu_open(unsigned long arg)
  29. {
  30. if (arg != VFIO_NOIOMMU_IOMMU)
  31. return ERR_PTR(-EINVAL);
  32. if (!capable(CAP_SYS_RAWIO))
  33. return ERR_PTR(-EPERM);
  34. return NULL;
  35. }
  36. static void vfio_noiommu_release(void *iommu_data)
  37. {
  38. }
  39. static long vfio_noiommu_ioctl(void *iommu_data,
  40. unsigned int cmd, unsigned long arg)
  41. {
  42. if (cmd == VFIO_CHECK_EXTENSION)
  43. return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
  44. return -ENOTTY;
  45. }
  46. static int vfio_noiommu_attach_group(void *iommu_data,
  47. struct iommu_group *iommu_group, enum vfio_group_type type)
  48. {
  49. return 0;
  50. }
  51. static void vfio_noiommu_detach_group(void *iommu_data,
  52. struct iommu_group *iommu_group)
  53. {
  54. }
  55. static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
  56. .name = "vfio-noiommu",
  57. .owner = THIS_MODULE,
  58. .open = vfio_noiommu_open,
  59. .release = vfio_noiommu_release,
  60. .ioctl = vfio_noiommu_ioctl,
  61. .attach_group = vfio_noiommu_attach_group,
  62. .detach_group = vfio_noiommu_detach_group,
  63. };
  64. /*
  65. * Only noiommu containers can use vfio-noiommu and noiommu containers can only
  66. * use vfio-noiommu.
  67. */
  68. static bool vfio_iommu_driver_allowed(struct vfio_container *container,
  69. const struct vfio_iommu_driver *driver)
  70. {
  71. if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
  72. return true;
  73. return container->noiommu == (driver->ops == &vfio_noiommu_ops);
  74. }
  75. /*
  76. * IOMMU driver registration
  77. */
  78. int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
  79. {
  80. struct vfio_iommu_driver *driver, *tmp;
  81. if (WARN_ON(!ops->register_device != !ops->unregister_device))
  82. return -EINVAL;
  83. driver = kzalloc(sizeof(*driver), GFP_KERNEL);
  84. if (!driver)
  85. return -ENOMEM;
  86. driver->ops = ops;
  87. mutex_lock(&vfio.iommu_drivers_lock);
  88. /* Check for duplicates */
  89. list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
  90. if (tmp->ops == ops) {
  91. mutex_unlock(&vfio.iommu_drivers_lock);
  92. kfree(driver);
  93. return -EINVAL;
  94. }
  95. }
  96. list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
  97. mutex_unlock(&vfio.iommu_drivers_lock);
  98. return 0;
  99. }
  100. EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
  101. void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
  102. {
  103. struct vfio_iommu_driver *driver;
  104. mutex_lock(&vfio.iommu_drivers_lock);
  105. list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
  106. if (driver->ops == ops) {
  107. list_del(&driver->vfio_next);
  108. mutex_unlock(&vfio.iommu_drivers_lock);
  109. kfree(driver);
  110. return;
  111. }
  112. }
  113. mutex_unlock(&vfio.iommu_drivers_lock);
  114. }
  115. EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
  116. /*
  117. * Container objects - containers are created when /dev/vfio/vfio is
  118. * opened, but their lifecycle extends until the last user is done, so
  119. * it's freed via kref. Must support container/group/device being
  120. * closed in any order.
  121. */
  122. static void vfio_container_release(struct kref *kref)
  123. {
  124. struct vfio_container *container;
  125. container = container_of(kref, struct vfio_container, kref);
  126. kfree(container);
  127. }
  128. static void vfio_container_get(struct vfio_container *container)
  129. {
  130. kref_get(&container->kref);
  131. }
  132. static void vfio_container_put(struct vfio_container *container)
  133. {
  134. kref_put(&container->kref, vfio_container_release);
  135. }
  136. void vfio_device_container_register(struct vfio_device *device)
  137. {
  138. struct vfio_iommu_driver *iommu_driver =
  139. device->group->container->iommu_driver;
  140. if (iommu_driver && iommu_driver->ops->register_device)
  141. iommu_driver->ops->register_device(
  142. device->group->container->iommu_data, device);
  143. }
  144. void vfio_device_container_unregister(struct vfio_device *device)
  145. {
  146. struct vfio_iommu_driver *iommu_driver =
  147. device->group->container->iommu_driver;
  148. if (iommu_driver && iommu_driver->ops->unregister_device)
  149. iommu_driver->ops->unregister_device(
  150. device->group->container->iommu_data, device);
  151. }
  152. static long
  153. vfio_container_ioctl_check_extension(struct vfio_container *container,
  154. unsigned long arg)
  155. {
  156. struct vfio_iommu_driver *driver;
  157. long ret = 0;
  158. down_read(&container->group_lock);
  159. driver = container->iommu_driver;
  160. switch (arg) {
  161. /* No base extensions yet */
  162. default:
  163. /*
  164. * If no driver is set, poll all registered drivers for
  165. * extensions and return the first positive result. If
  166. * a driver is already set, further queries will be passed
  167. * only to that driver.
  168. */
  169. if (!driver) {
  170. mutex_lock(&vfio.iommu_drivers_lock);
  171. list_for_each_entry(driver, &vfio.iommu_drivers_list,
  172. vfio_next) {
  173. if (!list_empty(&container->group_list) &&
  174. !vfio_iommu_driver_allowed(container,
  175. driver))
  176. continue;
  177. if (!try_module_get(driver->ops->owner))
  178. continue;
  179. ret = driver->ops->ioctl(NULL,
  180. VFIO_CHECK_EXTENSION,
  181. arg);
  182. module_put(driver->ops->owner);
  183. if (ret > 0)
  184. break;
  185. }
  186. mutex_unlock(&vfio.iommu_drivers_lock);
  187. } else
  188. ret = driver->ops->ioctl(container->iommu_data,
  189. VFIO_CHECK_EXTENSION, arg);
  190. }
  191. up_read(&container->group_lock);
  192. return ret;
  193. }
  194. /* hold write lock on container->group_lock */
  195. static int __vfio_container_attach_groups(struct vfio_container *container,
  196. struct vfio_iommu_driver *driver,
  197. void *data)
  198. {
  199. struct vfio_group *group;
  200. int ret = -ENODEV;
  201. list_for_each_entry(group, &container->group_list, container_next) {
  202. ret = driver->ops->attach_group(data, group->iommu_group,
  203. group->type);
  204. if (ret)
  205. goto unwind;
  206. }
  207. return ret;
  208. unwind:
  209. list_for_each_entry_continue_reverse(group, &container->group_list,
  210. container_next) {
  211. driver->ops->detach_group(data, group->iommu_group);
  212. }
  213. return ret;
  214. }
  215. static long vfio_ioctl_set_iommu(struct vfio_container *container,
  216. unsigned long arg)
  217. {
  218. struct vfio_iommu_driver *driver;
  219. long ret = -ENODEV;
  220. down_write(&container->group_lock);
  221. /*
  222. * The container is designed to be an unprivileged interface while
  223. * the group can be assigned to specific users. Therefore, only by
  224. * adding a group to a container does the user get the privilege of
  225. * enabling the iommu, which may allocate finite resources. There
  226. * is no unset_iommu, but by removing all the groups from a container,
  227. * the container is deprivileged and returns to an unset state.
  228. */
  229. if (list_empty(&container->group_list) || container->iommu_driver) {
  230. up_write(&container->group_lock);
  231. return -EINVAL;
  232. }
  233. mutex_lock(&vfio.iommu_drivers_lock);
  234. list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
  235. void *data;
  236. if (!vfio_iommu_driver_allowed(container, driver))
  237. continue;
  238. if (!try_module_get(driver->ops->owner))
  239. continue;
  240. /*
  241. * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
  242. * so test which iommu driver reported support for this
  243. * extension and call open on them. We also pass them the
  244. * magic, allowing a single driver to support multiple
  245. * interfaces if they'd like.
  246. */
  247. if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
  248. module_put(driver->ops->owner);
  249. continue;
  250. }
  251. data = driver->ops->open(arg);
  252. if (IS_ERR(data)) {
  253. ret = PTR_ERR(data);
  254. module_put(driver->ops->owner);
  255. continue;
  256. }
  257. ret = __vfio_container_attach_groups(container, driver, data);
  258. if (ret) {
  259. driver->ops->release(data);
  260. module_put(driver->ops->owner);
  261. continue;
  262. }
  263. container->iommu_driver = driver;
  264. container->iommu_data = data;
  265. break;
  266. }
  267. mutex_unlock(&vfio.iommu_drivers_lock);
  268. up_write(&container->group_lock);
  269. return ret;
  270. }
  271. static long vfio_fops_unl_ioctl(struct file *filep,
  272. unsigned int cmd, unsigned long arg)
  273. {
  274. struct vfio_container *container = filep->private_data;
  275. struct vfio_iommu_driver *driver;
  276. void *data;
  277. long ret = -EINVAL;
  278. if (!container)
  279. return ret;
  280. switch (cmd) {
  281. case VFIO_GET_API_VERSION:
  282. ret = VFIO_API_VERSION;
  283. break;
  284. case VFIO_CHECK_EXTENSION:
  285. ret = vfio_container_ioctl_check_extension(container, arg);
  286. break;
  287. case VFIO_SET_IOMMU:
  288. ret = vfio_ioctl_set_iommu(container, arg);
  289. break;
  290. default:
  291. driver = container->iommu_driver;
  292. data = container->iommu_data;
  293. if (driver) /* passthrough all unrecognized ioctls */
  294. ret = driver->ops->ioctl(data, cmd, arg);
  295. }
  296. return ret;
  297. }
  298. static int vfio_fops_open(struct inode *inode, struct file *filep)
  299. {
  300. struct vfio_container *container;
  301. container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT);
  302. if (!container)
  303. return -ENOMEM;
  304. INIT_LIST_HEAD(&container->group_list);
  305. init_rwsem(&container->group_lock);
  306. kref_init(&container->kref);
  307. filep->private_data = container;
  308. return 0;
  309. }
  310. static int vfio_fops_release(struct inode *inode, struct file *filep)
  311. {
  312. struct vfio_container *container = filep->private_data;
  313. filep->private_data = NULL;
  314. vfio_container_put(container);
  315. return 0;
  316. }
  317. static const struct file_operations vfio_fops = {
  318. .owner = THIS_MODULE,
  319. .open = vfio_fops_open,
  320. .release = vfio_fops_release,
  321. .unlocked_ioctl = vfio_fops_unl_ioctl,
  322. .compat_ioctl = compat_ptr_ioctl,
  323. };
  324. struct vfio_container *vfio_container_from_file(struct file *file)
  325. {
  326. struct vfio_container *container;
  327. /* Sanity check, is this really our fd? */
  328. if (file->f_op != &vfio_fops)
  329. return NULL;
  330. container = file->private_data;
  331. WARN_ON(!container); /* fget ensures we don't race vfio_release */
  332. return container;
  333. }
  334. static struct miscdevice vfio_dev = {
  335. .minor = VFIO_MINOR,
  336. .name = "vfio",
  337. .fops = &vfio_fops,
  338. .nodename = "vfio/vfio",
  339. .mode = S_IRUGO | S_IWUGO,
  340. };
  341. int vfio_container_attach_group(struct vfio_container *container,
  342. struct vfio_group *group)
  343. {
  344. struct vfio_iommu_driver *driver;
  345. int ret = 0;
  346. lockdep_assert_held(&group->group_lock);
  347. if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
  348. return -EPERM;
  349. down_write(&container->group_lock);
  350. /* Real groups and fake groups cannot mix */
  351. if (!list_empty(&container->group_list) &&
  352. container->noiommu != (group->type == VFIO_NO_IOMMU)) {
  353. ret = -EPERM;
  354. goto out_unlock_container;
  355. }
  356. if (group->type == VFIO_IOMMU) {
  357. ret = iommu_group_claim_dma_owner(group->iommu_group, group);
  358. if (ret)
  359. goto out_unlock_container;
  360. }
  361. driver = container->iommu_driver;
  362. if (driver) {
  363. ret = driver->ops->attach_group(container->iommu_data,
  364. group->iommu_group,
  365. group->type);
  366. if (ret) {
  367. if (group->type == VFIO_IOMMU)
  368. iommu_group_release_dma_owner(
  369. group->iommu_group);
  370. goto out_unlock_container;
  371. }
  372. }
  373. group->container = container;
  374. group->container_users = 1;
  375. container->noiommu = (group->type == VFIO_NO_IOMMU);
  376. list_add(&group->container_next, &container->group_list);
  377. /* Get a reference on the container and mark a user within the group */
  378. vfio_container_get(container);
  379. out_unlock_container:
  380. up_write(&container->group_lock);
  381. return ret;
  382. }
  383. void vfio_group_detach_container(struct vfio_group *group)
  384. {
  385. struct vfio_container *container = group->container;
  386. struct vfio_iommu_driver *driver;
  387. lockdep_assert_held(&group->group_lock);
  388. WARN_ON(group->container_users != 1);
  389. down_write(&container->group_lock);
  390. driver = container->iommu_driver;
  391. if (driver)
  392. driver->ops->detach_group(container->iommu_data,
  393. group->iommu_group);
  394. if (group->type == VFIO_IOMMU)
  395. iommu_group_release_dma_owner(group->iommu_group);
  396. group->container = NULL;
  397. group->container_users = 0;
  398. list_del(&group->container_next);
  399. /* Detaching the last group deprivileges a container, remove iommu */
  400. if (driver && list_empty(&container->group_list)) {
  401. driver->ops->release(container->iommu_data);
  402. module_put(driver->ops->owner);
  403. container->iommu_driver = NULL;
  404. container->iommu_data = NULL;
  405. }
  406. up_write(&container->group_lock);
  407. vfio_container_put(container);
  408. }
  409. int vfio_group_use_container(struct vfio_group *group)
  410. {
  411. lockdep_assert_held(&group->group_lock);
  412. /*
  413. * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
  414. * VFIO_SET_IOMMU hasn't been done yet.
  415. */
  416. if (!group->container->iommu_driver)
  417. return -EINVAL;
  418. if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
  419. return -EPERM;
  420. get_file(group->opened_file);
  421. group->container_users++;
  422. return 0;
  423. }
  424. void vfio_group_unuse_container(struct vfio_group *group)
  425. {
  426. lockdep_assert_held(&group->group_lock);
  427. WARN_ON(group->container_users <= 1);
  428. group->container_users--;
  429. fput(group->opened_file);
  430. }
  431. int vfio_device_container_pin_pages(struct vfio_device *device,
  432. dma_addr_t iova, int npage,
  433. int prot, struct page **pages)
  434. {
  435. struct vfio_container *container = device->group->container;
  436. struct iommu_group *iommu_group = device->group->iommu_group;
  437. struct vfio_iommu_driver *driver = container->iommu_driver;
  438. if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
  439. return -E2BIG;
  440. if (unlikely(!driver || !driver->ops->pin_pages))
  441. return -ENOTTY;
  442. return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
  443. npage, prot, pages);
  444. }
  445. void vfio_device_container_unpin_pages(struct vfio_device *device,
  446. dma_addr_t iova, int npage)
  447. {
  448. struct vfio_container *container = device->group->container;
  449. if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
  450. return;
  451. container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
  452. npage);
  453. }
  454. int vfio_device_container_dma_rw(struct vfio_device *device,
  455. dma_addr_t iova, void *data,
  456. size_t len, bool write)
  457. {
  458. struct vfio_container *container = device->group->container;
  459. struct vfio_iommu_driver *driver = container->iommu_driver;
  460. if (unlikely(!driver || !driver->ops->dma_rw))
  461. return -ENOTTY;
  462. return driver->ops->dma_rw(container->iommu_data, iova, data, len,
  463. write);
  464. }
  465. int __init vfio_container_init(void)
  466. {
  467. int ret;
  468. mutex_init(&vfio.iommu_drivers_lock);
  469. INIT_LIST_HEAD(&vfio.iommu_drivers_list);
  470. ret = misc_register(&vfio_dev);
  471. if (ret) {
  472. pr_err("vfio: misc device register failed\n");
  473. return ret;
  474. }
  475. if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
  476. ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
  477. if (ret)
  478. goto err_misc;
  479. }
  480. return 0;
  481. err_misc:
  482. misc_deregister(&vfio_dev);
  483. return ret;
  484. }
  485. void vfio_container_cleanup(void)
  486. {
  487. if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
  488. vfio_unregister_iommu_driver(&vfio_noiommu_ops);
  489. misc_deregister(&vfio_dev);
  490. mutex_destroy(&vfio.iommu_drivers_lock);
  491. }
  492. MODULE_ALIAS_MISCDEV(VFIO_MINOR);
  493. MODULE_ALIAS("devname:vfio/vfio");