| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607 |
- // SPDX-License-Identifier: GPL-2.0-only
- /*
- * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
- *
- * VFIO container (/dev/vfio/vfio)
- */
- #include <linux/file.h>
- #include <linux/slab.h>
- #include <linux/fs.h>
- #include <linux/capability.h>
- #include <linux/iommu.h>
- #include <linux/miscdevice.h>
- #include <linux/vfio.h>
- #include <uapi/linux/vfio.h>
- #include "vfio.h"
- struct vfio_container {
- struct kref kref;
- struct list_head group_list;
- struct rw_semaphore group_lock;
- struct vfio_iommu_driver *iommu_driver;
- void *iommu_data;
- bool noiommu;
- };
- static struct vfio {
- struct list_head iommu_drivers_list;
- struct mutex iommu_drivers_lock;
- } vfio;
- static void *vfio_noiommu_open(unsigned long arg)
- {
- if (arg != VFIO_NOIOMMU_IOMMU)
- return ERR_PTR(-EINVAL);
- if (!capable(CAP_SYS_RAWIO))
- return ERR_PTR(-EPERM);
- return NULL;
- }
- static void vfio_noiommu_release(void *iommu_data)
- {
- }
- static long vfio_noiommu_ioctl(void *iommu_data,
- unsigned int cmd, unsigned long arg)
- {
- if (cmd == VFIO_CHECK_EXTENSION)
- return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
- return -ENOTTY;
- }
- static int vfio_noiommu_attach_group(void *iommu_data,
- struct iommu_group *iommu_group, enum vfio_group_type type)
- {
- return 0;
- }
- static void vfio_noiommu_detach_group(void *iommu_data,
- struct iommu_group *iommu_group)
- {
- }
- static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
- .name = "vfio-noiommu",
- .owner = THIS_MODULE,
- .open = vfio_noiommu_open,
- .release = vfio_noiommu_release,
- .ioctl = vfio_noiommu_ioctl,
- .attach_group = vfio_noiommu_attach_group,
- .detach_group = vfio_noiommu_detach_group,
- };
- /*
- * Only noiommu containers can use vfio-noiommu and noiommu containers can only
- * use vfio-noiommu.
- */
- static bool vfio_iommu_driver_allowed(struct vfio_container *container,
- const struct vfio_iommu_driver *driver)
- {
- if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
- return true;
- return container->noiommu == (driver->ops == &vfio_noiommu_ops);
- }
- /*
- * IOMMU driver registration
- */
- int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
- {
- struct vfio_iommu_driver *driver, *tmp;
- if (WARN_ON(!ops->register_device != !ops->unregister_device))
- return -EINVAL;
- driver = kzalloc(sizeof(*driver), GFP_KERNEL);
- if (!driver)
- return -ENOMEM;
- driver->ops = ops;
- mutex_lock(&vfio.iommu_drivers_lock);
- /* Check for duplicates */
- list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
- if (tmp->ops == ops) {
- mutex_unlock(&vfio.iommu_drivers_lock);
- kfree(driver);
- return -EINVAL;
- }
- }
- list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
- mutex_unlock(&vfio.iommu_drivers_lock);
- return 0;
- }
- EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
- void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
- {
- struct vfio_iommu_driver *driver;
- mutex_lock(&vfio.iommu_drivers_lock);
- list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
- if (driver->ops == ops) {
- list_del(&driver->vfio_next);
- mutex_unlock(&vfio.iommu_drivers_lock);
- kfree(driver);
- return;
- }
- }
- mutex_unlock(&vfio.iommu_drivers_lock);
- }
- EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
- /*
- * Container objects - containers are created when /dev/vfio/vfio is
- * opened, but their lifecycle extends until the last user is done, so
- * it's freed via kref. Must support container/group/device being
- * closed in any order.
- */
- static void vfio_container_release(struct kref *kref)
- {
- struct vfio_container *container;
- container = container_of(kref, struct vfio_container, kref);
- kfree(container);
- }
- static void vfio_container_get(struct vfio_container *container)
- {
- kref_get(&container->kref);
- }
- static void vfio_container_put(struct vfio_container *container)
- {
- kref_put(&container->kref, vfio_container_release);
- }
- void vfio_device_container_register(struct vfio_device *device)
- {
- struct vfio_iommu_driver *iommu_driver =
- device->group->container->iommu_driver;
- if (iommu_driver && iommu_driver->ops->register_device)
- iommu_driver->ops->register_device(
- device->group->container->iommu_data, device);
- }
- void vfio_device_container_unregister(struct vfio_device *device)
- {
- struct vfio_iommu_driver *iommu_driver =
- device->group->container->iommu_driver;
- if (iommu_driver && iommu_driver->ops->unregister_device)
- iommu_driver->ops->unregister_device(
- device->group->container->iommu_data, device);
- }
- static long
- vfio_container_ioctl_check_extension(struct vfio_container *container,
- unsigned long arg)
- {
- struct vfio_iommu_driver *driver;
- long ret = 0;
- down_read(&container->group_lock);
- driver = container->iommu_driver;
- switch (arg) {
- /* No base extensions yet */
- default:
- /*
- * If no driver is set, poll all registered drivers for
- * extensions and return the first positive result. If
- * a driver is already set, further queries will be passed
- * only to that driver.
- */
- if (!driver) {
- mutex_lock(&vfio.iommu_drivers_lock);
- list_for_each_entry(driver, &vfio.iommu_drivers_list,
- vfio_next) {
- if (!list_empty(&container->group_list) &&
- !vfio_iommu_driver_allowed(container,
- driver))
- continue;
- if (!try_module_get(driver->ops->owner))
- continue;
- ret = driver->ops->ioctl(NULL,
- VFIO_CHECK_EXTENSION,
- arg);
- module_put(driver->ops->owner);
- if (ret > 0)
- break;
- }
- mutex_unlock(&vfio.iommu_drivers_lock);
- } else
- ret = driver->ops->ioctl(container->iommu_data,
- VFIO_CHECK_EXTENSION, arg);
- }
- up_read(&container->group_lock);
- return ret;
- }
- /* hold write lock on container->group_lock */
- static int __vfio_container_attach_groups(struct vfio_container *container,
- struct vfio_iommu_driver *driver,
- void *data)
- {
- struct vfio_group *group;
- int ret = -ENODEV;
- list_for_each_entry(group, &container->group_list, container_next) {
- ret = driver->ops->attach_group(data, group->iommu_group,
- group->type);
- if (ret)
- goto unwind;
- }
- return ret;
- unwind:
- list_for_each_entry_continue_reverse(group, &container->group_list,
- container_next) {
- driver->ops->detach_group(data, group->iommu_group);
- }
- return ret;
- }
- static long vfio_ioctl_set_iommu(struct vfio_container *container,
- unsigned long arg)
- {
- struct vfio_iommu_driver *driver;
- long ret = -ENODEV;
- down_write(&container->group_lock);
- /*
- * The container is designed to be an unprivileged interface while
- * the group can be assigned to specific users. Therefore, only by
- * adding a group to a container does the user get the privilege of
- * enabling the iommu, which may allocate finite resources. There
- * is no unset_iommu, but by removing all the groups from a container,
- * the container is deprivileged and returns to an unset state.
- */
- if (list_empty(&container->group_list) || container->iommu_driver) {
- up_write(&container->group_lock);
- return -EINVAL;
- }
- mutex_lock(&vfio.iommu_drivers_lock);
- list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
- void *data;
- if (!vfio_iommu_driver_allowed(container, driver))
- continue;
- if (!try_module_get(driver->ops->owner))
- continue;
- /*
- * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
- * so test which iommu driver reported support for this
- * extension and call open on them. We also pass them the
- * magic, allowing a single driver to support multiple
- * interfaces if they'd like.
- */
- if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
- module_put(driver->ops->owner);
- continue;
- }
- data = driver->ops->open(arg);
- if (IS_ERR(data)) {
- ret = PTR_ERR(data);
- module_put(driver->ops->owner);
- continue;
- }
- ret = __vfio_container_attach_groups(container, driver, data);
- if (ret) {
- driver->ops->release(data);
- module_put(driver->ops->owner);
- continue;
- }
- container->iommu_driver = driver;
- container->iommu_data = data;
- break;
- }
- mutex_unlock(&vfio.iommu_drivers_lock);
- up_write(&container->group_lock);
- return ret;
- }
- static long vfio_fops_unl_ioctl(struct file *filep,
- unsigned int cmd, unsigned long arg)
- {
- struct vfio_container *container = filep->private_data;
- struct vfio_iommu_driver *driver;
- void *data;
- long ret = -EINVAL;
- if (!container)
- return ret;
- switch (cmd) {
- case VFIO_GET_API_VERSION:
- ret = VFIO_API_VERSION;
- break;
- case VFIO_CHECK_EXTENSION:
- ret = vfio_container_ioctl_check_extension(container, arg);
- break;
- case VFIO_SET_IOMMU:
- ret = vfio_ioctl_set_iommu(container, arg);
- break;
- default:
- driver = container->iommu_driver;
- data = container->iommu_data;
- if (driver) /* passthrough all unrecognized ioctls */
- ret = driver->ops->ioctl(data, cmd, arg);
- }
- return ret;
- }
- static int vfio_fops_open(struct inode *inode, struct file *filep)
- {
- struct vfio_container *container;
- container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT);
- if (!container)
- return -ENOMEM;
- INIT_LIST_HEAD(&container->group_list);
- init_rwsem(&container->group_lock);
- kref_init(&container->kref);
- filep->private_data = container;
- return 0;
- }
- static int vfio_fops_release(struct inode *inode, struct file *filep)
- {
- struct vfio_container *container = filep->private_data;
- filep->private_data = NULL;
- vfio_container_put(container);
- return 0;
- }
- static const struct file_operations vfio_fops = {
- .owner = THIS_MODULE,
- .open = vfio_fops_open,
- .release = vfio_fops_release,
- .unlocked_ioctl = vfio_fops_unl_ioctl,
- .compat_ioctl = compat_ptr_ioctl,
- };
- struct vfio_container *vfio_container_from_file(struct file *file)
- {
- struct vfio_container *container;
- /* Sanity check, is this really our fd? */
- if (file->f_op != &vfio_fops)
- return NULL;
- container = file->private_data;
- WARN_ON(!container); /* fget ensures we don't race vfio_release */
- return container;
- }
- static struct miscdevice vfio_dev = {
- .minor = VFIO_MINOR,
- .name = "vfio",
- .fops = &vfio_fops,
- .nodename = "vfio/vfio",
- .mode = S_IRUGO | S_IWUGO,
- };
- int vfio_container_attach_group(struct vfio_container *container,
- struct vfio_group *group)
- {
- struct vfio_iommu_driver *driver;
- int ret = 0;
- lockdep_assert_held(&group->group_lock);
- if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
- return -EPERM;
- down_write(&container->group_lock);
- /* Real groups and fake groups cannot mix */
- if (!list_empty(&container->group_list) &&
- container->noiommu != (group->type == VFIO_NO_IOMMU)) {
- ret = -EPERM;
- goto out_unlock_container;
- }
- if (group->type == VFIO_IOMMU) {
- ret = iommu_group_claim_dma_owner(group->iommu_group, group);
- if (ret)
- goto out_unlock_container;
- }
- driver = container->iommu_driver;
- if (driver) {
- ret = driver->ops->attach_group(container->iommu_data,
- group->iommu_group,
- group->type);
- if (ret) {
- if (group->type == VFIO_IOMMU)
- iommu_group_release_dma_owner(
- group->iommu_group);
- goto out_unlock_container;
- }
- }
- group->container = container;
- group->container_users = 1;
- container->noiommu = (group->type == VFIO_NO_IOMMU);
- list_add(&group->container_next, &container->group_list);
- /* Get a reference on the container and mark a user within the group */
- vfio_container_get(container);
- out_unlock_container:
- up_write(&container->group_lock);
- return ret;
- }
- void vfio_group_detach_container(struct vfio_group *group)
- {
- struct vfio_container *container = group->container;
- struct vfio_iommu_driver *driver;
- lockdep_assert_held(&group->group_lock);
- WARN_ON(group->container_users != 1);
- down_write(&container->group_lock);
- driver = container->iommu_driver;
- if (driver)
- driver->ops->detach_group(container->iommu_data,
- group->iommu_group);
- if (group->type == VFIO_IOMMU)
- iommu_group_release_dma_owner(group->iommu_group);
- group->container = NULL;
- group->container_users = 0;
- list_del(&group->container_next);
- /* Detaching the last group deprivileges a container, remove iommu */
- if (driver && list_empty(&container->group_list)) {
- driver->ops->release(container->iommu_data);
- module_put(driver->ops->owner);
- container->iommu_driver = NULL;
- container->iommu_data = NULL;
- }
- up_write(&container->group_lock);
- vfio_container_put(container);
- }
- int vfio_group_use_container(struct vfio_group *group)
- {
- lockdep_assert_held(&group->group_lock);
- /*
- * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
- * VFIO_SET_IOMMU hasn't been done yet.
- */
- if (!group->container->iommu_driver)
- return -EINVAL;
- if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
- return -EPERM;
- get_file(group->opened_file);
- group->container_users++;
- return 0;
- }
- void vfio_group_unuse_container(struct vfio_group *group)
- {
- lockdep_assert_held(&group->group_lock);
- WARN_ON(group->container_users <= 1);
- group->container_users--;
- fput(group->opened_file);
- }
- int vfio_device_container_pin_pages(struct vfio_device *device,
- dma_addr_t iova, int npage,
- int prot, struct page **pages)
- {
- struct vfio_container *container = device->group->container;
- struct iommu_group *iommu_group = device->group->iommu_group;
- struct vfio_iommu_driver *driver = container->iommu_driver;
- if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
- return -E2BIG;
- if (unlikely(!driver || !driver->ops->pin_pages))
- return -ENOTTY;
- return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
- npage, prot, pages);
- }
- void vfio_device_container_unpin_pages(struct vfio_device *device,
- dma_addr_t iova, int npage)
- {
- struct vfio_container *container = device->group->container;
- if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
- return;
- container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
- npage);
- }
- int vfio_device_container_dma_rw(struct vfio_device *device,
- dma_addr_t iova, void *data,
- size_t len, bool write)
- {
- struct vfio_container *container = device->group->container;
- struct vfio_iommu_driver *driver = container->iommu_driver;
- if (unlikely(!driver || !driver->ops->dma_rw))
- return -ENOTTY;
- return driver->ops->dma_rw(container->iommu_data, iova, data, len,
- write);
- }
- int __init vfio_container_init(void)
- {
- int ret;
- mutex_init(&vfio.iommu_drivers_lock);
- INIT_LIST_HEAD(&vfio.iommu_drivers_list);
- ret = misc_register(&vfio_dev);
- if (ret) {
- pr_err("vfio: misc device register failed\n");
- return ret;
- }
- if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
- ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
- if (ret)
- goto err_misc;
- }
- return 0;
- err_misc:
- misc_deregister(&vfio_dev);
- return ret;
- }
- void vfio_container_cleanup(void)
- {
- if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
- vfio_unregister_iommu_driver(&vfio_noiommu_ops);
- misc_deregister(&vfio_dev);
- mutex_destroy(&vfio.iommu_drivers_lock);
- }
- MODULE_ALIAS_MISCDEV(VFIO_MINOR);
- MODULE_ALIAS("devname:vfio/vfio");
|