diff options
Diffstat (limited to 'drivers/vfio')
-rw-r--r-- | drivers/vfio/mdev/Makefile | 2 | ||||
-rw-r--r-- | drivers/vfio/mdev/mdev_core.c | 52 | ||||
-rw-r--r-- | drivers/vfio/mdev/mdev_driver.c | 10 | ||||
-rw-r--r-- | drivers/vfio/mdev/mdev_private.h | 6 | ||||
-rw-r--r-- | drivers/vfio/mdev/mdev_sysfs.c | 37 | ||||
-rw-r--r-- | drivers/vfio/mdev/vfio_mdev.c | 152 | ||||
-rw-r--r-- | drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 15 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/cmd.c | 236 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/cmd.h | 52 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/main.c | 135 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 6 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_core.c | 36 | ||||
-rw-r--r-- | drivers/vfio/vfio.c | 346 |
13 files changed, 380 insertions, 705 deletions
diff --git a/drivers/vfio/mdev/Makefile b/drivers/vfio/mdev/Makefile index ff9ecd802125..7c236ba1b90e 100644 --- a/drivers/vfio/mdev/Makefile +++ b/drivers/vfio/mdev/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o vfio_mdev.o +mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o obj-$(CONFIG_VFIO_MDEV) += mdev.o diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index b314101237fe..b8b9e7911e55 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -89,17 +89,10 @@ void mdev_release_parent(struct kref *kref) static void mdev_device_remove_common(struct mdev_device *mdev) { struct mdev_parent *parent = mdev->type->parent; - int ret; mdev_remove_sysfs_files(mdev); device_del(&mdev->dev); lockdep_assert_held(&parent->unreg_sem); - if (parent->ops->remove) { - ret = parent->ops->remove(mdev); - if (ret) - dev_err(&mdev->dev, "Remove failed: err=%d\n", ret); - } - /* Balances with device_initialize() */ put_device(&mdev->dev); } @@ -116,12 +109,12 @@ static int mdev_device_remove_cb(struct device *dev, void *data) /* * mdev_register_device : Register a device * @dev: device structure representing parent device. - * @ops: Parent device operation structure to be registered. + * @mdev_driver: Device driver to bind to the newly created mdev * * Add device to list of registered parent devices. * Returns a negative value on error, otherwise 0. */ -int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) +int mdev_register_device(struct device *dev, struct mdev_driver *mdev_driver) { int ret; struct mdev_parent *parent; @@ -129,9 +122,7 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) char *envp[] = { env_string, NULL }; /* check for mandatory ops */ - if (!ops || !ops->supported_type_groups) - return -EINVAL; - if (!ops->device_driver && (!ops->create || !ops->remove)) + if (!mdev_driver->supported_type_groups) return -EINVAL; dev = get_device(dev); @@ -158,7 +149,7 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) init_rwsem(&parent->unreg_sem); parent->dev = dev; - parent->ops = ops; + parent->mdev_driver = mdev_driver; if (!mdev_bus_compat_class) { mdev_bus_compat_class = class_compat_register("mdev_bus"); @@ -256,7 +247,7 @@ int mdev_device_create(struct mdev_type *type, const guid_t *uuid) int ret; struct mdev_device *mdev, *tmp; struct mdev_parent *parent = type->parent; - struct mdev_driver *drv = parent->ops->device_driver; + struct mdev_driver *drv = parent->mdev_driver; mutex_lock(&mdev_list_lock); @@ -278,7 +269,7 @@ int mdev_device_create(struct mdev_type *type, const guid_t *uuid) mdev->dev.parent = parent->dev; mdev->dev.bus = &mdev_bus_type; mdev->dev.release = mdev_device_release; - mdev->dev.groups = parent->ops->mdev_attr_groups; + mdev->dev.groups = mdev_device_groups; mdev->type = type; /* Pairs with the put in mdev_device_release() */ kobject_get(&type->kobj); @@ -297,18 +288,10 @@ int mdev_device_create(struct mdev_type *type, const guid_t *uuid) goto out_put_device; } - if (parent->ops->create) { - ret = parent->ops->create(mdev); - if (ret) - goto out_unlock; - } - ret = device_add(&mdev->dev); if (ret) - goto out_remove; + goto out_unlock; - if (!drv) - drv = &vfio_mdev_driver; ret = device_driver_attach(&drv->driver, &mdev->dev); if (ret) goto out_del; @@ -325,9 +308,6 @@ int mdev_device_create(struct mdev_type *type, const guid_t *uuid) out_del: device_del(&mdev->dev); -out_remove: - if (parent->ops->remove) - parent->ops->remove(mdev); out_unlock: up_read(&parent->unreg_sem); out_put_device: @@ -370,28 +350,14 @@ int mdev_device_remove(struct mdev_device *mdev) static int __init mdev_init(void) { - int rc; - - rc = mdev_bus_register(); - if (rc) - return rc; - rc = mdev_register_driver(&vfio_mdev_driver); - if (rc) - goto err_bus; - return 0; -err_bus: - mdev_bus_unregister(); - return rc; + return bus_register(&mdev_bus_type); } static void __exit mdev_exit(void) { - mdev_unregister_driver(&vfio_mdev_driver); - if (mdev_bus_compat_class) class_compat_unregister(mdev_bus_compat_class); - - mdev_bus_unregister(); + bus_unregister(&mdev_bus_type); } subsys_initcall(mdev_init) diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c index 7927ed4f1711..9c2af59809e2 100644 --- a/drivers/vfio/mdev/mdev_driver.c +++ b/drivers/vfio/mdev/mdev_driver.c @@ -74,13 +74,3 @@ void mdev_unregister_driver(struct mdev_driver *drv) driver_unregister(&drv->driver); } EXPORT_SYMBOL(mdev_unregister_driver); - -int mdev_bus_register(void) -{ - return bus_register(&mdev_bus_type); -} - -void mdev_bus_unregister(void) -{ - bus_unregister(&mdev_bus_type); -} diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index afbad7b0a14a..7c9fc79f3d83 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -15,7 +15,7 @@ void mdev_bus_unregister(void); struct mdev_parent { struct device *dev; - const struct mdev_parent_ops *ops; + struct mdev_driver *mdev_driver; struct kref ref; struct list_head next; struct kset *mdev_types_kset; @@ -32,13 +32,13 @@ struct mdev_type { unsigned int type_group_id; }; +extern const struct attribute_group *mdev_device_groups[]; + #define to_mdev_type_attr(_attr) \ container_of(_attr, struct mdev_type_attribute, attr) #define to_mdev_type(_kobj) \ container_of(_kobj, struct mdev_type, kobj) -extern struct mdev_driver vfio_mdev_driver; - int parent_create_sysfs_files(struct mdev_parent *parent); void parent_remove_sysfs_files(struct mdev_parent *parent); diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c index f5cf1931c54e..0ccfeb3dda24 100644 --- a/drivers/vfio/mdev/mdev_sysfs.c +++ b/drivers/vfio/mdev/mdev_sysfs.c @@ -97,7 +97,7 @@ static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent, { struct mdev_type *type; struct attribute_group *group = - parent->ops->supported_type_groups[type_group_id]; + parent->mdev_driver->supported_type_groups[type_group_id]; int ret; if (!group->name) { @@ -154,7 +154,7 @@ attr_create_failed: static void remove_mdev_supported_type(struct mdev_type *type) { struct attribute_group *group = - type->parent->ops->supported_type_groups[type->type_group_id]; + type->parent->mdev_driver->supported_type_groups[type->type_group_id]; sysfs_remove_files(&type->kobj, (const struct attribute **)group->attrs); @@ -168,7 +168,7 @@ static int add_mdev_supported_type_groups(struct mdev_parent *parent) { int i; - for (i = 0; parent->ops->supported_type_groups[i]; i++) { + for (i = 0; parent->mdev_driver->supported_type_groups[i]; i++) { struct mdev_type *type; type = add_mdev_supported_type(parent, i); @@ -197,7 +197,6 @@ void parent_remove_sysfs_files(struct mdev_parent *parent) remove_mdev_supported_type(type); } - sysfs_remove_groups(&parent->dev->kobj, parent->ops->dev_attr_groups); kset_unregister(parent->mdev_types_kset); } @@ -213,17 +212,10 @@ int parent_create_sysfs_files(struct mdev_parent *parent) INIT_LIST_HEAD(&parent->type_list); - ret = sysfs_create_groups(&parent->dev->kobj, - parent->ops->dev_attr_groups); - if (ret) - goto create_err; - ret = add_mdev_supported_type_groups(parent); if (ret) - sysfs_remove_groups(&parent->dev->kobj, - parent->ops->dev_attr_groups); - else - return ret; + goto create_err; + return 0; create_err: kset_unregister(parent->mdev_types_kset); @@ -252,11 +244,20 @@ static ssize_t remove_store(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_WO(remove); -static const struct attribute *mdev_device_attrs[] = { +static struct attribute *mdev_device_attrs[] = { &dev_attr_remove.attr, NULL, }; +static const struct attribute_group mdev_device_group = { + .attrs = mdev_device_attrs, +}; + +const struct attribute_group *mdev_device_groups[] = { + &mdev_device_group, + NULL +}; + int mdev_create_sysfs_files(struct mdev_device *mdev) { struct mdev_type *type = mdev->type; @@ -270,15 +271,8 @@ int mdev_create_sysfs_files(struct mdev_device *mdev) ret = sysfs_create_link(kobj, &type->kobj, "mdev_type"); if (ret) goto type_link_failed; - - ret = sysfs_create_files(kobj, mdev_device_attrs); - if (ret) - goto create_files_failed; - return ret; -create_files_failed: - sysfs_remove_link(kobj, "mdev_type"); type_link_failed: sysfs_remove_link(mdev->type->devices_kobj, dev_name(&mdev->dev)); return ret; @@ -288,7 +282,6 @@ void mdev_remove_sysfs_files(struct mdev_device *mdev) { struct kobject *kobj = &mdev->dev.kobj; - sysfs_remove_files(kobj, mdev_device_attrs); sysfs_remove_link(kobj, "mdev_type"); sysfs_remove_link(mdev->type->devices_kobj, dev_name(&mdev->dev)); } diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c deleted file mode 100644 index a90e24b0c851..000000000000 --- a/drivers/vfio/mdev/vfio_mdev.c +++ /dev/null @@ -1,152 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * VFIO based driver for Mediated device - * - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - * Author: Neo Jia <cjia@nvidia.com> - * Kirti Wankhede <kwankhede@nvidia.com> - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/device.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/vfio.h> -#include <linux/mdev.h> - -#include "mdev_private.h" - -static int vfio_mdev_open_device(struct vfio_device *core_vdev) -{ - struct mdev_device *mdev = to_mdev_device(core_vdev->dev); - struct mdev_parent *parent = mdev->type->parent; - - if (unlikely(!parent->ops->open_device)) - return 0; - - return parent->ops->open_device(mdev); -} - -static void vfio_mdev_close_device(struct vfio_device *core_vdev) -{ - struct mdev_device *mdev = to_mdev_device(core_vdev->dev); - struct mdev_parent *parent = mdev->type->parent; - - if (likely(parent->ops->close_device)) - parent->ops->close_device(mdev); -} - -static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev, - unsigned int cmd, unsigned long arg) -{ - struct mdev_device *mdev = to_mdev_device(core_vdev->dev); - struct mdev_parent *parent = mdev->type->parent; - - if (unlikely(!parent->ops->ioctl)) - return 0; - - return parent->ops->ioctl(mdev, cmd, arg); -} - -static ssize_t vfio_mdev_read(struct vfio_device *core_vdev, char __user *buf, - size_t count, loff_t *ppos) -{ - struct mdev_device *mdev = to_mdev_device(core_vdev->dev); - struct mdev_parent *parent = mdev->type->parent; - - if (unlikely(!parent->ops->read)) - return -EINVAL; - - return parent->ops->read(mdev, buf, count, ppos); -} - -static ssize_t vfio_mdev_write(struct vfio_device *core_vdev, - const char __user *buf, size_t count, - loff_t *ppos) -{ - struct mdev_device *mdev = to_mdev_device(core_vdev->dev); - struct mdev_parent *parent = mdev->type->parent; - - if (unlikely(!parent->ops->write)) - return -EINVAL; - - return parent->ops->write(mdev, buf, count, ppos); -} - -static int vfio_mdev_mmap(struct vfio_device *core_vdev, - struct vm_area_struct *vma) -{ - struct mdev_device *mdev = to_mdev_device(core_vdev->dev); - struct mdev_parent *parent = mdev->type->parent; - - if (unlikely(!parent->ops->mmap)) - return -EINVAL; - - return parent->ops->mmap(mdev, vma); -} - -static void vfio_mdev_request(struct vfio_device *core_vdev, unsigned int count) -{ - struct mdev_device *mdev = to_mdev_device(core_vdev->dev); - struct mdev_parent *parent = mdev->type->parent; - - if (parent->ops->request) - parent->ops->request(mdev, count); - else if (count == 0) - dev_notice(mdev_dev(mdev), - "No mdev vendor driver request callback support, blocked until released by user\n"); -} - -static const struct vfio_device_ops vfio_mdev_dev_ops = { - .name = "vfio-mdev", - .open_device = vfio_mdev_open_device, - .close_device = vfio_mdev_close_device, - .ioctl = vfio_mdev_unlocked_ioctl, - .read = vfio_mdev_read, - .write = vfio_mdev_write, - .mmap = vfio_mdev_mmap, - .request = vfio_mdev_request, -}; - -static int vfio_mdev_probe(struct mdev_device *mdev) -{ - struct vfio_device *vdev; - int ret; - - vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); - if (!vdev) - return -ENOMEM; - - vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops); - ret = vfio_register_emulated_iommu_dev(vdev); - if (ret) - goto out_uninit; - - dev_set_drvdata(&mdev->dev, vdev); - return 0; - -out_uninit: - vfio_uninit_group_dev(vdev); - kfree(vdev); - return ret; -} - -static void vfio_mdev_remove(struct mdev_device *mdev) -{ - struct vfio_device *vdev = dev_get_drvdata(&mdev->dev); - - vfio_unregister_group_dev(vdev); - vfio_uninit_group_dev(vdev); - kfree(vdev); -} - -struct mdev_driver vfio_mdev_driver = { - .driver = { - .name = "vfio_mdev", - .owner = THIS_MODULE, - .mod_name = KBUILD_MODNAME, - }, - .probe = vfio_mdev_probe, - .remove = vfio_mdev_remove, -}; diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 767b5d47631a..e92376837b29 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -337,6 +337,14 @@ static int vf_qm_cache_wb(struct hisi_qm *qm) return 0; } +static struct hisi_acc_vf_core_device *hssi_acc_drvdata(struct pci_dev *pdev) +{ + struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev); + + return container_of(core_device, struct hisi_acc_vf_core_device, + core_device); +} + static void vf_qm_fun_reset(struct hisi_acc_vf_core_device *hisi_acc_vdev, struct hisi_qm *qm) { @@ -962,7 +970,7 @@ hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev, static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev) { - struct hisi_acc_vf_core_device *hisi_acc_vdev = dev_get_drvdata(&pdev->dev); + struct hisi_acc_vf_core_device *hisi_acc_vdev = hssi_acc_drvdata(pdev); if (hisi_acc_vdev->core_device.vdev.migration_flags != VFIO_MIGRATION_STOP_COPY) @@ -1274,11 +1282,10 @@ static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device &hisi_acc_vfio_pci_ops); } + dev_set_drvdata(&pdev->dev, &hisi_acc_vdev->core_device); ret = vfio_pci_core_register_device(&hisi_acc_vdev->core_device); if (ret) goto out_free; - - dev_set_drvdata(&pdev->dev, hisi_acc_vdev); return 0; out_free: @@ -1289,7 +1296,7 @@ out_free: static void hisi_acc_vfio_pci_remove(struct pci_dev *pdev) { - struct hisi_acc_vf_core_device *hisi_acc_vdev = dev_get_drvdata(&pdev->dev); + struct hisi_acc_vf_core_device *hisi_acc_vdev = hssi_acc_drvdata(pdev); vfio_pci_core_unregister_device(&hisi_acc_vdev->core_device); vfio_pci_core_uninit_device(&hisi_acc_vdev->core_device); diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 5c9f9218cc1d..9b9f33ca270a 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -5,89 +5,157 @@ #include "cmd.h" -int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod) +static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, + u16 *vhca_id); + +int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) { - struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev); u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {}; u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {}; - int ret; - if (!mdev) + lockdep_assert_held(&mvdev->state_mutex); + if (mvdev->mdev_detach) return -ENOTCONN; MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA); - MLX5_SET(suspend_vhca_in, in, vhca_id, vhca_id); + MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id); MLX5_SET(suspend_vhca_in, in, op_mod, op_mod); - ret = mlx5_cmd_exec_inout(mdev, suspend_vhca, in, out); - mlx5_vf_put_core_dev(mdev); - return ret; + return mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out); } -int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod) +int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) { - struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev); u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {}; u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {}; - int ret; - if (!mdev) + lockdep_assert_held(&mvdev->state_mutex); + if (mvdev->mdev_detach) return -ENOTCONN; MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA); - MLX5_SET(resume_vhca_in, in, vhca_id, vhca_id); + MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id); MLX5_SET(resume_vhca_in, in, op_mod, op_mod); - ret = mlx5_cmd_exec_inout(mdev, resume_vhca, in, out); - mlx5_vf_put_core_dev(mdev); - return ret; + return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out); } -int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id, +int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, size_t *state_size) { - struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev); u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {}; u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {}; int ret; - if (!mdev) + lockdep_assert_held(&mvdev->state_mutex); + if (mvdev->mdev_detach) return -ENOTCONN; MLX5_SET(query_vhca_migration_state_in, in, opcode, MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE); - MLX5_SET(query_vhca_migration_state_in, in, vhca_id, vhca_id); + MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id); MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0); - ret = mlx5_cmd_exec_inout(mdev, query_vhca_migration_state, in, out); + ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in, + out); if (ret) - goto end; + return ret; *state_size = MLX5_GET(query_vhca_migration_state_out, out, required_umem_size); + return 0; +} + +static int mlx5fv_vf_event(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct mlx5vf_pci_core_device *mvdev = + container_of(nb, struct mlx5vf_pci_core_device, nb); + + mutex_lock(&mvdev->state_mutex); + switch (event) { + case MLX5_PF_NOTIFY_ENABLE_VF: + mvdev->mdev_detach = false; + break; + case MLX5_PF_NOTIFY_DISABLE_VF: + mlx5vf_disable_fds(mvdev); + mvdev->mdev_detach = true; + break; + default: + break; + } + mlx5vf_state_mutex_unlock(mvdev); + return 0; +} + +void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev) +{ + if (!mvdev->migrate_cap) + return; + + mlx5_sriov_blocking_notifier_unregister(mvdev->mdev, mvdev->vf_id, + &mvdev->nb); + destroy_workqueue(mvdev->cb_wq); +} + +void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev) +{ + struct pci_dev *pdev = mvdev->core_device.pdev; + int ret; + + if (!pdev->is_virtfn) + return; + + mvdev->mdev = mlx5_vf_get_core_dev(pdev); + if (!mvdev->mdev) + return; + + if (!MLX5_CAP_GEN(mvdev->mdev, migration)) + goto end; + + mvdev->vf_id = pci_iov_vf_id(pdev); + if (mvdev->vf_id < 0) + goto end; + + if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1, + &mvdev->vhca_id)) + goto end; + + mvdev->cb_wq = alloc_ordered_workqueue("mlx5vf_wq", 0); + if (!mvdev->cb_wq) + goto end; + + mutex_init(&mvdev->state_mutex); + spin_lock_init(&mvdev->reset_lock); + mvdev->nb.notifier_call = mlx5fv_vf_event; + ret = mlx5_sriov_blocking_notifier_register(mvdev->mdev, mvdev->vf_id, + &mvdev->nb); + if (ret) { + destroy_workqueue(mvdev->cb_wq); + goto end; + } + + mvdev->migrate_cap = 1; + mvdev->core_device.vdev.migration_flags = + VFIO_MIGRATION_STOP_COPY | + VFIO_MIGRATION_P2P; end: - mlx5_vf_put_core_dev(mdev); - return ret; + mlx5_vf_put_core_dev(mvdev->mdev); } -int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id) +static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, + u16 *vhca_id) { - struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev); u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; int out_size; void *out; int ret; - if (!mdev) - return -ENOTCONN; - out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); out = kzalloc(out_size, GFP_KERNEL); - if (!out) { - ret = -ENOMEM; - goto end; - } + if (!out) + return -ENOMEM; MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, other_function, 1); @@ -105,8 +173,6 @@ int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id) err_exec: kfree(out); -end: - mlx5_vf_put_core_dev(mdev); return ret; } @@ -151,21 +217,68 @@ static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn, return err; } -int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id, +void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) +{ + struct mlx5vf_async_data *async_data = container_of(_work, + struct mlx5vf_async_data, work); + struct mlx5_vf_migration_file *migf = container_of(async_data, + struct mlx5_vf_migration_file, async_data); + struct mlx5_core_dev *mdev = migf->mvdev->mdev; + + mutex_lock(&migf->lock); + if (async_data->status) { + migf->is_err = true; + wake_up_interruptible(&migf->poll_wait); + } + mutex_unlock(&migf->lock); + + mlx5_core_destroy_mkey(mdev, async_data->mkey); + dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); + mlx5_core_dealloc_pd(mdev, async_data->pdn); + kvfree(async_data->out); + fput(migf->filp); +} + +static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5vf_async_data *async_data = container_of(context, + struct mlx5vf_async_data, cb_work); + struct mlx5_vf_migration_file *migf = container_of(async_data, + struct mlx5_vf_migration_file, async_data); + + if (!status) { + WRITE_ONCE(migf->total_length, + MLX5_GET(save_vhca_state_out, async_data->out, + actual_image_size)); + wake_up_interruptible(&migf->poll_wait); + } + + /* + * The error and the cleanup flows can't run from an + * interrupt context + */ + async_data->status = status; + queue_work(migf->mvdev->cb_wq, &async_data->work); +} + +int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, struct mlx5_vf_migration_file *migf) { - struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev); - u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {}; + u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out); u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; + struct mlx5vf_async_data *async_data; + struct mlx5_core_dev *mdev; u32 pdn, mkey; int err; - if (!mdev) + lockdep_assert_held(&mvdev->state_mutex); + if (mvdev->mdev_detach) return -ENOTCONN; + mdev = mvdev->mdev; err = mlx5_core_alloc_pd(mdev, &pdn); if (err) - goto end; + return err; err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); @@ -179,45 +292,54 @@ int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id, MLX5_SET(save_vhca_state_in, in, opcode, MLX5_CMD_OP_SAVE_VHCA_STATE); MLX5_SET(save_vhca_state_in, in, op_mod, 0); - MLX5_SET(save_vhca_state_in, in, vhca_id, vhca_id); + MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id); MLX5_SET(save_vhca_state_in, in, mkey, mkey); MLX5_SET(save_vhca_state_in, in, size, migf->total_length); - err = mlx5_cmd_exec_inout(mdev, save_vhca_state, in, out); + async_data = &migf->async_data; + async_data->out = kvzalloc(out_size, GFP_KERNEL); + if (!async_data->out) { + err = -ENOMEM; + goto err_out; + } + + /* no data exists till the callback comes back */ + migf->total_length = 0; + get_file(migf->filp); + async_data->mkey = mkey; + async_data->pdn = pdn; + err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in), + async_data->out, + out_size, mlx5vf_save_callback, + &async_data->cb_work); if (err) goto err_exec; - migf->total_length = - MLX5_GET(save_vhca_state_out, out, actual_image_size); - - mlx5_core_destroy_mkey(mdev, mkey); - mlx5_core_dealloc_pd(mdev, pdn); - dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); - mlx5_vf_put_core_dev(mdev); - return 0; err_exec: + fput(migf->filp); + kvfree(async_data->out); +err_out: mlx5_core_destroy_mkey(mdev, mkey); err_create_mkey: dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); err_dma_map: mlx5_core_dealloc_pd(mdev, pdn); -end: - mlx5_vf_put_core_dev(mdev); return err; } -int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id, +int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, struct mlx5_vf_migration_file *migf) { - struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev); + struct mlx5_core_dev *mdev; u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {}; u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; u32 pdn, mkey; int err; - if (!mdev) + lockdep_assert_held(&mvdev->state_mutex); + if (mvdev->mdev_detach) return -ENOTCONN; mutex_lock(&migf->lock); @@ -226,6 +348,7 @@ int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id, goto end; } + mdev = mvdev->mdev; err = mlx5_core_alloc_pd(mdev, &pdn); if (err) goto end; @@ -241,7 +364,7 @@ int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id, MLX5_SET(load_vhca_state_in, in, opcode, MLX5_CMD_OP_LOAD_VHCA_STATE); MLX5_SET(load_vhca_state_in, in, op_mod, 0); - MLX5_SET(load_vhca_state_in, in, vhca_id, vhca_id); + MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id); MLX5_SET(load_vhca_state_in, in, mkey, mkey); MLX5_SET(load_vhca_state_in, in, size, migf->total_length); @@ -253,7 +376,6 @@ err_mkey: err_reg: mlx5_core_dealloc_pd(mdev, pdn); end: - mlx5_vf_put_core_dev(mdev); mutex_unlock(&migf->lock); return err; } diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 1392a11a9cc0..6c3112fdd8b1 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -7,12 +7,23 @@ #define MLX5_VFIO_CMD_H #include <linux/kernel.h> +#include <linux/vfio_pci_core.h> #include <linux/mlx5/driver.h> +struct mlx5vf_async_data { + struct mlx5_async_work cb_work; + struct work_struct work; + int status; + u32 pdn; + u32 mkey; + void *out; +}; + struct mlx5_vf_migration_file { struct file *filp; struct mutex lock; - bool disabled; + u8 disabled:1; + u8 is_err:1; struct sg_append_table table; size_t total_length; @@ -22,15 +33,42 @@ struct mlx5_vf_migration_file { struct scatterlist *last_offset_sg; unsigned int sg_last_entry; unsigned long last_offset; + struct mlx5vf_pci_core_device *mvdev; + wait_queue_head_t poll_wait; + struct mlx5_async_ctx async_ctx; + struct mlx5vf_async_data async_data; +}; + +struct mlx5vf_pci_core_device { + struct vfio_pci_core_device core_device; + int vf_id; + u16 vhca_id; + u8 migrate_cap:1; + u8 deferred_reset:1; + u8 mdev_detach:1; + /* protect migration state */ + struct mutex state_mutex; + enum vfio_device_mig_state mig_state; + /* protect the reset_done flow */ + spinlock_t reset_lock; + struct mlx5_vf_migration_file *resuming_migf; + struct mlx5_vf_migration_file *saving_migf; + struct workqueue_struct *cb_wq; + struct notifier_block nb; + struct mlx5_core_dev *mdev; }; -int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod); -int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod); -int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id, +int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); +int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); +int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, size_t *state_size); -int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id); -int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id, +void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev); +void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev); +int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, struct mlx5_vf_migration_file *migf); -int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id, +int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, struct mlx5_vf_migration_file *migf); +void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); +void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev); +void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work); #endif /* MLX5_VFIO_CMD_H */ diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index bbec5d288fee..dd1009b5ff9c 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -17,7 +17,6 @@ #include <linux/uaccess.h> #include <linux/vfio.h> #include <linux/sched/mm.h> -#include <linux/vfio_pci_core.h> #include <linux/anon_inodes.h> #include "cmd.h" @@ -25,19 +24,13 @@ /* Arbitrary to prevent userspace from consuming endless memory */ #define MAX_MIGRATION_SIZE (512*1024*1024) -struct mlx5vf_pci_core_device { - struct vfio_pci_core_device core_device; - u16 vhca_id; - u8 migrate_cap:1; - u8 deferred_reset:1; - /* protect migration state */ - struct mutex state_mutex; - enum vfio_device_mig_state mig_state; - /* protect the reset_done flow */ - spinlock_t reset_lock; - struct mlx5_vf_migration_file *resuming_migf; - struct mlx5_vf_migration_file *saving_migf; -}; +static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev) +{ + struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev); + + return container_of(core_device, struct mlx5vf_pci_core_device, + core_device); +} static struct page * mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf, @@ -149,12 +142,22 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, return -ESPIPE; pos = &filp->f_pos; + if (!(filp->f_flags & O_NONBLOCK)) { + if (wait_event_interruptible(migf->poll_wait, + READ_ONCE(migf->total_length) || migf->is_err)) + return -ERESTARTSYS; + } + mutex_lock(&migf->lock); + if ((filp->f_flags & O_NONBLOCK) && !READ_ONCE(migf->total_length)) { + done = -EAGAIN; + goto out_unlock; + } if (*pos > migf->total_length) { done = -EINVAL; goto out_unlock; } - if (migf->disabled) { + if (migf->disabled || migf->is_err) { done = -ENODEV; goto out_unlock; } @@ -194,9 +197,28 @@ out_unlock: return done; } +static __poll_t mlx5vf_save_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct mlx5_vf_migration_file *migf = filp->private_data; + __poll_t pollflags = 0; + + poll_wait(filp, &migf->poll_wait, wait); + + mutex_lock(&migf->lock); + if (migf->disabled || migf->is_err) + pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + else if (READ_ONCE(migf->total_length)) + pollflags = EPOLLIN | EPOLLRDNORM; + mutex_unlock(&migf->lock); + + return pollflags; +} + static const struct file_operations mlx5vf_save_fops = { .owner = THIS_MODULE, .read = mlx5vf_save_read, + .poll = mlx5vf_save_poll, .release = mlx5vf_release_file, .llseek = no_llseek, }; @@ -222,9 +244,11 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); - - ret = mlx5vf_cmd_query_vhca_migration_state( - mvdev->core_device.pdev, mvdev->vhca_id, &migf->total_length); + init_waitqueue_head(&migf->poll_wait); + mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx); + INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb); + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, + &migf->total_length); if (ret) goto out_free; @@ -233,8 +257,8 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) if (ret) goto out_free; - ret = mlx5vf_cmd_save_vhca_state(mvdev->core_device.pdev, - mvdev->vhca_id, migf); + migf->mvdev = mvdev; + ret = mlx5vf_cmd_save_vhca_state(mvdev, migf); if (ret) goto out_free; return migf; @@ -339,7 +363,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) return migf; } -static void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) +void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) { if (mvdev->resuming_migf) { mlx5vf_disable_fd(mvdev->resuming_migf); @@ -347,6 +371,8 @@ static void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) mvdev->resuming_migf = NULL; } if (mvdev->saving_migf) { + mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx); + cancel_work_sync(&mvdev->saving_migf->async_data.work); mlx5vf_disable_fd(mvdev->saving_migf); fput(mvdev->saving_migf->filp); mvdev->saving_migf = NULL; @@ -361,8 +387,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, int ret; if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) { - ret = mlx5vf_cmd_suspend_vhca( - mvdev->core_device.pdev, mvdev->vhca_id, + ret = mlx5vf_cmd_suspend_vhca(mvdev, MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER); if (ret) return ERR_PTR(ret); @@ -370,8 +395,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, } if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) { - ret = mlx5vf_cmd_resume_vhca( - mvdev->core_device.pdev, mvdev->vhca_id, + ret = mlx5vf_cmd_resume_vhca(mvdev, MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER); if (ret) return ERR_PTR(ret); @@ -379,8 +403,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, } if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) { - ret = mlx5vf_cmd_suspend_vhca( - mvdev->core_device.pdev, mvdev->vhca_id, + ret = mlx5vf_cmd_suspend_vhca(mvdev, MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR); if (ret) return ERR_PTR(ret); @@ -388,8 +411,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, } if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) { - ret = mlx5vf_cmd_resume_vhca( - mvdev->core_device.pdev, mvdev->vhca_id, + ret = mlx5vf_cmd_resume_vhca(mvdev, MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR); if (ret) return ERR_PTR(ret); @@ -424,8 +446,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, } if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { - ret = mlx5vf_cmd_load_vhca_state(mvdev->core_device.pdev, - mvdev->vhca_id, + ret = mlx5vf_cmd_load_vhca_state(mvdev, mvdev->resuming_migf); if (ret) return ERR_PTR(ret); @@ -444,7 +465,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, * This function is called in all state_mutex unlock cases to * handle a 'deferred_reset' if exists. */ -static void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev) +void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev) { again: spin_lock(&mvdev->reset_lock); @@ -505,7 +526,7 @@ static int mlx5vf_pci_get_device_state(struct vfio_device *vdev, static void mlx5vf_pci_aer_reset_done(struct pci_dev *pdev) { - struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev); + struct mlx5vf_pci_core_device *mvdev = mlx5vf_drvdata(pdev); if (!mvdev->migrate_cap) return; @@ -532,34 +553,16 @@ static int mlx5vf_pci_open_device(struct vfio_device *core_vdev) struct mlx5vf_pci_core_device *mvdev = container_of( core_vdev, struct mlx5vf_pci_core_device, core_device.vdev); struct vfio_pci_core_device *vdev = &mvdev->core_device; - int vf_id; int ret; ret = vfio_pci_core_enable(vdev); if (ret) return ret; - if (!mvdev->migrate_cap) { - vfio_pci_core_finish_enable(vdev); - return 0; - } - - vf_id = pci_iov_vf_id(vdev->pdev); - if (vf_id < 0) { - ret = vf_id; - goto out_disable; - } - - ret = mlx5vf_cmd_get_vhca_id(vdev->pdev, vf_id + 1, &mvdev->vhca_id); - if (ret) - goto out_disable; - - mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING; + if (mvdev->migrate_cap) + mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING; vfio_pci_core_finish_enable(vdev); return 0; -out_disable: - vfio_pci_core_disable(vdev); - return ret; } static void mlx5vf_pci_close_device(struct vfio_device *core_vdev) @@ -596,32 +599,15 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev, if (!mvdev) return -ENOMEM; vfio_pci_core_init_device(&mvdev->core_device, pdev, &mlx5vf_pci_ops); - - if (pdev->is_virtfn) { - struct mlx5_core_dev *mdev = - mlx5_vf_get_core_dev(pdev); - - if (mdev) { - if (MLX5_CAP_GEN(mdev, migration)) { - mvdev->migrate_cap = 1; - mvdev->core_device.vdev.migration_flags = - VFIO_MIGRATION_STOP_COPY | - VFIO_MIGRATION_P2P; - mutex_init(&mvdev->state_mutex); - spin_lock_init(&mvdev->reset_lock); - } - mlx5_vf_put_core_dev(mdev); - } - } - + mlx5vf_cmd_set_migratable(mvdev); + dev_set_drvdata(&pdev->dev, &mvdev->core_device); ret = vfio_pci_core_register_device(&mvdev->core_device); if (ret) goto out_free; - - dev_set_drvdata(&pdev->dev, mvdev); return 0; out_free: + mlx5vf_cmd_remove_migratable(mvdev); vfio_pci_core_uninit_device(&mvdev->core_device); kfree(mvdev); return ret; @@ -629,9 +615,10 @@ out_free: static void mlx5vf_pci_remove(struct pci_dev *pdev) { - struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev); + struct mlx5vf_pci_core_device *mvdev = mlx5vf_drvdata(pdev); vfio_pci_core_unregister_device(&mvdev->core_device); + mlx5vf_cmd_remove_migratable(mvdev); vfio_pci_core_uninit_device(&mvdev->core_device); kfree(mvdev); } diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 58839206d1ca..4d1a97415a27 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -151,10 +151,10 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return -ENOMEM; vfio_pci_core_init_device(vdev, pdev, &vfio_pci_ops); + dev_set_drvdata(&pdev->dev, vdev); ret = vfio_pci_core_register_device(vdev); if (ret) goto out_free; - dev_set_drvdata(&pdev->dev, vdev); return 0; out_free: @@ -174,10 +174,12 @@ static void vfio_pci_remove(struct pci_dev *pdev) static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn) { + struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev); + if (!enable_sriov) return -ENOENT; - return vfio_pci_core_sriov_configure(pdev, nr_virtfn); + return vfio_pci_core_sriov_configure(vdev, nr_virtfn); } static const struct pci_device_id vfio_pci_table[] = { diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 06b6f3594a13..100ab98c7ff0 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1821,6 +1821,10 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) struct pci_dev *pdev = vdev->pdev; int ret; + /* Drivers must set the vfio_pci_core_device to their drvdata */ + if (WARN_ON(vdev != dev_get_drvdata(&vdev->pdev->dev))) + return -EINVAL; + if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) return -EINVAL; @@ -1890,9 +1894,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_register_device); void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev) { - struct pci_dev *pdev = vdev->pdev; - - vfio_pci_core_sriov_configure(pdev, 0); + vfio_pci_core_sriov_configure(vdev, 0); vfio_unregister_group_dev(&vdev->vdev); @@ -1907,14 +1909,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_unregister_device); pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { - struct vfio_pci_core_device *vdev; - struct vfio_device *device; - - device = vfio_device_get_from_dev(&pdev->dev); - if (device == NULL) - return PCI_ERS_RESULT_DISCONNECT; - - vdev = container_of(device, struct vfio_pci_core_device, vdev); + struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev); mutex_lock(&vdev->igate); @@ -1923,26 +1918,18 @@ pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, mutex_unlock(&vdev->igate); - vfio_device_put(device); - return PCI_ERS_RESULT_CAN_RECOVER; } EXPORT_SYMBOL_GPL(vfio_pci_core_aer_err_detected); -int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn) +int vfio_pci_core_sriov_configure(struct vfio_pci_core_device *vdev, + int nr_virtfn) { - struct vfio_pci_core_device *vdev; - struct vfio_device *device; + struct pci_dev *pdev = vdev->pdev; int ret = 0; device_lock_assert(&pdev->dev); - device = vfio_device_get_from_dev(&pdev->dev); - if (!device) - return -ENODEV; - - vdev = container_of(device, struct vfio_pci_core_device, vdev); - if (nr_virtfn) { mutex_lock(&vfio_pci_sriov_pfs_mutex); /* @@ -1960,8 +1947,7 @@ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn) ret = pci_enable_sriov(pdev, nr_virtfn); if (ret) goto out_del; - ret = nr_virtfn; - goto out_put; + return nr_virtfn; } pci_disable_sriov(pdev); @@ -1971,8 +1957,6 @@ out_del: list_del_init(&vdev->sriov_pfs_item); out_unlock: mutex_unlock(&vfio_pci_sriov_pfs_mutex); -out_put: - vfio_device_put(device); return ret; } EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure); diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 0c766384cee0..cc0d337f7b13 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -444,31 +444,15 @@ static void vfio_group_get(struct vfio_group *group) refcount_inc(&group->users); } -static struct vfio_group *vfio_group_get_from_dev(struct device *dev) -{ - struct iommu_group *iommu_group; - struct vfio_group *group; - - iommu_group = iommu_group_get(dev); - if (!iommu_group) - return NULL; - - group = vfio_group_get_from_iommu(iommu_group); - iommu_group_put(iommu_group); - - return group; -} - /* * Device objects - create, release, get, put, search */ /* Device reference always implies a group reference */ -void vfio_device_put(struct vfio_device *device) +static void vfio_device_put(struct vfio_device *device) { if (refcount_dec_and_test(&device->refcount)) complete(&device->comp); } -EXPORT_SYMBOL_GPL(vfio_device_put); static bool vfio_device_try_get(struct vfio_device *device) { @@ -547,11 +531,11 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) iommu_group = iommu_group_get(dev); #ifdef CONFIG_VFIO_NOIOMMU - if (!iommu_group && noiommu && !iommu_present(dev->bus)) { + if (!iommu_group && noiommu) { /* * With noiommu enabled, create an IOMMU group for devices that - * don't already have one and don't have an iommu_ops on their - * bus. Taint the kernel because we're about to give a DMA + * don't already have one, implying no IOMMU hardware/driver + * exists. Taint the kernel because we're about to give a DMA * capable device to a user without IOMMU protection. */ group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); @@ -633,29 +617,6 @@ int vfio_register_emulated_iommu_dev(struct vfio_device *device) } EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); -/* - * Get a reference to the vfio_device for a device. Even if the - * caller thinks they own the device, they could be racing with a - * release call path, so we can't trust drvdata for the shortcut. - * Go the long way around, from the iommu_group to the vfio_group - * to the vfio_device. - */ -struct vfio_device *vfio_device_get_from_dev(struct device *dev) -{ - struct vfio_group *group; - struct vfio_device *device; - - group = vfio_group_get_from_dev(dev); - if (!group) - return NULL; - - device = vfio_group_get_device(group, dev); - vfio_group_put(group); - - return device; -} -EXPORT_SYMBOL_GPL(vfio_device_get_from_dev); - static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, char *buf) { @@ -1115,6 +1076,12 @@ static int vfio_group_add_container_user(struct vfio_group *group) static const struct file_operations vfio_device_fops; +/* true if the vfio_device has open_device() called but not close_device() */ +static bool vfio_assert_device_open(struct vfio_device *device) +{ + return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); +} + static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) { struct vfio_device *device; @@ -1329,8 +1296,10 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) struct vfio_device *device = filep->private_data; mutex_lock(&device->dev_set->lock); - if (!--device->open_count && device->ops->close_device) + vfio_assert_device_open(device); + if (device->open_count == 1 && device->ops->close_device) device->ops->close_device(device); + device->open_count--; mutex_unlock(&device->dev_set->lock); module_put(device->dev->driver->owner); @@ -1732,44 +1701,6 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep) } EXPORT_SYMBOL_GPL(vfio_group_get_external_user); -/* - * External user API, exported by symbols to be linked dynamically. - * The external user passes in a device pointer - * to verify that: - * - A VFIO group is assiciated with the device; - * - IOMMU is set for the group. - * If both checks passed, vfio_group_get_external_user_from_dev() - * increments the container user counter to prevent the VFIO group - * from disposal before external user exits and returns the pointer - * to the VFIO group. - * - * When the external user finishes using the VFIO group, it calls - * vfio_group_put_external_user() to release the VFIO group and - * decrement the container user counter. - * - * @dev [in] : device - * Return error PTR or pointer to VFIO group. - */ - -struct vfio_group *vfio_group_get_external_user_from_dev(struct device *dev) -{ - struct vfio_group *group; - int ret; - - group = vfio_group_get_from_dev(dev); - if (!group) - return ERR_PTR(-ENODEV); - - ret = vfio_group_add_container_user(group); - if (ret) { - vfio_group_put(group); - return ERR_PTR(ret); - } - - return group; -} -EXPORT_SYMBOL_GPL(vfio_group_get_external_user_from_dev); - void vfio_group_put_external_user(struct vfio_group *group) { vfio_group_try_dissolve_container(group); @@ -1919,7 +1850,7 @@ EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); /* * Pin a set of guest PFNs and return their associated host PFNs for local * domain only. - * @dev [in] : device + * @device [in] : device * @user_pfn [in]: array of user/guest PFNs to be pinned. * @npage [in] : count of elements in user_pfn array. This count should not * be greater VFIO_PIN_PAGES_MAX_ENTRIES. @@ -1927,32 +1858,23 @@ EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); * @phys_pfn[out]: array of host PFNs * Return error or number of pages pinned. */ -int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, - int prot, unsigned long *phys_pfn) +int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, + int npage, int prot, unsigned long *phys_pfn) { struct vfio_container *container; - struct vfio_group *group; + struct vfio_group *group = device->group; struct vfio_iommu_driver *driver; int ret; - if (!dev || !user_pfn || !phys_pfn || !npage) + if (!user_pfn || !phys_pfn || !npage || + !vfio_assert_device_open(device)) return -EINVAL; if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) return -E2BIG; - group = vfio_group_get_from_dev(dev); - if (!group) - return -ENODEV; - - if (group->dev_counter > 1) { - ret = -EINVAL; - goto err_pin_pages; - } - - ret = vfio_group_add_container_user(group); - if (ret) - goto err_pin_pages; + if (group->dev_counter > 1) + return -EINVAL; container = group->container; driver = container->iommu_driver; @@ -1963,45 +1885,33 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, else ret = -ENOTTY; - vfio_group_try_dissolve_container(group); - -err_pin_pages: - vfio_group_put(group); return ret; } EXPORT_SYMBOL(vfio_pin_pages); /* * Unpin set of host PFNs for local domain only. - * @dev [in] : device + * @device [in] : device * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest * PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES. * @npage [in] : count of elements in user_pfn array. This count should not * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. * Return error or number of pages unpinned. */ -int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage) +int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, + int npage) { struct vfio_container *container; - struct vfio_group *group; struct vfio_iommu_driver *driver; int ret; - if (!dev || !user_pfn || !npage) + if (!user_pfn || !npage || !vfio_assert_device_open(device)) return -EINVAL; if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) return -E2BIG; - group = vfio_group_get_from_dev(dev); - if (!group) - return -ENODEV; - - ret = vfio_group_add_container_user(group); - if (ret) - goto err_unpin_pages; - - container = group->container; + container = device->group->container; driver = container->iommu_driver; if (likely(driver && driver->ops->unpin_pages)) ret = driver->ops->unpin_pages(container->iommu_data, user_pfn, @@ -2009,110 +1919,11 @@ int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage) else ret = -ENOTTY; - vfio_group_try_dissolve_container(group); - -err_unpin_pages: - vfio_group_put(group); return ret; } EXPORT_SYMBOL(vfio_unpin_pages); /* - * Pin a set of guest IOVA PFNs and return their associated host PFNs for a - * VFIO group. - * - * The caller needs to call vfio_group_get_external_user() or - * vfio_group_get_external_user_from_dev() prior to calling this interface, - * so as to prevent the VFIO group from disposal in the middle of the call. - * But it can keep the reference to the VFIO group for several calls into - * this interface. - * After finishing using of the VFIO group, the caller needs to release the - * VFIO group by calling vfio_group_put_external_user(). - * - * @group [in] : VFIO group - * @user_iova_pfn [in] : array of user/guest IOVA PFNs to be pinned. - * @npage [in] : count of elements in user_iova_pfn array. - * This count should not be greater - * VFIO_PIN_PAGES_MAX_ENTRIES. - * @prot [in] : protection flags - * @phys_pfn [out] : array of host PFNs - * Return error or number of pages pinned. - */ -int vfio_group_pin_pages(struct vfio_group *group, - unsigned long *user_iova_pfn, int npage, - int prot, unsigned long *phys_pfn) -{ - struct vfio_container *container; - struct vfio_iommu_driver *driver; - int ret; - - if (!group || !user_iova_pfn || !phys_pfn || !npage) - return -EINVAL; - - if (group->dev_counter > 1) - return -EINVAL; - - if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) - return -E2BIG; - - container = group->container; - driver = container->iommu_driver; - if (likely(driver && driver->ops->pin_pages)) - ret = driver->ops->pin_pages(container->iommu_data, - group->iommu_group, user_iova_pfn, - npage, prot, phys_pfn); - else - ret = -ENOTTY; - - return ret; -} -EXPORT_SYMBOL(vfio_group_pin_pages); - -/* - * Unpin a set of guest IOVA PFNs for a VFIO group. - * - * The caller needs to call vfio_group_get_external_user() or - * vfio_group_get_external_user_from_dev() prior to calling this interface, - * so as to prevent the VFIO group from disposal in the middle of the call. - * But it can keep the reference to the VFIO group for several calls into - * this interface. - * After finishing using of the VFIO group, the caller needs to release the - * VFIO group by calling vfio_group_put_external_user(). - * - * @group [in] : vfio group - * @user_iova_pfn [in] : array of user/guest IOVA PFNs to be unpinned. - * @npage [in] : count of elements in user_iova_pfn array. - * This count should not be greater than - * VFIO_PIN_PAGES_MAX_ENTRIES. - * Return error or number of pages unpinned. - */ -int vfio_group_unpin_pages(struct vfio_group *group, - unsigned long *user_iova_pfn, int npage) -{ - struct vfio_container *container; - struct vfio_iommu_driver *driver; - int ret; - - if (!group || !user_iova_pfn || !npage) - return -EINVAL; - - if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) - return -E2BIG; - - container = group->container; - driver = container->iommu_driver; - if (likely(driver && driver->ops->unpin_pages)) - ret = driver->ops->unpin_pages(container->iommu_data, - user_iova_pfn, npage); - else - ret = -ENOTTY; - - return ret; -} -EXPORT_SYMBOL(vfio_group_unpin_pages); - - -/* * This interface allows the CPUs to perform some sort of virtual DMA on * behalf of the device. * @@ -2122,32 +1933,24 @@ EXPORT_SYMBOL(vfio_group_unpin_pages); * As the read/write of user space memory is conducted via the CPUs and is * not a real device DMA, it is not necessary to pin the user space memory. * - * The caller needs to call vfio_group_get_external_user() or - * vfio_group_get_external_user_from_dev() prior to calling this interface, - * so as to prevent the VFIO group from disposal in the middle of the call. - * But it can keep the reference to the VFIO group for several calls into - * this interface. - * After finishing using of the VFIO group, the caller needs to release the - * VFIO group by calling vfio_group_put_external_user(). - * - * @group [in] : VFIO group + * @device [in] : VFIO device * @user_iova [in] : base IOVA of a user space buffer * @data [in] : pointer to kernel buffer * @len [in] : kernel buffer length * @write : indicate read or write * Return error code on failure or 0 on success. */ -int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova, - void *data, size_t len, bool write) +int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, + size_t len, bool write) { struct vfio_container *container; struct vfio_iommu_driver *driver; int ret = 0; - if (!group || !data || len <= 0) + if (!data || len <= 0 || !vfio_assert_device_open(device)) return -EINVAL; - container = group->container; + container = device->group->container; driver = container->iommu_driver; if (likely(driver && driver->ops->dma_rw)) @@ -2155,7 +1958,6 @@ int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova, user_iova, data, len, write); else ret = -ENOTTY; - return ret; } EXPORT_SYMBOL(vfio_dma_rw); @@ -2168,10 +1970,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group, struct vfio_iommu_driver *driver; int ret; - ret = vfio_group_add_container_user(group); - if (ret) - return -EINVAL; - container = group->container; driver = container->iommu_driver; if (likely(driver && driver->ops->register_notifier)) @@ -2179,9 +1977,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group, events, nb); else ret = -ENOTTY; - - vfio_group_try_dissolve_container(group); - return ret; } @@ -2192,10 +1987,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group, struct vfio_iommu_driver *driver; int ret; - ret = vfio_group_add_container_user(group); - if (ret) - return -EINVAL; - container = group->container; driver = container->iommu_driver; if (likely(driver && driver->ops->unregister_notifier)) @@ -2203,9 +1994,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group, nb); else ret = -ENOTTY; - - vfio_group_try_dissolve_container(group); - return ret; } @@ -2234,10 +2022,6 @@ static int vfio_register_group_notifier(struct vfio_group *group, if (*events) return -EINVAL; - ret = vfio_group_add_container_user(group); - if (ret) - return -EINVAL; - ret = blocking_notifier_chain_register(&group->notifier, nb); /* @@ -2247,41 +2031,20 @@ static int vfio_register_group_notifier(struct vfio_group *group, if (!ret && set_kvm && group->kvm) blocking_notifier_call_chain(&group->notifier, VFIO_GROUP_NOTIFY_SET_KVM, group->kvm); - - vfio_group_try_dissolve_container(group); - return ret; } -static int vfio_unregister_group_notifier(struct vfio_group *group, - struct notifier_block *nb) +int vfio_register_notifier(struct vfio_device *device, + enum vfio_notify_type type, unsigned long *events, + struct notifier_block *nb) { + struct vfio_group *group = device->group; int ret; - ret = vfio_group_add_container_user(group); - if (ret) - return -EINVAL; - - ret = blocking_notifier_chain_unregister(&group->notifier, nb); - - vfio_group_try_dissolve_container(group); - - return ret; -} - -int vfio_register_notifier(struct device *dev, enum vfio_notify_type type, - unsigned long *events, struct notifier_block *nb) -{ - struct vfio_group *group; - int ret; - - if (!dev || !nb || !events || (*events == 0)) + if (!nb || !events || (*events == 0) || + !vfio_assert_device_open(device)) return -EINVAL; - group = vfio_group_get_from_dev(dev); - if (!group) - return -ENODEV; - switch (type) { case VFIO_IOMMU_NOTIFY: ret = vfio_register_iommu_notifier(group, events, nb); @@ -2292,59 +2055,34 @@ int vfio_register_notifier(struct device *dev, enum vfio_notify_type type, default: ret = -EINVAL; } - - vfio_group_put(group); return ret; } EXPORT_SYMBOL(vfio_register_notifier); -int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type, +int vfio_unregister_notifier(struct vfio_device *device, + enum vfio_notify_type type, struct notifier_block *nb) { - struct vfio_group *group; + struct vfio_group *group = device->group; int ret; - if (!dev || !nb) + if (!nb || !vfio_assert_device_open(device)) return -EINVAL; - group = vfio_group_get_from_dev(dev); - if (!group) - return -ENODEV; - switch (type) { case VFIO_IOMMU_NOTIFY: ret = vfio_unregister_iommu_notifier(group, nb); break; case VFIO_GROUP_NOTIFY: - ret = vfio_unregister_group_notifier(group, nb); + ret = blocking_notifier_chain_unregister(&group->notifier, nb); break; default: ret = -EINVAL; } - - vfio_group_put(group); return ret; } EXPORT_SYMBOL(vfio_unregister_notifier); -struct iommu_domain *vfio_group_iommu_domain(struct vfio_group *group) -{ - struct vfio_container *container; - struct vfio_iommu_driver *driver; - - if (!group) - return ERR_PTR(-EINVAL); - - container = group->container; - driver = container->iommu_driver; - if (likely(driver && driver->ops->group_iommu_domain)) - return driver->ops->group_iommu_domain(container->iommu_data, - group->iommu_group); - - return ERR_PTR(-ENOTTY); -} -EXPORT_SYMBOL_GPL(vfio_group_iommu_domain); - /* * Module/class support */ |