From 16b8fe4caf499ae8e12d2ab1b1324497e36a7b83 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 13 Nov 2020 18:52:02 +0100 Subject: vfio/pci: Move dummy_resources_list init in vfio_pci_probe() In case an error occurs in vfio_pci_enable() before the call to vfio_pci_probe_mmaps(), vfio_pci_disable() will try to iterate on an uninitialized list and cause a kernel panic. Lets move to the initialization to vfio_pci_probe() to fix the issue. Signed-off-by: Eric Auger Fixes: 05f0c03fbac1 ("vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive") CC: Stable # v4.7+ Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index e6190173482c..47ebc5c49ca4 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -161,8 +161,6 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev) int i; struct vfio_pci_dummy_resource *dummy_res; - INIT_LIST_HEAD(&vdev->dummy_resources_list); - for (i = 0; i < PCI_STD_NUM_BARS; i++) { int bar = i + PCI_STD_RESOURCES; @@ -1966,6 +1964,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mutex_init(&vdev->igate); spin_lock_init(&vdev->irqlock); mutex_init(&vdev->ioeventfds_lock); + INIT_LIST_HEAD(&vdev->dummy_resources_list); INIT_LIST_HEAD(&vdev->ioeventfds_list); mutex_init(&vdev->vma_lock); INIT_LIST_HEAD(&vdev->vma_list); -- cgit v1.2.3 From 7b06a56d468b756ad6bb43ac21b11e474ebc54a0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Nov 2020 12:34:58 -0400 Subject: vfio-pci: Use io_remap_pfn_range() for PCI IO memory commit f8f6ae5d077a ("mm: always have io_remap_pfn_range() set pgprot_decrypted()") allows drivers using mmap to put PCI memory mapped BAR space into userspace to work correctly on AMD SME systems that default to all memory encrypted. Since vfio_pci_mmap_fault() is working with PCI memory mapped BAR space it should be calling io_remap_pfn_range() otherwise it will not work on SME systems. Fixes: 11c4cd07ba11 ("vfio-pci: Fault mmaps to enable vma tracking") Signed-off-by: Jason Gunthorpe Acked-by: Peter Xu Tested-by: Tom Lendacky Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 47ebc5c49ca4..706de3ef94bb 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1633,8 +1633,8 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) mutex_unlock(&vdev->vma_lock); - if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, vma->vm_page_prot)) + if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot)) ret = VM_FAULT_SIGBUS; up_out: -- cgit v1.2.3 From d22f9a6c92de96304c81792942ae7c306f08ac77 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Sun, 22 Nov 2020 18:39:50 +1100 Subject: vfio/pci/nvlink2: Do not attempt NPU2 setup on POWER8NVL NPU We execute certain NPU2 setup code (such as mapping an LPID to a device in NPU2) unconditionally if an Nvlink bridge is detected. However this cannot succeed on POWER8NVL machines as the init helpers return an error other than ENODEV which means the device is there is and setup failed so vfio_pci_enable() fails and pass through is not possible. This changes the two NPU2 related init helpers to return -ENODEV if there is no "memory-region" device tree property as this is the distinction between NPU and NPU2. Tested on - POWER9 pvr=004e1201, Ubuntu 19.04 host, Ubuntu 18.04 vm, NVIDIA GV100 10de:1db1 driver 418.39 - POWER8 pvr=004c0100, RHEL 7.6 host, Ubuntu 16.10 vm, NVIDIA P100 10de:15f9 driver 396.47 Fixes: 7f92891778df ("vfio_pci: Add NVIDIA GV100GL [Tesla V100 SXM2] subdriver") Cc: stable@vger.kernel.org # 5.0 Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_nvlink2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c b/drivers/vfio/pci/vfio_pci_nvlink2.c index 65c61710c0e9..9adcf6a8f888 100644 --- a/drivers/vfio/pci/vfio_pci_nvlink2.c +++ b/drivers/vfio/pci/vfio_pci_nvlink2.c @@ -231,7 +231,7 @@ int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev) return -EINVAL; if (of_property_read_u32(npu_node, "memory-region", &mem_phandle)) - return -EINVAL; + return -ENODEV; mem_node = of_find_node_by_phandle(mem_phandle); if (!mem_node) @@ -393,7 +393,7 @@ int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev) int ret; struct vfio_pci_npu2_data *data; struct device_node *nvlink_dn; - u32 nvlink_index = 0; + u32 nvlink_index = 0, mem_phandle = 0; struct pci_dev *npdev = vdev->pdev; struct device_node *npu_node = pci_device_to_OF_node(npdev); struct pci_controller *hose = pci_bus_to_host(npdev->bus); @@ -408,6 +408,9 @@ int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev) if (!pnv_pci_get_gpu_dev(vdev->pdev)) return -ENODEV; + if (of_property_read_u32(npu_node, "memory-region", &mem_phandle)) + return -ENODEV; + /* * NPU2 normally has 8 ATSD registers (for concurrency) and 6 links * so we can allocate one register per link, using nvlink index as -- cgit v1.2.3 From a15ac665b9e9c90b1557499f2a46c1e89d29154a Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 3 Dec 2020 22:35:11 +0100 Subject: vfio-mdev: Wire in a request handler for mdev parent While performing some destructive tests with vfio-ccw, where the paths to a device are forcible removed and thus the device itself is unreachable, it is rather easy to end up in an endless loop in vfio_del_group_dev() due to the lack of a request callback for the associated device. In this example, one MDEV (77c) is used by a guest, while another (77b) is not. The symptom is that the iommu is detached from the mdev for 77b, but not 77c, until that guest is shutdown: [ 238.794867] vfio_ccw 0.0.077b: MDEV: Unregistering [ 238.794996] vfio_mdev 11f2d2bc-4083-431d-a023-eff72715c4f0: Removing from iommu group 2 [ 238.795001] vfio_mdev 11f2d2bc-4083-431d-a023-eff72715c4f0: MDEV: detaching iommu [ 238.795036] vfio_ccw 0.0.077c: MDEV: Unregistering ...silence... Let's wire in the request call back to the mdev device, so that a device being physically removed from the host can be (gracefully?) handled by the parent device at the time the device is removed. Add a message when registering the device if a driver doesn't provide this callback, so a clue is given that this same loop may be encountered in a similar situation, and a message when this occurs instead of the awkward silence noted above. Signed-off-by: Eric Farman Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/mdev/mdev_core.c | 4 ++++ drivers/vfio/mdev/vfio_mdev.c | 13 +++++++++++++ include/linux/mdev.h | 4 ++++ 3 files changed, 21 insertions(+) diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index b558d4cfd082..6de97d25a3f8 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -154,6 +154,10 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) if (!dev) return -EINVAL; + /* Not mandatory, but its absence could be a problem */ + if (!ops->request) + dev_info(dev, "Driver cannot be asked to release device\n"); + mutex_lock(&parent_list_lock); /* Check for duplicate */ diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index 30964a4e0a28..b52eea128549 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -98,6 +98,18 @@ static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma) return parent->ops->mmap(mdev, vma); } +static void vfio_mdev_request(void *device_data, unsigned int count) +{ + struct mdev_device *mdev = device_data; + struct mdev_parent *parent = mdev->parent; + + if (parent->ops->request) + parent->ops->request(mdev, count); + else if (count == 0) + dev_notice(mdev_dev(mdev), + "No mdev vendor driver request callback support, blocked until released by user\n"); +} + static const struct vfio_device_ops vfio_mdev_dev_ops = { .name = "vfio-mdev", .open = vfio_mdev_open, @@ -106,6 +118,7 @@ static const struct vfio_device_ops vfio_mdev_dev_ops = { .read = vfio_mdev_read, .write = vfio_mdev_write, .mmap = vfio_mdev_mmap, + .request = vfio_mdev_request, }; static int vfio_mdev_probe(struct device *dev) diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 0ce30ca78db0..9004375c462e 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -72,6 +72,9 @@ struct device *mdev_get_iommu_device(struct device *dev); * @mmap: mmap callback * @mdev: mediated device structure * @vma: vma structure + * @request: request callback to release device + * @mdev: mediated device structure + * @count: request sequence number * Parent device that support mediated device should be registered with mdev * module with mdev_parent_ops structure. **/ @@ -92,6 +95,7 @@ struct mdev_parent_ops { long (*ioctl)(struct mdev_device *mdev, unsigned int cmd, unsigned long arg); int (*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma); + void (*request)(struct mdev_device *mdev, unsigned int count); }; /* interface for exporting mdev supported type attributes */ -- cgit v1.2.3 From bccce80bbd44ab50bbec761a51c6293c1ce47e34 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 3 Dec 2020 22:35:12 +0100 Subject: vfio-ccw: Wire in the request callback The device is being unplugged, so pass the request to userspace to ask for a graceful cleanup. This should free up the thread that would otherwise loop waiting for the device to be fully released. Signed-off-by: Eric Farman Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_ops.c | 26 ++++++++++++++++++++++++++ drivers/s390/cio/vfio_ccw_private.h | 4 ++++ include/uapi/linux/vfio.h | 1 + 3 files changed, 31 insertions(+) diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 8b3ed5b45277..68106be4ba7a 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -394,6 +394,7 @@ static int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info) switch (info->index) { case VFIO_CCW_IO_IRQ_INDEX: case VFIO_CCW_CRW_IRQ_INDEX: + case VFIO_CCW_REQ_IRQ_INDEX: info->count = 1; info->flags = VFIO_IRQ_INFO_EVENTFD; break; @@ -424,6 +425,9 @@ static int vfio_ccw_mdev_set_irqs(struct mdev_device *mdev, case VFIO_CCW_CRW_IRQ_INDEX: ctx = &private->crw_trigger; break; + case VFIO_CCW_REQ_IRQ_INDEX: + ctx = &private->req_trigger; + break; default: return -EINVAL; } @@ -607,6 +611,27 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, } } +/* Request removal of the device*/ +static void vfio_ccw_mdev_request(struct mdev_device *mdev, unsigned int count) +{ + struct vfio_ccw_private *private = dev_get_drvdata(mdev_parent_dev(mdev)); + + if (!private) + return; + + if (private->req_trigger) { + if (!(count % 10)) + dev_notice_ratelimited(mdev_dev(private->mdev), + "Relaying device request to user (#%u)\n", + count); + + eventfd_signal(private->req_trigger, 1); + } else if (count == 0) { + dev_notice(mdev_dev(private->mdev), + "No device request channel registered, blocked until released by user\n"); + } +} + static const struct mdev_parent_ops vfio_ccw_mdev_ops = { .owner = THIS_MODULE, .supported_type_groups = mdev_type_groups, @@ -617,6 +642,7 @@ static const struct mdev_parent_ops vfio_ccw_mdev_ops = { .read = vfio_ccw_mdev_read, .write = vfio_ccw_mdev_write, .ioctl = vfio_ccw_mdev_ioctl, + .request = vfio_ccw_mdev_request, }; int vfio_ccw_mdev_reg(struct subchannel *sch) diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 8723156b29ea..b2c762eb42b9 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -84,7 +84,10 @@ struct vfio_ccw_crw { * @irb: irb info received from interrupt * @scsw: scsw info * @io_trigger: eventfd ctx for signaling userspace I/O results + * @crw_trigger: eventfd ctx for signaling userspace CRW information + * @req_trigger: eventfd ctx for signaling userspace to return device * @io_work: work for deferral process of I/O handling + * @crw_work: work for deferral process of CRW handling */ struct vfio_ccw_private { struct subchannel *sch; @@ -108,6 +111,7 @@ struct vfio_ccw_private { struct eventfd_ctx *io_trigger; struct eventfd_ctx *crw_trigger; + struct eventfd_ctx *req_trigger; struct work_struct io_work; struct work_struct crw_work; } __aligned(8); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 2f313a238a8f..d1812777139f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -820,6 +820,7 @@ enum { enum { VFIO_CCW_IO_IRQ_INDEX, VFIO_CCW_CRW_IRQ_INDEX, + VFIO_CCW_REQ_IRQ_INDEX, VFIO_CCW_NUM_IRQS }; -- cgit v1.2.3 From bdfae1c9a913930eae5ea506733aa7c285e12a06 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 9 Dec 2020 09:44:44 +0800 Subject: vfio/type1: Add vfio_group_iommu_domain() Add the API for getting the domain from a vfio group. This could be used by the physical device drivers which rely on the vfio/mdev framework for mediated device user level access. The typical use case like below: unsigned int pasid; struct vfio_group *vfio_group; struct iommu_domain *iommu_domain; struct device *dev = mdev_dev(mdev); struct device *iommu_device = mdev_get_iommu_device(dev); if (!iommu_device || !iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX)) return -EINVAL; vfio_group = vfio_group_get_external_user_from_dev(dev); if (IS_ERR_OR_NULL(vfio_group)) return -EFAULT; iommu_domain = vfio_group_iommu_domain(vfio_group); if (IS_ERR_OR_NULL(iommu_domain)) { vfio_group_put_external_user(vfio_group); return -EFAULT; } pasid = iommu_aux_get_pasid(iommu_domain, iommu_device); if (pasid < 0) { vfio_group_put_external_user(vfio_group); return -EFAULT; } /* Program device context with pasid value. */ ... Signed-off-by: Lu Baolu Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 18 ++++++++++++++++++ drivers/vfio/vfio_iommu_type1.c | 24 ++++++++++++++++++++++++ include/linux/vfio.h | 4 ++++ 3 files changed, 46 insertions(+) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 2151bc7f87ab..4ad8a35667a7 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -2331,6 +2331,24 @@ int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type, } EXPORT_SYMBOL(vfio_unregister_notifier); +struct iommu_domain *vfio_group_iommu_domain(struct vfio_group *group) +{ + struct vfio_container *container; + struct vfio_iommu_driver *driver; + + if (!group) + return ERR_PTR(-EINVAL); + + container = group->container; + driver = container->iommu_driver; + if (likely(driver && driver->ops->group_iommu_domain)) + return driver->ops->group_iommu_domain(container->iommu_data, + group->iommu_group); + + return ERR_PTR(-ENOTTY); +} +EXPORT_SYMBOL_GPL(vfio_group_iommu_domain); + /** * Module/class support */ diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 67e827638995..0b4dedaa9128 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2980,6 +2980,29 @@ static int vfio_iommu_type1_dma_rw(void *iommu_data, dma_addr_t user_iova, return ret; } +static struct iommu_domain * +vfio_iommu_type1_group_iommu_domain(void *iommu_data, + struct iommu_group *iommu_group) +{ + struct iommu_domain *domain = ERR_PTR(-ENODEV); + struct vfio_iommu *iommu = iommu_data; + struct vfio_domain *d; + + if (!iommu || !iommu_group) + return ERR_PTR(-EINVAL); + + mutex_lock(&iommu->lock); + list_for_each_entry(d, &iommu->domain_list, next) { + if (find_iommu_group(d, iommu_group)) { + domain = d->domain; + break; + } + } + mutex_unlock(&iommu->lock); + + return domain; +} + static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { .name = "vfio-iommu-type1", .owner = THIS_MODULE, @@ -2993,6 +3016,7 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { .register_notifier = vfio_iommu_type1_register_notifier, .unregister_notifier = vfio_iommu_type1_unregister_notifier, .dma_rw = vfio_iommu_type1_dma_rw, + .group_iommu_domain = vfio_iommu_type1_group_iommu_domain, }; static int __init vfio_iommu_type1_init(void) diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 38d3c6a8dc7e..f45940b38a02 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -90,6 +90,8 @@ struct vfio_iommu_driver_ops { struct notifier_block *nb); int (*dma_rw)(void *iommu_data, dma_addr_t user_iova, void *data, size_t count, bool write); + struct iommu_domain *(*group_iommu_domain)(void *iommu_data, + struct iommu_group *group); }; extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); @@ -126,6 +128,8 @@ extern int vfio_group_unpin_pages(struct vfio_group *group, extern int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova, void *data, size_t len, bool write); +extern struct iommu_domain *vfio_group_iommu_domain(struct vfio_group *group); + /* each type has independent events */ enum vfio_notify_type { VFIO_IOMMU_NOTIFY = 0, -- cgit v1.2.3