From 7039d11b3e4af77cf5ac1f689ae395b2a183bd25 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 29 Apr 2020 15:36:42 +0200 Subject: iommu/vt-d: Wire up iommu_ops->def_domain_type The Intel VT-d driver already has a matching function to determine the default domain type for a device. Wire it up in intel_iommu_ops. Signed-off-by: Joerg Roedel Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20200429133712.31431-5-joro@8bytes.org Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ef0a5246700e..b9f905a55dda 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -6209,6 +6209,7 @@ const struct iommu_ops intel_iommu_ops = { .dev_enable_feat = intel_iommu_dev_enable_feat, .dev_disable_feat = intel_iommu_dev_disable_feat, .is_attach_deferred = intel_iommu_is_attach_deferred, + .def_domain_type = device_def_domain_type, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, }; -- cgit v1.2.3 From e5d1841f18b2401c8b449c024817cd243e363934 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 29 Apr 2020 15:36:54 +0200 Subject: iommu/vt-d: Convert to probe/release_device() call-backs Convert the Intel IOMMU driver to use the probe_device() and release_device() call-backs of iommu_ops, so that the iommu core code does the group and sysfs setup. Signed-off-by: Joerg Roedel Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20200429133712.31431-17-joro@8bytes.org Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 67 ++++----------------------------------------- 1 file changed, 6 insertions(+), 61 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index b9f905a55dda..b906727f5b85 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5781,78 +5781,27 @@ static bool intel_iommu_capable(enum iommu_cap cap) return false; } -static int intel_iommu_add_device(struct device *dev) +static struct iommu_device *intel_iommu_probe_device(struct device *dev) { - struct dmar_domain *dmar_domain; - struct iommu_domain *domain; struct intel_iommu *iommu; - struct iommu_group *group; u8 bus, devfn; - int ret; iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) - return -ENODEV; - - iommu_device_link(&iommu->iommu, dev); + return ERR_PTR(-ENODEV); if (translation_pre_enabled(iommu)) dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO; - group = iommu_group_get_for_dev(dev); - - if (IS_ERR(group)) { - ret = PTR_ERR(group); - goto unlink; - } - - iommu_group_put(group); - - domain = iommu_get_domain_for_dev(dev); - dmar_domain = to_dmar_domain(domain); - if (domain->type == IOMMU_DOMAIN_DMA) { - if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) { - ret = iommu_request_dm_for_dev(dev); - if (ret) { - dmar_remove_one_dev_info(dev); - dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - domain_add_dev_info(si_domain, dev); - dev_info(dev, - "Device uses a private identity domain.\n"); - } - } - } else { - if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) { - ret = iommu_request_dma_domain_for_dev(dev); - if (ret) { - dmar_remove_one_dev_info(dev); - dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - if (!get_private_domain_for_dev(dev)) { - dev_warn(dev, - "Failed to get a private domain.\n"); - ret = -ENOMEM; - goto unlink; - } - - dev_info(dev, - "Device uses a private dma domain.\n"); - } - } - } - if (device_needs_bounce(dev)) { dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n"); set_dma_ops(dev, &bounce_dma_ops); } - return 0; - -unlink: - iommu_device_unlink(&iommu->iommu, dev); - return ret; + return &iommu->iommu; } -static void intel_iommu_remove_device(struct device *dev) +static void intel_iommu_release_device(struct device *dev) { struct intel_iommu *iommu; u8 bus, devfn; @@ -5863,10 +5812,6 @@ static void intel_iommu_remove_device(struct device *dev) dmar_remove_one_dev_info(dev); - iommu_group_remove_device(dev); - - iommu_device_unlink(&iommu->iommu, dev); - if (device_needs_bounce(dev)) set_dma_ops(dev, NULL); } @@ -6198,8 +6143,8 @@ const struct iommu_ops intel_iommu_ops = { .map = intel_iommu_map, .unmap = intel_iommu_unmap, .iova_to_phys = intel_iommu_iova_to_phys, - .add_device = intel_iommu_add_device, - .remove_device = intel_iommu_remove_device, + .probe_device = intel_iommu_probe_device, + .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .apply_resv_region = intel_iommu_apply_resv_region, -- cgit v1.2.3 From 327d5b2fee91c404a3956c324193892cf2cc9528 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 6 May 2020 09:59:45 +0800 Subject: iommu/vt-d: Allow 32bit devices to uses DMA domain Currently, if a 32bit device initially uses an identity domain, Intel IOMMU driver will convert it forcibly to a DMA one if its address capability is not enough for the whole system memory. The motivation was to overcome the overhead caused by possible bounced buffer. Unfortunately, this improvement has led to many problems. For example, some 32bit devices are required to use an identity domain, forcing them to use DMA domain will cause the device not to work anymore. On the other hand, the VMD sub-devices share a domain but each sub-device might have different address capability. Forcing a VMD sub-device to use DMA domain blindly will impact the operation of other sub-devices without any notification. Further more, PCI aliased devices (PCI bridge and all devices beneath it, VMD devices and various devices quirked with pci_add_dma_alias()) must use the same domain. Forcing one device to switch to DMA domain during runtime will cause in-fligh DMAs for other devices to abort or target to other memory which might cause undefind system behavior. With the last private domain usage in iommu_need_mapping() removed, all private domain helpers are also cleaned in this patch. Otherwise, the compiler will complain that some functions are defined but not used. Signed-off-by: Lu Baolu Tested-by: Daniel Drake Reviewed-by: Jon Derrick Reviewed-by: Jerry Snitselaar Cc: Daniel Drake Cc: Derrick Jonathan Cc: Jerry Snitselaar Link: https://lore.kernel.org/r/20200506015947.28662-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 291 +------------------------------------------- 1 file changed, 1 insertion(+), 290 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 34e08fa2ce3a..16ba7add0f72 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -355,11 +355,6 @@ static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); static void dmar_remove_one_dev_info(struct device *dev); static void __dmar_remove_one_dev_info(struct device_domain_info *info); -static void domain_context_clear(struct intel_iommu *iommu, - struct device *dev); -static int domain_detach_iommu(struct dmar_domain *domain, - struct intel_iommu *iommu); -static bool device_is_rmrr_locked(struct device *dev); static int intel_iommu_attach_device(struct iommu_domain *domain, struct device *dev); static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, @@ -1930,65 +1925,6 @@ static inline int guestwidth_to_adjustwidth(int gaw) return agaw; } -static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, - int guest_width) -{ - int adjust_width, agaw; - unsigned long sagaw; - int ret; - - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - - if (!intel_iommu_strict) { - ret = init_iova_flush_queue(&domain->iovad, - iommu_flush_iova, iova_entry_free); - if (ret) - pr_info("iova flush queue initialization failed\n"); - } - - domain_reserve_special_ranges(domain); - - /* calculate AGAW */ - if (guest_width > cap_mgaw(iommu->cap)) - guest_width = cap_mgaw(iommu->cap); - domain->gaw = guest_width; - adjust_width = guestwidth_to_adjustwidth(guest_width); - agaw = width_to_agaw(adjust_width); - sagaw = cap_sagaw(iommu->cap); - if (!test_bit(agaw, &sagaw)) { - /* hardware doesn't support it, choose a bigger one */ - pr_debug("Hardware doesn't support agaw %d\n", agaw); - agaw = find_next_bit(&sagaw, 5, agaw); - if (agaw >= 5) - return -ENODEV; - } - domain->agaw = agaw; - - if (ecap_coherent(iommu->ecap)) - domain->iommu_coherency = 1; - else - domain->iommu_coherency = 0; - - if (ecap_sc_support(iommu->ecap)) - domain->iommu_snooping = 1; - else - domain->iommu_snooping = 0; - - if (intel_iommu_superpage) - domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); - else - domain->iommu_superpage = 0; - - domain->nid = iommu->node; - - /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); - if (!domain->pgd) - return -ENOMEM; - __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); - return 0; -} - static void domain_exit(struct dmar_domain *domain) { @@ -2704,94 +2640,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, return domain; } -static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) -{ - *(u16 *)opaque = alias; - return 0; -} - -static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) -{ - struct device_domain_info *info; - struct dmar_domain *domain = NULL; - struct intel_iommu *iommu; - u16 dma_alias; - unsigned long flags; - u8 bus, devfn; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return NULL; - - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); - - spin_lock_irqsave(&device_domain_lock, flags); - info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus), - PCI_BUS_NUM(dma_alias), - dma_alias & 0xff); - if (info) { - iommu = info->iommu; - domain = info->domain; - } - spin_unlock_irqrestore(&device_domain_lock, flags); - - /* DMA alias already has a domain, use it */ - if (info) - goto out; - } - - /* Allocate and initialize new domain for the device */ - domain = alloc_domain(0); - if (!domain) - return NULL; - if (domain_init(domain, iommu, gaw)) { - domain_exit(domain); - return NULL; - } - -out: - return domain; -} - -static struct dmar_domain *set_domain_for_dev(struct device *dev, - struct dmar_domain *domain) -{ - struct intel_iommu *iommu; - struct dmar_domain *tmp; - u16 req_id, dma_alias; - u8 bus, devfn; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return NULL; - - req_id = ((u16)bus << 8) | devfn; - - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); - - /* register PCI DMA alias device */ - if (req_id != dma_alias) { - tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias), - dma_alias & 0xff, NULL, domain); - - if (!tmp || tmp != domain) - return tmp; - } - } - - tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain); - if (!tmp || tmp != domain) - return tmp; - - return domain; -} - static int iommu_domain_identity_map(struct dmar_domain *domain, unsigned long long start, unsigned long long end) @@ -2817,45 +2665,6 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, DMA_PTE_READ|DMA_PTE_WRITE); } -static int domain_prepare_identity_map(struct device *dev, - struct dmar_domain *domain, - unsigned long long start, - unsigned long long end) -{ - /* For _hardware_ passthrough, don't bother. But for software - passthrough, we do it anyway -- it may indicate a memory - range which is reserved in E820, so which didn't get set - up to start with in si_domain */ - if (domain == si_domain && hw_pass_through) { - dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n", - start, end); - return 0; - } - - dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end); - - if (end < start) { - WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" - "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - dmi_get_system_info(DMI_BIOS_VENDOR), - dmi_get_system_info(DMI_BIOS_VERSION), - dmi_get_system_info(DMI_PRODUCT_VERSION)); - return -EIO; - } - - if (end >> agaw_to_width(domain->agaw)) { - WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" - "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - agaw_to_width(domain->agaw), - dmi_get_system_info(DMI_BIOS_VENDOR), - dmi_get_system_info(DMI_BIOS_VERSION), - dmi_get_system_info(DMI_PRODUCT_VERSION)); - return -EIO; - } - - return iommu_domain_identity_map(domain, start, end); -} - static int md_domain_init(struct dmar_domain *domain, int guest_width); static int __init si_domain_init(int hw) @@ -3531,98 +3340,16 @@ static unsigned long intel_alloc_iova(struct device *dev, return iova_pfn; } -static struct dmar_domain *get_private_domain_for_dev(struct device *dev) -{ - struct dmar_domain *domain, *tmp; - struct dmar_rmrr_unit *rmrr; - struct device *i_dev; - int i, ret; - - /* Device shouldn't be attached by any domains. */ - domain = find_domain(dev); - if (domain) - return NULL; - - domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - if (!domain) - goto out; - - /* We have a new domain - setup possible RMRRs for the device */ - rcu_read_lock(); - for_each_rmrr_units(rmrr) { - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, i_dev) { - if (i_dev != dev) - continue; - - ret = domain_prepare_identity_map(dev, domain, - rmrr->base_address, - rmrr->end_address); - if (ret) - dev_err(dev, "Mapping reserved region failed\n"); - } - } - rcu_read_unlock(); - - tmp = set_domain_for_dev(dev, domain); - if (!tmp || domain != tmp) { - domain_exit(domain); - domain = tmp; - } - -out: - if (!domain) - dev_err(dev, "Allocating domain failed\n"); - else - domain->domain.type = IOMMU_DOMAIN_DMA; - - return domain; -} - /* Check if the dev needs to go through non-identity map and unmap process.*/ static bool iommu_need_mapping(struct device *dev) { - int ret; - if (iommu_dummy(dev)) return false; if (unlikely(attach_deferred(dev))) do_deferred_attach(dev); - ret = identity_mapping(dev); - if (ret) { - u64 dma_mask = *dev->dma_mask; - - if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask) - dma_mask = dev->coherent_dma_mask; - - if (dma_mask >= dma_direct_get_required_mask(dev)) - return false; - - /* - * 32 bit DMA is removed from si_domain and fall back to - * non-identity mapping. - */ - dmar_remove_one_dev_info(dev); - ret = iommu_request_dma_domain_for_dev(dev); - if (ret) { - struct iommu_domain *domain; - struct dmar_domain *dmar_domain; - - domain = iommu_get_domain_for_dev(dev); - if (domain) { - dmar_domain = to_dmar_domain(domain); - dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - } - dmar_remove_one_dev_info(dev); - get_private_domain_for_dev(dev); - } - - dev_info(dev, "32bit DMA uses non-identity mapping\n"); - } - - return true; + return !identity_mapping(dev); } static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, @@ -5186,16 +4913,6 @@ int __init intel_iommu_init(void) } up_write(&dmar_global_lock); -#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB) - /* - * If the system has no untrusted device or the user has decided - * to disable the bounce page mechanisms, we don't need swiotlb. - * Mark this and the pre-allocated bounce pages will be released - * later. - */ - if (!has_untrusted_dev() || intel_no_bounce) - swiotlb = 0; -#endif dma_ops = &intel_dma_ops; init_iommu_pm_ops(); @@ -5296,12 +5013,6 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) domain_detach_iommu(domain, iommu); spin_unlock_irqrestore(&iommu->lock, flags); - /* free the private domain */ - if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN && - !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && - list_empty(&domain->devices)) - domain_exit(info->domain); - free_devinfo_mem(info); } -- cgit v1.2.3 From 14b3526d5909f01e1d1baa05f50952788bb7418e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 6 May 2020 09:59:46 +0800 Subject: iommu/vt-d: Allow PCI sub-hierarchy to use DMA domain Before commit fa954e6831789 ("iommu/vt-d: Delegate the dma domain to upper layer"), Intel IOMMU started off with all devices in the identity domain, and took them out later if it found they couldn't access all of memory. This required devices behind a PCI bridge to use a DMA domain at the beginning because all PCI devices behind the bridge use the same source-id in their transactions and the domain couldn't be changed at run-time. Intel IOMMU driver is now aligned with the default domain framework, there's no need to keep this requirement anymore. Signed-off-by: Lu Baolu Tested-by: Daniel Drake Reviewed-by: Jon Derrick Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20200506015947.28662-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 16ba7add0f72..af309e8fa6f5 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2857,31 +2857,6 @@ static int device_def_domain_type(struct device *dev) if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) return IOMMU_DOMAIN_IDENTITY; - - /* - * We want to start off with all devices in the 1:1 domain, and - * take them out later if we find they can't access all of memory. - * - * However, we can't do this for PCI devices behind bridges, - * because all PCI devices behind the same bridge will end up - * with the same source-id on their transactions. - * - * Practically speaking, we can't change things around for these - * devices at run-time, because we can't be sure there'll be no - * DMA transactions in flight for any of their siblings. - * - * So PCI devices (unless they're on the root bus) as well as - * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of - * the 1:1 domain, just in _case_ one of their siblings turns out - * not to be able to map all of memory. - */ - if (!pci_is_pcie(pdev)) { - if (!pci_is_root_bus(pdev->bus)) - return IOMMU_DOMAIN_DMA; - if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) - return IOMMU_DOMAIN_DMA; - } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) - return IOMMU_DOMAIN_DMA; } return 0; -- cgit v1.2.3 From 6fc7020cf298aaec343df423746b44d99c6efaa5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 6 May 2020 09:59:47 +0800 Subject: iommu/vt-d: Apply per-device dma_ops Current Intel IOMMU driver sets the system level dma_ops. This causes each dma API to go through the IOMMU driver even the devices are using identity mapped domains. This sets per-device dma_ops only if a device is using a DMA domain. Otherwise, use the default system level dma_ops for direct dma. Signed-off-by: Lu Baolu Tested-by: Daniel Drake Reviewed-by: Jon Derrick Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20200506015947.28662-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 82 ++++++++++++++------------------------------- 1 file changed, 26 insertions(+), 56 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index af309e8fa6f5..29d3940847d3 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2720,17 +2720,6 @@ static int __init si_domain_init(int hw) return 0; } -static int identity_mapping(struct device *dev) -{ - struct device_domain_info *info; - - info = dev->archdata.iommu; - if (info) - return (info->domain == si_domain); - - return 0; -} - static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) { struct dmar_domain *ndomain; @@ -3315,18 +3304,6 @@ static unsigned long intel_alloc_iova(struct device *dev, return iova_pfn; } -/* Check if the dev needs to go through non-identity map and unmap process.*/ -static bool iommu_need_mapping(struct device *dev) -{ - if (iommu_dummy(dev)) - return false; - - if (unlikely(attach_deferred(dev))) - do_deferred_attach(dev); - - return !identity_mapping(dev); -} - static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir, u64 dma_mask) { @@ -3340,6 +3317,9 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); + domain = find_domain(dev); if (!domain) return DMA_MAPPING_ERROR; @@ -3391,20 +3371,15 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - return __intel_map_single(dev, page_to_phys(page) + offset, - size, dir, *dev->dma_mask); - return dma_direct_map_page(dev, page, offset, size, dir, attrs); + return __intel_map_single(dev, page_to_phys(page) + offset, + size, dir, *dev->dma_mask); } static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - return __intel_map_single(dev, phys_addr, size, dir, - *dev->dma_mask); - return dma_direct_map_resource(dev, phys_addr, size, dir, attrs); + return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask); } static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) @@ -3455,17 +3430,13 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - intel_unmap(dev, dev_addr, size); - else - dma_direct_unmap_page(dev, dev_addr, size, dir, attrs); + intel_unmap(dev, dev_addr, size); } static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - intel_unmap(dev, dev_addr, size); + intel_unmap(dev, dev_addr, size); } static void *intel_alloc_coherent(struct device *dev, size_t size, @@ -3475,8 +3446,8 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, struct page *page = NULL; int order; - if (!iommu_need_mapping(dev)) - return dma_direct_alloc(dev, size, dma_handle, flags, attrs); + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); size = PAGE_ALIGN(size); order = get_order(size); @@ -3511,9 +3482,6 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, int order; struct page *page = virt_to_page(vaddr); - if (!iommu_need_mapping(dev)) - return dma_direct_free(dev, size, vaddr, dma_handle, attrs); - size = PAGE_ALIGN(size); order = get_order(size); @@ -3531,9 +3499,6 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, struct scatterlist *sg; int i; - if (!iommu_need_mapping(dev)) - return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs); - for_each_sg(sglist, sg, nelems, i) { nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg)); } @@ -3557,8 +3522,9 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); - if (!iommu_need_mapping(dev)) - return dma_direct_map_sg(dev, sglist, nelems, dir, attrs); + + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); domain = find_domain(dev); if (!domain) @@ -3605,8 +3571,6 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele static u64 intel_get_required_mask(struct device *dev) { - if (!iommu_need_mapping(dev)) - return dma_direct_get_required_mask(dev); return DMA_BIT_MASK(32); } @@ -4888,8 +4852,6 @@ int __init intel_iommu_init(void) } up_write(&dmar_global_lock); - dma_ops = &intel_dma_ops; - init_iommu_pm_ops(); down_read(&dmar_global_lock); @@ -5479,11 +5441,6 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) if (translation_pre_enabled(iommu)) dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO; - if (device_needs_bounce(dev)) { - dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n"); - set_dma_ops(dev, &bounce_dma_ops); - } - return &iommu->iommu; } @@ -5498,7 +5455,19 @@ static void intel_iommu_release_device(struct device *dev) dmar_remove_one_dev_info(dev); + set_dma_ops(dev, NULL); +} + +static void intel_iommu_probe_finalize(struct device *dev) +{ + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev(dev); if (device_needs_bounce(dev)) + set_dma_ops(dev, &bounce_dma_ops); + else if (domain && domain->type == IOMMU_DOMAIN_DMA) + set_dma_ops(dev, &intel_dma_ops); + else set_dma_ops(dev, NULL); } @@ -5830,6 +5799,7 @@ const struct iommu_ops intel_iommu_ops = { .unmap = intel_iommu_unmap, .iova_to_phys = intel_iommu_iova_to_phys, .probe_device = intel_iommu_probe_device, + .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, -- cgit v1.2.3 From 8627892af6cb80f80acab066e26851d7b5fb3ca6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 7 May 2020 19:18:02 +0300 Subject: iommu/vt-d: Unify format of the printed messages Unify format of the printed messages, i.e. replace printk(LEVEL ... ) with pr_level(...). Signed-off-by: Andy Shevchenko Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20200507161804.13275-1-andriy.shevchenko@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 29d3940847d3..2ff8d69ce4f8 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -475,8 +475,7 @@ static int __init intel_iommu_setup(char *str) pr_info("Intel-IOMMU: scalable mode supported\n"); intel_iommu_sm = 1; } else if (!strncmp(str, "tboot_noforce", 13)) { - printk(KERN_INFO - "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); + pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); intel_iommu_tboot_noforce = 1; } else if (!strncmp(str, "nobounce", 8)) { pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n"); -- cgit v1.2.3 From 3db9983e4327f773c490de2a8c66d6000561d88a Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Sat, 16 May 2020 14:20:44 +0800 Subject: iommu/vt-d: Move domain helper to header Move domain helper to header to be used by SVA code. Signed-off-by: Jacob Pan Signed-off-by: Lu Baolu Reviewed-by: Eric Auger Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20200516062101.29541-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 6 ------ include/linux/intel-iommu.h | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2ff8d69ce4f8..8027f21073eb 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -441,12 +441,6 @@ static void init_translation_status(struct intel_iommu *iommu) iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED; } -/* Convert generic 'struct iommu_domain to private struct dmar_domain */ -static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) -{ - return container_of(dom, struct dmar_domain, domain); -} - static int __init intel_iommu_setup(char *str) { if (!str) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 980234ae0312..ed7171d2ae1f 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -595,6 +595,12 @@ static inline void __iommu_flush_cache( clflush_cache_range(addr, size); } +/* Convert generic struct iommu_domain to private struct dmar_domain */ +static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct dmar_domain, domain); +} + /* * 0: readable * 1: writable -- cgit v1.2.3 From b0d1f8741b812352fe0e5f3b2381427085f23e19 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Sat, 16 May 2020 14:20:46 +0800 Subject: iommu/vt-d: Add nested translation helper function Nested translation mode is supported in VT-d 3.0 Spec.CH 3.8. With PASID granular translation type set to 0x11b, translation result from the first level(FL) also subject to a second level(SL) page table translation. This mode is used for SVA virtualization, where FL performs guest virtual to guest physical translation and SL performs guest physical to host physical translation. This patch adds a helper function for setting up nested translation where second level comes from a domain and first level comes from a guest PGD. Signed-off-by: Jacob Pan Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20200516062101.29541-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 25 ------- drivers/iommu/intel-pasid.c | 174 +++++++++++++++++++++++++++++++++++++++++++- drivers/iommu/intel-pasid.h | 10 +++ include/linux/intel-iommu.h | 20 +++++ include/uapi/linux/iommu.h | 5 ++ 5 files changed, 206 insertions(+), 28 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 8027f21073eb..7e85c09eec71 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -296,31 +296,6 @@ static inline void context_clear_entry(struct context_entry *context) static struct dmar_domain *si_domain; static int hw_pass_through = 1; -/* si_domain contains mulitple devices */ -#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0) - -/* - * This is a DMA domain allocated through the iommu domain allocation - * interface. But one or more devices belonging to this domain have - * been chosen to use a private domain. We should avoid to use the - * map/unmap/iova_to_phys APIs on it. - */ -#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1) - -/* - * When VT-d works in the scalable mode, it allows DMA translation to - * happen through either first level or second level page table. This - * bit marks that the DMA translation for the domain goes through the - * first level page table, otherwise, it goes through the second level. - */ -#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(2) - -/* - * Domain represents a virtual machine which demands iommu nested - * translation mode support. - */ -#define DOMAIN_FLAG_NESTING_MODE BIT(3) - #define for_each_domain_iommu(idx, domain) \ for (idx = 0; idx < g_num_of_iommus; idx++) \ if (domain->iommu_refcnt[idx]) diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index d9cea3011b58..c7fa1b79eaf7 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -359,6 +359,16 @@ pasid_set_flpm(struct pasid_entry *pe, u64 value) pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); } +/* + * Setup the Extended Access Flag Enable (EAFE) field (Bit 135) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_eafe(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7); +} + static void pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, int pasid) @@ -492,7 +502,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); /* Setup Present and PASID Granular Transfer Type: */ - pasid_set_translation_type(pte, 1); + pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); pasid_set_present(pte); pasid_flush_caches(iommu, pte, pasid, did); @@ -561,7 +571,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, pasid_set_domain_id(pte, did); pasid_set_slptr(pte, pgd_val); pasid_set_address_width(pte, agaw); - pasid_set_translation_type(pte, 2); + pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); @@ -595,7 +605,7 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, pasid_clear_entry(pte); pasid_set_domain_id(pte, did); pasid_set_address_width(pte, iommu->agaw); - pasid_set_translation_type(pte, 4); + pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); @@ -609,3 +619,161 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, return 0; } + +static int +intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte, + struct iommu_gpasid_bind_data_vtd *pasid_data) +{ + /* + * Not all guest PASID table entry fields are passed down during bind, + * here we only set up the ones that are dependent on guest settings. + * Execution related bits such as NXE, SMEP are not supported. + * Other fields, such as snoop related, are set based on host needs + * regardless of guest settings. + */ + if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_SRE) { + if (!ecap_srs(iommu->ecap)) { + pr_err_ratelimited("No supervisor request support on %s\n", + iommu->name); + return -EINVAL; + } + pasid_set_sre(pte); + } + + if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) { + if (!ecap_eafs(iommu->ecap)) { + pr_err_ratelimited("No extended access flag support on %s\n", + iommu->name); + return -EINVAL; + } + pasid_set_eafe(pte); + } + + /* + * Memory type is only applicable to devices inside processor coherent + * domain. Will add MTS support once coherent devices are available. + */ + if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_MTS_MASK) { + pr_warn_ratelimited("No memory type support %s\n", + iommu->name); + return -EINVAL; + } + + return 0; +} + +/** + * intel_pasid_setup_nested() - Set up PASID entry for nested translation. + * This could be used for guest shared virtual address. In this case, the + * first level page tables are used for GVA-GPA translation in the guest, + * second level page tables are used for GPA-HPA translation. + * + * @iommu: IOMMU which the device belong to + * @dev: Device to be set up for translation + * @gpgd: FLPTPTR: First Level Page translation pointer in GPA + * @pasid: PASID to be programmed in the device PASID table + * @pasid_data: Additional PASID info from the guest bind request + * @domain: Domain info for setting up second level page tables + * @addr_width: Address width of the first level (guest) + */ +int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, + pgd_t *gpgd, int pasid, + struct iommu_gpasid_bind_data_vtd *pasid_data, + struct dmar_domain *domain, int addr_width) +{ + struct pasid_entry *pte; + struct dma_pte *pgd; + int ret = 0; + u64 pgd_val; + int agaw; + u16 did; + + if (!ecap_nest(iommu->ecap)) { + pr_err_ratelimited("IOMMU: %s: No nested translation support\n", + iommu->name); + return -EINVAL; + } + + if (!(domain->flags & DOMAIN_FLAG_NESTING_MODE)) { + pr_err_ratelimited("Domain is not in nesting mode, %x\n", + domain->flags); + return -EINVAL; + } + + pte = intel_pasid_get_entry(dev, pasid); + if (WARN_ON(!pte)) + return -EINVAL; + + /* + * Caller must ensure PASID entry is not in use, i.e. not bind the + * same PASID to the same device twice. + */ + if (pasid_pte_is_present(pte)) + return -EBUSY; + + pasid_clear_entry(pte); + + /* Sanity checking performed by caller to make sure address + * width matching in two dimensions: + * 1. CPU vs. IOMMU + * 2. Guest vs. Host. + */ + switch (addr_width) { +#ifdef CONFIG_X86 + case ADDR_WIDTH_5LEVEL: + if (!cpu_feature_enabled(X86_FEATURE_LA57) || + !cap_5lp_support(iommu->cap)) { + dev_err_ratelimited(dev, + "5-level paging not supported\n"); + return -EINVAL; + } + + pasid_set_flpm(pte, 1); + break; +#endif + case ADDR_WIDTH_4LEVEL: + pasid_set_flpm(pte, 0); + break; + default: + dev_err_ratelimited(dev, "Invalid guest address width %d\n", + addr_width); + return -EINVAL; + } + + /* First level PGD is in GPA, must be supported by the second level */ + if ((unsigned long long)gpgd > domain->max_addr) { + dev_err_ratelimited(dev, + "Guest PGD %llx not supported, max %llx\n", + (unsigned long long)gpgd, domain->max_addr); + return -EINVAL; + } + pasid_set_flptr(pte, (u64)gpgd); + + ret = intel_pasid_setup_bind_data(iommu, pte, pasid_data); + if (ret) + return ret; + + /* Setup the second level based on the given domain */ + pgd = domain->pgd; + + agaw = iommu_skip_agaw(domain, iommu, &pgd); + if (agaw < 0) { + dev_err_ratelimited(dev, "Invalid domain page table\n"); + return -EINVAL; + } + pgd_val = virt_to_phys(pgd); + pasid_set_slptr(pte, pgd_val); + pasid_set_fault_enable(pte); + + did = domain->iommu_did[iommu->seq_id]; + pasid_set_domain_id(pte, did); + + pasid_set_address_width(pte, agaw); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); + pasid_set_present(pte); + pasid_flush_caches(iommu, pte, pasid, did); + + return ret; +} diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 92de6df24ccb..ccd50c2ae75c 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -36,6 +36,7 @@ * to vmalloc or even module mappings. */ #define PASID_FLAG_SUPERVISOR_MODE BIT(0) +#define PASID_FLAG_NESTED BIT(1) /* * The PASID_FLAG_FL5LP flag Indicates using 5-level paging for first- @@ -51,6 +52,11 @@ struct pasid_entry { u64 val[8]; }; +#define PASID_ENTRY_PGTT_FL_ONLY (1) +#define PASID_ENTRY_PGTT_SL_ONLY (2) +#define PASID_ENTRY_PGTT_NESTED (3) +#define PASID_ENTRY_PGTT_PT (4) + /* The representative of a PASID table */ struct pasid_table { void *table; /* pasid table pointer */ @@ -99,6 +105,10 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, int intel_pasid_setup_pass_through(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, int pasid); +int intel_pasid_setup_nested(struct intel_iommu *iommu, + struct device *dev, pgd_t *pgd, int pasid, + struct iommu_gpasid_bind_data_vtd *pasid_data, + struct dmar_domain *domain, int addr_width); void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, int pasid); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index ed7171d2ae1f..e0d1fed7cbe4 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -42,6 +42,9 @@ #define DMA_FL_PTE_PRESENT BIT_ULL(0) #define DMA_FL_PTE_XD BIT_ULL(63) +#define ADDR_WIDTH_5LEVEL (57) +#define ADDR_WIDTH_4LEVEL (48) + #define CONTEXT_TT_MULTI_LEVEL 0 #define CONTEXT_TT_DEV_IOTLB 1 #define CONTEXT_TT_PASS_THROUGH 2 @@ -480,6 +483,23 @@ struct context_entry { u64 hi; }; +/* si_domain contains mulitple devices */ +#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0) + +/* + * When VT-d works in the scalable mode, it allows DMA translation to + * happen through either first level or second level page table. This + * bit marks that the DMA translation for the domain goes through the + * first level page table, otherwise, it goes through the second level. + */ +#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1) + +/* + * Domain represents a virtual machine which demands iommu nested + * translation mode support. + */ +#define DOMAIN_FLAG_NESTING_MODE BIT(2) + struct dmar_domain { int nid; /* node id */ diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index 4ad3496e5c43..e907b7091a46 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -285,6 +285,11 @@ struct iommu_gpasid_bind_data_vtd { __u32 emt; }; +#define IOMMU_SVA_VTD_GPASID_MTS_MASK (IOMMU_SVA_VTD_GPASID_CD | \ + IOMMU_SVA_VTD_GPASID_EMTE | \ + IOMMU_SVA_VTD_GPASID_PCD | \ + IOMMU_SVA_VTD_GPASID_PWT) + /** * struct iommu_gpasid_bind_data - Information about device and guest PASID binding * @version: Version of this data structure -- cgit v1.2.3 From 56722a4398a306585ca3ed39ff54fc907af98618 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Sat, 16 May 2020 14:20:47 +0800 Subject: iommu/vt-d: Add bind guest PASID support When supporting guest SVA with emulated IOMMU, the guest PASID table is shadowed in VMM. Updates to guest vIOMMU PASID table will result in PASID cache flush which will be passed down to the host as bind guest PASID calls. For the SL page tables, it will be harvested from device's default domain (request w/o PASID), or aux domain in case of mediated device. .-------------. .---------------------------. | vIOMMU | | Guest process CR3, FL only| | | '---------------------------' .----------------/ | PASID Entry |--- PASID cache flush - '-------------' | | | V | | CR3 in GPA '-------------' Guest ------| Shadow |--------------------------|-------- v v v Host .-------------. .----------------------. | pIOMMU | | Bind FL for GVA-GPA | | | '----------------------' .----------------/ | | PASID Entry | V (Nested xlate) '----------------\.------------------------------. | | |SL for GPA-HPA, default domain| | | '------------------------------' '-------------' Where: - FL = First level/stage one page tables - SL = Second level/stage two page tables Signed-off-by: Jacob Pan Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20200516062101.29541-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 4 + drivers/iommu/intel-svm.c | 200 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/intel-iommu.h | 6 +- include/linux/intel-svm.h | 12 +++ 4 files changed, 221 insertions(+), 1 deletion(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7e85c09eec71..f42c548f8421 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5780,6 +5780,10 @@ const struct iommu_ops intel_iommu_ops = { .is_attach_deferred = intel_iommu_is_attach_deferred, .def_domain_type = device_def_domain_type, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, +#ifdef CONFIG_INTEL_IOMMU_SVM + .sva_bind_gpasid = intel_svm_bind_gpasid, + .sva_unbind_gpasid = intel_svm_unbind_gpasid, +#endif }; static void quirk_iommu_igfx(struct pci_dev *dev) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 2998418f0a38..7d3405c5a198 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -226,6 +226,206 @@ static LIST_HEAD(global_svm_list); list_for_each_entry((sdev), &(svm)->devs, list) \ if ((d) != (sdev)->dev) {} else +int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, + struct iommu_gpasid_bind_data *data) +{ + struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct dmar_domain *dmar_domain; + struct intel_svm_dev *sdev; + struct intel_svm *svm; + int ret = 0; + + if (WARN_ON(!iommu) || !data) + return -EINVAL; + + if (data->version != IOMMU_GPASID_BIND_VERSION_1 || + data->format != IOMMU_PASID_FORMAT_INTEL_VTD) + return -EINVAL; + + if (!dev_is_pci(dev)) + return -ENOTSUPP; + + /* VT-d supports devices with full 20 bit PASIDs only */ + if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX) + return -EINVAL; + + /* + * We only check host PASID range, we have no knowledge to check + * guest PASID range. + */ + if (data->hpasid <= 0 || data->hpasid >= PASID_MAX) + return -EINVAL; + + dmar_domain = to_dmar_domain(domain); + + mutex_lock(&pasid_mutex); + svm = ioasid_find(NULL, data->hpasid, NULL); + if (IS_ERR(svm)) { + ret = PTR_ERR(svm); + goto out; + } + + if (svm) { + /* + * If we found svm for the PASID, there must be at + * least one device bond, otherwise svm should be freed. + */ + if (WARN_ON(list_empty(&svm->devs))) { + ret = -EINVAL; + goto out; + } + + for_each_svm_dev(sdev, svm, dev) { + /* + * For devices with aux domains, we should allow + * multiple bind calls with the same PASID and pdev. + */ + if (iommu_dev_feature_enabled(dev, + IOMMU_DEV_FEAT_AUX)) { + sdev->users++; + } else { + dev_warn_ratelimited(dev, + "Already bound with PASID %u\n", + svm->pasid); + ret = -EBUSY; + } + goto out; + } + } else { + /* We come here when PASID has never been bond to a device. */ + svm = kzalloc(sizeof(*svm), GFP_KERNEL); + if (!svm) { + ret = -ENOMEM; + goto out; + } + /* REVISIT: upper layer/VFIO can track host process that bind + * the PASID. ioasid_set = mm might be sufficient for vfio to + * check pasid VMM ownership. We can drop the following line + * once VFIO and IOASID set check is in place. + */ + svm->mm = get_task_mm(current); + svm->pasid = data->hpasid; + if (data->flags & IOMMU_SVA_GPASID_VAL) { + svm->gpasid = data->gpasid; + svm->flags |= SVM_FLAG_GUEST_PASID; + } + ioasid_set_data(data->hpasid, svm); + INIT_LIST_HEAD_RCU(&svm->devs); + mmput(svm->mm); + } + sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); + if (!sdev) { + ret = -ENOMEM; + goto out; + } + sdev->dev = dev; + + /* Only count users if device has aux domains */ + if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) + sdev->users = 1; + + /* Set up device context entry for PASID if not enabled already */ + ret = intel_iommu_enable_pasid(iommu, sdev->dev); + if (ret) { + dev_err_ratelimited(dev, "Failed to enable PASID capability\n"); + kfree(sdev); + goto out; + } + + /* + * PASID table is per device for better security. Therefore, for + * each bind of a new device even with an existing PASID, we need to + * call the nested mode setup function here. + */ + spin_lock(&iommu->lock); + ret = intel_pasid_setup_nested(iommu, dev, (pgd_t *)data->gpgd, + data->hpasid, &data->vtd, dmar_domain, + data->addr_width); + spin_unlock(&iommu->lock); + if (ret) { + dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n", + data->hpasid, ret); + /* + * PASID entry should be in cleared state if nested mode + * set up failed. So we only need to clear IOASID tracking + * data such that free call will succeed. + */ + kfree(sdev); + goto out; + } + + svm->flags |= SVM_FLAG_GUEST_MODE; + + init_rcu_head(&sdev->rcu); + list_add_rcu(&sdev->list, &svm->devs); + out: + if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) { + ioasid_set_data(data->hpasid, NULL); + kfree(svm); + } + + mutex_unlock(&pasid_mutex); + return ret; +} + +int intel_svm_unbind_gpasid(struct device *dev, int pasid) +{ + struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct intel_svm_dev *sdev; + struct intel_svm *svm; + int ret = -EINVAL; + + if (WARN_ON(!iommu)) + return -EINVAL; + + mutex_lock(&pasid_mutex); + svm = ioasid_find(NULL, pasid, NULL); + if (!svm) { + ret = -EINVAL; + goto out; + } + + if (IS_ERR(svm)) { + ret = PTR_ERR(svm); + goto out; + } + + for_each_svm_dev(sdev, svm, dev) { + ret = 0; + if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) + sdev->users--; + if (!sdev->users) { + list_del_rcu(&sdev->list); + intel_pasid_tear_down_entry(iommu, dev, svm->pasid); + intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); + /* TODO: Drain in flight PRQ for the PASID since it + * may get reused soon, we don't want to + * confuse with its previous life. + * intel_svm_drain_prq(dev, pasid); + */ + kfree_rcu(sdev, rcu); + + if (list_empty(&svm->devs)) { + /* + * We do not free the IOASID here in that + * IOMMU driver did not allocate it. + * Unlike native SVM, IOASID for guest use was + * allocated prior to the bind call. + * In any case, if the free call comes before + * the unbind, IOMMU driver will get notified + * and perform cleanup. + */ + ioasid_set_data(pasid, NULL); + kfree(svm); + } + } + break; + } +out: + mutex_unlock(&pasid_mutex); + return ret; +} + int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index e0d1fed7cbe4..3dfd426dfb03 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -698,7 +698,9 @@ struct dmar_domain *find_domain(struct device *dev); extern void intel_svm_check(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); - +int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, + struct iommu_gpasid_bind_data *data); +int intel_svm_unbind_gpasid(struct device *dev, int pasid); struct svm_dev_ops; struct intel_svm_dev { @@ -715,9 +717,11 @@ struct intel_svm_dev { struct intel_svm { struct mmu_notifier notifier; struct mm_struct *mm; + struct intel_iommu *iommu; int flags; int pasid; + int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; struct list_head list; }; diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index d7c403d0dd27..1b47ca46373e 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -44,6 +44,18 @@ struct svm_dev_ops { * do such IOTLB flushes automatically. */ #define SVM_FLAG_SUPERVISOR_MODE (1<<1) +/* + * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest + * processes. Compared to the host bind, the primary differences are: + * 1. mm life cycle management + * 2. fault reporting + */ +#define SVM_FLAG_GUEST_MODE (1<<2) +/* + * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space, + * which requires guest and host PASID translation at both directions. + */ +#define SVM_FLAG_GUEST_PASID (1<<3) #ifdef CONFIG_INTEL_IOMMU_SVM -- cgit v1.2.3 From 6ee1b77ba3ac0a79fc6f3273f3b27b13240a355e Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Sat, 16 May 2020 14:20:49 +0800 Subject: iommu/vt-d: Add svm/sva invalidate function When Shared Virtual Address (SVA) is enabled for a guest OS via vIOMMU, we need to provide invalidation support at IOMMU API and driver level. This patch adds Intel VT-d specific function to implement iommu passdown invalidate API for shared virtual address. The use case is for supporting caching structure invalidation of assigned SVM capable devices. Emulated IOMMU exposes queue invalidation capability and passes down all descriptors from the guest to the physical IOMMU. The assumption is that guest to host device ID mapping should be resolved prior to calling IOMMU driver. Based on the device handle, host IOMMU driver can replace certain fields before submit to the invalidation queue. Signed-off-by: Liu Yi L Signed-off-by: Jacob Pan Signed-off-by: Lu Baolu Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20200516062101.29541-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 171 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f42c548f8421..627bb5093317 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5242,6 +5242,176 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain, aux_domain_remove_dev(to_dmar_domain(domain), dev); } +/* + * 2D array for converting and sanitizing IOMMU generic TLB granularity to + * VT-d granularity. Invalidation is typically included in the unmap operation + * as a result of DMA or VFIO unmap. However, for assigned devices guest + * owns the first level page tables. Invalidations of translation caches in the + * guest are trapped and passed down to the host. + * + * vIOMMU in the guest will only expose first level page tables, therefore + * we do not support IOTLB granularity for request without PASID (second level). + * + * For example, to find the VT-d granularity encoding for IOTLB + * type and page selective granularity within PASID: + * X: indexed by iommu cache type + * Y: indexed by enum iommu_inv_granularity + * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR] + */ + +const static int +inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = { + /* + * PASID based IOTLB invalidation: PASID selective (per PASID), + * page selective (address granularity) + */ + {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID}, + /* PASID based dev TLBs */ + {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL}, + /* PASID cache */ + {-EINVAL, -EINVAL, -EINVAL} +}; + +static inline int to_vtd_granularity(int type, int granu) +{ + return inv_type_granu_table[type][granu]; +} + +static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules) +{ + u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT; + + /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc. + * IOMMU cache invalidate API passes granu_size in bytes, and number of + * granu size in contiguous memory. + */ + return order_base_2(nr_pages); +} + +#ifdef CONFIG_INTEL_IOMMU_SVM +static int +intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, + struct iommu_cache_invalidate_info *inv_info) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct device_domain_info *info; + struct intel_iommu *iommu; + unsigned long flags; + int cache_type; + u8 bus, devfn; + u16 did, sid; + int ret = 0; + u64 size = 0; + + if (!inv_info || !dmar_domain || + inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1) + return -EINVAL; + + if (!dev || !dev_is_pci(dev)) + return -ENODEV; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE)) + return -EINVAL; + + spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&iommu->lock); + info = dev->archdata.iommu; + if (!info) { + ret = -EINVAL; + goto out_unlock; + } + did = dmar_domain->iommu_did[iommu->seq_id]; + sid = PCI_DEVID(bus, devfn); + + /* Size is only valid in address selective invalidation */ + if (inv_info->granularity != IOMMU_INV_GRANU_PASID) + size = to_vtd_size(inv_info->addr_info.granule_size, + inv_info->addr_info.nb_granules); + + for_each_set_bit(cache_type, + (unsigned long *)&inv_info->cache, + IOMMU_CACHE_INV_TYPE_NR) { + int granu = 0; + u64 pasid = 0; + + granu = to_vtd_granularity(cache_type, inv_info->granularity); + if (granu == -EINVAL) { + pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n", + cache_type, inv_info->granularity); + break; + } + + /* + * PASID is stored in different locations based on the + * granularity. + */ + if (inv_info->granularity == IOMMU_INV_GRANU_PASID && + (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID)) + pasid = inv_info->pasid_info.pasid; + else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && + (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID)) + pasid = inv_info->addr_info.pasid; + + switch (BIT(cache_type)) { + case IOMMU_CACHE_INV_TYPE_IOTLB: + if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && + size && + (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) { + pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n", + inv_info->addr_info.addr, size); + ret = -ERANGE; + goto out_unlock; + } + + /* + * If granu is PASID-selective, address is ignored. + * We use npages = -1 to indicate that. + */ + qi_flush_piotlb(iommu, did, pasid, + mm_to_dma_pfn(inv_info->addr_info.addr), + (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size, + inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF); + + /* + * Always flush device IOTLB if ATS is enabled. vIOMMU + * in the guest may assume IOTLB flush is inclusive, + * which is more efficient. + */ + if (info->ats_enabled) + qi_flush_dev_iotlb_pasid(iommu, sid, + info->pfsid, pasid, + info->ats_qdep, + inv_info->addr_info.addr, + size, granu); + break; + case IOMMU_CACHE_INV_TYPE_DEV_IOTLB: + if (info->ats_enabled) + qi_flush_dev_iotlb_pasid(iommu, sid, + info->pfsid, pasid, + info->ats_qdep, + inv_info->addr_info.addr, + size, granu); + else + pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n"); + break; + default: + dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n", + cache_type); + ret = -EINVAL; + } + } +out_unlock: + spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&device_domain_lock, flags); + + return ret; +} +#endif + static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, size_t size, int iommu_prot, gfp_t gfp) @@ -5781,6 +5951,7 @@ const struct iommu_ops intel_iommu_ops = { .def_domain_type = device_def_domain_type, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, #ifdef CONFIG_INTEL_IOMMU_SVM + .cache_invalidate = intel_iommu_sva_invalidate, .sva_bind_gpasid = intel_svm_bind_gpasid, .sva_unbind_gpasid = intel_svm_unbind_gpasid, #endif -- cgit v1.2.3 From 3375303e82877552f3b2b42309e8233fe715fd9f Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Sat, 16 May 2020 14:20:51 +0800 Subject: iommu/vt-d: Add custom allocator for IOASID When VT-d driver runs in the guest, PASID allocation must be performed via virtual command interface. This patch registers a custom IOASID allocator which takes precedence over the default XArray based allocator. The resulting IOASID allocation will always come from the host. This ensures that PASID namespace is system- wide. Virtual command registers are used in the guest only, to prevent vmexit cost, we cache the capability and store it during initialization. Signed-off-by: Liu, Yi L Signed-off-by: Jacob Pan Signed-off-by: Lu Baolu Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20200516062101.29541-9-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 1 + drivers/iommu/intel-iommu.c | 85 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/intel-iommu.h | 7 ++++ 3 files changed, 93 insertions(+) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 34ee8f28555f..66af08ad10fb 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -963,6 +963,7 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) warn_invalid_dmar(phys_addr, " returns all ones"); goto unmap; } + iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG); /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 627bb5093317..80d0bd561bdd 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1726,6 +1726,9 @@ static void free_dmar_iommu(struct intel_iommu *iommu) if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); } + if (ecap_vcs(iommu->ecap) && vccap_pasid(iommu->vccap)) + ioasid_unregister_allocator(&iommu->pasid_allocator); + #endif } @@ -3038,6 +3041,85 @@ out_unmap: return ret; } +#ifdef CONFIG_INTEL_IOMMU_SVM +static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data) +{ + struct intel_iommu *iommu = data; + ioasid_t ioasid; + + if (!iommu) + return INVALID_IOASID; + /* + * VT-d virtual command interface always uses the full 20 bit + * PASID range. Host can partition guest PASID range based on + * policies but it is out of guest's control. + */ + if (min < PASID_MIN || max > intel_pasid_max_id) + return INVALID_IOASID; + + if (vcmd_alloc_pasid(iommu, &ioasid)) + return INVALID_IOASID; + + return ioasid; +} + +static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data) +{ + struct intel_iommu *iommu = data; + + if (!iommu) + return; + /* + * Sanity check the ioasid owner is done at upper layer, e.g. VFIO + * We can only free the PASID when all the devices are unbound. + */ + if (ioasid_find(NULL, ioasid, NULL)) { + pr_alert("Cannot free active IOASID %d\n", ioasid); + return; + } + vcmd_free_pasid(iommu, ioasid); +} + +static void register_pasid_allocator(struct intel_iommu *iommu) +{ + /* + * If we are running in the host, no need for custom allocator + * in that PASIDs are allocated from the host system-wide. + */ + if (!cap_caching_mode(iommu->cap)) + return; + + if (!sm_supported(iommu)) { + pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n"); + return; + } + + /* + * Register a custom PASID allocator if we are running in a guest, + * guest PASID must be obtained via virtual command interface. + * There can be multiple vIOMMUs in each guest but only one allocator + * is active. All vIOMMU allocators will eventually be calling the same + * host allocator. + */ + if (!ecap_vcs(iommu->ecap) || !vccap_pasid(iommu->vccap)) + return; + + pr_info("Register custom PASID allocator\n"); + iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc; + iommu->pasid_allocator.free = intel_vcmd_ioasid_free; + iommu->pasid_allocator.pdata = (void *)iommu; + if (ioasid_register_allocator(&iommu->pasid_allocator)) { + pr_warn("Custom PASID allocator failed, scalable mode disabled\n"); + /* + * Disable scalable mode on this IOMMU if there + * is no custom allocator. Mixing SM capable vIOMMU + * and non-SM vIOMMU are not supported. + */ + intel_iommu_sm = 0; + } +} +#endif + static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; @@ -3155,6 +3237,9 @@ static int __init init_dmars(void) */ for_each_active_iommu(iommu, drhd) { iommu_flush_write_buffer(iommu); +#ifdef CONFIG_INTEL_IOMMU_SVM + register_pasid_allocator(iommu); +#endif iommu_set_root_entry(iommu); iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index addb310b4ded..e14124f74b3a 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -195,6 +196,9 @@ #define ecap_max_handle_mask(e) ((e >> 20) & 0xf) #define ecap_sc_support(e) ((e >> 7) & 0x1) /* Snooping Control */ +/* Virtual command interface capability */ +#define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */ + /* IOTLB_REG */ #define DMA_TLB_FLUSH_GRANU_OFFSET 60 #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) @@ -288,6 +292,7 @@ /* PRS_REG */ #define DMA_PRS_PPR ((u32)1) +#define DMA_VCS_PAS ((u64)1) #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ do { \ @@ -555,6 +560,7 @@ struct intel_iommu { u64 reg_size; /* size of hw register set */ u64 cap; u64 ecap; + u64 vccap; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ raw_spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ @@ -575,6 +581,7 @@ struct intel_iommu { #ifdef CONFIG_INTEL_IOMMU_SVM struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ + struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */ #endif struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ -- cgit v1.2.3 From e85bb99b79ca5ad2681612a7bb22f94cc2c71866 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 16 May 2020 14:20:52 +0800 Subject: iommu/vt-d: Add get_domain_info() helper Add a get_domain_info() helper to retrieve the valid per-device iommu private data. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20200516062101.29541-10-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 40 +++++++++++++++++++++++++++------------- drivers/iommu/intel-pasid.c | 12 ++++++------ drivers/iommu/intel-svm.c | 2 +- include/linux/intel-iommu.h | 1 + 4 files changed, 35 insertions(+), 20 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 80d0bd561bdd..a13b723ca38d 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -365,6 +365,21 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2)) +struct device_domain_info *get_domain_info(struct device *dev) +{ + struct device_domain_info *info; + + if (!dev) + return NULL; + + info = dev->archdata.iommu; + if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO || + info == DEFER_DEVICE_DOMAIN_INFO)) + return NULL; + + return info; +} + DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); @@ -2429,7 +2444,7 @@ struct dmar_domain *find_domain(struct device *dev) dev = &pci_real_dma_dev(to_pci_dev(dev))->dev; /* No lock here, assumes no domain exit in normal case */ - info = dev->archdata.iommu; + info = get_domain_info(dev); if (likely(info)) return info->domain; @@ -5012,9 +5027,8 @@ static void dmar_remove_one_dev_info(struct device *dev) unsigned long flags; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; - if (info && info != DEFER_DEVICE_DOMAIN_INFO - && info != DUMMY_DEVICE_DOMAIN_INFO) + info = get_domain_info(dev); + if (info) __dmar_remove_one_dev_info(info); spin_unlock_irqrestore(&device_domain_lock, flags); } @@ -5104,7 +5118,7 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) static inline bool is_aux_domain(struct device *dev, struct iommu_domain *domain) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); return info && info->auxd_enabled && domain->type == IOMMU_DOMAIN_UNMANAGED; @@ -5113,7 +5127,7 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain) static void auxiliary_link_device(struct dmar_domain *domain, struct device *dev) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); assert_spin_locked(&device_domain_lock); if (WARN_ON(!info)) @@ -5126,7 +5140,7 @@ static void auxiliary_link_device(struct dmar_domain *domain, static void auxiliary_unlink_device(struct dmar_domain *domain, struct device *dev) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); assert_spin_locked(&device_domain_lock); if (WARN_ON(!info)) @@ -5214,7 +5228,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, return; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; + info = get_domain_info(dev); iommu = info->iommu; auxiliary_unlink_device(domain, dev); @@ -5404,7 +5418,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, spin_lock_irqsave(&device_domain_lock, flags); spin_lock(&iommu->lock); - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info) { ret = -EINVAL; goto out_unlock; @@ -5768,7 +5782,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) spin_lock(&iommu->lock); ret = -EINVAL; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !info->pasid_supported) goto out; @@ -5864,7 +5878,7 @@ static int intel_iommu_enable_auxd(struct device *dev) return -ENODEV; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; + info = get_domain_info(dev); info->auxd_enabled = 1; spin_unlock_irqrestore(&device_domain_lock, flags); @@ -5877,7 +5891,7 @@ static int intel_iommu_disable_auxd(struct device *dev) unsigned long flags; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!WARN_ON(!info)) info->auxd_enabled = 0; spin_unlock_irqrestore(&device_domain_lock, flags); @@ -5954,7 +5968,7 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) static bool intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); if (feat == IOMMU_DEV_FEAT_AUX) return scalable_mode_support() && info && info->auxd_enabled; diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index ea8f4ef4e295..c46a068142b9 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -151,7 +151,7 @@ int intel_pasid_alloc_table(struct device *dev) int size; might_sleep(); - info = dev->archdata.iommu; + info = get_domain_info(dev); if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) return -EINVAL; @@ -198,7 +198,7 @@ void intel_pasid_free_table(struct device *dev) struct pasid_entry *table; int i, max_pde; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !dev_is_pci(dev) || !info->pasid_table) return; @@ -224,7 +224,7 @@ struct pasid_table *intel_pasid_get_table(struct device *dev) { struct device_domain_info *info; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info) return NULL; @@ -235,7 +235,7 @@ int intel_pasid_get_dev_max_id(struct device *dev) { struct device_domain_info *info; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !info->pasid_table) return 0; @@ -256,7 +256,7 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) return NULL; dir = pasid_table->table; - info = dev->archdata.iommu; + info = get_domain_info(dev); dir_index = pasid >> PASID_PDE_SHIFT; index = pasid & PASID_PTE_MASK; @@ -462,7 +462,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, struct device_domain_info *info; u16 sid, qdep, pfsid; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !info->ats_enabled) return; diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 7d3405c5a198..75a1ba4439f7 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -503,7 +503,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ goto out; } - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !info->pasid_supported) { kfree(sdev); goto out; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index e14124f74b3a..caa179e806fc 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -714,6 +714,7 @@ int for_each_device_domain(int (*fn)(struct device_domain_info *info, void iommu_flush_write_buffer(struct intel_iommu *iommu); int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); struct dmar_domain *find_domain(struct device *dev); +struct device_domain_info *get_domain_info(struct device *dev); #ifdef CONFIG_INTEL_IOMMU_SVM extern void intel_svm_check(struct intel_iommu *iommu); -- cgit v1.2.3 From 76fdd6c59532630559a2c63e8645a7033f9623c4 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Sat, 16 May 2020 14:20:53 +0800 Subject: iommu/vt-d: Report SVA feature with generic flag Query Shared Virtual Address/Memory capability is a generic feature. SVA feature check is the required first step before calling iommu_sva_bind_device(). VT-d checks SVA feature enabling at per IOMMU level during this step, SVA bind device will check and enable PCI ATS, PRS, and PASID capabilities at device level. This patch reports Intel SVM as SVA feature such that generic code (e.g. Uacce [1]) can use it. [1] https://lkml.org/lkml/2020/1/15/604 Signed-off-by: Jacob Pan Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20200516062101.29541-11-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a13b723ca38d..ed7de7420b3c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5944,6 +5944,14 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) return !!siov_find_pci_dvsec(to_pci_dev(dev)); } + if (feat == IOMMU_DEV_FEAT_SVA) { + struct device_domain_info *info = get_domain_info(dev); + + return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) && + info->pasid_supported && info->pri_supported && + info->ats_supported; + } + return false; } @@ -5953,6 +5961,16 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (feat == IOMMU_DEV_FEAT_AUX) return intel_iommu_enable_auxd(dev); + if (feat == IOMMU_DEV_FEAT_SVA) { + struct device_domain_info *info = get_domain_info(dev); + + if (!info) + return -EINVAL; + + if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) + return 0; + } + return -ENODEV; } -- cgit v1.2.3 From 064a57d7ddfc46ada02b477b91c478001b03bfa3 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Sat, 16 May 2020 14:20:54 +0800 Subject: iommu/vt-d: Replace intel SVM APIs with generic SVA APIs This patch is an initial step to replace Intel SVM code with the following IOMMU SVA ops: intel_svm_bind_mm() => iommu_sva_bind_device() intel_svm_unbind_mm() => iommu_sva_unbind_device() intel_svm_is_pasid_valid() => iommu_sva_get_pasid() The features below will continue to work but are not included in this patch in that they are handled mostly within the IOMMU subsystem. - IO page fault - mmu notifier Consolidation of the above will come after merging generic IOMMU sva code[1]. There should not be any changes needed for SVA users such as accelerator device drivers during this time. [1] http://jpbrucker.net/sva/ Signed-off-by: Jacob Pan Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20200516062101.29541-12-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 3 ++ drivers/iommu/intel-svm.c | 124 ++++++++++++++++++++++++-------------------- include/linux/intel-iommu.h | 6 +++ include/linux/intel-svm.h | 86 ------------------------------ 4 files changed, 78 insertions(+), 141 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ed7de7420b3c..7d28ef2e6fe2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -6071,6 +6071,9 @@ const struct iommu_ops intel_iommu_ops = { .cache_invalidate = intel_iommu_sva_invalidate, .sva_bind_gpasid = intel_svm_bind_gpasid, .sva_unbind_gpasid = intel_svm_unbind_gpasid, + .sva_bind = intel_svm_bind, + .sva_unbind = intel_svm_unbind, + .sva_get_pasid = intel_svm_get_pasid, #endif }; diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 75a1ba4439f7..8b66bf45477e 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -426,13 +426,15 @@ out: return ret; } -int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) +/* Caller must hold pasid_mutex, mm reference */ +static int +intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops, + struct mm_struct *mm, struct intel_svm_dev **sd) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); struct device_domain_info *info; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; - struct mm_struct *mm = NULL; int pasid_max; int ret; @@ -449,16 +451,15 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } else pasid_max = 1 << 20; + /* Bind supervisor PASID shuld have mm = NULL */ if (flags & SVM_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap)) + if (!ecap_srs(iommu->ecap) || mm) { + pr_err("Supervisor PASID with user provided mm.\n"); return -EINVAL; - } else if (pasid) { - mm = get_task_mm(current); - BUG_ON(!mm); + } } - mutex_lock(&pasid_mutex); - if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) { + if (!(flags & SVM_FLAG_PRIVATE_PASID)) { struct intel_svm *t; list_for_each_entry(t, &global_svm_list, list) { @@ -496,9 +497,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ sdev->dev = dev; ret = intel_iommu_enable_pasid(iommu, dev); - if (ret || !pasid) { - /* If they don't actually want to assign a PASID, this is - * just an enabling check/preparation. */ + if (ret) { kfree(sdev); goto out; } @@ -597,18 +596,17 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } } list_add_rcu(&sdev->list, &svm->devs); - - success: - *pasid = svm->pasid; +success: + sdev->pasid = svm->pasid; + sdev->sva.dev = dev; + if (sd) + *sd = sdev; ret = 0; out: - mutex_unlock(&pasid_mutex); - if (mm) - mmput(mm); return ret; } -EXPORT_SYMBOL_GPL(intel_svm_bind_mm); +/* Caller must hold pasid_mutex */ int intel_svm_unbind_mm(struct device *dev, int pasid) { struct intel_svm_dev *sdev; @@ -616,7 +614,6 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) struct intel_svm *svm; int ret = -EINVAL; - mutex_lock(&pasid_mutex); iommu = intel_svm_device_to_iommu(dev); if (!iommu) goto out; @@ -662,45 +659,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) break; } out: - mutex_unlock(&pasid_mutex); return ret; } -EXPORT_SYMBOL_GPL(intel_svm_unbind_mm); - -int intel_svm_is_pasid_valid(struct device *dev, int pasid) -{ - struct intel_iommu *iommu; - struct intel_svm *svm; - int ret = -EINVAL; - - mutex_lock(&pasid_mutex); - iommu = intel_svm_device_to_iommu(dev); - if (!iommu) - goto out; - - svm = ioasid_find(NULL, pasid, NULL); - if (!svm) - goto out; - - if (IS_ERR(svm)) { - ret = PTR_ERR(svm); - goto out; - } - /* init_mm is used in this case */ - if (!svm->mm) - ret = 1; - else if (atomic_read(&svm->mm->mm_users) > 0) - ret = 1; - else - ret = 0; - - out: - mutex_unlock(&pasid_mutex); - - return ret; -} -EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid); /* Page request queue descriptor */ struct page_req_dsc { @@ -894,3 +855,56 @@ static irqreturn_t prq_event_thread(int irq, void *d) return IRQ_RETVAL(handled); } + +#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) +struct iommu_sva * +intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +{ + struct iommu_sva *sva = ERR_PTR(-EINVAL); + struct intel_svm_dev *sdev = NULL; + int flags = 0; + int ret; + + /* + * TODO: Consolidate with generic iommu-sva bind after it is merged. + * It will require shared SVM data structures, i.e. combine io_mm + * and intel_svm etc. + */ + if (drvdata) + flags = *(int *)drvdata; + mutex_lock(&pasid_mutex); + ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev); + if (ret) + sva = ERR_PTR(ret); + else if (sdev) + sva = &sdev->sva; + else + WARN(!sdev, "SVM bind succeeded with no sdev!\n"); + + mutex_unlock(&pasid_mutex); + + return sva; +} + +void intel_svm_unbind(struct iommu_sva *sva) +{ + struct intel_svm_dev *sdev; + + mutex_lock(&pasid_mutex); + sdev = to_intel_svm_dev(sva); + intel_svm_unbind_mm(sdev->dev, sdev->pasid); + mutex_unlock(&pasid_mutex); +} + +int intel_svm_get_pasid(struct iommu_sva *sva) +{ + struct intel_svm_dev *sdev; + int pasid; + + mutex_lock(&pasid_mutex); + sdev = to_intel_svm_dev(sva); + pasid = sdev->pasid; + mutex_unlock(&pasid_mutex); + + return pasid; +} diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index caa179e806fc..42245e1e1b48 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -723,6 +723,10 @@ extern int intel_svm_finish_prq(struct intel_iommu *iommu); int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, struct iommu_gpasid_bind_data *data); int intel_svm_unbind_gpasid(struct device *dev, int pasid); +struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, + void *drvdata); +void intel_svm_unbind(struct iommu_sva *handle); +int intel_svm_get_pasid(struct iommu_sva *handle); struct svm_dev_ops; struct intel_svm_dev { @@ -730,6 +734,8 @@ struct intel_svm_dev { struct rcu_head rcu; struct device *dev; struct svm_dev_ops *ops; + struct iommu_sva sva; + int pasid; int users; u16 did; u16 dev_iotlb:1; diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 1b47ca46373e..c9e7e601950d 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -21,7 +21,6 @@ struct svm_dev_ops { #define SVM_REQ_EXEC (1<<1) #define SVM_REQ_PRIV (1<<0) - /* * The SVM_FLAG_PRIVATE_PASID flag requests a PASID which is *not* the "main" * PASID for the current process. Even if a PASID already exists, a new one @@ -57,89 +56,4 @@ struct svm_dev_ops { */ #define SVM_FLAG_GUEST_PASID (1<<3) -#ifdef CONFIG_INTEL_IOMMU_SVM - -/** - * intel_svm_bind_mm() - Bind the current process to a PASID - * @dev: Device to be granted access - * @pasid: Address for allocated PASID - * @flags: Flags. Later for requesting supervisor mode, etc. - * @ops: Callbacks to device driver - * - * This function attempts to enable PASID support for the given device. - * If the @pasid argument is non-%NULL, a PASID is allocated for access - * to the MM of the current process. - * - * By using a %NULL value for the @pasid argument, this function can - * be used to simply validate that PASID support is available for the - * given device — i.e. that it is behind an IOMMU which has the - * requisite support, and is enabled. - * - * Page faults are handled transparently by the IOMMU code, and there - * should be no need for the device driver to be involved. If a page - * fault cannot be handled (i.e. is an invalid address rather than - * just needs paging in), then the page request will be completed by - * the core IOMMU code with appropriate status, and the device itself - * can then report the resulting fault to its driver via whatever - * mechanism is appropriate. - * - * Multiple calls from the same process may result in the same PASID - * being re-used. A reference count is kept. - */ -extern int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, - struct svm_dev_ops *ops); - -/** - * intel_svm_unbind_mm() - Unbind a specified PASID - * @dev: Device for which PASID was allocated - * @pasid: PASID value to be unbound - * - * This function allows a PASID to be retired when the device no - * longer requires access to the address space of a given process. - * - * If the use count for the PASID in question reaches zero, the - * PASID is revoked and may no longer be used by hardware. - * - * Device drivers are required to ensure that no access (including - * page requests) is currently outstanding for the PASID in question, - * before calling this function. - */ -extern int intel_svm_unbind_mm(struct device *dev, int pasid); - -/** - * intel_svm_is_pasid_valid() - check if pasid is valid - * @dev: Device for which PASID was allocated - * @pasid: PASID value to be checked - * - * This function checks if the specified pasid is still valid. A - * valid pasid means the backing mm is still having a valid user. - * For kernel callers init_mm is always valid. for other mm, if mm->mm_users - * is non-zero, it is valid. - * - * returns -EINVAL if invalid pasid, 0 if pasid ref count is invalid - * 1 if pasid is valid. - */ -extern int intel_svm_is_pasid_valid(struct device *dev, int pasid); - -#else /* CONFIG_INTEL_IOMMU_SVM */ - -static inline int intel_svm_bind_mm(struct device *dev, int *pasid, - int flags, struct svm_dev_ops *ops) -{ - return -ENOSYS; -} - -static inline int intel_svm_unbind_mm(struct device *dev, int pasid) -{ - BUG(); -} - -static inline int intel_svm_is_pasid_valid(struct device *dev, int pasid) -{ - return -EINVAL; -} -#endif /* CONFIG_INTEL_IOMMU_SVM */ - -#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL)) - #endif /* __INTEL_SVM_H__ */ -- cgit v1.2.3 From 37e91bd4b39922b31ca4e6c4eabb0d7140b14e74 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 16 May 2020 14:20:57 +0800 Subject: iommu/vt-d: Disable non-recoverable fault processing before unbind When a PASID is used for SVA by the device, it's possible that the PASID entry is cleared before the device flushes all ongoing DMA requests. The IOMMU should tolerate and ignore the non-recoverable faults caused by the untranslated requests from this device. For example, when an exception happens, the process terminates before the device driver stops DMA and call IOMMU driver to unbind PASID. The flow of process exist is as follows: do_exit() { exit_mm() { mm_put(); exit_mmap() { intel_invalidate_range() //mmu notifier tlb_finish_mmu() mmu_notifier_release(mm) { intel_iommu_release() { [2] intel_iommu_teardown_pasid(); intel_iommu_flush_tlbs(); } } unmap_vmas(); free_pgtables(); }; } exit_files(tsk) { close_files() { dsa_close(); [1] dsa_stop_dma(); intel_svm_unbind_pasid(); } } } Care must be taken on VT-d to avoid unrecoverable faults between the time window of [1] and [2]. [Process exist flow was contributed by Jacob Pan.] Intel VT-d provides such function through the FPD bit of the PASID entry. This sets FPD bit when PASID entry is changing from present to nonpresent in the mm notifier and will clear it when the pasid is unbound. Signed-off-by: Lu Baolu Reviewed-by: Jacob Pan Link: https://lore.kernel.org/r/20200516062101.29541-15-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 4 ++-- drivers/iommu/intel-pasid.c | 26 +++++++++++++++++++++----- drivers/iommu/intel-pasid.h | 4 +++- drivers/iommu/intel-svm.c | 9 ++++++--- 4 files changed, 32 insertions(+), 11 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7d28ef2e6fe2..3c5cc3424e90 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5005,7 +5005,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) if (info->dev) { if (dev_is_pci(info->dev) && sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, info->dev, - PASID_RID2PASID); + PASID_RID2PASID, false); iommu_disable_dev_iotlb(info); domain_context_clear(iommu, info->dev); @@ -5234,7 +5234,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, auxiliary_unlink_device(domain, dev); spin_lock(&iommu->lock); - intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid); + intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false); domain_detach_iommu(domain, iommu); spin_unlock(&iommu->lock); diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index 45e9b5b291bc..25d749830500 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -292,7 +292,20 @@ static inline void pasid_clear_entry(struct pasid_entry *pe) WRITE_ONCE(pe->val[7], 0); } -static void intel_pasid_clear_entry(struct device *dev, int pasid) +static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe) +{ + WRITE_ONCE(pe->val[0], PASID_PTE_FPD); + WRITE_ONCE(pe->val[1], 0); + WRITE_ONCE(pe->val[2], 0); + WRITE_ONCE(pe->val[3], 0); + WRITE_ONCE(pe->val[4], 0); + WRITE_ONCE(pe->val[5], 0); + WRITE_ONCE(pe->val[6], 0); + WRITE_ONCE(pe->val[7], 0); +} + +static void +intel_pasid_clear_entry(struct device *dev, int pasid, bool fault_ignore) { struct pasid_entry *pe; @@ -300,7 +313,10 @@ static void intel_pasid_clear_entry(struct device *dev, int pasid) if (WARN_ON(!pe)) return; - pasid_clear_entry(pe); + if (fault_ignore && pasid_pte_is_present(pe)) + pasid_clear_entry_with_fpd(pe); + else + pasid_clear_entry(pe); } static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) @@ -473,8 +489,8 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); } -void intel_pasid_tear_down_entry(struct intel_iommu *iommu, - struct device *dev, int pasid) +void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, + int pasid, bool fault_ignore) { struct pasid_entry *pte; u16 did; @@ -484,7 +500,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, return; did = pasid_get_domain_id(pte); - intel_pasid_clear_entry(dev, pasid); + intel_pasid_clear_entry(dev, pasid, fault_ignore); if (!ecap_coherent(iommu->ecap)) clflush_cache_range(pte, sizeof(*pte)); diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index a41b09b3ffde..c5318d40e0fa 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -15,6 +15,7 @@ #define PASID_MAX 0x100000 #define PASID_PTE_MASK 0x3F #define PASID_PTE_PRESENT 1 +#define PASID_PTE_FPD 2 #define PDE_PFN_MASK PAGE_MASK #define PASID_PDE_SHIFT 6 #define MAX_NR_PASID_BITS 20 @@ -120,7 +121,8 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct iommu_gpasid_bind_data_vtd *pasid_data, struct dmar_domain *domain, int addr_width); void intel_pasid_tear_down_entry(struct intel_iommu *iommu, - struct device *dev, int pasid); + struct device *dev, int pasid, + bool fault_ignore); int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid); void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid); #endif /* __INTEL_PASID_H */ diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 5133b2d4428f..960a3610e852 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -207,7 +207,8 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) { - intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid); + intel_pasid_tear_down_entry(svm->iommu, sdev->dev, + svm->pasid, true); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); } rcu_read_unlock(); @@ -396,7 +397,8 @@ int intel_svm_unbind_gpasid(struct device *dev, int pasid) sdev->users--; if (!sdev->users) { list_del_rcu(&sdev->list); - intel_pasid_tear_down_entry(iommu, dev, svm->pasid); + intel_pasid_tear_down_entry(iommu, dev, + svm->pasid, false); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); /* TODO: Drain in flight PRQ for the PASID since it * may get reused soon, we don't want to @@ -639,7 +641,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) * to use. We have a *shared* PASID table, because it's * large and has to be physically contiguous. So it's * hard to be as defensive as we might like. */ - intel_pasid_tear_down_entry(iommu, dev, svm->pasid); + intel_pasid_tear_down_entry(iommu, dev, + svm->pasid, false); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); kfree_rcu(sdev, rcu); -- cgit v1.2.3 From e70b081c6f376471d7a9fee69e12e8f05ac2925d Mon Sep 17 00:00:00 2001 From: Tom Murphy Date: Sat, 16 May 2020 14:21:01 +0800 Subject: iommu/vt-d: Remove IOVA handling code from the non-dma_ops path There's no need for the non-dma_ops path to keep track of IOVAs. The whole point of the non-dma_ops path is that it allows the IOVAs to be handled separately. The IOVA handling code removed in this patch is pointless. Signed-off-by: Tom Murphy Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20200516062101.29541-19-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 95 +++++++++++++++------------------------------ 1 file changed, 32 insertions(+), 63 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 3c5cc3424e90..f75d7d9c231f 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1892,11 +1892,6 @@ static int dmar_init_reserved_ranges(void) return 0; } -static void domain_reserve_special_ranges(struct dmar_domain *domain) -{ - copy_reserved_iova(&reserved_iova_list, &domain->iovad); -} - static inline int guestwidth_to_adjustwidth(int gaw) { int agaw; @@ -1918,7 +1913,8 @@ static void domain_exit(struct dmar_domain *domain) domain_remove_dev_info(domain); /* destroy iovas */ - put_iova_domain(&domain->iovad); + if (domain->domain.type == IOMMU_DOMAIN_DMA) + put_iova_domain(&domain->iovad); if (domain->pgd) { struct page *freelist; @@ -2627,19 +2623,9 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, } static int iommu_domain_identity_map(struct dmar_domain *domain, - unsigned long long start, - unsigned long long end) + unsigned long first_vpfn, + unsigned long last_vpfn) { - unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; - unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; - - if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), - dma_to_mm_pfn(last_vpfn))) { - pr_err("Reserving iova failed\n"); - return -ENOMEM; - } - - pr_debug("Mapping reserved region %llx-%llx\n", start, end); /* * RMRR range might have overlap with physical memory range, * clear it first @@ -2677,7 +2663,8 @@ static int __init si_domain_init(int hw) for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { ret = iommu_domain_identity_map(si_domain, - PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); + mm_to_dma_pfn(start_pfn), + mm_to_dma_pfn(end_pfn)); if (ret) return ret; } @@ -4547,58 +4534,37 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { struct memory_notify *mhp = v; - unsigned long long start, end; - unsigned long start_vpfn, last_vpfn; + unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn); + unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn + + mhp->nr_pages - 1); switch (val) { case MEM_GOING_ONLINE: - start = mhp->start_pfn << PAGE_SHIFT; - end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1; - if (iommu_domain_identity_map(si_domain, start, end)) { - pr_warn("Failed to build identity map for [%llx-%llx]\n", - start, end); + if (iommu_domain_identity_map(si_domain, + start_vpfn, last_vpfn)) { + pr_warn("Failed to build identity map for [%lx-%lx]\n", + start_vpfn, last_vpfn); return NOTIFY_BAD; } break; case MEM_OFFLINE: case MEM_CANCEL_ONLINE: - start_vpfn = mm_to_dma_pfn(mhp->start_pfn); - last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1); - while (start_vpfn <= last_vpfn) { - struct iova *iova; + { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; struct page *freelist; - iova = find_iova(&si_domain->iovad, start_vpfn); - if (iova == NULL) { - pr_debug("Failed get IOVA for PFN %lx\n", - start_vpfn); - break; - } - - iova = split_and_remove_iova(&si_domain->iovad, iova, - start_vpfn, last_vpfn); - if (iova == NULL) { - pr_warn("Failed to split IOVA PFN [%lx-%lx]\n", - start_vpfn, last_vpfn); - return NOTIFY_BAD; - } - - freelist = domain_unmap(si_domain, iova->pfn_lo, - iova->pfn_hi); + freelist = domain_unmap(si_domain, + start_vpfn, last_vpfn); rcu_read_lock(); for_each_active_iommu(iommu, drhd) iommu_flush_iotlb_psi(iommu, si_domain, - iova->pfn_lo, iova_size(iova), + start_vpfn, mhp->nr_pages, !freelist, 0); rcu_read_unlock(); dma_free_pagelist(freelist); - - start_vpfn = iova->pfn_hi + 1; - free_iova_mem(iova); } break; } @@ -4626,8 +4592,9 @@ static void free_all_cpu_cached_iovas(unsigned int cpu) for (did = 0; did < cap_ndoms(iommu->cap); did++) { domain = get_iommu_domain(iommu, (u16)did); - if (!domain) + if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA) continue; + free_cpu_cached_iovas(cpu, &domain->iovad); } } @@ -5037,9 +5004,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - domain_reserve_special_ranges(domain); - /* calculate AGAW */ domain->gaw = guest_width; adjust_width = guestwidth_to_adjustwidth(guest_width); @@ -5058,11 +5022,21 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) return 0; } +static void intel_init_iova_domain(struct dmar_domain *dmar_domain) +{ + init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); + copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad); + + if (!intel_iommu_strict && + init_iova_flush_queue(&dmar_domain->iovad, + iommu_flush_iova, iova_entry_free)) + pr_info("iova flush queue initialization failed\n"); +} + static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; struct iommu_domain *domain; - int ret; switch (type) { case IOMMU_DOMAIN_DMA: @@ -5079,13 +5053,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) return NULL; } - if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) { - ret = init_iova_flush_queue(&dmar_domain->iovad, - iommu_flush_iova, - iova_entry_free); - if (ret) - pr_info("iova flush queue initialization failed\n"); - } + if (type == IOMMU_DOMAIN_DMA) + intel_init_iova_domain(dmar_domain); domain_update_iommu_cap(dmar_domain); -- cgit v1.2.3 From 7809c4d5805b1d331acfe0047dd9695691d428d4 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 21 May 2020 17:50:30 -0400 Subject: iommu/vt-d: fix a GCC warning The commit 6ee1b77ba3ac ("iommu/vt-d: Add svm/sva invalidate function") introduced a GCC warning, drivers/iommu/intel-iommu.c:5330:1: warning: 'static' is not at beginning of declaration [-Wold-style-declaration] const static int ^~~~~ Fixes: 6ee1b77ba3ac0 ("iommu/vt-d: Add svm/sva invalidate function") Signed-off-by: Qian Cai Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20200521215030.16938-1-cai@lca.pw Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f75d7d9c231f..ff5a30a94679 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5327,7 +5327,7 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain, * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR] */ -const static int +static const int inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = { /* * PASID based IOTLB invalidation: PASID selective (per PASID), -- cgit v1.2.3 From da656a042568ffbc30881c43a832277f275eea4a Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 20 May 2020 17:22:03 +0200 Subject: iommu/vt-d: Use pci_ats_supported() The pci_ats_supported() helper checks if a device supports ATS and is allowed to use it. By checking the ATS capability it also integrates the pci_ats_disabled() check from pci_ats_init(). Simplify the vt-d checks. Signed-off-by: Jean-Philippe Brucker Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20200520152201.3309416-5-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 34e08fa2ce3a..5dea3042820b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1454,8 +1454,7 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) info->pri_enabled = 1; #endif - if (!pdev->untrusted && info->ats_supported && - pci_ats_page_aligned(pdev) && + if (info->ats_supported && pci_ats_page_aligned(pdev) && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { info->ats_enabled = 1; domain_update_iotlb(info->domain); @@ -2611,10 +2610,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); - if (!pdev->untrusted && - !pci_ats_disabled() && - ecap_dev_iotlb_support(iommu->ecap) && - pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) && + if (ecap_dev_iotlb_support(iommu->ecap) && + pci_ats_supported(pdev) && dmar_find_matched_atsr_unit(pdev)) info->ats_supported = 1; -- cgit v1.2.3 From 8038bdb8553313ad53bfcffcf8294dd0ab44618f Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Wed, 27 May 2020 10:56:15 -0600 Subject: iommu/vt-d: Only clear real DMA device's context entries Domain context mapping can encounter issues with sub-devices of a real DMA device. A sub-device cannot have a valid context entry due to it potentially aliasing another device's 16-bit ID. It's expected that sub-devices of the real DMA device uses the real DMA device's requester when context mapping. This is an issue when a sub-device is removed where the context entry is cleared for all aliases. Other sub-devices are still valid, resulting in those sub-devices being stranded without valid context entries. The correct approach is to use the real DMA device when programming the context entries. The insertion path is correct because device_to_iommu() will return the bus and devfn of the real DMA device. The removal path needs to only operate on the real DMA device, otherwise the entire context entry would be cleared for all sub-devices of the real DMA device. This patch also adds a helper to determine if a struct device is a sub-device of a real DMA device. Fixes: 2b0140c69637e ("iommu/vt-d: Use pci_real_dma_dev() for mapping") Cc: stable@vger.kernel.org # v5.6+ Signed-off-by: Jon Derrick Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20200527165617.297470-2-jonathan.derrick@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ff5a30a94679..1ff45b2d03ab 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2500,6 +2500,12 @@ static int domain_setup_first_level(struct intel_iommu *iommu, flags); } +static bool dev_is_real_dma_subdevice(struct device *dev) +{ + return dev && dev_is_pci(dev) && + pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); +} + static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, int bus, int devfn, struct device *dev, @@ -4975,7 +4981,8 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) PASID_RID2PASID, false); iommu_disable_dev_iotlb(info); - domain_context_clear(iommu, info->dev); + if (!dev_is_real_dma_subdevice(info->dev)) + domain_context_clear(iommu, info->dev); intel_pasid_free_table(info->dev); } -- cgit v1.2.3 From 4fda230ecddc2573ed88632e98b69b0b9b68c0ad Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Wed, 27 May 2020 10:56:16 -0600 Subject: iommu/vt-d: Allocate domain info for real DMA sub-devices Sub-devices of a real DMA device might exist on a separate segment than the real DMA device and its IOMMU. These devices should still have a valid device_domain_info, but the current dma alias model won't allocate info for the subdevice. This patch adds a segment member to struct device_domain_info and uses the sub-device's BDF so that these sub-devices won't alias to other devices. Fixes: 2b0140c69637e ("iommu/vt-d: Use pci_real_dma_dev() for mapping") Cc: stable@vger.kernel.org # v5.6+ Signed-off-by: Jon Derrick Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20200527165617.297470-3-jonathan.derrick@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 19 +++++++++++++++---- include/linux/intel-iommu.h | 1 + 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 1ff45b2d03ab..6d39b9bd89a6 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2463,7 +2463,7 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn) struct device_domain_info *info; list_for_each_entry(info, &device_domain_list, global) - if (info->iommu->segment == segment && info->bus == bus && + if (info->segment == segment && info->bus == bus && info->devfn == devfn) return info; @@ -2520,8 +2520,18 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (!info) return NULL; - info->bus = bus; - info->devfn = devfn; + if (!dev_is_real_dma_subdevice(dev)) { + info->bus = bus; + info->devfn = devfn; + info->segment = iommu->segment; + } else { + struct pci_dev *pdev = to_pci_dev(dev); + + info->bus = pdev->bus->number; + info->devfn = pdev->devfn; + info->segment = pci_domain_nr(pdev->bus); + } + info->ats_supported = info->pasid_supported = info->pri_supported = 0; info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0; info->ats_qdep = 0; @@ -2561,7 +2571,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (!found) { struct device_domain_info *info2; - info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn); + info2 = dmar_search_domain_by_dev_info(info->segment, info->bus, + info->devfn); if (info2) { found = info2->domain; info2->dev = dev; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 21633cee6331..4100bd224f5c 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -609,6 +609,7 @@ struct device_domain_info { struct list_head auxiliary_domains; /* auxiliary domains * attached to this device */ + u32 segment; /* PCI segment number */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ u16 pfsid; /* SRIOV physical function source ID */ -- cgit v1.2.3 From bba9cc2cf82840bd3c9b3f4f7edac2dc8329c241 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Wed, 27 May 2020 10:56:17 -0600 Subject: iommu/vt-d: Remove real DMA lookup in find_domain By removing the real DMA indirection in find_domain(), we can allow sub-devices of a real DMA device to have their own valid device_domain_info. The dmar lookup and context entry removal paths have been fixed to account for sub-devices. Fixes: 2b0140c69637 ("iommu/vt-d: Use pci_real_dma_dev() for mapping") Signed-off-by: Jon Derrick Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20200527165617.297470-4-jonathan.derrick@intel.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207575 Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 6d39b9bd89a6..5767882aa80f 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2436,9 +2436,6 @@ struct dmar_domain *find_domain(struct device *dev) if (unlikely(attach_deferred(dev) || iommu_dummy(dev))) return NULL; - if (dev_is_pci(dev)) - dev = &pci_real_dma_dev(to_pci_dev(dev))->dev; - /* No lock here, assumes no domain exit in normal case */ info = get_domain_info(dev); if (likely(info)) -- cgit v1.2.3