diff options
author | Linus Torvalds | 2020-06-08 11:42:23 -0700 |
---|---|---|
committer | Linus Torvalds | 2020-06-08 11:42:23 -0700 |
commit | 4e3a16ee9148e966678bbc713579235422271a63 (patch) | |
tree | 360e31fbfae5e1a6c385ff01919f61b32d74ed17 /drivers/iommu/intel-iommu.c | |
parent | 9413b9a690ec8aeaedea74bb875079d36f295304 (diff) | |
parent | 431275afdc7155415254aef4bd3816a1b8a2ead0 (diff) |
Merge tag 'iommu-updates-v5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel:
"A big part of this is a change in how devices get connected to IOMMUs
in the core code. It contains the change from the old add_device() /
remove_device() to the new probe_device() / release_device()
call-backs.
As a result functionality that was previously in the IOMMU drivers has
been moved to the IOMMU core code, including IOMMU group allocation
for each device. The reason for this change was to get more robust
allocation of default domains for the iommu groups.
A couple of fixes were necessary after this was merged into the IOMMU
tree, but there are no known bugs left. The last fix is applied on-top
of the merge commit for the topic branches.
Other than that change, we have:
- Removal of the driver private domain handling in the Intel VT-d
driver. This was fragile code and I am glad it is gone now.
- More Intel VT-d updates from Lu Baolu:
- Nested Shared Virtual Addressing (SVA) support to the Intel VT-d
driver
- Replacement of the Intel SVM interfaces to the common IOMMU SVA
API
- SVA Page Request draining support
- ARM-SMMU Updates from Will:
- Avoid mapping reserved MMIO space on SMMUv3, so that it can be
claimed by the PMU driver
- Use xarray to manage ASIDs on SMMUv3
- Reword confusing shutdown message
- DT compatible string updates
- Allow implementations to override the default domain type
- A new IOMMU driver for the Allwinner Sun50i platform
- Support for ATS gets disabled for untrusted devices (like
Thunderbolt devices). This includes a PCI patch, acked by Bjorn.
- Some cleanups to the AMD IOMMU driver to make more use of IOMMU
core features.
- Unification of some printk formats in the Intel and AMD IOMMU
drivers and in the IOVA code.
- Updates for DT bindings
- A number of smaller fixes and cleanups.
* tag 'iommu-updates-v5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (109 commits)
iommu: Check for deferred attach in iommu_group_do_dma_attach()
iommu/amd: Remove redundant devid checks
iommu/amd: Store dev_data as device iommu private data
iommu/amd: Merge private header files
iommu/amd: Remove PD_DMA_OPS_MASK
iommu/amd: Consolidate domain allocation/freeing
iommu/amd: Free page-table in protection_domain_free()
iommu/amd: Allocate page-table in protection_domain_init()
iommu/amd: Let free_pagetable() not rely on domain->pt_root
iommu/amd: Unexport get_dev_data()
iommu/vt-d: Fix compile warning
iommu/vt-d: Remove real DMA lookup in find_domain
iommu/vt-d: Allocate domain info for real DMA sub-devices
iommu/vt-d: Only clear real DMA device's context entries
iommu: Remove iommu_sva_ops::mm_exit()
uacce: Remove mm_exit() op
iommu/sun50i: Constify sun50i_iommu_ops
iommu/hyper-v: Constify hyperv_ir_domain_ops
iommu/vt-d: Use pci_ats_supported()
iommu/arm-smmu-v3: Use pci_ats_supported()
...
Diffstat (limited to 'drivers/iommu/intel-iommu.c')
-rw-r--r-- | drivers/iommu/intel-iommu.c | 952 |
1 files changed, 399 insertions, 553 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0182cff2c7ac..648a785e078a 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -296,31 +296,6 @@ static inline void context_clear_entry(struct context_entry *context) static struct dmar_domain *si_domain; static int hw_pass_through = 1; -/* si_domain contains mulitple devices */ -#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0) - -/* - * This is a DMA domain allocated through the iommu domain allocation - * interface. But one or more devices belonging to this domain have - * been chosen to use a private domain. We should avoid to use the - * map/unmap/iova_to_phys APIs on it. - */ -#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1) - -/* - * When VT-d works in the scalable mode, it allows DMA translation to - * happen through either first level or second level page table. This - * bit marks that the DMA translation for the domain goes through the - * first level page table, otherwise, it goes through the second level. - */ -#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(2) - -/* - * Domain represents a virtual machine which demands iommu nested - * translation mode support. - */ -#define DOMAIN_FLAG_NESTING_MODE BIT(3) - #define for_each_domain_iommu(idx, domain) \ for (idx = 0; idx < g_num_of_iommus; idx++) \ if (domain->iommu_refcnt[idx]) @@ -355,11 +330,6 @@ static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); static void dmar_remove_one_dev_info(struct device *dev); static void __dmar_remove_one_dev_info(struct device_domain_info *info); -static void domain_context_clear(struct intel_iommu *iommu, - struct device *dev); -static int domain_detach_iommu(struct dmar_domain *domain, - struct intel_iommu *iommu); -static bool device_is_rmrr_locked(struct device *dev); static int intel_iommu_attach_device(struct iommu_domain *domain, struct device *dev); static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, @@ -395,6 +365,21 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2)) +struct device_domain_info *get_domain_info(struct device *dev) +{ + struct device_domain_info *info; + + if (!dev) + return NULL; + + info = dev->archdata.iommu; + if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO || + info == DEFER_DEVICE_DOMAIN_INFO)) + return NULL; + + return info; +} + DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); @@ -446,12 +431,6 @@ static void init_translation_status(struct intel_iommu *iommu) iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED; } -/* Convert generic 'struct iommu_domain to private struct dmar_domain */ -static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) -{ - return container_of(dom, struct dmar_domain, domain); -} - static int __init intel_iommu_setup(char *str) { if (!str) @@ -480,8 +459,7 @@ static int __init intel_iommu_setup(char *str) pr_info("Intel-IOMMU: scalable mode supported\n"); intel_iommu_sm = 1; } else if (!strncmp(str, "tboot_noforce", 13)) { - printk(KERN_INFO - "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); + pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); intel_iommu_tboot_noforce = 1; } else if (!strncmp(str, "nobounce", 8)) { pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n"); @@ -1454,8 +1432,7 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) info->pri_enabled = 1; #endif - if (!pdev->untrusted && info->ats_supported && - pci_ats_page_aligned(pdev) && + if (info->ats_supported && pci_ats_page_aligned(pdev) && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { info->ats_enabled = 1; domain_update_iotlb(info->domain); @@ -1763,6 +1740,9 @@ static void free_dmar_iommu(struct intel_iommu *iommu) if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); } + if (ecap_vcs(iommu->ecap) && vccap_pasid(iommu->vccap)) + ioasid_unregister_allocator(&iommu->pasid_allocator); + #endif } @@ -1911,11 +1891,6 @@ static int dmar_init_reserved_ranges(void) return 0; } -static void domain_reserve_special_ranges(struct dmar_domain *domain) -{ - copy_reserved_iova(&reserved_iova_list, &domain->iovad); -} - static inline int guestwidth_to_adjustwidth(int gaw) { int agaw; @@ -1930,65 +1905,6 @@ static inline int guestwidth_to_adjustwidth(int gaw) return agaw; } -static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, - int guest_width) -{ - int adjust_width, agaw; - unsigned long sagaw; - int ret; - - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - - if (!intel_iommu_strict) { - ret = init_iova_flush_queue(&domain->iovad, - iommu_flush_iova, iova_entry_free); - if (ret) - pr_info("iova flush queue initialization failed\n"); - } - - domain_reserve_special_ranges(domain); - - /* calculate AGAW */ - if (guest_width > cap_mgaw(iommu->cap)) - guest_width = cap_mgaw(iommu->cap); - domain->gaw = guest_width; - adjust_width = guestwidth_to_adjustwidth(guest_width); - agaw = width_to_agaw(adjust_width); - sagaw = cap_sagaw(iommu->cap); - if (!test_bit(agaw, &sagaw)) { - /* hardware doesn't support it, choose a bigger one */ - pr_debug("Hardware doesn't support agaw %d\n", agaw); - agaw = find_next_bit(&sagaw, 5, agaw); - if (agaw >= 5) - return -ENODEV; - } - domain->agaw = agaw; - - if (ecap_coherent(iommu->ecap)) - domain->iommu_coherency = 1; - else - domain->iommu_coherency = 0; - - if (ecap_sc_support(iommu->ecap)) - domain->iommu_snooping = 1; - else - domain->iommu_snooping = 0; - - if (intel_iommu_superpage) - domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); - else - domain->iommu_superpage = 0; - - domain->nid = iommu->node; - - /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); - if (!domain->pgd) - return -ENOMEM; - __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); - return 0; -} - static void domain_exit(struct dmar_domain *domain) { @@ -1996,7 +1912,8 @@ static void domain_exit(struct dmar_domain *domain) domain_remove_dev_info(domain); /* destroy iovas */ - put_iova_domain(&domain->iovad); + if (domain->domain.type == IOMMU_DOMAIN_DMA) + put_iova_domain(&domain->iovad); if (domain->pgd) { struct page *freelist; @@ -2518,11 +2435,8 @@ struct dmar_domain *find_domain(struct device *dev) if (unlikely(attach_deferred(dev) || iommu_dummy(dev))) return NULL; - if (dev_is_pci(dev)) - dev = &pci_real_dma_dev(to_pci_dev(dev))->dev; - /* No lock here, assumes no domain exit in normal case */ - info = dev->archdata.iommu; + info = get_domain_info(dev); if (likely(info)) return info->domain; @@ -2545,7 +2459,7 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn) struct device_domain_info *info; list_for_each_entry(info, &device_domain_list, global) - if (info->iommu->segment == segment && info->bus == bus && + if (info->segment == segment && info->bus == bus && info->devfn == devfn) return info; @@ -2582,6 +2496,12 @@ static int domain_setup_first_level(struct intel_iommu *iommu, flags); } +static bool dev_is_real_dma_subdevice(struct device *dev) +{ + return dev && dev_is_pci(dev) && + pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); +} + static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, int bus, int devfn, struct device *dev, @@ -2596,8 +2516,18 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (!info) return NULL; - info->bus = bus; - info->devfn = devfn; + if (!dev_is_real_dma_subdevice(dev)) { + info->bus = bus; + info->devfn = devfn; + info->segment = iommu->segment; + } else { + struct pci_dev *pdev = to_pci_dev(dev); + + info->bus = pdev->bus->number; + info->devfn = pdev->devfn; + info->segment = pci_domain_nr(pdev->bus); + } + info->ats_supported = info->pasid_supported = info->pri_supported = 0; info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0; info->ats_qdep = 0; @@ -2611,10 +2541,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); - if (!pdev->untrusted && - !pci_ats_disabled() && - ecap_dev_iotlb_support(iommu->ecap) && - pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) && + if (ecap_dev_iotlb_support(iommu->ecap) && + pci_ats_supported(pdev) && dmar_find_matched_atsr_unit(pdev)) info->ats_supported = 1; @@ -2637,7 +2565,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (!found) { struct device_domain_info *info2; - info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn); + info2 = dmar_search_domain_by_dev_info(info->segment, info->bus, + info->devfn); if (info2) { found = info2->domain; info2->dev = dev; @@ -2704,108 +2633,10 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, return domain; } -static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) -{ - *(u16 *)opaque = alias; - return 0; -} - -static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) -{ - struct device_domain_info *info; - struct dmar_domain *domain = NULL; - struct intel_iommu *iommu; - u16 dma_alias; - unsigned long flags; - u8 bus, devfn; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return NULL; - - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); - - spin_lock_irqsave(&device_domain_lock, flags); - info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus), - PCI_BUS_NUM(dma_alias), - dma_alias & 0xff); - if (info) { - iommu = info->iommu; - domain = info->domain; - } - spin_unlock_irqrestore(&device_domain_lock, flags); - - /* DMA alias already has a domain, use it */ - if (info) - goto out; - } - - /* Allocate and initialize new domain for the device */ - domain = alloc_domain(0); - if (!domain) - return NULL; - if (domain_init(domain, iommu, gaw)) { - domain_exit(domain); - return NULL; - } - -out: - return domain; -} - -static struct dmar_domain *set_domain_for_dev(struct device *dev, - struct dmar_domain *domain) -{ - struct intel_iommu *iommu; - struct dmar_domain *tmp; - u16 req_id, dma_alias; - u8 bus, devfn; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return NULL; - - req_id = ((u16)bus << 8) | devfn; - - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); - - /* register PCI DMA alias device */ - if (req_id != dma_alias) { - tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias), - dma_alias & 0xff, NULL, domain); - - if (!tmp || tmp != domain) - return tmp; - } - } - - tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain); - if (!tmp || tmp != domain) - return tmp; - - return domain; -} - static int iommu_domain_identity_map(struct dmar_domain *domain, - unsigned long long start, - unsigned long long end) + unsigned long first_vpfn, + unsigned long last_vpfn) { - unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; - unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; - - if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), - dma_to_mm_pfn(last_vpfn))) { - pr_err("Reserving iova failed\n"); - return -ENOMEM; - } - - pr_debug("Mapping reserved region %llx-%llx\n", start, end); /* * RMRR range might have overlap with physical memory range, * clear it first @@ -2817,45 +2648,6 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, DMA_PTE_READ|DMA_PTE_WRITE); } -static int domain_prepare_identity_map(struct device *dev, - struct dmar_domain *domain, - unsigned long long start, - unsigned long long end) -{ - /* For _hardware_ passthrough, don't bother. But for software - passthrough, we do it anyway -- it may indicate a memory - range which is reserved in E820, so which didn't get set - up to start with in si_domain */ - if (domain == si_domain && hw_pass_through) { - dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n", - start, end); - return 0; - } - - dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end); - - if (end < start) { - WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" - "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - dmi_get_system_info(DMI_BIOS_VENDOR), - dmi_get_system_info(DMI_BIOS_VERSION), - dmi_get_system_info(DMI_PRODUCT_VERSION)); - return -EIO; - } - - if (end >> agaw_to_width(domain->agaw)) { - WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" - "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - agaw_to_width(domain->agaw), - dmi_get_system_info(DMI_BIOS_VENDOR), - dmi_get_system_info(DMI_BIOS_VERSION), - dmi_get_system_info(DMI_PRODUCT_VERSION)); - return -EIO; - } - - return iommu_domain_identity_map(domain, start, end); -} - static int md_domain_init(struct dmar_domain *domain, int guest_width); static int __init si_domain_init(int hw) @@ -2882,7 +2674,8 @@ static int __init si_domain_init(int hw) for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { ret = iommu_domain_identity_map(si_domain, - PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); + mm_to_dma_pfn(start_pfn), + mm_to_dma_pfn(end_pfn)); if (ret) return ret; } @@ -2911,17 +2704,6 @@ static int __init si_domain_init(int hw) return 0; } -static int identity_mapping(struct device *dev) -{ - struct device_domain_info *info; - - info = dev->archdata.iommu; - if (info) - return (info->domain == si_domain); - - return 0; -} - static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) { struct dmar_domain *ndomain; @@ -3048,31 +2830,6 @@ static int device_def_domain_type(struct device *dev) if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) return IOMMU_DOMAIN_IDENTITY; - - /* - * We want to start off with all devices in the 1:1 domain, and - * take them out later if we find they can't access all of memory. - * - * However, we can't do this for PCI devices behind bridges, - * because all PCI devices behind the same bridge will end up - * with the same source-id on their transactions. - * - * Practically speaking, we can't change things around for these - * devices at run-time, because we can't be sure there'll be no - * DMA transactions in flight for any of their siblings. - * - * So PCI devices (unless they're on the root bus) as well as - * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of - * the 1:1 domain, just in _case_ one of their siblings turns out - * not to be able to map all of memory. - */ - if (!pci_is_pcie(pdev)) { - if (!pci_is_root_bus(pdev->bus)) - return IOMMU_DOMAIN_DMA; - if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) - return IOMMU_DOMAIN_DMA; - } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) - return IOMMU_DOMAIN_DMA; } return 0; @@ -3297,6 +3054,85 @@ out_unmap: return ret; } +#ifdef CONFIG_INTEL_IOMMU_SVM +static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data) +{ + struct intel_iommu *iommu = data; + ioasid_t ioasid; + + if (!iommu) + return INVALID_IOASID; + /* + * VT-d virtual command interface always uses the full 20 bit + * PASID range. Host can partition guest PASID range based on + * policies but it is out of guest's control. + */ + if (min < PASID_MIN || max > intel_pasid_max_id) + return INVALID_IOASID; + + if (vcmd_alloc_pasid(iommu, &ioasid)) + return INVALID_IOASID; + + return ioasid; +} + +static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data) +{ + struct intel_iommu *iommu = data; + + if (!iommu) + return; + /* + * Sanity check the ioasid owner is done at upper layer, e.g. VFIO + * We can only free the PASID when all the devices are unbound. + */ + if (ioasid_find(NULL, ioasid, NULL)) { + pr_alert("Cannot free active IOASID %d\n", ioasid); + return; + } + vcmd_free_pasid(iommu, ioasid); +} + +static void register_pasid_allocator(struct intel_iommu *iommu) +{ + /* + * If we are running in the host, no need for custom allocator + * in that PASIDs are allocated from the host system-wide. + */ + if (!cap_caching_mode(iommu->cap)) + return; + + if (!sm_supported(iommu)) { + pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n"); + return; + } + + /* + * Register a custom PASID allocator if we are running in a guest, + * guest PASID must be obtained via virtual command interface. + * There can be multiple vIOMMUs in each guest but only one allocator + * is active. All vIOMMU allocators will eventually be calling the same + * host allocator. + */ + if (!ecap_vcs(iommu->ecap) || !vccap_pasid(iommu->vccap)) + return; + + pr_info("Register custom PASID allocator\n"); + iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc; + iommu->pasid_allocator.free = intel_vcmd_ioasid_free; + iommu->pasid_allocator.pdata = (void *)iommu; + if (ioasid_register_allocator(&iommu->pasid_allocator)) { + pr_warn("Custom PASID allocator failed, scalable mode disabled\n"); + /* + * Disable scalable mode on this IOMMU if there + * is no custom allocator. Mixing SM capable vIOMMU + * and non-SM vIOMMU are not supported. + */ + intel_iommu_sm = 0; + } +} +#endif + static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; @@ -3414,6 +3250,9 @@ static int __init init_dmars(void) */ for_each_active_iommu(iommu, drhd) { iommu_flush_write_buffer(iommu); +#ifdef CONFIG_INTEL_IOMMU_SVM + register_pasid_allocator(iommu); +#endif iommu_set_root_entry(iommu); iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); @@ -3531,100 +3370,6 @@ static unsigned long intel_alloc_iova(struct device *dev, return iova_pfn; } -static struct dmar_domain *get_private_domain_for_dev(struct device *dev) -{ - struct dmar_domain *domain, *tmp; - struct dmar_rmrr_unit *rmrr; - struct device *i_dev; - int i, ret; - - /* Device shouldn't be attached by any domains. */ - domain = find_domain(dev); - if (domain) - return NULL; - - domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - if (!domain) - goto out; - - /* We have a new domain - setup possible RMRRs for the device */ - rcu_read_lock(); - for_each_rmrr_units(rmrr) { - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, i_dev) { - if (i_dev != dev) - continue; - - ret = domain_prepare_identity_map(dev, domain, - rmrr->base_address, - rmrr->end_address); - if (ret) - dev_err(dev, "Mapping reserved region failed\n"); - } - } - rcu_read_unlock(); - - tmp = set_domain_for_dev(dev, domain); - if (!tmp || domain != tmp) { - domain_exit(domain); - domain = tmp; - } - -out: - if (!domain) - dev_err(dev, "Allocating domain failed\n"); - else - domain->domain.type = IOMMU_DOMAIN_DMA; - - return domain; -} - -/* Check if the dev needs to go through non-identity map and unmap process.*/ -static bool iommu_need_mapping(struct device *dev) -{ - int ret; - - if (iommu_dummy(dev)) - return false; - - if (unlikely(attach_deferred(dev))) - do_deferred_attach(dev); - - ret = identity_mapping(dev); - if (ret) { - u64 dma_mask = *dev->dma_mask; - - if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask) - dma_mask = dev->coherent_dma_mask; - - if (dma_mask >= dma_direct_get_required_mask(dev)) - return false; - - /* - * 32 bit DMA is removed from si_domain and fall back to - * non-identity mapping. - */ - dmar_remove_one_dev_info(dev); - ret = iommu_request_dma_domain_for_dev(dev); - if (ret) { - struct iommu_domain *domain; - struct dmar_domain *dmar_domain; - - domain = iommu_get_domain_for_dev(dev); - if (domain) { - dmar_domain = to_dmar_domain(domain); - dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - } - dmar_remove_one_dev_info(dev); - get_private_domain_for_dev(dev); - } - - dev_info(dev, "32bit DMA uses non-identity mapping\n"); - } - - return true; -} - static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir, u64 dma_mask) { @@ -3638,6 +3383,9 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); + domain = find_domain(dev); if (!domain) return DMA_MAPPING_ERROR; @@ -3689,20 +3437,15 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - return __intel_map_single(dev, page_to_phys(page) + offset, - size, dir, *dev->dma_mask); - return dma_direct_map_page(dev, page, offset, size, dir, attrs); + return __intel_map_single(dev, page_to_phys(page) + offset, + size, dir, *dev->dma_mask); } static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - return __intel_map_single(dev, phys_addr, size, dir, - *dev->dma_mask); - return dma_direct_map_resource(dev, phys_addr, size, dir, attrs); + return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask); } static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) @@ -3753,17 +3496,13 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - intel_unmap(dev, dev_addr, size); - else - dma_direct_unmap_page(dev, dev_addr, size, dir, attrs); + intel_unmap(dev, dev_addr, size); } static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - intel_unmap(dev, dev_addr, size); + intel_unmap(dev, dev_addr, size); } static void *intel_alloc_coherent(struct device *dev, size_t size, @@ -3773,8 +3512,8 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, struct page *page = NULL; int order; - if (!iommu_need_mapping(dev)) - return dma_direct_alloc(dev, size, dma_handle, flags, attrs); + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); size = PAGE_ALIGN(size); order = get_order(size); @@ -3809,9 +3548,6 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, int order; struct page *page = virt_to_page(vaddr); - if (!iommu_need_mapping(dev)) - return dma_direct_free(dev, size, vaddr, dma_handle, attrs); - size = PAGE_ALIGN(size); order = get_order(size); @@ -3829,9 +3565,6 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, struct scatterlist *sg; int i; - if (!iommu_need_mapping(dev)) - return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs); - for_each_sg(sglist, sg, nelems, i) { nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg)); } @@ -3855,8 +3588,9 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); - if (!iommu_need_mapping(dev)) - return dma_direct_map_sg(dev, sglist, nelems, dir, attrs); + + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); domain = find_domain(dev); if (!domain) @@ -3903,8 +3637,6 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele static u64 intel_get_required_mask(struct device *dev) { - if (!iommu_need_mapping(dev)) - return dma_direct_get_required_mask(dev); return DMA_BIT_MASK(32); } @@ -4813,58 +4545,37 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { struct memory_notify *mhp = v; - unsigned long long start, end; - unsigned long start_vpfn, last_vpfn; + unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn); + unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn + + mhp->nr_pages - 1); switch (val) { case MEM_GOING_ONLINE: - start = mhp->start_pfn << PAGE_SHIFT; - end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1; - if (iommu_domain_identity_map(si_domain, start, end)) { - pr_warn("Failed to build identity map for [%llx-%llx]\n", - start, end); + if (iommu_domain_identity_map(si_domain, + start_vpfn, last_vpfn)) { + pr_warn("Failed to build identity map for [%lx-%lx]\n", + start_vpfn, last_vpfn); return NOTIFY_BAD; } break; case MEM_OFFLINE: case MEM_CANCEL_ONLINE: - start_vpfn = mm_to_dma_pfn(mhp->start_pfn); - last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1); - while (start_vpfn <= last_vpfn) { - struct iova *iova; + { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; struct page *freelist; - iova = find_iova(&si_domain->iovad, start_vpfn); - if (iova == NULL) { - pr_debug("Failed get IOVA for PFN %lx\n", - start_vpfn); - break; - } - - iova = split_and_remove_iova(&si_domain->iovad, iova, - start_vpfn, last_vpfn); - if (iova == NULL) { - pr_warn("Failed to split IOVA PFN [%lx-%lx]\n", - start_vpfn, last_vpfn); - return NOTIFY_BAD; - } - - freelist = domain_unmap(si_domain, iova->pfn_lo, - iova->pfn_hi); + freelist = domain_unmap(si_domain, + start_vpfn, last_vpfn); rcu_read_lock(); for_each_active_iommu(iommu, drhd) iommu_flush_iotlb_psi(iommu, si_domain, - iova->pfn_lo, iova_size(iova), + start_vpfn, mhp->nr_pages, !freelist, 0); rcu_read_unlock(); dma_free_pagelist(freelist); - - start_vpfn = iova->pfn_hi + 1; - free_iova_mem(iova); } break; } @@ -4892,8 +4603,9 @@ static void free_all_cpu_cached_iovas(unsigned int cpu) for (did = 0; did < cap_ndoms(iommu->cap); did++) { domain = get_iommu_domain(iommu, (u16)did); - if (!domain) + if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA) continue; + free_cpu_cached_iovas(cpu, &domain->iovad); } } @@ -5186,18 +4898,6 @@ int __init intel_iommu_init(void) } up_write(&dmar_global_lock); -#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB) - /* - * If the system has no untrusted device or the user has decided - * to disable the bounce page mechanisms, we don't need swiotlb. - * Mark this and the pre-allocated bounce pages will be released - * later. - */ - if (!has_untrusted_dev() || intel_no_bounce) - swiotlb = 0; -#endif - dma_ops = &intel_dma_ops; - init_iommu_pm_ops(); down_read(&dmar_global_lock); @@ -5283,10 +4983,11 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) if (info->dev) { if (dev_is_pci(info->dev) && sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, info->dev, - PASID_RID2PASID); + PASID_RID2PASID, false); iommu_disable_dev_iotlb(info); - domain_context_clear(iommu, info->dev); + if (!dev_is_real_dma_subdevice(info->dev)) + domain_context_clear(iommu, info->dev); intel_pasid_free_table(info->dev); } @@ -5296,12 +4997,6 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) domain_detach_iommu(domain, iommu); spin_unlock_irqrestore(&iommu->lock, flags); - /* free the private domain */ - if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN && - !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && - list_empty(&domain->devices)) - domain_exit(info->domain); - free_devinfo_mem(info); } @@ -5311,9 +5006,8 @@ static void dmar_remove_one_dev_info(struct device *dev) unsigned long flags; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; - if (info && info != DEFER_DEVICE_DOMAIN_INFO - && info != DUMMY_DEVICE_DOMAIN_INFO) + info = get_domain_info(dev); + if (info) __dmar_remove_one_dev_info(info); spin_unlock_irqrestore(&device_domain_lock, flags); } @@ -5322,9 +5016,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - domain_reserve_special_ranges(domain); - /* calculate AGAW */ domain->gaw = guest_width; adjust_width = guestwidth_to_adjustwidth(guest_width); @@ -5343,11 +5034,21 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) return 0; } +static void intel_init_iova_domain(struct dmar_domain *dmar_domain) +{ + init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); + copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad); + + if (!intel_iommu_strict && + init_iova_flush_queue(&dmar_domain->iovad, + iommu_flush_iova, iova_entry_free)) + pr_info("iova flush queue initialization failed\n"); +} + static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; struct iommu_domain *domain; - int ret; switch (type) { case IOMMU_DOMAIN_DMA: @@ -5364,13 +5065,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) return NULL; } - if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) { - ret = init_iova_flush_queue(&dmar_domain->iovad, - iommu_flush_iova, - iova_entry_free); - if (ret) - pr_info("iova flush queue initialization failed\n"); - } + if (type == IOMMU_DOMAIN_DMA) + intel_init_iova_domain(dmar_domain); domain_update_iommu_cap(dmar_domain); @@ -5403,7 +5099,7 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) static inline bool is_aux_domain(struct device *dev, struct iommu_domain *domain) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); return info && info->auxd_enabled && domain->type == IOMMU_DOMAIN_UNMANAGED; @@ -5412,7 +5108,7 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain) static void auxiliary_link_device(struct dmar_domain *domain, struct device *dev) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); assert_spin_locked(&device_domain_lock); if (WARN_ON(!info)) @@ -5425,7 +5121,7 @@ static void auxiliary_link_device(struct dmar_domain *domain, static void auxiliary_unlink_device(struct dmar_domain *domain, struct device *dev) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); assert_spin_locked(&device_domain_lock); if (WARN_ON(!info)) @@ -5513,13 +5209,13 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, return; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; + info = get_domain_info(dev); iommu = info->iommu; auxiliary_unlink_device(domain, dev); spin_lock(&iommu->lock); - intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid); + intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false); domain_detach_iommu(domain, iommu); spin_unlock(&iommu->lock); @@ -5626,6 +5322,176 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain, aux_domain_remove_dev(to_dmar_domain(domain), dev); } +/* + * 2D array for converting and sanitizing IOMMU generic TLB granularity to + * VT-d granularity. Invalidation is typically included in the unmap operation + * as a result of DMA or VFIO unmap. However, for assigned devices guest + * owns the first level page tables. Invalidations of translation caches in the + * guest are trapped and passed down to the host. + * + * vIOMMU in the guest will only expose first level page tables, therefore + * we do not support IOTLB granularity for request without PASID (second level). + * + * For example, to find the VT-d granularity encoding for IOTLB + * type and page selective granularity within PASID: + * X: indexed by iommu cache type + * Y: indexed by enum iommu_inv_granularity + * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR] + */ + +static const int +inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = { + /* + * PASID based IOTLB invalidation: PASID selective (per PASID), + * page selective (address granularity) + */ + {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID}, + /* PASID based dev TLBs */ + {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL}, + /* PASID cache */ + {-EINVAL, -EINVAL, -EINVAL} +}; + +static inline int to_vtd_granularity(int type, int granu) +{ + return inv_type_granu_table[type][granu]; +} + +static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules) +{ + u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT; + + /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc. + * IOMMU cache invalidate API passes granu_size in bytes, and number of + * granu size in contiguous memory. + */ + return order_base_2(nr_pages); +} + +#ifdef CONFIG_INTEL_IOMMU_SVM +static int +intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, + struct iommu_cache_invalidate_info *inv_info) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct device_domain_info *info; + struct intel_iommu *iommu; + unsigned long flags; + int cache_type; + u8 bus, devfn; + u16 did, sid; + int ret = 0; + u64 size = 0; + + if (!inv_info || !dmar_domain || + inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1) + return -EINVAL; + + if (!dev || !dev_is_pci(dev)) + return -ENODEV; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE)) + return -EINVAL; + + spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&iommu->lock); + info = get_domain_info(dev); + if (!info) { + ret = -EINVAL; + goto out_unlock; + } + did = dmar_domain->iommu_did[iommu->seq_id]; + sid = PCI_DEVID(bus, devfn); + + /* Size is only valid in address selective invalidation */ + if (inv_info->granularity != IOMMU_INV_GRANU_PASID) + size = to_vtd_size(inv_info->addr_info.granule_size, + inv_info->addr_info.nb_granules); + + for_each_set_bit(cache_type, + (unsigned long *)&inv_info->cache, + IOMMU_CACHE_INV_TYPE_NR) { + int granu = 0; + u64 pasid = 0; + + granu = to_vtd_granularity(cache_type, inv_info->granularity); + if (granu == -EINVAL) { + pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n", + cache_type, inv_info->granularity); + break; + } + + /* + * PASID is stored in different locations based on the + * granularity. + */ + if (inv_info->granularity == IOMMU_INV_GRANU_PASID && + (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID)) + pasid = inv_info->pasid_info.pasid; + else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && + (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID)) + pasid = inv_info->addr_info.pasid; + + switch (BIT(cache_type)) { + case IOMMU_CACHE_INV_TYPE_IOTLB: + if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && + size && + (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) { + pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n", + inv_info->addr_info.addr, size); + ret = -ERANGE; + goto out_unlock; + } + + /* + * If granu is PASID-selective, address is ignored. + * We use npages = -1 to indicate that. + */ + qi_flush_piotlb(iommu, did, pasid, + mm_to_dma_pfn(inv_info->addr_info.addr), + (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size, + inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF); + + /* + * Always flush device IOTLB if ATS is enabled. vIOMMU + * in the guest may assume IOTLB flush is inclusive, + * which is more efficient. + */ + if (info->ats_enabled) + qi_flush_dev_iotlb_pasid(iommu, sid, + info->pfsid, pasid, + info->ats_qdep, + inv_info->addr_info.addr, + size, granu); + break; + case IOMMU_CACHE_INV_TYPE_DEV_IOTLB: + if (info->ats_enabled) + qi_flush_dev_iotlb_pasid(iommu, sid, + info->pfsid, pasid, + info->ats_qdep, + inv_info->addr_info.addr, + size, granu); + else + pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n"); + break; + default: + dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n", + cache_type); + ret = -EINVAL; + } + } +out_unlock: + spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&device_domain_lock, flags); + + return ret; +} +#endif + static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, size_t size, int iommu_prot, gfp_t gfp) @@ -5781,78 +5647,22 @@ static bool intel_iommu_capable(enum iommu_cap cap) return false; } -static int intel_iommu_add_device(struct device *dev) +static struct iommu_device *intel_iommu_probe_device(struct device *dev) { - struct dmar_domain *dmar_domain; - struct iommu_domain *domain; struct intel_iommu *iommu; - struct iommu_group *group; u8 bus, devfn; - int ret; iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) - return -ENODEV; - - iommu_device_link(&iommu->iommu, dev); + return ERR_PTR(-ENODEV); if (translation_pre_enabled(iommu)) dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO; - group = iommu_group_get_for_dev(dev); - - if (IS_ERR(group)) { - ret = PTR_ERR(group); - goto unlink; - } - - iommu_group_put(group); - - domain = iommu_get_domain_for_dev(dev); - dmar_domain = to_dmar_domain(domain); - if (domain->type == IOMMU_DOMAIN_DMA) { - if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) { - ret = iommu_request_dm_for_dev(dev); - if (ret) { - dmar_remove_one_dev_info(dev); - dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - domain_add_dev_info(si_domain, dev); - dev_info(dev, - "Device uses a private identity domain.\n"); - } - } - } else { - if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) { - ret = iommu_request_dma_domain_for_dev(dev); - if (ret) { - dmar_remove_one_dev_info(dev); - dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - if (!get_private_domain_for_dev(dev)) { - dev_warn(dev, - "Failed to get a private domain.\n"); - ret = -ENOMEM; - goto unlink; - } - - dev_info(dev, - "Device uses a private dma domain.\n"); - } - } - } - - if (device_needs_bounce(dev)) { - dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n"); - set_dma_ops(dev, &bounce_dma_ops); - } - - return 0; - -unlink: - iommu_device_unlink(&iommu->iommu, dev); - return ret; + return &iommu->iommu; } -static void intel_iommu_remove_device(struct device *dev) +static void intel_iommu_release_device(struct device *dev) { struct intel_iommu *iommu; u8 bus, devfn; @@ -5863,11 +5673,19 @@ static void intel_iommu_remove_device(struct device *dev) dmar_remove_one_dev_info(dev); - iommu_group_remove_device(dev); + set_dma_ops(dev, NULL); +} - iommu_device_unlink(&iommu->iommu, dev); +static void intel_iommu_probe_finalize(struct device *dev) +{ + struct iommu_domain *domain; + domain = iommu_get_domain_for_dev(dev); if (device_needs_bounce(dev)) + set_dma_ops(dev, &bounce_dma_ops); + else if (domain && domain->type == IOMMU_DOMAIN_DMA) + set_dma_ops(dev, &intel_dma_ops); + else set_dma_ops(dev, NULL); } @@ -5945,7 +5763,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) spin_lock(&iommu->lock); ret = -EINVAL; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !info->pasid_supported) goto out; @@ -6041,7 +5859,7 @@ static int intel_iommu_enable_auxd(struct device *dev) return -ENODEV; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; + info = get_domain_info(dev); info->auxd_enabled = 1; spin_unlock_irqrestore(&device_domain_lock, flags); @@ -6054,7 +5872,7 @@ static int intel_iommu_disable_auxd(struct device *dev) unsigned long flags; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!WARN_ON(!info)) info->auxd_enabled = 0; spin_unlock_irqrestore(&device_domain_lock, flags); @@ -6107,6 +5925,14 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) return !!siov_find_pci_dvsec(to_pci_dev(dev)); } + if (feat == IOMMU_DEV_FEAT_SVA) { + struct device_domain_info *info = get_domain_info(dev); + + return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) && + info->pasid_supported && info->pri_supported && + info->ats_supported; + } + return false; } @@ -6116,6 +5942,16 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (feat == IOMMU_DEV_FEAT_AUX) return intel_iommu_enable_auxd(dev); + if (feat == IOMMU_DEV_FEAT_SVA) { + struct device_domain_info *info = get_domain_info(dev); + + if (!info) + return -EINVAL; + + if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) + return 0; + } + return -ENODEV; } @@ -6131,7 +5967,7 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) static bool intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); if (feat == IOMMU_DEV_FEAT_AUX) return scalable_mode_support() && info && info->auxd_enabled; @@ -6198,8 +6034,9 @@ const struct iommu_ops intel_iommu_ops = { .map = intel_iommu_map, .unmap = intel_iommu_unmap, .iova_to_phys = intel_iommu_iova_to_phys, - .add_device = intel_iommu_add_device, - .remove_device = intel_iommu_remove_device, + .probe_device = intel_iommu_probe_device, + .probe_finalize = intel_iommu_probe_finalize, + .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .apply_resv_region = intel_iommu_apply_resv_region, @@ -6209,7 +6046,16 @@ const struct iommu_ops intel_iommu_ops = { .dev_enable_feat = intel_iommu_dev_enable_feat, .dev_disable_feat = intel_iommu_dev_disable_feat, .is_attach_deferred = intel_iommu_is_attach_deferred, + .def_domain_type = device_def_domain_type, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, +#ifdef CONFIG_INTEL_IOMMU_SVM + .cache_invalidate = intel_iommu_sva_invalidate, + .sva_bind_gpasid = intel_svm_bind_gpasid, + .sva_unbind_gpasid = intel_svm_unbind_gpasid, + .sva_bind = intel_svm_bind, + .sva_unbind = intel_svm_unbind, + .sva_get_pasid = intel_svm_get_pasid, +#endif }; static void quirk_iommu_igfx(struct pci_dev *dev) |