diff options
author | Jason Gunthorpe | 2022-04-11 12:16:06 -0300 |
---|---|---|
committer | Joerg Roedel | 2022-04-28 17:24:57 +0200 |
commit | 71cfafda9c9bd9812cdb62ddb94daf65a1af12c1 (patch) | |
tree | f1bc4377a68f66b6da05adc92e2dabd9edb3c796 /drivers/vfio | |
parent | 6043257b1de069fbb5a2a52d7211c0275bc8c0e0 (diff) |
vfio: Move the Intel no-snoop control off of IOMMU_CACHE
IOMMU_CACHE means "normal DMA to this iommu_domain's IOVA should be cache
coherent" and is used by the DMA API. The definition allows for special
non-coherent DMA to exist - ie processing of the no-snoop flag in PCIe
TLPs - so long as this behavior is opt-in by the device driver.
The flag is mainly used by the DMA API to synchronize the IOMMU setting
with the expected cache behavior of the DMA master. eg based on
dev_is_dma_coherent() in some case.
For Intel IOMMU IOMMU_CACHE was redefined to mean 'force all DMA to be
cache coherent' which has the practical effect of causing the IOMMU to
ignore the no-snoop bit in a PCIe TLP.
x86 platforms are always IOMMU_CACHE, so Intel should ignore this flag.
Instead use the new domain op enforce_cache_coherency() which causes every
IOPTE created in the domain to have the no-snoop blocking behavior.
Reconfigure VFIO to always use IOMMU_CACHE and call
enforce_cache_coherency() to operate the special Intel behavior.
Remove the IOMMU_CACHE test from Intel IOMMU.
Ultimately VFIO plumbs the result of enforce_cache_coherency() back into
the x86 platform code through kvm_arch_register_noncoherent_dma() which
controls if the WBINVD instruction is available in the guest. No other
archs implement kvm_arch_register_noncoherent_dma() nor are there any
other known consumers of VFIO_DMA_CC_IOMMU that might be affected by the
user visible result change on non-x86 archs.
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/2-v3-2cf356649677+a32-intel_no_snoop_jgg@nvidia.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Diffstat (limited to 'drivers/vfio')
-rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 30 |
1 files changed, 19 insertions, 11 deletions
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 9394aa9444c1..c13b9290e357 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -84,8 +84,8 @@ struct vfio_domain { struct iommu_domain *domain; struct list_head next; struct list_head group_list; - int prot; /* IOMMU_CACHE */ - bool fgsp; /* Fine-grained super pages */ + bool fgsp : 1; /* Fine-grained super pages */ + bool enforce_cache_coherency : 1; }; struct vfio_dma { @@ -1461,7 +1461,7 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, list_for_each_entry(d, &iommu->domain_list, next) { ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, - npage << PAGE_SHIFT, prot | d->prot); + npage << PAGE_SHIFT, prot | IOMMU_CACHE); if (ret) goto unwind; @@ -1771,7 +1771,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, } ret = iommu_map(domain->domain, iova, phys, - size, dma->prot | domain->prot); + size, dma->prot | IOMMU_CACHE); if (ret) { if (!dma->iommu_mapped) { vfio_unpin_pages_remote(dma, iova, @@ -1859,7 +1859,7 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain) return; ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2, - IOMMU_READ | IOMMU_WRITE | domain->prot); + IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE); if (!ret) { size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE); @@ -2267,8 +2267,15 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_detach; } - if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) - domain->prot |= IOMMU_CACHE; + /* + * If the IOMMU can block non-coherent operations (ie PCIe TLPs with + * no-snoop set) then VFIO always turns this feature on because on Intel + * platforms it optimizes KVM to disable wbinvd emulation. + */ + if (domain->domain->ops->enforce_cache_coherency) + domain->enforce_cache_coherency = + domain->domain->ops->enforce_cache_coherency( + domain->domain); /* * Try to match an existing compatible domain. We don't want to @@ -2279,7 +2286,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, */ list_for_each_entry(d, &iommu->domain_list, next) { if (d->domain->ops == domain->domain->ops && - d->prot == domain->prot) { + d->enforce_cache_coherency == + domain->enforce_cache_coherency) { iommu_detach_group(domain->domain, group->iommu_group); if (!iommu_attach_group(d->domain, group->iommu_group)) { @@ -2611,14 +2619,14 @@ static void vfio_iommu_type1_release(void *iommu_data) kfree(iommu); } -static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) +static int vfio_domains_have_enforce_cache_coherency(struct vfio_iommu *iommu) { struct vfio_domain *domain; int ret = 1; mutex_lock(&iommu->lock); list_for_each_entry(domain, &iommu->domain_list, next) { - if (!(domain->prot & IOMMU_CACHE)) { + if (!(domain->enforce_cache_coherency)) { ret = 0; break; } @@ -2641,7 +2649,7 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, case VFIO_DMA_CC_IOMMU: if (!iommu) return 0; - return vfio_domains_have_iommu_cache(iommu); + return vfio_domains_have_enforce_cache_coherency(iommu); default: return 0; } |