aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoerg Roedel2021-08-02 17:00:28 +0200
committerJoerg Roedel2021-08-02 17:00:28 +0200
commit1d65b90847219e4101dc09ecc1c8178b1d2331b8 (patch)
tree4d2872a267cba56f487ed429c4c3a2b7d1839837
parenta270be1b3fdfb6940dd692c859fdf9a7407047be (diff)
parent75cc1018a9e1e57d4ae43a101fc08a070894d439 (diff)
Merge remote-tracking branch 'korg/core' into x86/amd
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt12
-rw-r--r--drivers/iommu/Kconfig41
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h6
-rw-r--r--drivers/iommu/amd/init.c7
-rw-r--r--drivers/iommu/amd/io_pgtable.c3
-rw-r--r--drivers/iommu/amd/iommu.c6
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c3
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c21
-rw-r--r--drivers/iommu/intel/iommu.c139
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c50
-rw-r--r--drivers/iommu/io-pgtable-arm.c223
-rw-r--r--drivers/iommu/iommu.c146
-rw-r--r--include/linux/io-pgtable.h8
-rw-r--r--include/linux/iommu.h11
14 files changed, 437 insertions, 239 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bdb22006f713..90b525cf0ec2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -290,10 +290,7 @@
amd_iommu= [HW,X86-64]
Pass parameters to the AMD IOMMU driver in the system.
Possible values are:
- fullflush - enable flushing of IO/TLB entries when
- they are unmapped. Otherwise they are
- flushed before they will be reused, which
- is a lot of faster
+ fullflush - Deprecated, equivalent to iommu.strict=1
off - do not initialize any AMD IOMMU found in
the system
force_isolation - Force device isolation for all
@@ -1944,9 +1941,7 @@
this case, gfx device will use physical address for
DMA.
strict [Default Off]
- With this option on every unmap_single operation will
- result in a hardware IOTLB flush operation as opposed
- to batching them for performance.
+ Deprecated, equivalent to iommu.strict=1.
sp_off [Default Off]
By default, super page will be supported if Intel IOMMU
has the capability. With this option, super page will
@@ -2047,9 +2042,10 @@
throughput at the cost of reduced device isolation.
Will fall back to strict mode if not supported by
the relevant IOMMU driver.
- 1 - Strict mode (default).
+ 1 - Strict mode.
DMA unmap operations invalidate IOMMU hardware TLBs
synchronously.
+ unset - Use value of CONFIG_IOMMU_DEFAULT_{LAZY,STRICT}.
Note: on x86, the default behaviour depends on the
equivalent driver-specific parameters, but a strict
mode explicitly specified by either method takes
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 07b7c25cbed8..c84da8205be7 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -90,6 +90,47 @@ config IOMMU_DEFAULT_PASSTHROUGH
If unsure, say N here.
+choice
+ prompt "IOMMU default DMA IOTLB invalidation mode"
+ depends on IOMMU_DMA
+
+ default IOMMU_DEFAULT_LAZY if (AMD_IOMMU || INTEL_IOMMU)
+ default IOMMU_DEFAULT_STRICT
+ help
+ This option allows an IOMMU DMA IOTLB invalidation mode to be
+ chosen at build time, to override the default mode of each ARCH,
+ removing the need to pass in kernel parameters through command line.
+ It is still possible to provide common boot params to override this
+ config.
+
+ If unsure, keep the default.
+
+config IOMMU_DEFAULT_STRICT
+ bool "strict"
+ help
+ For every IOMMU DMA unmap operation, the flush operation of IOTLB and
+ the free operation of IOVA are guaranteed to be done in the unmap
+ function.
+
+config IOMMU_DEFAULT_LAZY
+ bool "lazy"
+ help
+ Support lazy mode, where for every IOMMU DMA unmap operation, the
+ flush operation of IOTLB and the free operation of IOVA are deferred.
+ They are only guaranteed to be done before the related IOVA will be
+ reused.
+
+ The isolation provided in this mode is not as secure as STRICT mode,
+ such that a vulnerable time window may be created between the DMA
+ unmap and the mappings cached in the IOMMU IOTLB or device TLB
+ finally being invalidated, where the device could still access the
+ memory which has already been unmapped by the device driver.
+ However this mode may provide better performance in high throughput
+ scenarios, and is still considerably more secure than passthrough
+ mode or no IOMMU.
+
+endchoice
+
config OF_IOMMU
def_bool y
depends on OF && IOMMU_API
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 94c1a7a9876d..8dbe61e2b3c1 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -779,12 +779,6 @@ extern u16 amd_iommu_last_bdf;
/* allocation bitmap for domain ids */
extern unsigned long *amd_iommu_pd_alloc_bitmap;
-/*
- * If true, the addresses will be flushed on unmap time, not when
- * they are reused
- */
-extern bool amd_iommu_unmap_flush;
-
/* Smallest max PASID supported by any IOMMU in the system */
extern u32 amd_iommu_max_pasid;
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 1c7ae7d3c55d..239556c1f698 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -161,7 +161,6 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
to handle */
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */
-bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
system */
@@ -3103,8 +3102,10 @@ static int __init parse_amd_iommu_intr(char *str)
static int __init parse_amd_iommu_options(char *str)
{
for (; *str; ++str) {
- if (strncmp(str, "fullflush", 9) == 0)
- amd_iommu_unmap_flush = true;
+ if (strncmp(str, "fullflush", 9) == 0) {
+ pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
+ iommu_set_dma_strict();
+ }
if (strncmp(str, "force_enable", 12) == 0)
amd_iommu_force_enable = true;
if (strncmp(str, "off", 3) == 0)
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index bb0ee5c9fde7..182c93a43efd 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -493,9 +493,6 @@ static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo
unsigned long offset_mask, pte_pgsize;
u64 *pte, __pte;
- if (pgtable->mode == PAGE_MODE_NONE)
- return iova;
-
pte = fetch_pte(pgtable, iova, &pte_pgsize);
if (!pte || !IOMMU_PTE_PRESENT(*pte))
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index fb5c40715d10..7dedbea9c67f 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1818,12 +1818,6 @@ void amd_iommu_domain_update(struct protection_domain *domain)
static void __init amd_iommu_init_dma_ops(void)
{
swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
-
- if (amd_iommu_unmap_flush)
- pr_info("IO/TLB flush on unmap enabled\n");
- else
- pr_info("Lazy IO/TLB flushing enabled\n");
- iommu_set_dma_strict(amd_iommu_unmap_flush);
}
int __init amd_iommu_init_api(void)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 235f9bdaeaf2..6346f21726f4 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2488,9 +2488,6 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
- if (domain->type == IOMMU_DOMAIN_IDENTITY)
- return iova;
-
if (!ops)
return 0;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index f22dbeb1e510..ac21170fa208 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1198,8 +1198,9 @@ rpm_put:
return ret;
}
-static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1209,14 +1210,15 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
return -ENODEV;
arm_smmu_rpm_get(smmu);
- ret = ops->map(ops, iova, paddr, size, prot, gfp);
+ ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
arm_smmu_rpm_put(smmu);
return ret;
}
-static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *iotlb_gather)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1226,7 +1228,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
return 0;
arm_smmu_rpm_get(smmu);
- ret = ops->unmap(ops, iova, size, gather);
+ ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
arm_smmu_rpm_put(smmu);
return ret;
@@ -1320,9 +1322,6 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
- if (domain->type == IOMMU_DOMAIN_IDENTITY)
- return iova;
-
if (!ops)
return 0;
@@ -1582,8 +1581,8 @@ static struct iommu_ops arm_smmu_ops = {
.domain_alloc = arm_smmu_domain_alloc,
.domain_free = arm_smmu_domain_free,
.attach_dev = arm_smmu_attach_dev,
- .map = arm_smmu_map,
- .unmap = arm_smmu_unmap,
+ .map_pages = arm_smmu_map_pages,
+ .unmap_pages = arm_smmu_unmap_pages,
.flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index dd22fc7d5176..c12cc955389a 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -85,24 +85,6 @@
#define LEVEL_STRIDE (9)
#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
-/*
- * This bitmap is used to advertise the page sizes our hardware support
- * to the IOMMU core, which will then use this information to split
- * physically contiguous memory regions it is mapping into page sizes
- * that we support.
- *
- * Traditionally the IOMMU core just handed us the mappings directly,
- * after making sure the size is an order of a 4KiB page and that the
- * mapping has natural alignment.
- *
- * To retain this behavior, we currently advertise that we support
- * all page sizes that are an order of 4KiB.
- *
- * If at some point we'd like to utilize the IOMMU core's new behavior,
- * we could change this to advertise the real page sizes we support.
- */
-#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
-
static inline int agaw_to_level(int agaw)
{
return agaw + 2;
@@ -361,7 +343,6 @@ int intel_iommu_enabled = 0;
EXPORT_SYMBOL_GPL(intel_iommu_enabled);
static int dmar_map_gfx = 1;
-static int intel_iommu_strict;
static int intel_iommu_superpage = 1;
static int iommu_identity_mapping;
static int iommu_skip_te_disable;
@@ -454,8 +435,8 @@ static int __init intel_iommu_setup(char *str)
pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
iommu_dma_forcedac = true;
} else if (!strncmp(str, "strict", 6)) {
- pr_info("Disable batched IOTLB flush\n");
- intel_iommu_strict = 1;
+ pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
+ iommu_set_dma_strict();
} else if (!strncmp(str, "sp_off", 6)) {
pr_info("Disable supported super page\n");
intel_iommu_superpage = 0;
@@ -736,6 +717,23 @@ static int domain_update_device_node(struct dmar_domain *domain)
static void domain_update_iotlb(struct dmar_domain *domain);
+/* Return the super pagesize bitmap if supported. */
+static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
+{
+ unsigned long bitmap = 0;
+
+ /*
+ * 1-level super page supports page size of 2MiB, 2-level super page
+ * supports page size of both 2MiB and 1GiB.
+ */
+ if (domain->iommu_superpage == 1)
+ bitmap |= SZ_2M;
+ else if (domain->iommu_superpage == 2)
+ bitmap |= SZ_2M | SZ_1G;
+
+ return bitmap;
+}
+
/* Some capabilities may be different across iommus */
static void domain_update_iommu_cap(struct dmar_domain *domain)
{
@@ -762,6 +760,7 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
else
domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
+ domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
domain_update_iotlb(domain);
}
@@ -2334,9 +2333,9 @@ static int
__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long phys_pfn, unsigned long nr_pages, int prot)
{
+ struct dma_pte *first_pte = NULL, *pte = NULL;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
- struct dma_pte *pte = NULL;
phys_addr_t pteval;
u64 attr;
@@ -2369,6 +2368,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
if (!pte)
return -ENOMEM;
+ first_pte = pte;
+
/* It is large page*/
if (largepage_lvl > 1) {
unsigned long end_pfn;
@@ -2416,14 +2417,14 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
* recalculate 'pte' and switch back to smaller pages for the
* end of the mapping, if the trailing size is not enough to
* use another superpage (i.e. nr_pages < lvl_pages).
- *
- * We leave clflush for the leaf pte changes to iotlb_sync_map()
- * callback.
*/
pte++;
if (!nr_pages || first_pte_in_page(pte) ||
- (largepage_lvl > 1 && nr_pages < lvl_pages))
+ (largepage_lvl > 1 && nr_pages < lvl_pages)) {
+ domain_flush_cache(domain, first_pte,
+ (void *)pte - (void *)first_pte);
pte = NULL;
+ }
}
return 0;
@@ -4393,9 +4394,9 @@ int __init intel_iommu_init(void)
* is likely to be much lower than the overhead of synchronizing
* the virtual and physical IOMMU page-tables.
*/
- if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) {
- pr_warn("IOMMU batching is disabled due to virtualization");
- intel_iommu_strict = 1;
+ if (cap_caching_mode(iommu->cap)) {
+ pr_info_once("IOMMU batching disallowed due to virtualization\n");
+ iommu_set_dma_strict();
}
iommu_device_sysfs_add(&iommu->iommu, NULL,
intel_iommu_groups,
@@ -4404,7 +4405,6 @@ int __init intel_iommu_init(void)
}
up_read(&dmar_global_lock);
- iommu_set_dma_strict(intel_iommu_strict);
bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
if (si_domain && !hw_pass_through)
register_memory_notifier(&intel_iommu_memory_nb);
@@ -5067,6 +5067,28 @@ static int intel_iommu_map(struct iommu_domain *domain,
hpa >> VTD_PAGE_SHIFT, size, prot);
}
+static int intel_iommu_map_pages(struct iommu_domain *domain,
+ unsigned long iova, phys_addr_t paddr,
+ size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped)
+{
+ unsigned long pgshift = __ffs(pgsize);
+ size_t size = pgcount << pgshift;
+ int ret;
+
+ if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G)
+ return -EINVAL;
+
+ if (!IS_ALIGNED(iova | paddr, pgsize))
+ return -EINVAL;
+
+ ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp);
+ if (!ret && mapped)
+ *mapped = size;
+
+ return ret;
+}
+
static size_t intel_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *gather)
@@ -5096,6 +5118,17 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
return size;
}
+static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
+ unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather)
+{
+ unsigned long pgshift = __ffs(pgsize);
+ size_t size = pgcount << pgshift;
+
+ return intel_iommu_unmap(domain, iova, size, gather);
+}
+
static void intel_iommu_tlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
@@ -5532,39 +5565,6 @@ static bool risky_device(struct pci_dev *pdev)
return false;
}
-static void clflush_sync_map(struct dmar_domain *domain, unsigned long clf_pfn,
- unsigned long clf_pages)
-{
- struct dma_pte *first_pte = NULL, *pte = NULL;
- unsigned long lvl_pages = 0;
- int level = 0;
-
- while (clf_pages > 0) {
- if (!pte) {
- level = 0;
- pte = pfn_to_dma_pte(domain, clf_pfn, &level);
- if (WARN_ON(!pte))
- return;
- first_pte = pte;
- lvl_pages = lvl_to_nr_pages(level);
- }
-
- if (WARN_ON(!lvl_pages || clf_pages < lvl_pages))
- return;
-
- clf_pages -= lvl_pages;
- clf_pfn += lvl_pages;
- pte++;
-
- if (!clf_pages || first_pte_in_page(pte) ||
- (level > 1 && clf_pages < lvl_pages)) {
- domain_flush_cache(domain, first_pte,
- (void *)pte - (void *)first_pte);
- pte = NULL;
- }
- }
-}
-
static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
@@ -5574,9 +5574,6 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
struct intel_iommu *iommu;
int iommu_id;
- if (!dmar_domain->iommu_coherency)
- clflush_sync_map(dmar_domain, pfn, pages);
-
for_each_domain_iommu(iommu_id, dmar_domain) {
iommu = g_iommus[iommu_id];
__mapping_notify_one(iommu, dmar_domain, pfn, pages);
@@ -5593,9 +5590,9 @@ const struct iommu_ops intel_iommu_ops = {
.aux_attach_dev = intel_iommu_aux_attach_device,
.aux_detach_dev = intel_iommu_aux_detach_device,
.aux_get_pasid = intel_iommu_aux_get_pasid,
- .map = intel_iommu_map,
+ .map_pages = intel_iommu_map_pages,
+ .unmap_pages = intel_iommu_unmap_pages,
.iotlb_sync_map = intel_iommu_iotlb_sync_map,
- .unmap = intel_iommu_unmap,
.flush_iotlb_all = intel_flush_iotlb_all,
.iotlb_sync = intel_iommu_tlb_sync,
.iova_to_phys = intel_iommu_iova_to_phys,
@@ -5611,7 +5608,7 @@ const struct iommu_ops intel_iommu_ops = {
.dev_disable_feat = intel_iommu_dev_disable_feat,
.is_attach_deferred = intel_iommu_is_attach_deferred,
.def_domain_type = device_def_domain_type,
- .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
+ .pgsize_bitmap = SZ_4K,
#ifdef CONFIG_INTEL_IOMMU_SVM
.cache_invalidate = intel_iommu_sva_invalidate,
.sva_bind_gpasid = intel_svm_bind_gpasid,
@@ -5714,8 +5711,8 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
} else if (dmar_map_gfx) {
/* we have to ensure the gfx device is idle before we flush */
pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
- intel_iommu_strict = 1;
- }
+ iommu_set_dma_strict();
+ }
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index d4004bcf333a..5db90d7ce2ec 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -519,11 +519,12 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
return __arm_v7s_map(data, iova, paddr, size, prot, lvl + 1, cptep, gfp);
}
-static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped)
{
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
- int ret;
+ int ret = -EINVAL;
if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
paddr >= (1ULL << data->iop.cfg.oas)))
@@ -533,7 +534,17 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
return 0;
- ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd, gfp);
+ while (pgcount--) {
+ ret = __arm_v7s_map(data, iova, paddr, pgsize, prot, 1, data->pgd,
+ gfp);
+ if (ret)
+ break;
+
+ iova += pgsize;
+ paddr += pgsize;
+ if (mapped)
+ *mapped += pgsize;
+ }
/*
* Synchronise all PTE updates for the new mapping before there's
* a chance for anything to kick off a table walk for the new iova.
@@ -543,6 +554,12 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
return ret;
}
+static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ return arm_v7s_map_pages(ops, iova, paddr, size, 1, prot, gfp, NULL);
+}
+
static void arm_v7s_free_pgtable(struct io_pgtable *iop)
{
struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
@@ -710,15 +727,32 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep);
}
-static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
- size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_v7s_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather)
{
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
+ size_t unmapped = 0, ret;
if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
return 0;
- return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
+ while (pgcount--) {
+ ret = __arm_v7s_unmap(data, gather, iova, pgsize, 1, data->pgd);
+ if (!ret)
+ break;
+
+ unmapped += pgsize;
+ iova += pgsize;
+ }
+
+ return unmapped;
+}
+
+static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+ size_t size, struct iommu_iotlb_gather *gather)
+{
+ return arm_v7s_unmap_pages(ops, iova, size, 1, gather);
}
static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
@@ -780,7 +814,9 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
data->iop.ops = (struct io_pgtable_ops) {
.map = arm_v7s_map,
+ .map_pages = arm_v7s_map_pages,
.unmap = arm_v7s_unmap,
+ .unmap_pages = arm_v7s_unmap_pages,
.iova_to_phys = arm_v7s_iova_to_phys,
};
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58e79b5..053df4048a29 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -46,6 +46,9 @@
#define ARM_LPAE_PGD_SIZE(d) \
(sizeof(arm_lpae_iopte) << (d)->pgd_bits)
+#define ARM_LPAE_PTES_PER_TABLE(d) \
+ (ARM_LPAE_GRANULE(d) >> ilog2(sizeof(arm_lpae_iopte)))
+
/*
* Calculate the index at level l used to map virtual address a using the
* pagetable in d.
@@ -232,70 +235,77 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
free_pages((unsigned long)pages, get_order(size));
}
-static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep,
+static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
struct io_pgtable_cfg *cfg)
{
dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
- sizeof(*ptep), DMA_TO_DEVICE);
+ sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
}
-static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
- struct io_pgtable_cfg *cfg)
+static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg)
{
- *ptep = pte;
+
+ *ptep = 0;
if (!cfg->coherent_walk)
- __arm_lpae_sync_pte(ptep, cfg);
+ __arm_lpae_sync_pte(ptep, 1, cfg);
}
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t size, int lvl,
- arm_lpae_iopte *ptep);
+ unsigned long iova, size_t size, size_t pgcount,
+ int lvl, arm_lpae_iopte *ptep);
static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
phys_addr_t paddr, arm_lpae_iopte prot,
- int lvl, arm_lpae_iopte *ptep)
+ int lvl, int num_entries, arm_lpae_iopte *ptep)
{
arm_lpae_iopte pte = prot;
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
+ size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+ int i;
if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
pte |= ARM_LPAE_PTE_TYPE_PAGE;
else
pte |= ARM_LPAE_PTE_TYPE_BLOCK;
- pte |= paddr_to_iopte(paddr, data);
+ for (i = 0; i < num_entries; i++)
+ ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data);
- __arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
+ if (!cfg->coherent_walk)
+ __arm_lpae_sync_pte(ptep, num_entries, cfg);
}
static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
unsigned long iova, phys_addr_t paddr,
- arm_lpae_iopte prot, int lvl,
+ arm_lpae_iopte prot, int lvl, int num_entries,
arm_lpae_iopte *ptep)
{
- arm_lpae_iopte pte = *ptep;
-
- if (iopte_leaf(pte, lvl, data->iop.fmt)) {
- /* We require an unmap first */
- WARN_ON(!selftest_running);
- return -EEXIST;
- } else if (iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE) {
- /*
- * We need to unmap and free the old table before
- * overwriting it with a block entry.
- */
- arm_lpae_iopte *tblp;
- size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
-
- tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
- if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
- WARN_ON(1);
- return -EINVAL;
+ int i;
+
+ for (i = 0; i < num_entries; i++)
+ if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) {
+ /* We require an unmap first */
+ WARN_ON(!selftest_running);
+ return -EEXIST;
+ } else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) {
+ /*
+ * We need to unmap and free the old table before
+ * overwriting it with a block entry.
+ */
+ arm_lpae_iopte *tblp;
+ size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+
+ tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
+ if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1,
+ lvl, tblp) != sz) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
}
- }
- __arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
+ __arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep);
return 0;
}
@@ -323,7 +333,7 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
return old;
/* Even if it's not ours, there's no point waiting; just kick it */
- __arm_lpae_sync_pte(ptep, cfg);
+ __arm_lpae_sync_pte(ptep, 1, cfg);
if (old == curr)
WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC);
@@ -331,20 +341,30 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
}
static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
- phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
- int lvl, arm_lpae_iopte *ptep, gfp_t gfp)
+ phys_addr_t paddr, size_t size, size_t pgcount,
+ arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep,
+ gfp_t gfp, size_t *mapped)
{
arm_lpae_iopte *cptep, pte;
size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
size_t tblsz = ARM_LPAE_GRANULE(data);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
+ int ret = 0, num_entries, max_entries, map_idx_start;
/* Find our entry at the current level */
- ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
+ map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
+ ptep += map_idx_start;
/* If we can install a leaf entry at this level, then do so */
- if (size == block_size)
- return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
+ if (size == block_size) {
+ max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
+ num_entries = min_t(int, pgcount, max_entries);
+ ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
+ if (!ret && mapped)
+ *mapped += num_entries * size;
+
+ return ret;
+ }
/* We can't allocate tables at the final level */
if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
@@ -361,7 +381,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
if (pte)
__arm_lpae_free_pages(cptep, tblsz, cfg);
} else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
- __arm_lpae_sync_pte(ptep, cfg);
+ __arm_lpae_sync_pte(ptep, 1, cfg);
}
if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) {
@@ -373,7 +393,8 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
}
/* Rinse, repeat */
- return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep, gfp);
+ return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1,
+ cptep, gfp, mapped);
}
static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
@@ -440,8 +461,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
return pte;
}
-static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
- phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp)
+static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int iommu_prot, gfp_t gfp, size_t *mapped)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
@@ -450,7 +472,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
arm_lpae_iopte prot;
long iaext = (s64)iova >> cfg->ias;
- if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
+ if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize))
return -EINVAL;
if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
@@ -463,7 +485,8 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
return 0;
prot = arm_lpae_prot_to_pte(data, iommu_prot);
- ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep, gfp);
+ ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl,
+ ptep, gfp, mapped);
/*
* Synchronise all PTE updates for the new mapping before there's
* a chance for anything to kick off a table walk for the new iova.
@@ -473,6 +496,13 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
return ret;
}
+static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp)
+{
+ return arm_lpae_map_pages(ops, iova, paddr, size, 1, iommu_prot, gfp,
+ NULL);
+}
+
static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
arm_lpae_iopte *ptep)
{
@@ -516,14 +546,15 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size,
arm_lpae_iopte blk_pte, int lvl,
- arm_lpae_iopte *ptep)
+ arm_lpae_iopte *ptep, size_t pgcount)
{
struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte pte, *tablep;
phys_addr_t blk_paddr;
size_t tablesz = ARM_LPAE_GRANULE(data);
size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
- int i, unmap_idx = -1;
+ int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
+ int i, unmap_idx_start = -1, num_entries = 0, max_entries;
if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
return 0;
@@ -532,18 +563,21 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
if (!tablep)
return 0; /* Bytes unmapped */
- if (size == split_sz)
- unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
+ if (size == split_sz) {
+ unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
+ max_entries = ptes_per_table - unmap_idx_start;
+ num_entries = min_t(int, pgcount, max_entries);
+ }
blk_paddr = iopte_to_paddr(blk_pte, data);
pte = iopte_prot(blk_pte);
- for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
+ for (i = 0; i < ptes_per_table; i++, blk_paddr += split_sz) {
/* Unmap! */
- if (i == unmap_idx)
+ if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries))
continue;
- __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
+ __arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
}
pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg);
@@ -558,76 +592,92 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
return 0;
tablep = iopte_deref(pte, data);
- } else if (unmap_idx >= 0) {
- io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
- return size;
+ } else if (unmap_idx_start >= 0) {
+ for (i = 0; i < num_entries; i++)
+ io_pgtable_tlb_add_page(&data->iop, gather, iova + i * size, size);
+
+ return num_entries * size;
}
- return __arm_lpae_unmap(data, gather, iova, size, lvl, tablep);
+ return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep);
}
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t size, int lvl,
- arm_lpae_iopte *ptep)
+ unsigned long iova, size_t size, size_t pgcount,
+ int lvl, arm_lpae_iopte *ptep)
{
arm_lpae_iopte pte;
struct io_pgtable *iop = &data->iop;
+ int i = 0, num_entries, max_entries, unmap_idx_start;
/* Something went horribly wrong and we ran out of page table */
if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
return 0;
- ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
+ unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
+ ptep += unmap_idx_start;
pte = READ_ONCE(*ptep);
if (WARN_ON(!pte))
return 0;
/* If the size matches this level, we're in the right place */
if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
- __arm_lpae_set_pte(ptep, 0, &iop->cfg);
-
- if (!iopte_leaf(pte, lvl, iop->fmt)) {
- /* Also flush any partial walks */
- io_pgtable_tlb_flush_walk(iop, iova, size,
- ARM_LPAE_GRANULE(data));
- ptep = iopte_deref(pte, data);
- __arm_lpae_free_pgtable(data, lvl + 1, ptep);
- } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
- /*
- * Order the PTE update against queueing the IOVA, to
- * guarantee that a flush callback from a different CPU
- * has observed it before the TLBIALL can be issued.
- */
- smp_wmb();
- } else {
- io_pgtable_tlb_add_page(iop, gather, iova, size);
+ max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
+ num_entries = min_t(int, pgcount, max_entries);
+
+ while (i < num_entries) {
+ pte = READ_ONCE(*ptep);
+ if (WARN_ON(!pte))
+ break;
+
+ __arm_lpae_clear_pte(ptep, &iop->cfg);
+
+ if (!iopte_leaf(pte, lvl, iop->fmt)) {
+ /* Also flush any partial walks */
+ io_pgtable_tlb_flush_walk(iop, iova + i * size, size,
+ ARM_LPAE_GRANULE(data));
+ __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
+ } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
+ /*
+ * Order the PTE update against queueing the IOVA, to
+ * guarantee that a flush callback from a different CPU
+ * has observed it before the TLBIALL can be issued.
+ */
+ smp_wmb();
+ } else {
+ io_pgtable_tlb_add_page(iop, gather, iova + i * size, size);
+ }
+
+ ptep++;
+ i++;
}
- return size;
+ return i * size;
} else if (iopte_leaf(pte, lvl, iop->fmt)) {
/*
* Insert a table at the next level to map the old region,
* minus the part we want to unmap
*/
return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
- lvl + 1, ptep);
+ lvl + 1, ptep, pgcount);
}
/* Keep on walkin' */
ptep = iopte_deref(pte, data);
- return __arm_lpae_unmap(data, gather, iova, size, lvl + 1, ptep);
+ return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl + 1, ptep);
}
-static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
- size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte *ptep = data->pgd;
long iaext = (s64)iova >> cfg->ias;
- if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
+ if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
return 0;
if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
@@ -635,7 +685,14 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
if (WARN_ON(iaext))
return 0;
- return __arm_lpae_unmap(data, gather, iova, size, data->start_level, ptep);
+ return __arm_lpae_unmap(data, gather, iova, pgsize, pgcount,
+ data->start_level, ptep);
+}
+
+static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+ size_t size, struct iommu_iotlb_gather *gather)
+{
+ return arm_lpae_unmap_pages(ops, iova, size, 1, gather);
}
static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
@@ -750,7 +807,9 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
data->iop.ops = (struct io_pgtable_ops) {
.map = arm_lpae_map,
+ .map_pages = arm_lpae_map_pages,
.unmap = arm_lpae_unmap,
+ .unmap_pages = arm_lpae_unmap_pages,
.iova_to_phys = arm_lpae_iova_to_phys,
};
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 5419c4b9f27a..f2cda9950bd5 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -8,6 +8,7 @@
#include <linux/device.h>
#include <linux/kernel.h>
+#include <linux/bits.h>
#include <linux/bug.h>
#include <linux/types.h>
#include <linux/init.h>
@@ -29,7 +30,7 @@ static struct kset *iommu_group_kset;
static DEFINE_IDA(iommu_group_ida);
static unsigned int iommu_def_domain_type __read_mostly;
-static bool iommu_dma_strict __read_mostly = true;
+static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_STRICT);
static u32 iommu_cmd_line __read_mostly;
struct iommu_group {
@@ -138,6 +139,11 @@ static int __init iommu_subsys_init(void)
(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
"(set via kernel command line)" : "");
+ pr_info("DMA domain TLB invalidation policy: %s mode %s\n",
+ iommu_dma_strict ? "strict" : "lazy",
+ (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
+ "(set via kernel command line)" : "");
+
return 0;
}
subsys_initcall(iommu_subsys_init);
@@ -344,10 +350,9 @@ static int __init iommu_dma_setup(char *str)
}
early_param("iommu.strict", iommu_dma_setup);
-void iommu_set_dma_strict(bool strict)
+void iommu_set_dma_strict(void)
{
- if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT))
- iommu_dma_strict = strict;
+ iommu_dma_strict = true;
}
bool iommu_get_dma_strict(struct iommu_domain *domain)
@@ -2367,45 +2372,94 @@ EXPORT_SYMBOL_GPL(iommu_detach_group);
phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
{
- if (unlikely(domain->ops->iova_to_phys == NULL))
+ if (domain->type == IOMMU_DOMAIN_IDENTITY)
+ return iova;
+
+ if (domain->type == IOMMU_DOMAIN_BLOCKED)
return 0;
return domain->ops->iova_to_phys(domain, iova);
}
EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
-static size_t iommu_pgsize(struct iommu_domain *domain,
- unsigned long addr_merge, size_t size)
+static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, size_t *count)
{
- unsigned int pgsize_idx;
- size_t pgsize;
+ unsigned int pgsize_idx, pgsize_idx_next;
+ unsigned long pgsizes;
+ size_t offset, pgsize, pgsize_next;
+ unsigned long addr_merge = paddr | iova;
- /* Max page size that still fits into 'size' */
- pgsize_idx = __fls(size);
+ /* Page sizes supported by the hardware and small enough for @size */
+ pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);
- /* need to consider alignment requirements ? */
- if (likely(addr_merge)) {
- /* Max page size allowed by address */
- unsigned int align_pgsize_idx = __ffs(addr_merge);
- pgsize_idx = min(pgsize_idx, align_pgsize_idx);
- }
+ /* Constrain the page sizes further based on the maximum alignment */
+ if (likely(addr_merge))
+ pgsizes &= GENMASK(__ffs(addr_merge), 0);
- /* build a mask of acceptable page sizes */
- pgsize = (1UL << (pgsize_idx + 1)) - 1;
+ /* Make sure we have at least one suitable page size */
+ BUG_ON(!pgsizes);
- /* throw away page sizes not supported by the hardware */
- pgsize &= domain->pgsize_bitmap;
+ /* Pick the biggest page size remaining */
+ pgsize_idx = __fls(pgsizes);
+ pgsize = BIT(pgsize_idx);
+ if (!count)
+ return pgsize;
+
+ /* Find the next biggest support page size, if it exists */
+ pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
+ if (!pgsizes)
+ goto out_set_count;
+
+ pgsize_idx_next = __ffs(pgsizes);
+ pgsize_next = BIT(pgsize_idx_next);
+
+ /*
+ * There's no point trying a bigger page size unless the virtual
+ * and physical addresses are similarly offset within the larger page.
+ */
+ if ((iova ^ paddr) & (pgsize_next - 1))
+ goto out_set_count;
- /* make sure we're still sane */
- BUG_ON(!pgsize);
+ /* Calculate the offset to the next page size alignment boundary */
+ offset = pgsize_next - (addr_merge & (pgsize_next - 1));
- /* pick the biggest page */
- pgsize_idx = __fls(pgsize);
- pgsize = 1UL << pgsize_idx;
+ /*
+ * If size is big enough to accommodate the larger page, reduce
+ * the number of smaller pages.
+ */
+ if (offset + pgsize_next <= size)
+ size = offset;
+out_set_count:
+ *count = size >> pgsize_idx;
return pgsize;
}
+static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot,
+ gfp_t gfp, size_t *mapped)
+{
+ const struct iommu_ops *ops = domain->ops;
+ size_t pgsize, count;
+ int ret;
+
+ pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
+
+ pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
+ iova, &paddr, pgsize, count);
+
+ if (ops->map_pages) {
+ ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
+ gfp, mapped);
+ } else {
+ ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+ *mapped = ret ? 0 : pgsize;
+ }
+
+ return ret;
+}
+
static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
@@ -2416,7 +2470,7 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t orig_paddr = paddr;
int ret = 0;
- if (unlikely(ops->map == NULL ||
+ if (unlikely(!(ops->map || ops->map_pages) ||
domain->pgsize_bitmap == 0UL))
return -ENODEV;
@@ -2440,18 +2494,21 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
while (size) {
- size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
+ size_t mapped = 0;
- pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
- iova, &paddr, pgsize);
- ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+ ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
+ &mapped);
+ /*
+ * Some pages may have been mapped, even if an error occurred,
+ * so we should account for those so they can be unmapped.
+ */
+ size -= mapped;
if (ret)
break;
- iova += pgsize;
- paddr += pgsize;
- size -= pgsize;
+ iova += mapped;
+ paddr += mapped;
}
/* unroll mapping in case something went wrong */
@@ -2491,6 +2548,19 @@ int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
}
EXPORT_SYMBOL_GPL(iommu_map_atomic);
+static size_t __iommu_unmap_pages(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ struct iommu_iotlb_gather *iotlb_gather)
+{
+ const struct iommu_ops *ops = domain->ops;
+ size_t pgsize, count;
+
+ pgsize = iommu_pgsize(domain, iova, iova, size, &count);
+ return ops->unmap_pages ?
+ ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
+ ops->unmap(domain, iova, pgsize, iotlb_gather);
+}
+
static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
@@ -2500,7 +2570,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long orig_iova = iova;
unsigned int min_pagesz;
- if (unlikely(ops->unmap == NULL ||
+ if (unlikely(!(ops->unmap || ops->unmap_pages) ||
domain->pgsize_bitmap == 0UL))
return 0;
@@ -2528,9 +2598,9 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
* or we hit an area that isn't mapped.
*/
while (unmapped < size) {
- size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
-
- unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
+ unmapped_page = __iommu_unmap_pages(domain, iova,
+ size - unmapped,
+ iotlb_gather);
if (!unmapped_page)
break;
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 4d40dfa75b55..c43f3b899d2a 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -143,7 +143,9 @@ struct io_pgtable_cfg {
* struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
*
* @map: Map a physically contiguous memory region.
+ * @map_pages: Map a physically contiguous range of pages of the same size.
* @unmap: Unmap a physically contiguous memory region.
+ * @unmap_pages: Unmap a range of virtually contiguous pages of the same size.
* @iova_to_phys: Translate iova to physical address.
*
* These functions map directly onto the iommu_ops member functions with
@@ -152,8 +154,14 @@ struct io_pgtable_cfg {
struct io_pgtable_ops {
int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+ int (*map_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
+ size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
};
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 979a5ceeea55..e552ecfefcf7 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -180,7 +180,10 @@ struct iommu_iotlb_gather {
* @attach_dev: attach device to an iommu domain
* @detach_dev: detach device from an iommu domain
* @map: map a physically contiguous memory region to an iommu domain
+ * @map_pages: map a physically contiguous set of pages of the same size to
+ * an iommu domain.
* @unmap: unmap a physically contiguous memory region from an iommu domain
+ * @unmap_pages: unmap a number of pages of the same size from an iommu domain
* @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
* @iotlb_sync_map: Sync mappings created recently using @map to the hardware
* @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
@@ -229,8 +232,14 @@ struct iommu_ops {
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
int (*map)(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+ int (*map_pages)(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
size_t size, struct iommu_iotlb_gather *iotlb_gather);
+ size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
size_t size);
@@ -476,7 +485,7 @@ int iommu_enable_nesting(struct iommu_domain *domain);
int iommu_set_pgtable_quirks(struct iommu_domain *domain,
unsigned long quirks);
-void iommu_set_dma_strict(bool val);
+void iommu_set_dma_strict(void);
bool iommu_get_dma_strict(struct iommu_domain *domain);
extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev,