From 822242e649ea9f1d9b5c43735445fff26209b215 Mon Sep 17 00:00:00 2001 From: Ke Liu Date: Wed, 8 Jun 2022 02:16:55 +0000 Subject: iommu: Directly use ida_alloc()/free() Use ida_alloc()/ida_free() instead of deprecated ida_simple_get()/ida_simple_remove(). Signed-off-by: Ke Liu Reviewed-by: Jason Gunthorpe Signed-off-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20220608021655.1538087-1-liuke94@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 847ad47a2dfd..cdc86c39954e 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -600,7 +600,7 @@ static void iommu_group_release(struct kobject *kobj) if (group->iommu_data_release) group->iommu_data_release(group->iommu_data); - ida_simple_remove(&iommu_group_ida, group->id); + ida_free(&iommu_group_ida, group->id); if (group->default_domain) iommu_domain_free(group->default_domain); @@ -641,7 +641,7 @@ struct iommu_group *iommu_group_alloc(void) INIT_LIST_HEAD(&group->devices); INIT_LIST_HEAD(&group->entry); - ret = ida_simple_get(&iommu_group_ida, 0, 0, GFP_KERNEL); + ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); if (ret < 0) { kfree(group); return ERR_PTR(ret); @@ -651,7 +651,7 @@ struct iommu_group *iommu_group_alloc(void) ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, NULL, "%d", group->id); if (ret) { - ida_simple_remove(&iommu_group_ida, group->id); + ida_free(&iommu_group_ida, group->id); kobject_put(&group->kobj); return ERR_PTR(ret); } -- cgit v1.2.3 From 4bf7fda4dce22214c70c49960b1b6438e6260b67 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 9 Jun 2022 16:12:10 +0100 Subject: iommu/dma: Add config for PCI SAC address trick For devices stuck behind a conventional PCI bus, saving extra cycles at 33MHz is probably fairly significant. However since native PCI Express is now the norm for high-performance devices, the optimisation to always prefer 32-bit addresses for the sake of avoiding DAC is starting to look rather anachronistic. Technically 32-bit addresses do have shorter TLPs on PCIe, but unless the device is saturating its link bandwidth with small transfers it seems unlikely that the difference is appreciable. What definitely is appreciable, however, is that the IOVA allocator doesn't behave all that well once the 32-bit space starts getting full. As DMA working sets get bigger, this optimisation increasingly backfires and adds considerable overhead to the dma_map path for use-cases like high-bandwidth networking. We've increasingly bandaged the allocator in attempts to mitigate this, but it remains fundamentally at odds with other valid requirements to try as hard as possible to satisfy a request within the given limit; what we really need is to just avoid this odd notion of a speculative allocation when it isn't beneficial anyway. Unfortunately that's where things get awkward... Having been present on x86 for 15 years or so now, it turns out there are systems which fail to properly define the upper limit of usable IOVA space for certain devices and this trick was the only thing letting them work OK. I had a similar ulterior motive for a couple of early arm64 systems when originally adding it to iommu-dma, but those really should be fixed with proper firmware bindings by now. Let's be brave and default it to off in the hope that CI systems and developers will find and fix those bugs, but expect that desktop-focused distro configs are likely to want to turn it back on for maximum compatibility. Signed-off-by: Robin Murphy Reviewed-by: Christoph Hellwig Reviewed-by: John Garry Link: https://lore.kernel.org/r/3f06994f9f370f9d35b2630ab75171ecd2065621.1654782107.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 26 ++++++++++++++++++++++++++ drivers/iommu/dma-iommu.c | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index c79a0df090c0..5a225b48dd00 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -144,6 +144,32 @@ config IOMMU_DMA select IRQ_MSI_IOMMU select NEED_SG_DMA_LENGTH +config IOMMU_DMA_PCI_SAC + bool "Enable 64-bit legacy PCI optimisation by default" + depends on IOMMU_DMA + help + Enable by default an IOMMU optimisation for 64-bit legacy PCI devices, + wherein the DMA API layer will always first try to allocate a 32-bit + DMA address suitable for a single address cycle, before falling back + to allocating from the device's full usable address range. If your + system has 64-bit legacy PCI devices in 32-bit slots where using dual + address cycles reduces DMA throughput significantly, this may be + beneficial to overall performance. + + If you have a modern PCI Express based system, this feature mostly just + represents extra overhead in the allocation path for no practical + benefit, and it should usually be preferable to say "n" here. + + However, beware that this feature has also historically papered over + bugs where the IOMMU address width and/or device DMA mask is not set + correctly. If device DMA problems and IOMMU faults start occurring + after disabling this option, it is almost certainly indicative of a + latent driver or firmware/BIOS bug, which would previously have only + manifested with several gigabytes worth of concurrent DMA mappings. + + If this option is not set, the feature can still be re-enabled at + boot time with the "iommu.forcedac=0" command-line argument. + # Shared Virtual Addressing config IOMMU_SVA bool diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f90251572a5d..9f9d9ba7f376 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -67,7 +67,7 @@ struct iommu_dma_cookie { }; static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); -bool iommu_dma_forcedac __read_mostly; +bool iommu_dma_forcedac __read_mostly = !IS_ENABLED(CONFIG_IOMMU_DMA_PCI_SAC); static int __init iommu_dma_forcedac_setup(char *str) { -- cgit v1.2.3 From ac9a5d522bb80be50ea84965699e1c8257d745ce Mon Sep 17 00:00:00 2001 From: Yunfei Wang Date: Mon, 30 May 2022 20:07:45 +0800 Subject: iommu/dma: Fix race condition during iova_domain initialization When many devices share the same iova domain, iommu_dma_init_domain() may be called at the same time. The checking of iovad->start_pfn will all get false in iommu_dma_init_domain() and both enter init_iova_domain() to do iovad initialization. Fix this by protecting init_iova_domain() with iommu_dma_cookie->mutex. Exception backtrace: rb_insert_color(param1=0xFFFFFF80CD2BDB40, param3=1) + 64 init_iova_domain() + 180 iommu_setup_dma_ops() + 260 arch_setup_dma_ops() + 132 of_dma_configure_id() + 468 platform_dma_configure() + 32 really_probe() + 1168 driver_probe_device() + 268 __device_attach_driver() + 524 __device_attach() + 524 bus_probe_device() + 64 deferred_probe_work_func() + 260 process_one_work() + 580 worker_thread() + 1076 kthread() + 332 ret_from_fork() + 16 Signed-off-by: Ning Li Signed-off-by: Yunfei Wang Acked-by: Robin Murphy Reviewed-by: Miles Chen Link: https://lore.kernel.org/r/20220530120748.31733-1-yf.wang@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 9f9d9ba7f376..1910f4f1612b 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -64,6 +64,7 @@ struct iommu_dma_cookie { /* Domain for flush queue callback; NULL if flush queue not in use */ struct iommu_domain *fq_domain; + struct mutex mutex; }; static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); @@ -310,6 +311,7 @@ int iommu_get_dma_cookie(struct iommu_domain *domain) if (!domain->iova_cookie) return -ENOMEM; + mutex_init(&domain->iova_cookie->mutex); return 0; } @@ -560,26 +562,33 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, } /* start_pfn is always nonzero for an already-initialised domain */ + mutex_lock(&cookie->mutex); if (iovad->start_pfn) { if (1UL << order != iovad->granule || base_pfn != iovad->start_pfn) { pr_warn("Incompatible range for DMA domain\n"); - return -EFAULT; + ret = -EFAULT; + goto done_unlock; } - return 0; + ret = 0; + goto done_unlock; } init_iova_domain(iovad, 1UL << order, base_pfn); ret = iova_domain_init_rcaches(iovad); if (ret) - return ret; + goto done_unlock; /* If the FQ fails we can simply fall back to strict mode */ if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain)) domain->type = IOMMU_DOMAIN_DMA; - return iova_reserve_iommu_regions(dev, domain); + ret = iova_reserve_iommu_regions(dev, domain); + +done_unlock: + mutex_unlock(&cookie->mutex); + return ret; } /** -- cgit v1.2.3 From d034dbbb9b2ac32aa06c104701fdf81ebdfea9a2 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 16 Jun 2022 13:08:26 +0200 Subject: dt-bindings: iommu: mediatek: Add mediatek,infracfg phandle Add property "mediatek,infracfg" to let the mtk_iommu driver retrieve a phandle to the infracfg syscon instead of performing a per-soc compatible lookup in the entire devicetree and set it as a required property for MT2712 and MT8173. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220616110830.26037-2-angelogioacchino.delregno@collabora.com Signed-off-by: Joerg Roedel --- .../devicetree/bindings/iommu/mediatek,iommu.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml index 2ae3bbad7f1a..fee0241b5098 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -101,6 +101,10 @@ properties: items: - const: bclk + mediatek,infracfg: + $ref: /schemas/types.yaml#/definitions/phandle + description: The phandle to the mediatek infracfg syscon + mediatek,larbs: $ref: /schemas/types.yaml#/definitions/phandle-array minItems: 1 @@ -167,6 +171,18 @@ allOf: required: - power-domains + - if: + properties: + compatible: + contains: + enum: + - mediatek,mt2712-m4u + - mediatek,mt8173-m4u + + then: + required: + - mediatek,infracfg + - if: # The IOMMUs don't have larbs. not: properties: @@ -191,6 +207,7 @@ examples: interrupts = ; clocks = <&infracfg CLK_INFRA_M4U>; clock-names = "bclk"; + mediatek,infracfg = <&infracfg>; mediatek,larbs = <&larb0>, <&larb1>, <&larb2>, <&larb3>, <&larb4>, <&larb5>; #iommu-cells = <1>; -- cgit v1.2.3 From 7d748ffdee0dc244df3def1b2ec108bf095c0f9e Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 16 Jun 2022 13:08:27 +0200 Subject: iommu/mediatek: Lookup phandle to retrieve syscon to infracfg This driver will get support for more SoCs and the list of infracfg compatibles is expected to grow: in order to prevent getting this situation out of control and see a long list of compatible strings, add support to retrieve a handle to infracfg's regmap through a new "mediatek,infracfg" phandle. In order to keep retrocompatibility with older devicetrees, the old way is kept in place. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Miles Chen Reviewed-by: Yong Wu Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220616110830.26037-3-angelogioacchino.delregno@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index bb9dd92c9898..90685946fcbe 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1140,22 +1140,32 @@ static int mtk_iommu_probe(struct platform_device *pdev) data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN); if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE)) { - switch (data->plat_data->m4u_plat) { - case M4U_MT2712: - p = "mediatek,mt2712-infracfg"; - break; - case M4U_MT8173: - p = "mediatek,mt8173-infracfg"; - break; - default: - p = NULL; + infracfg = syscon_regmap_lookup_by_phandle(dev->of_node, "mediatek,infracfg"); + if (IS_ERR(infracfg)) { + /* + * Legacy devicetrees will not specify a phandle to + * mediatek,infracfg: in that case, we use the older + * way to retrieve a syscon to infra. + * + * This is for retrocompatibility purposes only, hence + * no more compatibles shall be added to this. + */ + switch (data->plat_data->m4u_plat) { + case M4U_MT2712: + p = "mediatek,mt2712-infracfg"; + break; + case M4U_MT8173: + p = "mediatek,mt8173-infracfg"; + break; + default: + p = NULL; + } + + infracfg = syscon_regmap_lookup_by_compatible(p); + if (IS_ERR(infracfg)) + return PTR_ERR(infracfg); } - infracfg = syscon_regmap_lookup_by_compatible(p); - - if (IS_ERR(infracfg)) - return PTR_ERR(infracfg); - ret = regmap_read(infracfg, REG_INFRA_MISC, &val); if (ret) return ret; -- cgit v1.2.3 From 21fd9be431adb880e55cf7b6b918e0696c0be643 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 16 Jun 2022 13:08:30 +0200 Subject: iommu/mediatek: Cleanup pericfg lookup flow Since only the INFRA type IOMMU needs to modify register(s) in the pericfg iospace, it's safe to drop the pericfg_comp_str NULL check; also, directly assign the regmap handle to data->pericfg instead of to the infracfg variable to improve code readability. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220616110830.26037-6-angelogioacchino.delregno@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 90685946fcbe..b2ae84046249 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1217,15 +1217,13 @@ static int mtk_iommu_probe(struct platform_device *pdev) dev_err(dev, "mm dts parse fail(%d).", ret); goto out_runtime_disable; } - } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA) && - data->plat_data->pericfg_comp_str) { - infracfg = syscon_regmap_lookup_by_compatible(data->plat_data->pericfg_comp_str); - if (IS_ERR(infracfg)) { - ret = PTR_ERR(infracfg); + } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) { + p = data->plat_data->pericfg_comp_str; + data->pericfg = syscon_regmap_lookup_by_compatible(p); + if (IS_ERR(data->pericfg)) { + ret = PTR_ERR(data->pericfg); goto out_runtime_disable; } - - data->pericfg = infracfg; } platform_set_drvdata(pdev, data); -- cgit v1.2.3 From 0d10fe7591178713695d7d8d77284b2fc2e6629f Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 25 May 2022 16:54:16 +0200 Subject: iommu/amd: Use try_cmpxchg64 in alloc_pte and free_clear_pte Use try_cmpxchg64 instead of cmpxchg64 (*ptr, old, new) != old in alloc_pte and free_clear_pte. cmpxchg returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, remove racy explicit assignment to pteval when cmpxchg fails, this is what try_cmpxchg does implicitly from *pte in an atomic way. Signed-off-by: Uros Bizjak Cc: Joerg Roedel Cc: Suravee Suthikulpanit Cc: Will Deacon Link: https://lore.kernel.org/r/20220525145416.10816-1-ubizjak@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/io_pgtable.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 6608d1717574..7d4b61e5db47 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -258,7 +258,7 @@ static u64 *alloc_pte(struct protection_domain *domain, __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); /* pte could have been changed somewhere. */ - if (cmpxchg64(pte, __pte, __npte) != __pte) + if (!try_cmpxchg64(pte, &__pte, __npte)) free_page((unsigned long)page); else if (IOMMU_PTE_PRESENT(__pte)) *updated = true; @@ -341,10 +341,8 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist) u64 *pt; int mode; - while (cmpxchg64(pte, pteval, 0) != pteval) { + while (!try_cmpxchg64(pte, &pteval, 0)) pr_warn("AMD-Vi: IOMMU pte changed since we read it\n"); - pteval = *pte; - } if (!IOMMU_PTE_PRESENT(pteval)) return; -- cgit v1.2.3 From 3b7e2482f9a3f2e99628048b945c9c6cc53bd38e Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 15 Jun 2022 11:10:36 +0100 Subject: iommu: Introduce a callback to struct iommu_resv_region A callback is introduced to struct iommu_resv_region to free memory allocations associated with the reserved region. This will be useful when we introduce support for IORT RMR based reserved regions. Reviewed-by: Christoph Hellwig Tested-by: Steven Price Tested-by: Laurentiu Tudor Tested-by: Hanjun Guo Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-2-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 16 +++++++++++----- include/linux/iommu.h | 2 ++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index cdc86c39954e..f46431ac49e1 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2590,16 +2590,22 @@ void iommu_put_resv_regions(struct device *dev, struct list_head *list) * @list: reserved region list for device * * IOMMU drivers can use this to implement their .put_resv_regions() callback - * for simple reservations. Memory allocated for each reserved region will be - * freed. If an IOMMU driver allocates additional resources per region, it is - * going to have to implement a custom callback. + * for simple reservations. If a per region callback is provided that will be + * used to free all memory allocations associated with the reserved region or + * else just free up the memory for the regions. If an IOMMU driver allocates + * additional resources per region, it is going to have to implement a custom + * callback. */ void generic_iommu_put_resv_regions(struct device *dev, struct list_head *list) { struct iommu_resv_region *entry, *next; - list_for_each_entry_safe(entry, next, list, list) - kfree(entry); + list_for_each_entry_safe(entry, next, list, list) { + if (entry->free) + entry->free(dev, entry); + else + kfree(entry); + } } EXPORT_SYMBOL(generic_iommu_put_resv_regions); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5e1afe169549..b22ffa6bc4a9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -135,6 +135,7 @@ enum iommu_resv_type { * @length: Length of the region in bytes * @prot: IOMMU Protection flags (READ/WRITE/...) * @type: Type of the reserved region + * @free: Callback to free associated memory allocations */ struct iommu_resv_region { struct list_head list; @@ -142,6 +143,7 @@ struct iommu_resv_region { size_t length; int prot; enum iommu_resv_type type; + void (*free)(struct device *dev, struct iommu_resv_region *region); }; /** -- cgit v1.2.3 From 8778b1d48117055c710a01498f65fa730160fdfa Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 15 Jun 2022 11:10:37 +0100 Subject: ACPI/IORT: Make iort_iommu_msi_get_resv_regions() return void At present iort_iommu_msi_get_resv_regions() returns the number of MSI reserved regions on success and there are no users for this. The reserved region list will get populated anyway for platforms that require the HW MSI region reservation. Hence, change the function to return void instead. Reviewed-by: Christoph Hellwig Tested-by: Steven Price Tested-by: Laurentiu Tudor Reviewed-by: Hanjun Guo Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-3-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/iort.c | 25 +++++++++---------------- include/linux/acpi_iort.h | 6 +++--- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index f2f8f05662de..213f61cae176 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -811,22 +811,19 @@ static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev) * @dev: Device from iommu_get_resv_regions() * @head: Reserved region list from iommu_get_resv_regions() * - * Returns: Number of msi reserved regions on success (0 if platform - * doesn't require the reservation or no associated msi regions), - * appropriate error value otherwise. The ITS interrupt translation - * spaces (ITS_base + SZ_64K, SZ_64K) associated with the device - * are the msi reserved regions. + * The ITS interrupt translation spaces (ITS_base + SZ_64K, SZ_64K) + * associated with the device are the HW MSI reserved regions. */ -int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) +void iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct acpi_iort_its_group *its; struct acpi_iort_node *iommu_node, *its_node = NULL; - int i, resv = 0; + int i; iommu_node = iort_get_msi_resv_iommu(dev); if (!iommu_node) - return 0; + return; /* * Current logic to reserve ITS regions relies on HW topologies @@ -846,7 +843,7 @@ int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) } if (!its_node) - return 0; + return; /* Move to ITS specific data */ its = (struct acpi_iort_its_group *)its_node->node_data; @@ -860,14 +857,10 @@ int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) region = iommu_alloc_resv_region(base + SZ_64K, SZ_64K, prot, IOMMU_RESV_MSI); - if (region) { + if (region) list_add_tail(®ion->list, head); - resv++; - } } } - - return (resv == its->its_count) ? resv : -ENODEV; } static inline bool iort_iommu_driver_enabled(u8 type) @@ -1034,8 +1027,8 @@ int iort_iommu_configure_id(struct device *dev, const u32 *id_in) } #else -int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) -{ return 0; } +void iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) +{ } int iort_iommu_configure_id(struct device *dev, const u32 *input_id) { return -ENODEV; } #endif diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index f1f0842a2cb2..a8198b83753d 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -36,7 +36,7 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ int iort_dma_get_ranges(struct device *dev, u64 *size); int iort_iommu_configure_id(struct device *dev, const u32 *id_in); -int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); +void iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); phys_addr_t acpi_iort_dma_get_max_cpu_address(void); #else static inline void acpi_iort_init(void) { } @@ -52,8 +52,8 @@ static inline int iort_dma_get_ranges(struct device *dev, u64 *size) static inline int iort_iommu_configure_id(struct device *dev, const u32 *id_in) { return -ENODEV; } static inline -int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) -{ return 0; } +void iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) +{ } static inline phys_addr_t acpi_iort_dma_get_max_cpu_address(void) { return PHYS_ADDR_MAX; } -- cgit v1.2.3 From 55be25b8b5e4e2fd680cfb073b84a74a79c002fa Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 15 Jun 2022 11:10:38 +0100 Subject: ACPI/IORT: Provide a generic helper to retrieve reserve regions Currently IORT provides a helper to retrieve HW MSI reserve regions. Change this to a generic helper to retrieve any IORT related reserve regions. This will be useful when we add support for RMR nodes in subsequent patches. [Lorenzo: For ACPI IORT] Reviewed-by: Lorenzo Pieralisi Reviewed-by: Christoph Hellwig Tested-by: Steven Price Tested-by: Laurentiu Tudor Tested-by: Hanjun Guo Reviewed-by: Hanjun Guo Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-4-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/iort.c | 22 +++++++++++++++------- drivers/iommu/dma-iommu.c | 2 +- include/linux/acpi_iort.h | 4 ++-- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 213f61cae176..cd5d1d7823cb 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -806,15 +806,13 @@ static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev) return NULL; } -/** - * iort_iommu_msi_get_resv_regions - Reserved region driver helper - * @dev: Device from iommu_get_resv_regions() - * @head: Reserved region list from iommu_get_resv_regions() - * +/* + * Retrieve platform specific HW MSI reserve regions. * The ITS interrupt translation spaces (ITS_base + SZ_64K, SZ_64K) * associated with the device are the HW MSI reserved regions. */ -void iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) +static void iort_iommu_msi_get_resv_regions(struct device *dev, + struct list_head *head) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct acpi_iort_its_group *its; @@ -863,6 +861,16 @@ void iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) } } +/** + * iort_iommu_get_resv_regions - Generic helper to retrieve reserved regions. + * @dev: Device from iommu_get_resv_regions() + * @head: Reserved region list from iommu_get_resv_regions() + */ +void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head) +{ + iort_iommu_msi_get_resv_regions(dev, head); +} + static inline bool iort_iommu_driver_enabled(u8 type) { switch (type) { @@ -1027,7 +1035,7 @@ int iort_iommu_configure_id(struct device *dev, const u32 *id_in) } #else -void iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) +void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head) { } int iort_iommu_configure_id(struct device *dev, const u32 *input_id) { return -ENODEV; } diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 1910f4f1612b..458fb6738223 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -387,7 +387,7 @@ void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) { if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode)) - iort_iommu_msi_get_resv_regions(dev, list); + iort_iommu_get_resv_regions(dev, list); } EXPORT_SYMBOL(iommu_dma_get_resv_regions); diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index a8198b83753d..e5d2de9caf7f 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -36,7 +36,7 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ int iort_dma_get_ranges(struct device *dev, u64 *size); int iort_iommu_configure_id(struct device *dev, const u32 *id_in); -void iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); +void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head); phys_addr_t acpi_iort_dma_get_max_cpu_address(void); #else static inline void acpi_iort_init(void) { } @@ -52,7 +52,7 @@ static inline int iort_dma_get_ranges(struct device *dev, u64 *size) static inline int iort_iommu_configure_id(struct device *dev, const u32 *id_in) { return -ENODEV; } static inline -void iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) +void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head) { } static inline phys_addr_t acpi_iort_dma_get_max_cpu_address(void) -- cgit v1.2.3 From 491cf4a6735a957cd0733365f8d17e0f4308f5a4 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 15 Jun 2022 11:10:39 +0100 Subject: ACPI/IORT: Add support to retrieve IORT RMR reserved regions Parse through the IORT RMR nodes and populate the reserve region list corresponding to a given IOMMU and device(optional). Also, go through the ID mappings of the RMR node and retrieve all the SIDs associated with it. Reviewed-by: Lorenzo Pieralisi Tested-by: Steven Price Tested-by: Laurentiu Tudor Tested-by: Hanjun Guo Reviewed-by: Hanjun Guo Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-5-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/iort.c | 291 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/iommu.h | 8 ++ 2 files changed, 299 insertions(+) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index cd5d1d7823cb..b6273af316c6 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -788,6 +788,294 @@ void acpi_configure_pmsi_domain(struct device *dev) } #ifdef CONFIG_IOMMU_API +static void iort_rmr_free(struct device *dev, + struct iommu_resv_region *region) +{ + struct iommu_iort_rmr_data *rmr_data; + + rmr_data = container_of(region, struct iommu_iort_rmr_data, rr); + kfree(rmr_data->sids); + kfree(rmr_data); +} + +static struct iommu_iort_rmr_data *iort_rmr_alloc( + struct acpi_iort_rmr_desc *rmr_desc, + int prot, enum iommu_resv_type type, + u32 *sids, u32 num_sids) +{ + struct iommu_iort_rmr_data *rmr_data; + struct iommu_resv_region *region; + u32 *sids_copy; + u64 addr = rmr_desc->base_address, size = rmr_desc->length; + + rmr_data = kmalloc(sizeof(*rmr_data), GFP_KERNEL); + if (!rmr_data) + return NULL; + + /* Create a copy of SIDs array to associate with this rmr_data */ + sids_copy = kmemdup(sids, num_sids * sizeof(*sids), GFP_KERNEL); + if (!sids_copy) { + kfree(rmr_data); + return NULL; + } + rmr_data->sids = sids_copy; + rmr_data->num_sids = num_sids; + + if (!IS_ALIGNED(addr, SZ_64K) || !IS_ALIGNED(size, SZ_64K)) { + /* PAGE align base addr and size */ + addr &= PAGE_MASK; + size = PAGE_ALIGN(size + offset_in_page(rmr_desc->base_address)); + + pr_err(FW_BUG "RMR descriptor[0x%llx - 0x%llx] not aligned to 64K, continue with [0x%llx - 0x%llx]\n", + rmr_desc->base_address, + rmr_desc->base_address + rmr_desc->length - 1, + addr, addr + size - 1); + } + + region = &rmr_data->rr; + INIT_LIST_HEAD(®ion->list); + region->start = addr; + region->length = size; + region->prot = prot; + region->type = type; + region->free = iort_rmr_free; + + return rmr_data; +} + +static void iort_rmr_desc_check_overlap(struct acpi_iort_rmr_desc *desc, + u32 count) +{ + int i, j; + + for (i = 0; i < count; i++) { + u64 end, start = desc[i].base_address, length = desc[i].length; + + if (!length) { + pr_err(FW_BUG "RMR descriptor[0x%llx] with zero length, continue anyway\n", + start); + continue; + } + + end = start + length - 1; + + /* Check for address overlap */ + for (j = i + 1; j < count; j++) { + u64 e_start = desc[j].base_address; + u64 e_end = e_start + desc[j].length - 1; + + if (start <= e_end && end >= e_start) + pr_err(FW_BUG "RMR descriptor[0x%llx - 0x%llx] overlaps, continue anyway\n", + start, end); + } + } +} + +/* + * Please note, we will keep the already allocated RMR reserve + * regions in case of a memory allocation failure. + */ +static void iort_get_rmrs(struct acpi_iort_node *node, + struct acpi_iort_node *smmu, + u32 *sids, u32 num_sids, + struct list_head *head) +{ + struct acpi_iort_rmr *rmr = (struct acpi_iort_rmr *)node->node_data; + struct acpi_iort_rmr_desc *rmr_desc; + int i; + + rmr_desc = ACPI_ADD_PTR(struct acpi_iort_rmr_desc, node, + rmr->rmr_offset); + + iort_rmr_desc_check_overlap(rmr_desc, rmr->rmr_count); + + for (i = 0; i < rmr->rmr_count; i++, rmr_desc++) { + struct iommu_iort_rmr_data *rmr_data; + enum iommu_resv_type type; + int prot = IOMMU_READ | IOMMU_WRITE; + + if (rmr->flags & ACPI_IORT_RMR_REMAP_PERMITTED) + type = IOMMU_RESV_DIRECT_RELAXABLE; + else + type = IOMMU_RESV_DIRECT; + + if (rmr->flags & ACPI_IORT_RMR_ACCESS_PRIVILEGE) + prot |= IOMMU_PRIV; + + /* Attributes 0x00 - 0x03 represents device memory */ + if (ACPI_IORT_RMR_ACCESS_ATTRIBUTES(rmr->flags) <= + ACPI_IORT_RMR_ATTR_DEVICE_GRE) + prot |= IOMMU_MMIO; + else if (ACPI_IORT_RMR_ACCESS_ATTRIBUTES(rmr->flags) == + ACPI_IORT_RMR_ATTR_NORMAL_IWB_OWB) + prot |= IOMMU_CACHE; + + rmr_data = iort_rmr_alloc(rmr_desc, prot, type, + sids, num_sids); + if (!rmr_data) + return; + + list_add_tail(&rmr_data->rr.list, head); + } +} + +static u32 *iort_rmr_alloc_sids(u32 *sids, u32 count, u32 id_start, + u32 new_count) +{ + u32 *new_sids; + u32 total_count = count + new_count; + int i; + + new_sids = krealloc_array(sids, count + new_count, + sizeof(*new_sids), GFP_KERNEL); + if (!new_sids) + return NULL; + + for (i = count; i < total_count; i++) + new_sids[i] = id_start++; + + return new_sids; +} + +static bool iort_rmr_has_dev(struct device *dev, u32 id_start, + u32 id_count) +{ + int i; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + /* + * Make sure the kernel has preserved the boot firmware PCIe + * configuration. This is required to ensure that the RMR PCIe + * StreamIDs are still valid (Refer: ARM DEN 0049E.d Section 3.1.1.5). + */ + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_host_bridge *host = pci_find_host_bridge(pdev->bus); + + if (!host->preserve_config) + return false; + } + + for (i = 0; i < fwspec->num_ids; i++) { + if (fwspec->ids[i] >= id_start && + fwspec->ids[i] <= id_start + id_count) + return true; + } + + return false; +} + +static void iort_node_get_rmr_info(struct acpi_iort_node *node, + struct acpi_iort_node *iommu, + struct device *dev, struct list_head *head) +{ + struct acpi_iort_node *smmu = NULL; + struct acpi_iort_rmr *rmr; + struct acpi_iort_id_mapping *map; + u32 *sids = NULL; + u32 num_sids = 0; + int i; + + if (!node->mapping_offset || !node->mapping_count) { + pr_err(FW_BUG "Invalid ID mapping, skipping RMR node %p\n", + node); + return; + } + + rmr = (struct acpi_iort_rmr *)node->node_data; + if (!rmr->rmr_offset || !rmr->rmr_count) + return; + + map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node, + node->mapping_offset); + + /* + * Go through the ID mappings and see if we have a match for SMMU + * and dev(if !NULL). If found, get the sids for the Node. + * Please note, id_count is equal to the number of IDs in the + * range minus one. + */ + for (i = 0; i < node->mapping_count; i++, map++) { + struct acpi_iort_node *parent; + + if (!map->id_count) + continue; + + parent = ACPI_ADD_PTR(struct acpi_iort_node, iort_table, + map->output_reference); + if (parent != iommu) + continue; + + /* If dev is valid, check RMR node corresponds to the dev SID */ + if (dev && !iort_rmr_has_dev(dev, map->output_base, + map->id_count)) + continue; + + /* Retrieve SIDs associated with the Node. */ + sids = iort_rmr_alloc_sids(sids, num_sids, map->output_base, + map->id_count + 1); + if (!sids) + return; + + num_sids += map->id_count + 1; + } + + if (!sids) + return; + + iort_get_rmrs(node, smmu, sids, num_sids, head); + kfree(sids); +} + +static void iort_find_rmrs(struct acpi_iort_node *iommu, struct device *dev, + struct list_head *head) +{ + struct acpi_table_iort *iort; + struct acpi_iort_node *iort_node, *iort_end; + int i; + + /* Only supports ARM DEN 0049E.d onwards */ + if (iort_table->revision < 5) + return; + + iort = (struct acpi_table_iort *)iort_table; + + iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort, + iort->node_offset); + iort_end = ACPI_ADD_PTR(struct acpi_iort_node, iort, + iort_table->length); + + for (i = 0; i < iort->node_count; i++) { + if (WARN_TAINT(iort_node >= iort_end, TAINT_FIRMWARE_WORKAROUND, + "IORT node pointer overflows, bad table!\n")) + return; + + if (iort_node->type == ACPI_IORT_NODE_RMR) + iort_node_get_rmr_info(iort_node, iommu, dev, head); + + iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort_node, + iort_node->length); + } +} + +/* + * Populate the RMR list associated with a given IOMMU and dev(if provided). + * If dev is NULL, the function populates all the RMRs associated with the + * given IOMMU. + */ +static void iort_iommu_rmr_get_resv_regions(struct fwnode_handle *iommu_fwnode, + struct device *dev, + struct list_head *head) +{ + struct acpi_iort_node *iommu; + + iommu = iort_get_iort_node(iommu_fwnode); + if (!iommu) + return; + + iort_find_rmrs(iommu, dev, head); +} + static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev) { struct acpi_iort_node *iommu; @@ -868,7 +1156,10 @@ static void iort_iommu_msi_get_resv_regions(struct device *dev, */ void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head) { + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + iort_iommu_msi_get_resv_regions(dev, head); + iort_iommu_rmr_get_resv_regions(fwspec->iommu_fwnode, dev, head); } static inline bool iort_iommu_driver_enabled(u8 type) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b22ffa6bc4a9..e6abd998dbe7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -146,6 +146,14 @@ struct iommu_resv_region { void (*free)(struct device *dev, struct iommu_resv_region *region); }; +struct iommu_iort_rmr_data { + struct iommu_resv_region rr; + + /* Stream IDs associated with IORT RMR entry */ + const u32 *sids; + u32 num_sids; +}; + /** * enum iommu_dev_features - Per device IOMMU features * @IOMMU_DEV_FEAT_SVA: Shared Virtual Addresses -- cgit v1.2.3 From e302eea8f49717253ac64fd45b7cc719e87fa010 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 15 Jun 2022 11:10:40 +0100 Subject: ACPI/IORT: Add a helper to retrieve RMR info directly This will provide a way for SMMU drivers to retrieve StreamIDs associated with IORT RMR nodes and use that to set bypass settings for those IDs. Tested-by: Steven Price Tested-by: Laurentiu Tudor Tested-by: Hanjun Guo Reviewed-by: Hanjun Guo Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-6-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/iort.c | 28 ++++++++++++++++++++++++++++ include/linux/acpi_iort.h | 8 ++++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index b6273af316c6..cd1349d3544e 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1394,6 +1394,34 @@ int iort_dma_get_ranges(struct device *dev, u64 *size) return nc_dma_get_range(dev, size); } +/** + * iort_get_rmr_sids - Retrieve IORT RMR node reserved regions with + * associated StreamIDs information. + * @iommu_fwnode: fwnode associated with IOMMU + * @head: Resereved region list + */ +void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode, + struct list_head *head) +{ + iort_iommu_rmr_get_resv_regions(iommu_fwnode, NULL, head); +} +EXPORT_SYMBOL_GPL(iort_get_rmr_sids); + +/** + * iort_put_rmr_sids - Free memory allocated for RMR reserved regions. + * @iommu_fwnode: fwnode associated with IOMMU + * @head: Resereved region list + */ +void iort_put_rmr_sids(struct fwnode_handle *iommu_fwnode, + struct list_head *head) +{ + struct iommu_resv_region *entry, *next; + + list_for_each_entry_safe(entry, next, head, list) + entry->free(NULL, entry); +} +EXPORT_SYMBOL_GPL(iort_put_rmr_sids); + static void __init acpi_iort_register_irq(int hwirq, const char *name, int trigger, struct resource *res) diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index e5d2de9caf7f..b43be0987b19 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -33,6 +33,10 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 id, enum irq_domain_bus_token bus_token); void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); +void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode, + struct list_head *head); +void iort_put_rmr_sids(struct fwnode_handle *iommu_fwnode, + struct list_head *head); /* IOMMU interface */ int iort_dma_get_ranges(struct device *dev, u64 *size); int iort_iommu_configure_id(struct device *dev, const u32 *id_in); @@ -46,6 +50,10 @@ static inline struct irq_domain *iort_get_device_domain( struct device *dev, u32 id, enum irq_domain_bus_token bus_token) { return NULL; } static inline void acpi_configure_pmsi_domain(struct device *dev) { } +static inline +void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode, struct list_head *head) { } +static inline +void iort_put_rmr_sids(struct fwnode_handle *iommu_fwnode, struct list_head *head) { } /* IOMMU interface */ static inline int iort_dma_get_ranges(struct device *dev, u64 *size) { return -ENODEV; } -- cgit v1.2.3 From 04e2afd1a71c98e8ad84e5e6bf8352c3de6e6eaf Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 15 Jun 2022 11:10:41 +0100 Subject: iommu/arm-smmu-v3: Introduce strtab init helper Introduce a helper to check the sid range and to init the l2 strtab entries(bypass). This will be useful when we have to initialize the l2 strtab with bypass for RMR SIDs. Tested-by: Hanjun Guo Acked-by: Will Deacon Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-7-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 88817a3376ef..17d4f3432df2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2537,6 +2537,19 @@ static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid) return sid < limit; } +static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid) +{ + /* Check the SIDs are in range of the SMMU and our stream table */ + if (!arm_smmu_sid_in_range(smmu, sid)) + return -ERANGE; + + /* Ensure l2 strtab is initialised */ + if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) + return arm_smmu_init_l2_strtab(smmu, sid); + + return 0; +} + static int arm_smmu_insert_master(struct arm_smmu_device *smmu, struct arm_smmu_master *master) { @@ -2560,20 +2573,9 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, new_stream->id = sid; new_stream->master = master; - /* - * Check the SIDs are in range of the SMMU and our stream table - */ - if (!arm_smmu_sid_in_range(smmu, sid)) { - ret = -ERANGE; + ret = arm_smmu_init_sid_strtab(smmu, sid); + if (ret) break; - } - - /* Ensure l2 strtab is initialised */ - if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { - ret = arm_smmu_init_l2_strtab(smmu, sid); - if (ret) - break; - } /* Insert into SID tree */ new_node = &(smmu->streams.rb_node); -- cgit v1.2.3 From 6c998abb1ea54b3d83420f68ce382354a30f2238 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 15 Jun 2022 11:10:42 +0100 Subject: iommu/arm-smmu-v3: Refactor arm_smmu_init_bypass_stes() to force bypass By default, disable_bypass flag is set and any dev without an iommu domain installs STE with CFG_ABORT during arm_smmu_init_bypass_stes(). Introduce a "force" flag and move the STE update logic to arm_smmu_init_bypass_stes() so that we can force it to install CFG_BYPASS STE for specific SIDs. This will be useful in a follow-up patch to install bypass for IORT RMR SIDs. Tested-by: Hanjun Guo Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-8-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 17d4f3432df2..09723861a08a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1380,12 +1380,21 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); } -static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent) +static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force) { unsigned int i; + u64 val = STRTAB_STE_0_V; + + if (disable_bypass && !force) + val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT); + else + val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); for (i = 0; i < nent; ++i) { - arm_smmu_write_strtab_ent(NULL, -1, strtab); + strtab[0] = cpu_to_le64(val); + strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, + STRTAB_STE_1_SHCFG_INCOMING)); + strtab[2] = 0; strtab += STRTAB_STE_DWORDS; } } @@ -1413,7 +1422,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return -ENOMEM; } - arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT); + arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false); arm_smmu_write_strtab_l1_desc(strtab, desc); return 0; } @@ -3051,7 +3060,7 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); cfg->strtab_base_cfg = reg; - arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents); + arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false); return 0; } -- cgit v1.2.3 From 9bdbdaa3c44ea97c8e97e51dd503a7061bd676a0 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 15 Jun 2022 11:10:43 +0100 Subject: iommu/arm-smmu-v3: Get associated RMR info and install bypass STE Check if there is any RMR info associated with the devices behind the SMMUv3 and if any, install bypass STEs for them. This is to keep any ongoing traffic associated with these devices alive when we enable/reset SMMUv3 during probe(). Tested-by: Hanjun Guo Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-9-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 33 +++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 09723861a08a..448e7b7ce0f2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3754,6 +3754,36 @@ static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start, return devm_ioremap_resource(dev, &res); } +static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) +{ + struct list_head rmr_list; + struct iommu_resv_region *e; + + INIT_LIST_HEAD(&rmr_list); + iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); + + list_for_each_entry(e, &rmr_list, list) { + __le64 *step; + struct iommu_iort_rmr_data *rmr; + int ret, i; + + rmr = container_of(e, struct iommu_iort_rmr_data, rr); + for (i = 0; i < rmr->num_sids; i++) { + ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]); + if (ret) { + dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n", + rmr->sids[i]); + continue; + } + + step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]); + arm_smmu_init_bypass_stes(step, 1, true); + } + } + + iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); +} + static int arm_smmu_device_probe(struct platform_device *pdev) { int irq, ret; @@ -3837,6 +3867,9 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Record our private device structure */ platform_set_drvdata(pdev, smmu); + /* Check for RMRs and install bypass STEs if any */ + arm_smmu_rmr_install_bypass_ste(smmu); + /* Reset the device */ ret = arm_smmu_device_reset(smmu, bypass); if (ret) -- cgit v1.2.3 From 0bec05574d13ed7e8643733fe5dccbd0c86604d2 Mon Sep 17 00:00:00 2001 From: Jon Nettleton Date: Wed, 15 Jun 2022 11:10:44 +0100 Subject: iommu/arm-smmu: Get associated RMR info and install bypass SMR Check if there is any RMR info associated with the devices behind the SMMU and if any, install bypass SMRs for them. This is to keep any ongoing traffic associated with these devices alive when we enable/reset SMMU during probe(). Signed-off-by: Jon Nettleton Signed-off-by: Steven Price Tested-by: Steven Price Tested-by: Laurentiu Tudor Signed-off-by: Shameer Kolothum Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220615101044.1972-10-shameerali.kolothum.thodi@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 52 +++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 2ed3594f384e..7ac4907235c3 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -2071,6 +2071,54 @@ err_reset_platform_ops: __maybe_unused; return err; } +static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu) +{ + struct list_head rmr_list; + struct iommu_resv_region *e; + int idx, cnt = 0; + u32 reg; + + INIT_LIST_HEAD(&rmr_list); + iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); + + /* + * Rather than trying to look at existing mappings that + * are setup by the firmware and then invalidate the ones + * that do no have matching RMR entries, just disable the + * SMMU until it gets enabled again in the reset routine. + */ + reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0); + reg |= ARM_SMMU_sCR0_CLIENTPD; + arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg); + + list_for_each_entry(e, &rmr_list, list) { + struct iommu_iort_rmr_data *rmr; + int i; + + rmr = container_of(e, struct iommu_iort_rmr_data, rr); + for (i = 0; i < rmr->num_sids; i++) { + idx = arm_smmu_find_sme(smmu, rmr->sids[i], ~0); + if (idx < 0) + continue; + + if (smmu->s2crs[idx].count == 0) { + smmu->smrs[idx].id = rmr->sids[i]; + smmu->smrs[idx].mask = 0; + smmu->smrs[idx].valid = true; + } + smmu->s2crs[idx].count++; + smmu->s2crs[idx].type = S2CR_TYPE_BYPASS; + smmu->s2crs[idx].privcfg = S2CR_PRIVCFG_DEFAULT; + + cnt++; + } + } + + dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt, + cnt == 1 ? "" : "s"); + iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); +} + static int arm_smmu_device_probe(struct platform_device *pdev) { struct resource *res; @@ -2191,6 +2239,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, smmu); + + /* Check for RMRs and install bypass SMRs if any */ + arm_smmu_rmr_install_bypass_smr(smmu); + arm_smmu_device_reset(smmu); arm_smmu_test_smr_masks(smmu); -- cgit v1.2.3 From e63cfb5faac57056eeed22d78ee1c8f7a4ddbef2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 21 Jun 2022 16:14:25 +0100 Subject: iommu: Use dev_iommu_ops() for probe_finalize The ->probe_finalize hook only runs after ->probe_device succeeds, so we can move that over to the new dev_iommu_ops() as well. Reviewed-by: Lu Baolu Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/5fe4b0ce22f676f435d332f2b2828dc7ef848a19.1655822151.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f46431ac49e1..862c3f61b7de 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -272,7 +272,7 @@ err_free: int iommu_probe_device(struct device *dev) { - const struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops; struct iommu_group *group; int ret; @@ -313,6 +313,7 @@ int iommu_probe_device(struct device *dev) mutex_unlock(&group->mutex); iommu_group_put(group); + ops = dev_iommu_ops(dev); if (ops->probe_finalize) ops->probe_finalize(dev); -- cgit v1.2.3 From b321a2fba2734cbac799034081e20dcdb321ef4f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 21 Jun 2022 16:14:26 +0100 Subject: iommu: Make .release_device optional Many drivers do nothing meaningful for .release_device, and it's neatly abstracted to just two callsites in the core code, so let's make it optional to implement. Signed-off-by: Robin Murphy Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/bda9d3eb4527eac8f6544a15067e2529cca54a2e.1655822151.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu_domain.c | 5 ----- drivers/iommu/iommu.c | 6 ++++-- drivers/iommu/msm_iommu.c | 5 ----- drivers/iommu/sun50i-iommu.c | 3 --- drivers/iommu/tegra-gart.c | 5 ----- drivers/iommu/tegra-smmu.c | 3 --- 6 files changed, 4 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 94b4589dc67c..011f9ab7f743 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -447,15 +447,10 @@ static struct iommu_device *fsl_pamu_probe_device(struct device *dev) return &pamu_iommu; } -static void fsl_pamu_release_device(struct device *dev) -{ -} - static const struct iommu_ops fsl_pamu_ops = { .capable = fsl_pamu_capable, .domain_alloc = fsl_pamu_domain_alloc, .probe_device = fsl_pamu_probe_device, - .release_device = fsl_pamu_release_device, .device_group = fsl_pamu_device_group, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = fsl_pamu_attach_device, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 862c3f61b7de..0aa141646bdf 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -259,7 +259,8 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list return 0; out_release: - ops->release_device(dev); + if (ops->release_device) + ops->release_device(dev); out_module_put: module_put(ops->owner); @@ -337,7 +338,8 @@ void iommu_release_device(struct device *dev) iommu_device_unlink(dev->iommu->iommu_dev, dev); ops = dev_iommu_ops(dev); - ops->release_device(dev); + if (ops->release_device) + ops->release_device(dev); iommu_group_remove_device(dev); module_put(ops->owner); diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index f09aedfdd462..428919a474c1 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -394,10 +394,6 @@ static struct iommu_device *msm_iommu_probe_device(struct device *dev) return &iommu->iommu; } -static void msm_iommu_release_device(struct device *dev) -{ -} - static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) { int ret = 0; @@ -677,7 +673,6 @@ fail: static struct iommu_ops msm_iommu_ops = { .domain_alloc = msm_iommu_domain_alloc, .probe_device = msm_iommu_probe_device, - .release_device = msm_iommu_release_device, .device_group = generic_device_group, .pgsize_bitmap = MSM_IOMMU_PGSIZES, .of_xlate = qcom_iommu_of_xlate, diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index c54ab477b8fd..a84c63518773 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -738,8 +738,6 @@ static struct iommu_device *sun50i_iommu_probe_device(struct device *dev) return &iommu->iommu; } -static void sun50i_iommu_release_device(struct device *dev) {} - static struct iommu_group *sun50i_iommu_device_group(struct device *dev) { struct sun50i_iommu *iommu = sun50i_iommu_from_dev(dev); @@ -764,7 +762,6 @@ static const struct iommu_ops sun50i_iommu_ops = { .domain_alloc = sun50i_iommu_domain_alloc, .of_xlate = sun50i_iommu_of_xlate, .probe_device = sun50i_iommu_probe_device, - .release_device = sun50i_iommu_release_device, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = sun50i_iommu_attach_device, .detach_dev = sun50i_iommu_detach_device, diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index a6700a40a6f8..e5ca3cf1a949 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -246,10 +246,6 @@ static struct iommu_device *gart_iommu_probe_device(struct device *dev) return &gart_handle->iommu; } -static void gart_iommu_release_device(struct device *dev) -{ -} - static int gart_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) { @@ -273,7 +269,6 @@ static void gart_iommu_sync(struct iommu_domain *domain, static const struct iommu_ops gart_iommu_ops = { .domain_alloc = gart_iommu_domain_alloc, .probe_device = gart_iommu_probe_device, - .release_device = gart_iommu_release_device, .device_group = generic_device_group, .pgsize_bitmap = GART_IOMMU_PGSIZES, .of_xlate = gart_iommu_of_xlate, diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 1fea68e551f1..2a8de975fe63 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -864,8 +864,6 @@ static struct iommu_device *tegra_smmu_probe_device(struct device *dev) return &smmu->iommu; } -static void tegra_smmu_release_device(struct device *dev) {} - static const struct tegra_smmu_group_soc * tegra_smmu_find_group(struct tegra_smmu *smmu, unsigned int swgroup) { @@ -966,7 +964,6 @@ static int tegra_smmu_of_xlate(struct device *dev, static const struct iommu_ops tegra_smmu_ops = { .domain_alloc = tegra_smmu_domain_alloc, .probe_device = tegra_smmu_probe_device, - .release_device = tegra_smmu_release_device, .device_group = tegra_smmu_device_group, .of_xlate = tegra_smmu_of_xlate, .pgsize_bitmap = SZ_4K, -- cgit v1.2.3 From 4d26ba671e3deed010311345e4426e6e11eaaf4c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 21 Jun 2022 16:14:27 +0100 Subject: iommu: Clean up release_device checks Since .release_device is now called through per-device ops, any call which gets as far as a driver definitely *is* for that driver, for a device which has successfully passed .probe_device, so all the checks to that effect are now redundant and can be removed. In the same vein we can also skip freeing fwspecs which are now managed by core code. Signed-off-by: Robin Murphy Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/02671dbfad7a3343fc25a44222350efcb455fe3c.1655822151.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 3 --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 +------- drivers/iommu/arm/arm-smmu/arm-smmu.c | 14 +++----------- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 11 ----------- drivers/iommu/exynos-iommu.c | 3 --- drivers/iommu/mtk_iommu.c | 5 ----- drivers/iommu/mtk_iommu_v1.c | 5 ----- drivers/iommu/sprd-iommu.c | 11 ----------- drivers/iommu/virtio-iommu.c | 8 +------- 9 files changed, 5 insertions(+), 63 deletions(-) diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 8af0242a90d9..e87d3cf54ed6 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -564,9 +564,6 @@ static void apple_dart_release_device(struct device *dev) { struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); - if (!cfg) - return; - dev_iommu_priv_set(dev, NULL); kfree(cfg); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 448e7b7ce0f2..d9c1623ec1a9 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2702,20 +2702,14 @@ err_free_master: static void arm_smmu_release_device(struct device *dev) { - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct arm_smmu_master *master; - - if (!fwspec || fwspec->ops != &arm_smmu_ops) - return; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); - master = dev_iommu_priv_get(dev); if (WARN_ON(arm_smmu_master_sva_enabled(master))) iopf_queue_remove_device(master->smmu->evtq.iopf, dev); arm_smmu_detach_dev(master); arm_smmu_disable_pasid(master); arm_smmu_remove_master(master); kfree(master); - iommu_fwspec_free(dev); } static struct iommu_group *arm_smmu_device_group(struct device *dev) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 7ac4907235c3..588929bed1bc 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1432,27 +1432,19 @@ out_free: static void arm_smmu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct arm_smmu_master_cfg *cfg; - struct arm_smmu_device *smmu; + struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); int ret; - if (!fwspec || fwspec->ops != &arm_smmu_ops) - return; - - cfg = dev_iommu_priv_get(dev); - smmu = cfg->smmu; - - ret = arm_smmu_rpm_get(smmu); + ret = arm_smmu_rpm_get(cfg->smmu); if (ret < 0) return; arm_smmu_master_free_smes(cfg, fwspec); - arm_smmu_rpm_put(smmu); + arm_smmu_rpm_put(cfg->smmu); dev_iommu_priv_set(dev, NULL); kfree(cfg); - iommu_fwspec_free(dev); } static void arm_smmu_probe_finalize(struct device *dev) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 4c077c38fbd6..4a922c7b69ee 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -532,16 +532,6 @@ static struct iommu_device *qcom_iommu_probe_device(struct device *dev) return &qcom_iommu->iommu; } -static void qcom_iommu_release_device(struct device *dev) -{ - struct qcom_iommu_dev *qcom_iommu = to_iommu(dev); - - if (!qcom_iommu) - return; - - iommu_fwspec_free(dev); -} - static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) { struct qcom_iommu_dev *qcom_iommu; @@ -591,7 +581,6 @@ static const struct iommu_ops qcom_iommu_ops = { .capable = qcom_iommu_capable, .domain_alloc = qcom_iommu_domain_alloc, .probe_device = qcom_iommu_probe_device, - .release_device = qcom_iommu_release_device, .device_group = generic_device_group, .of_xlate = qcom_iommu_of_xlate, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 71f2018e23fe..1d6808d6e190 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1251,9 +1251,6 @@ static void exynos_iommu_release_device(struct device *dev) struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); struct sysmmu_drvdata *data; - if (!has_sysmmu(dev)) - return; - if (owner->domain) { struct iommu_group *group = iommu_group_get(dev); diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index bb9dd92c9898..5c3d9366c25c 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -819,17 +819,12 @@ static void mtk_iommu_release_device(struct device *dev) struct device *larbdev; unsigned int larbid; - if (!fwspec || fwspec->ops != &mtk_iommu_ops) - return; - data = dev_iommu_priv_get(dev); if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { larbid = MTK_M4U_TO_LARB(fwspec->ids[0]); larbdev = data->larb_imu[larbid].dev; device_link_remove(dev, larbdev); } - - iommu_fwspec_free(dev); } static int mtk_iommu_get_group_id(struct device *dev, const struct mtk_iommu_plat_data *plat_data) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index e1cb51b9866c..128c7a3f1778 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -532,15 +532,10 @@ static void mtk_iommu_v1_release_device(struct device *dev) struct device *larbdev; unsigned int larbid; - if (!fwspec || fwspec->ops != &mtk_iommu_v1_ops) - return; - data = dev_iommu_priv_get(dev); larbid = mt2701_m4u_to_larb(fwspec->ids[0]); larbdev = data->larb_imu[larbid].dev; device_link_remove(dev, larbdev); - - iommu_fwspec_free(dev); } static int mtk_iommu_v1_hw_init(const struct mtk_iommu_v1_data *data) diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index bd409bab6286..511959c8a14d 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -383,16 +383,6 @@ static struct iommu_device *sprd_iommu_probe_device(struct device *dev) return &sdev->iommu; } -static void sprd_iommu_release_device(struct device *dev) -{ - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - - if (!fwspec || fwspec->ops != &sprd_iommu_ops) - return; - - iommu_fwspec_free(dev); -} - static struct iommu_group *sprd_iommu_device_group(struct device *dev) { struct sprd_iommu_device *sdev = dev_iommu_priv_get(dev); @@ -417,7 +407,6 @@ static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) static const struct iommu_ops sprd_iommu_ops = { .domain_alloc = sprd_iommu_domain_alloc, .probe_device = sprd_iommu_probe_device, - .release_device = sprd_iommu_release_device, .device_group = sprd_iommu_device_group, .of_xlate = sprd_iommu_of_xlate, .pgsize_bitmap = ~0UL << SPRD_IOMMU_PAGE_SHIFT, diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 25be4b822aa0..55337796a5f8 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -981,13 +981,7 @@ static void viommu_probe_finalize(struct device *dev) static void viommu_release_device(struct device *dev) { - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct viommu_endpoint *vdev; - - if (!fwspec || fwspec->ops != &viommu_ops) - return; - - vdev = dev_iommu_priv_get(dev); + struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); generic_iommu_put_resv_regions(dev, &vdev->resv_regions); kfree(vdev); -- cgit v1.2.3 From 32977242f80e343e2247f77e53fe20e8f7c23938 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 23 Jun 2022 11:36:29 +0200 Subject: iommu/exynos: Make driver independent of the system page size PAGE_{SIZE,SHIFT} macros depend on the configured kernel's page size, but the driver expects values calculated as for 4KB pages. Fix this. Reported-by: Robin Murphy Signed-off-by: Marek Szyprowski Reviewed-by: Sam Protsenko Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220623093629.32178-1-m.szyprowski@samsung.com Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 71f2018e23fe..9c060505a46e 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -340,8 +340,7 @@ static void __sysmmu_set_ptbase(struct sysmmu_drvdata *data, phys_addr_t pgd) if (MMU_MAJ_VER(data->version) < 5) writel(pgd, data->sfrbase + REG_PT_BASE_ADDR); else - writel(pgd >> PAGE_SHIFT, - data->sfrbase + REG_V5_PT_BASE_PFN); + writel(pgd / SZ_4K, data->sfrbase + REG_V5_PT_BASE_PFN); __sysmmu_tlb_invalidate(data); } @@ -551,7 +550,7 @@ static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data, * 64KB page can be one of 16 consecutive sets. */ if (MMU_MAJ_VER(data->version) == 2) - num_inv = min_t(unsigned int, size / PAGE_SIZE, 64); + num_inv = min_t(unsigned int, size / SZ_4K, 64); if (sysmmu_block(data)) { __sysmmu_tlb_invalidate_entry(data, iova, num_inv); -- cgit v1.2.3 From b4c9bf178ace26c52c9cac36f265ba95132a221d Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Sun, 3 Jul 2022 19:44:50 +0800 Subject: iommu/iova: change IOVA_MAG_SIZE to 127 to save memory kmalloc will round up the request size to power of 2, and current iova_magazine's size is 1032 (1024+8) bytes, so each instance allocated will get 2048 bytes from kmalloc, causing around 1KB waste. Change IOVA_MAG_SIZE from 128 to 127 to make size of 'iova_magazine' 1024 bytes so that no memory will be wasted. Signed-off-by: Feng Tang Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220703114450.15184-1-feng.tang@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index db77aa675145..e44f565c5319 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -614,7 +614,12 @@ EXPORT_SYMBOL_GPL(reserve_iova); * dynamic size tuning described in the paper. */ -#define IOVA_MAG_SIZE 128 +/* + * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to + * assure size of 'iova_magazine' to be 1024 bytes, so that no memory + * will be wasted. + */ +#define IOVA_MAG_SIZE 127 #define MAX_GLOBAL_MAGS 32 /* magazines per bin */ struct iova_magazine { -- cgit v1.2.3 From 3482c0b7307365fa9d00cfc1040ff8c39bb8604e Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Tue, 14 Jun 2022 16:01:35 -0700 Subject: iommu: arm-smmu-impl: Add 8250 display compatible to the client list. Required for turning on per-process page tables for the GPU. Signed-off-by: Emma Anholt Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20220614230136.3726047-1-emma@anholt.net Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 7820711c4560..2d470d867887 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -233,6 +233,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sc7280-mdss" }, { .compatible = "qcom,sc7280-mss-pil" }, { .compatible = "qcom,sc8180x-mdss" }, + { .compatible = "qcom,sm8250-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, { } -- cgit v1.2.3 From 83874d51eb4a51725c9403e11476a433add98c34 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Fri, 1 Jul 2022 02:20:08 -0400 Subject: iommu/amd: Handle return of iommu_device_sysfs_add As iommu_device_sysfs_add() can fail, we should check the return value. Signed-off-by: Bo Liu Link: https://lore.kernel.org/r/20220701062008.6988-1-liubo03@inspur.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1d08f87e734b..298c836e174f 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1906,8 +1906,11 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) amd_iommu_erratum_746_workaround(iommu); amd_iommu_ats_write_check_workaround(iommu); - iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev, + ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev, amd_iommu_groups, "ivhd%d", iommu->index); + if (ret) + return ret; + iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); return pci_enable_device(iommu->dev); -- cgit v1.2.3 From d02674d71c5a625e5b2e4323edbf05ebedaf4273 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:07:51 +0530 Subject: iommu/amd: Update struct iommu_dev_data definition struct iommu_dev_data contains member "pdev" to point to pci_dev. This is valid for only PCI devices and for other devices this will be NULL. This causes unnecessary "pdev != NULL" check at various places. Replace "struct pci_dev" member with "struct device" and use to_pci_dev() to get pci device reference as needed. Also adjust setup_aliases() and clone_aliases() function. No functional change intended. Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-2-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 2 +- drivers/iommu/amd/iommu.c | 32 +++++++++++++++++++------------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 72d0f5e2f651..9b563f850f1d 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -689,7 +689,7 @@ struct iommu_dev_data { struct list_head list; /* For domain->dev_list */ struct llist_node dev_data_list; /* For global dev_data_list */ struct protection_domain *domain; /* Domain the device is bound to */ - struct pci_dev *pdev; + struct device *dev; u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ struct { diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 840831d5d2ad..efa8af5a9419 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -188,10 +188,13 @@ static int clone_alias(struct pci_dev *pdev, u16 alias, void *data) return 0; } -static void clone_aliases(struct pci_dev *pdev) +static void clone_aliases(struct device *dev) { - if (!pdev) + struct pci_dev *pdev; + + if (!dev_is_pci(dev)) return; + pdev = to_pci_dev(dev); /* * The IVRS alias stored in the alias table may not be @@ -203,14 +206,14 @@ static void clone_aliases(struct pci_dev *pdev) pci_for_each_dma_alias(pdev, clone_alias, NULL); } -static struct pci_dev *setup_aliases(struct device *dev) +static void setup_aliases(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); u16 ivrs_alias; /* For ACPI HID devices, there are no aliases */ if (!dev_is_pci(dev)) - return NULL; + return; /* * Add the IVRS alias to the pci aliases if it is on the same @@ -221,9 +224,7 @@ static struct pci_dev *setup_aliases(struct device *dev) PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) pci_add_dma_alias(pdev, ivrs_alias & 0xff, 1); - clone_aliases(pdev); - - return pdev; + clone_aliases(dev); } static struct iommu_dev_data *find_dev_data(u16 devid) @@ -331,7 +332,8 @@ static int iommu_init_device(struct device *dev) if (!dev_data) return -ENOMEM; - dev_data->pdev = setup_aliases(dev); + dev_data->dev = dev; + setup_aliases(dev); /* * By default we use passthrough mode for IOMMUv2 capable device. @@ -1232,13 +1234,17 @@ static int device_flush_dte_alias(struct pci_dev *pdev, u16 alias, void *data) static int device_flush_dte(struct iommu_dev_data *dev_data) { struct amd_iommu *iommu; + struct pci_dev *pdev = NULL; u16 alias; int ret; iommu = amd_iommu_rlookup_table[dev_data->devid]; - if (dev_data->pdev) - ret = pci_for_each_dma_alias(dev_data->pdev, + if (dev_is_pci(dev_data->dev)) + pdev = to_pci_dev(dev_data->dev); + + if (pdev) + ret = pci_for_each_dma_alias(pdev, device_flush_dte_alias, iommu); else ret = iommu_flush_dte(iommu, dev_data->devid); @@ -1561,7 +1567,7 @@ static void do_attach(struct iommu_dev_data *dev_data, /* Update device table */ set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); - clone_aliases(dev_data->pdev); + clone_aliases(dev_data->dev); device_flush_dte(dev_data); } @@ -1577,7 +1583,7 @@ static void do_detach(struct iommu_dev_data *dev_data) dev_data->domain = NULL; list_del(&dev_data->list); clear_dte_entry(dev_data->devid); - clone_aliases(dev_data->pdev); + clone_aliases(dev_data->dev); /* Flush the DTE entry */ device_flush_dte(dev_data); @@ -1818,7 +1824,7 @@ static void update_device_table(struct protection_domain *domain) list_for_each_entry(dev_data, &domain->dev_list, list) { set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, dev_data->iommu_v2); - clone_aliases(dev_data->pdev); + clone_aliases(dev_data->dev); } } -- cgit v1.2.3 From 404ec4e4c169fb64da6b2a38b471c13ac0897c76 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:07:52 +0530 Subject: iommu/amd: Introduce pci segment structure Newer AMD systems can support multiple PCI segments, where each segment contains one or more IOMMU instances. However, an IOMMU instance can only support a single PCI segment. Current code assumes that system contains only one pci segment (segment 0) and creates global data structures such as device table, rlookup table, etc. Introducing per PCI segment data structure, which contains segment specific data structures. This will eventually replace the global data structures. Also update `amd_iommu->pci_seg` variable to point to PCI segment structure instead of PCI segment ID. Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-3-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 24 ++++++++++++++++++- drivers/iommu/amd/init.c | 46 ++++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 9b563f850f1d..2243b1a22d78 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -456,6 +456,11 @@ extern bool amdr_ivrs_remap_support; /* kmem_cache to get tables with 128 byte alignement */ extern struct kmem_cache *amd_iommu_irq_cache; +/* Make iterating over all pci segment easier */ +#define for_each_pci_segment(pci_seg) \ + list_for_each_entry((pci_seg), &amd_iommu_pci_seg_list, list) +#define for_each_pci_segment_safe(pci_seg, next) \ + list_for_each_entry_safe((pci_seg), (next), &amd_iommu_pci_seg_list, list) /* * Make iterating over all IOMMUs easier */ @@ -530,6 +535,17 @@ struct protection_domain { unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; +/* + * This structure contains information about one PCI segment in the system. + */ +struct amd_iommu_pci_seg { + /* List with all PCI segments in the system */ + struct list_head list; + + /* PCI segment number */ + u16 id; +}; + /* * Structure where we save information about one hardware AMD IOMMU in the * system. @@ -581,7 +597,7 @@ struct amd_iommu { u16 cap_ptr; /* pci domain of this IOMMU */ - u16 pci_seg; + struct amd_iommu_pci_seg *pci_seg; /* start of exclusion range of that IOMMU */ u64 exclusion_start; @@ -709,6 +725,12 @@ extern struct list_head ioapic_map; extern struct list_head hpet_map; extern struct list_head acpihid_map; +/* + * List with all PCI segments in the system. This list is not locked because + * it is only written at driver initialization time + */ +extern struct list_head amd_iommu_pci_seg_list; + /* * List with all IOMMUs in the system. This list is not locked because it is * only written and read at driver initialization or suspend time diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 298c836e174f..97f96f908052 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -164,6 +164,7 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ +LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ @@ -1458,6 +1459,43 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, return 0; } +/* Allocate PCI segment data structure */ +static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id) +{ + struct amd_iommu_pci_seg *pci_seg; + + pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL); + if (pci_seg == NULL) + return NULL; + + pci_seg->id = id; + list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); + + return pci_seg; +} + +static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id) +{ + struct amd_iommu_pci_seg *pci_seg; + + for_each_pci_segment(pci_seg) { + if (pci_seg->id == id) + return pci_seg; + } + + return alloc_pci_segment(id); +} + +static void __init free_pci_segments(void) +{ + struct amd_iommu_pci_seg *pci_seg, *next; + + for_each_pci_segment_safe(pci_seg, next) { + list_del(&pci_seg->list); + kfree(pci_seg); + } +} + static void __init free_iommu_one(struct amd_iommu *iommu) { free_cwwb_sem(iommu); @@ -1544,8 +1582,14 @@ static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) */ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) { + struct amd_iommu_pci_seg *pci_seg; int ret; + pci_seg = get_pci_segment(h->pci_seg); + if (pci_seg == NULL) + return -ENOMEM; + iommu->pci_seg = pci_seg; + raw_spin_lock_init(&iommu->lock); iommu->cmd_sem_val = 0; @@ -1566,7 +1610,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) */ iommu->devid = h->devid; iommu->cap_ptr = h->cap_ptr; - iommu->pci_seg = h->pci_seg; iommu->mmio_phys = h->mmio_phys; switch (h->type) { @@ -2614,6 +2657,7 @@ static void __init free_iommu_resources(void) amd_iommu_dev_table = NULL; free_iommu_all(); + free_pci_segments(); } /* SB IOAPIC is always on this device in AMD systems */ -- cgit v1.2.3 From 04230c119930299f2a30de783c0a643481a66eed Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:07:53 +0530 Subject: iommu/amd: Introduce per PCI segment device table Introduce per PCI segment device table. All IOMMUs within the segment will share this device table. This will replace global device table i.e. amd_iommu_dev_table. Also introduce helper function to get the device table for the given IOMMU. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-4-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/amd_iommu_types.h | 10 ++++++++++ drivers/iommu/amd/init.c | 26 ++++++++++++++++++++++++-- drivers/iommu/amd/iommu.c | 12 ++++++++++++ 4 files changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 1ab31074f5b3..885570cd0d77 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -128,4 +128,5 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { } extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); +extern struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); #endif diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 2243b1a22d78..422ea87ae4c7 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -544,6 +544,16 @@ struct amd_iommu_pci_seg { /* PCI segment number */ u16 id; + + /* + * device table virtual address + * + * Pointer to the per PCI segment device table. + * It is indexed by the PCI device id or the HT unit id and contains + * information about the domain the device belongs to as well as the + * page table root pointer. + */ + struct dev_table_entry *dev_table; }; /* diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 97f96f908052..9f26601368eb 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -642,11 +642,29 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) * * The following functions belong to the code path which parses the ACPI table * the second time. In this ACPI parsing iteration we allocate IOMMU specific - * data structures, initialize the device/alias/rlookup table and also - * basically initialize the hardware. + * data structures, initialize the per PCI segment device/alias/rlookup table + * and also basically initialize the hardware. * ****************************************************************************/ +/* Allocate per PCI segment device table */ +static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) +{ + pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32, + get_order(dev_table_size)); + if (!pci_seg->dev_table) + return -ENOMEM; + + return 0; +} + +static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) +{ + free_pages((unsigned long)pci_seg->dev_table, + get_order(dev_table_size)); + pci_seg->dev_table = NULL; +} + /* * Allocates the command buffer. This buffer is per AMD IOMMU. We can * write commands to that buffer later and the IOMMU will execute them @@ -1471,6 +1489,9 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id) pci_seg->id = id; list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); + if (alloc_dev_table(pci_seg)) + return NULL; + return pci_seg; } @@ -1492,6 +1513,7 @@ static void __init free_pci_segments(void) for_each_pci_segment_safe(pci_seg, next) { list_del(&pci_seg->list); + free_dev_table(pci_seg); kfree(pci_seg); } } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index efa8af5a9419..ac8f81f527b4 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -134,6 +134,18 @@ static inline int get_device_id(struct device *dev) return devid; } +struct dev_table_entry *get_dev_table(struct amd_iommu *iommu) +{ + struct dev_table_entry *dev_table; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; + + BUG_ON(pci_seg == NULL); + dev_table = pci_seg->dev_table; + BUG_ON(dev_table == NULL); + + return dev_table; +} + static struct protection_domain *to_pdomain(struct iommu_domain *dom) { return container_of(dom, struct protection_domain, domain); -- cgit v1.2.3 From eda797a2779509280c84c557a9caf568b23db4e6 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:07:54 +0530 Subject: iommu/amd: Introduce per PCI segment rlookup table This will replace global rlookup table (amd_iommu_rlookup_table). Add helper functions to set/get rlookup table for the given device. Also add macros to get seg/devid from sbdf. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-5-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/amd_iommu_types.h | 11 ++++++++++ drivers/iommu/amd/init.c | 23 +++++++++++++++++++ drivers/iommu/amd/iommu.c | 44 +++++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 885570cd0d77..2947239700ce 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -19,6 +19,7 @@ extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); extern int amd_iommu_init_api(void); +extern void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid); #ifdef CONFIG_AMD_IOMMU_DEBUGFS void amd_iommu_debugfs_setup(struct amd_iommu *iommu); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 422ea87ae4c7..d0ee78c656ff 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -456,6 +456,9 @@ extern bool amdr_ivrs_remap_support; /* kmem_cache to get tables with 128 byte alignement */ extern struct kmem_cache *amd_iommu_irq_cache; +#define PCI_SBDF_TO_SEGID(sbdf) (((sbdf) >> 16) & 0xffff) +#define PCI_SBDF_TO_DEVID(sbdf) ((sbdf) & 0xffff) + /* Make iterating over all pci segment easier */ #define for_each_pci_segment(pci_seg) \ list_for_each_entry((pci_seg), &amd_iommu_pci_seg_list, list) @@ -490,6 +493,7 @@ struct amd_iommu_fault { }; +struct amd_iommu; struct iommu_domain; struct irq_domain; struct amd_irte_ops; @@ -554,6 +558,13 @@ struct amd_iommu_pci_seg { * page table root pointer. */ struct dev_table_entry *dev_table; + + /* + * The rlookup iommu table is used to find the IOMMU which is + * responsible for a specific device. It is indexed by the PCI + * device id. + */ + struct amd_iommu **rlookup_table; }; /* diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 9f26601368eb..35863ba9f86e 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -665,6 +665,26 @@ static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) pci_seg->dev_table = NULL; } +/* Allocate per PCI segment IOMMU rlookup table. */ +static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) +{ + pci_seg->rlookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(rlookup_table_size)); + if (pci_seg->rlookup_table == NULL) + return -ENOMEM; + + return 0; +} + +static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) +{ + free_pages((unsigned long)pci_seg->rlookup_table, + get_order(rlookup_table_size)); + pci_seg->rlookup_table = NULL; +} + + /* * Allocates the command buffer. This buffer is per AMD IOMMU. We can * write commands to that buffer later and the IOMMU will execute them @@ -1491,6 +1511,8 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id) if (alloc_dev_table(pci_seg)) return NULL; + if (alloc_rlookup_table(pci_seg)) + return NULL; return pci_seg; } @@ -1513,6 +1535,7 @@ static void __init free_pci_segments(void) for_each_pci_segment_safe(pci_seg, next) { list_del(&pci_seg->list); + free_rlookup_table(pci_seg); free_dev_table(pci_seg); kfree(pci_seg); } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index ac8f81f527b4..b0262b2e749d 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -146,6 +146,50 @@ struct dev_table_entry *get_dev_table(struct amd_iommu *iommu) return dev_table; } +static inline u16 get_device_segment(struct device *dev) +{ + u16 seg; + + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + seg = pci_domain_nr(pdev->bus); + } else { + u32 devid = get_acpihid_device_id(dev, NULL); + + seg = PCI_SBDF_TO_SEGID(devid); + } + + return seg; +} + +/* Writes the specific IOMMU for a device into the PCI segment rlookup table */ +void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid) +{ + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; + + pci_seg->rlookup_table[devid] = iommu; +} + +static struct amd_iommu *__rlookup_amd_iommu(u16 seg, u16 devid) +{ + struct amd_iommu_pci_seg *pci_seg; + + for_each_pci_segment(pci_seg) { + if (pci_seg->id == seg) + return pci_seg->rlookup_table[devid]; + } + return NULL; +} + +static struct amd_iommu *rlookup_amd_iommu(struct device *dev) +{ + u16 seg = get_device_segment(dev); + u16 devid = get_device_id(dev); + + return __rlookup_amd_iommu(seg, devid); +} + static struct protection_domain *to_pdomain(struct iommu_domain *dom) { return container_of(dom, struct protection_domain, domain); -- cgit v1.2.3 From 333e581bcdffc09e6c6c95a0c24bfc66b3b4dcfd Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:07:55 +0530 Subject: iommu/amd: Introduce per PCI segment irq_lookup_table This will replace global irq lookup table (irq_lookup_table). Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-6-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 6 ++++++ drivers/iommu/amd/init.c | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index d0ee78c656ff..cfb5f0e44186 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -565,6 +565,12 @@ struct amd_iommu_pci_seg { * device id. */ struct amd_iommu **rlookup_table; + + /* + * This table is used to find the irq remapping table for a given + * device id quickly. + */ + struct irq_remap_table **irq_lookup_table; }; /* diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 35863ba9f86e..e05d792c57cf 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -684,6 +684,26 @@ static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) pci_seg->rlookup_table = NULL; } +static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) +{ + pci_seg->irq_lookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(rlookup_table_size)); + kmemleak_alloc(pci_seg->irq_lookup_table, + rlookup_table_size, 1, GFP_KERNEL); + if (pci_seg->irq_lookup_table == NULL) + return -ENOMEM; + + return 0; +} + +static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) +{ + kmemleak_free(pci_seg->irq_lookup_table); + free_pages((unsigned long)pci_seg->irq_lookup_table, + get_order(rlookup_table_size)); + pci_seg->irq_lookup_table = NULL; +} /* * Allocates the command buffer. This buffer is per AMD IOMMU. We can @@ -1535,6 +1555,7 @@ static void __init free_pci_segments(void) for_each_pci_segment_safe(pci_seg, next) { list_del(&pci_seg->list); + free_irq_lookup_table(pci_seg); free_rlookup_table(pci_seg); free_dev_table(pci_seg); kfree(pci_seg); @@ -2900,6 +2921,7 @@ static int __init early_amd_iommu_init(void) amd_iommu_irq_remap = check_ioapic_information(); if (amd_iommu_irq_remap) { + struct amd_iommu_pci_seg *pci_seg; /* * Interrupt remapping enabled, create kmem_cache for the * remapping tables. @@ -2916,6 +2938,11 @@ static int __init early_amd_iommu_init(void) if (!amd_iommu_irq_cache) goto out; + for_each_pci_segment(pci_seg) { + if (alloc_irq_lookup_table(pci_seg)) + goto out; + } + irq_lookup_table = (void *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, get_order(rlookup_table_size)); -- cgit v1.2.3 From 39a303ba4a57f263aa2ed4908161bca3583f35f9 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:07:56 +0530 Subject: iommu/amd: Introduce per PCI segment dev_data_list This will replace global dev_data_list. Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-7-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 3 +++ drivers/iommu/amd/init.c | 1 + drivers/iommu/amd/iommu.c | 21 ++++++++++----------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index cfb5f0e44186..5f3cc704f131 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -546,6 +546,9 @@ struct amd_iommu_pci_seg { /* List with all PCI segments in the system */ struct list_head list; + /* List of all available dev_data structures */ + struct llist_head dev_data_list; + /* PCI segment number */ u16 id; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index e05d792c57cf..5455b9d051e9 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1527,6 +1527,7 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id) return NULL; pci_seg->id = id; + init_llist_head(&pci_seg->dev_data_list); list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); if (alloc_dev_table(pci_seg)) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b0262b2e749d..48275da7fcb0 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -62,9 +62,6 @@ static DEFINE_SPINLOCK(pd_bitmap_lock); -/* List of all available dev_data structures */ -static LLIST_HEAD(dev_data_list); - LIST_HEAD(ioapic_map); LIST_HEAD(hpet_map); LIST_HEAD(acpihid_map); @@ -195,9 +192,10 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } -static struct iommu_dev_data *alloc_dev_data(u16 devid) +static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid) { struct iommu_dev_data *dev_data; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); if (!dev_data) @@ -207,19 +205,20 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) dev_data->devid = devid; ratelimit_default_init(&dev_data->rs); - llist_add(&dev_data->dev_data_list, &dev_data_list); + llist_add(&dev_data->dev_data_list, &pci_seg->dev_data_list); return dev_data; } -static struct iommu_dev_data *search_dev_data(u16 devid) +static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid) { struct iommu_dev_data *dev_data; struct llist_node *node; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; - if (llist_empty(&dev_data_list)) + if (llist_empty(&pci_seg->dev_data_list)) return NULL; - node = dev_data_list.first; + node = pci_seg->dev_data_list.first; llist_for_each_entry(dev_data, node, dev_data_list) { if (dev_data->devid == devid) return dev_data; @@ -288,10 +287,10 @@ static struct iommu_dev_data *find_dev_data(u16 devid) struct iommu_dev_data *dev_data; struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - dev_data = search_dev_data(devid); + dev_data = search_dev_data(iommu, devid); if (dev_data == NULL) { - dev_data = alloc_dev_data(devid); + dev_data = alloc_dev_data(iommu, devid); if (!dev_data) return NULL; @@ -3466,7 +3465,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data; struct amd_ir_data *ir_data = data->chip_data; struct irq_2_irte *irte_info = &ir_data->irq_2_irte; - struct iommu_dev_data *dev_data = search_dev_data(irte_info->devid); + struct iommu_dev_data *dev_data = search_dev_data(NULL, irte_info->devid); /* Note: * This device has never been set up for guest mode. -- cgit v1.2.3 From eb21ef0227915ba61c8927fddca53571a0a35de8 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:07:57 +0530 Subject: iommu/amd: Introduce per PCI segment old_dev_tbl_cpy It will remove global old_dev_tbl_cpy. Also update copy_device_table() copy device table for all PCI segments. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-8-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 6 ++ drivers/iommu/amd/init.c | 109 +++++++++++++++++++++--------------- 2 files changed, 70 insertions(+), 45 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 5f3cc704f131..3ef68d588cc7 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -574,6 +574,12 @@ struct amd_iommu_pci_seg { * device id quickly. */ struct irq_remap_table **irq_lookup_table; + + /* + * Pointer to a device table which the content of old device table + * will be copied to. It's only be used in kdump kernel. + */ + struct dev_table_entry *old_dev_tbl_cpy; }; /* diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 5455b9d051e9..a750707e725b 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -193,11 +193,6 @@ bool amd_iommu_force_isolation __read_mostly; * page table root pointer. */ struct dev_table_entry *amd_iommu_dev_table; -/* - * Pointer to a device table which the content of old device table - * will be copied to. It's only be used in kdump kernel. - */ -static struct dev_table_entry *old_dev_tbl_cpy; /* * The alias table is a driver specific data structure which contains the @@ -992,39 +987,27 @@ static int get_dev_entry_bit(u16 devid, u8 bit) } -static bool copy_device_table(void) +static bool __copy_device_table(struct amd_iommu *iommu) { - u64 int_ctl, int_tab_len, entry = 0, last_entry = 0; + u64 int_ctl, int_tab_len, entry = 0; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; struct dev_table_entry *old_devtb = NULL; u32 lo, hi, devid, old_devtb_size; phys_addr_t old_devtb_phys; - struct amd_iommu *iommu; u16 dom_id, dte_v, irq_v; gfp_t gfp_flag; u64 tmp; - if (!amd_iommu_pre_enabled) - return false; - - pr_warn("Translation is already enabled - trying to copy translation structures\n"); - for_each_iommu(iommu) { - /* All IOMMUs should use the same device table with the same size */ - lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); - hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); - entry = (((u64) hi) << 32) + lo; - if (last_entry && last_entry != entry) { - pr_err("IOMMU:%d should use the same dev table as others!\n", - iommu->index); - return false; - } - last_entry = entry; + /* Each IOMMU use separate device table with the same size */ + lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); + hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); + entry = (((u64) hi) << 32) + lo; - old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; - if (old_devtb_size != dev_table_size) { - pr_err("The device table size of IOMMU:%d is not expected!\n", - iommu->index); - return false; - } + old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; + if (old_devtb_size != dev_table_size) { + pr_err("The device table size of IOMMU:%d is not expected!\n", + iommu->index); + return false; } /* @@ -1047,31 +1030,31 @@ static bool copy_device_table(void) return false; gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32; - old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, - get_order(dev_table_size)); - if (old_dev_tbl_cpy == NULL) { + pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, + get_order(dev_table_size)); + if (pci_seg->old_dev_tbl_cpy == NULL) { pr_err("Failed to allocate memory for copying old device table!\n"); memunmap(old_devtb); return false; } for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { - old_dev_tbl_cpy[devid] = old_devtb[devid]; + pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid]; dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; if (dte_v && dom_id) { - old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; - old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; + pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; + pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); /* If gcr3 table existed, mask it out */ if (old_devtb[devid].data[0] & DTE_FLAG_GV) { tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; - old_dev_tbl_cpy[devid].data[1] &= ~tmp; + pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp; tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; tmp |= DTE_FLAG_GV; - old_dev_tbl_cpy[devid].data[0] &= ~tmp; + pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp; } } @@ -1086,7 +1069,7 @@ static bool copy_device_table(void) return false; } - old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; + pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; } } memunmap(old_devtb); @@ -1094,6 +1077,33 @@ static bool copy_device_table(void) return true; } +static bool copy_device_table(void) +{ + struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; + + if (!amd_iommu_pre_enabled) + return false; + + pr_warn("Translation is already enabled - trying to copy translation structures\n"); + + /* + * All IOMMUs within PCI segment shares common device table. + * Hence copy device table only once per PCI segment. + */ + for_each_pci_segment(pci_seg) { + for_each_iommu(iommu) { + if (pci_seg->id != iommu->pci_seg->id) + continue; + if (!__copy_device_table(iommu)) + return false; + break; + } + } + + return true; +} + void amd_iommu_apply_erratum_63(u16 devid) { int sysmgt; @@ -2591,7 +2601,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) static void early_enable_iommus(void) { struct amd_iommu *iommu; - + struct amd_iommu_pci_seg *pci_seg; if (!copy_device_table()) { /* @@ -2601,9 +2611,14 @@ static void early_enable_iommus(void) */ if (amd_iommu_pre_enabled) pr_err("Failed to copy DEV table from previous kernel.\n"); - if (old_dev_tbl_cpy != NULL) - free_pages((unsigned long)old_dev_tbl_cpy, - get_order(dev_table_size)); + + for_each_pci_segment(pci_seg) { + if (pci_seg->old_dev_tbl_cpy != NULL) { + free_pages((unsigned long)pci_seg->old_dev_tbl_cpy, + get_order(dev_table_size)); + pci_seg->old_dev_tbl_cpy = NULL; + } + } for_each_iommu(iommu) { clear_translation_pre_enabled(iommu); @@ -2611,9 +2626,13 @@ static void early_enable_iommus(void) } } else { pr_info("Copied DEV table from previous kernel.\n"); - free_pages((unsigned long)amd_iommu_dev_table, - get_order(dev_table_size)); - amd_iommu_dev_table = old_dev_tbl_cpy; + + for_each_pci_segment(pci_seg) { + free_pages((unsigned long)pci_seg->dev_table, + get_order(dev_table_size)); + pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; + } + for_each_iommu(iommu) { iommu_disable_command_buffer(iommu); iommu_disable_event_buffer(iommu); -- cgit v1.2.3 From 99fc4ac3d2978cec1ae80dbc452fac1bb2cc976f Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:07:58 +0530 Subject: iommu/amd: Introduce per PCI segment alias_table This will replace global alias table (amd_iommu_alias_table). Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-9-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 7 +++++++ drivers/iommu/amd/init.c | 41 +++++++++++++++++++++++++++++-------- drivers/iommu/amd/iommu.c | 41 ++++++++++++++++++++++--------------- 3 files changed, 64 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 3ef68d588cc7..c9dd0ab37475 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -580,6 +580,13 @@ struct amd_iommu_pci_seg { * will be copied to. It's only be used in kdump kernel. */ struct dev_table_entry *old_dev_tbl_cpy; + + /* + * The alias table is a driver specific data structure which contains the + * mappings of the PCI device ids to the actual requestor ids on the IOMMU. + * More than one device can share the same requestor id. + */ + u16 *alias_table; }; /* diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index a750707e725b..2c8dbf2332bd 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -700,6 +700,31 @@ static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) pci_seg->irq_lookup_table = NULL; } +static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) +{ + int i; + + pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(alias_table_size)); + if (!pci_seg->alias_table) + return -ENOMEM; + + /* + * let all alias entries point to itself + */ + for (i = 0; i <= amd_iommu_last_bdf; ++i) + pci_seg->alias_table[i] = i; + + return 0; +} + +static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) +{ + free_pages((unsigned long)pci_seg->alias_table, + get_order(alias_table_size)); + pci_seg->alias_table = NULL; +} + /* * Allocates the command buffer. This buffer is per AMD IOMMU. We can * write commands to that buffer later and the IOMMU will execute them @@ -1268,6 +1293,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, u32 dev_i, ext_flags = 0; bool alias = false; struct ivhd_entry *e; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; u32 ivhd_size; int ret; @@ -1349,7 +1375,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, devid_to = e->ext >> 8; set_dev_entry_from_acpi(iommu, devid , e->flags, 0); set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); - amd_iommu_alias_table[devid] = devid_to; + pci_seg->alias_table[devid] = devid_to; break; case IVHD_DEV_ALIAS_RANGE: @@ -1407,7 +1433,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, devid = e->devid; for (dev_i = devid_start; dev_i <= devid; ++dev_i) { if (alias) { - amd_iommu_alias_table[dev_i] = devid_to; + pci_seg->alias_table[dev_i] = devid_to; set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags); } @@ -1542,6 +1568,8 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id) if (alloc_dev_table(pci_seg)) return NULL; + if (alloc_alias_table(pci_seg)) + return NULL; if (alloc_rlookup_table(pci_seg)) return NULL; @@ -1568,6 +1596,7 @@ static void __init free_pci_segments(void) list_del(&pci_seg->list); free_irq_lookup_table(pci_seg); free_rlookup_table(pci_seg); + free_alias_table(pci_seg); free_dev_table(pci_seg); kfree(pci_seg); } @@ -2842,7 +2871,7 @@ static void __init ivinfo_init(void *ivrs) static int __init early_amd_iommu_init(void) { struct acpi_table_header *ivrs_base; - int i, remap_cache_sz, ret; + int remap_cache_sz, ret; acpi_status status; if (!amd_iommu_detected) @@ -2913,12 +2942,6 @@ static int __init early_amd_iommu_init(void) if (amd_iommu_pd_alloc_bitmap == NULL) goto out; - /* - * let all alias entries point to itself - */ - for (i = 0; i <= amd_iommu_last_bdf; ++i) - amd_iommu_alias_table[i] = i; - /* * never allocate domain 0 because its used as the non-allocated and * error value placeholder diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 48275da7fcb0..378fa68f2fdd 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -243,7 +243,7 @@ static int clone_alias(struct pci_dev *pdev, u16 alias, void *data) return 0; } -static void clone_aliases(struct device *dev) +static void clone_aliases(struct amd_iommu *iommu, struct device *dev) { struct pci_dev *pdev; @@ -256,14 +256,15 @@ static void clone_aliases(struct device *dev) * part of the PCI DMA aliases if it's bus differs * from the original device. */ - clone_alias(pdev, amd_iommu_alias_table[pci_dev_id(pdev)], NULL); + clone_alias(pdev, iommu->pci_seg->alias_table[pci_dev_id(pdev)], NULL); pci_for_each_dma_alias(pdev, clone_alias, NULL); } -static void setup_aliases(struct device *dev) +static void setup_aliases(struct amd_iommu *iommu, struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; u16 ivrs_alias; /* For ACPI HID devices, there are no aliases */ @@ -274,12 +275,12 @@ static void setup_aliases(struct device *dev) * Add the IVRS alias to the pci aliases if it is on the same * bus. The IVRS table may know about a quirk that we don't. */ - ivrs_alias = amd_iommu_alias_table[pci_dev_id(pdev)]; + ivrs_alias = pci_seg->alias_table[pci_dev_id(pdev)]; if (ivrs_alias != pci_dev_id(pdev) && PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) pci_add_dma_alias(pdev, ivrs_alias & 0xff, 1); - clone_aliases(dev); + clone_aliases(iommu, dev); } static struct iommu_dev_data *find_dev_data(u16 devid) @@ -371,7 +372,7 @@ static bool check_device(struct device *dev) return true; } -static int iommu_init_device(struct device *dev) +static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) { struct iommu_dev_data *dev_data; int devid; @@ -388,7 +389,7 @@ static int iommu_init_device(struct device *dev) return -ENOMEM; dev_data->dev = dev; - setup_aliases(dev); + setup_aliases(iommu, dev); /* * By default we use passthrough mode for IOMMUv2 capable device. @@ -409,7 +410,7 @@ static int iommu_init_device(struct device *dev) return 0; } -static void iommu_ignore_device(struct device *dev) +static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev) { int devid; @@ -420,7 +421,7 @@ static void iommu_ignore_device(struct device *dev) amd_iommu_rlookup_table[devid] = NULL; memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); - setup_aliases(dev); + setup_aliases(iommu, dev); } static void amd_iommu_uninit_device(struct device *dev) @@ -1290,6 +1291,7 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) { struct amd_iommu *iommu; struct pci_dev *pdev = NULL; + struct amd_iommu_pci_seg *pci_seg; u16 alias; int ret; @@ -1306,7 +1308,8 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) if (ret) return ret; - alias = amd_iommu_alias_table[dev_data->devid]; + pci_seg = iommu->pci_seg; + alias = pci_seg->alias_table[dev_data->devid]; if (alias != dev_data->devid) { ret = iommu_flush_dte(iommu, alias); if (ret) @@ -1622,7 +1625,7 @@ static void do_attach(struct iommu_dev_data *dev_data, /* Update device table */ set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); - clone_aliases(dev_data->dev); + clone_aliases(iommu, dev_data->dev); device_flush_dte(dev_data); } @@ -1638,7 +1641,7 @@ static void do_detach(struct iommu_dev_data *dev_data) dev_data->domain = NULL; list_del(&dev_data->list); clear_dte_entry(dev_data->devid); - clone_aliases(dev_data->dev); + clone_aliases(iommu, dev_data->dev); /* Flush the DTE entry */ device_flush_dte(dev_data); @@ -1821,12 +1824,12 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) if (dev_iommu_priv_get(dev)) return &iommu->iommu; - ret = iommu_init_device(dev); + ret = iommu_init_device(iommu, dev); if (ret) { if (ret != -ENOTSUPP) dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); iommu_dev = ERR_PTR(ret); - iommu_ignore_device(dev); + iommu_ignore_device(iommu, dev); } else { amd_iommu_set_pci_msi_domain(dev, iommu); iommu_dev = &iommu->iommu; @@ -1877,9 +1880,13 @@ static void update_device_table(struct protection_domain *domain) struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { + struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev); + + if (!iommu) + continue; set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, dev_data->iommu_v2); - clone_aliases(dev_data->dev); + clone_aliases(iommu, dev_data->dev); } } @@ -2783,6 +2790,7 @@ static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev) { struct irq_remap_table *table = NULL; struct irq_remap_table *new_table = NULL; + struct amd_iommu_pci_seg *pci_seg; struct amd_iommu *iommu; unsigned long flags; u16 alias; @@ -2793,11 +2801,12 @@ static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev) if (!iommu) goto out_unlock; + pci_seg = iommu->pci_seg; table = irq_lookup_table[devid]; if (table) goto out_unlock; - alias = amd_iommu_alias_table[devid]; + alias = pci_seg->alias_table[devid]; table = irq_lookup_table[alias]; if (table) { set_remap_table_entry(iommu, devid, table); -- cgit v1.2.3 From b618ae6247bbafe0844355bafd948e59ebd77098 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:07:59 +0530 Subject: iommu/amd: Introduce per PCI segment unity map list Newer AMD systems can support multiple PCI segments. In order to support multiple PCI segments IVMD table in IVRS structure is enhanced to include pci segment id. Update ivmd_header structure to include "pci_seg". Also introduce per PCI segment unity map list. It will replace global amd_iommu_unity_map list. Note that we have used "reserved" field in IVMD table to include "pci_seg id" which was set to zero. It will take care of backward compatibility (new kernel will work fine on older systems). Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-10-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 13 +++++++------ drivers/iommu/amd/init.c | 30 ++++++++++++++++++++---------- drivers/iommu/amd/iommu.c | 8 +++++++- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index c9dd0ab37475..3099a018cef0 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -587,6 +587,13 @@ struct amd_iommu_pci_seg { * More than one device can share the same requestor id. */ u16 *alias_table; + + /* + * A list of required unity mappings we find in ACPI. It is not locked + * because as runtime it is only read. It is created at ACPI table + * parsing time. + */ + struct list_head unity_map; }; /* @@ -813,12 +820,6 @@ struct unity_map_entry { int prot; }; -/* - * List of all unity mappings. It is not locked because as runtime it is only - * read. It is created at ACPI table parsing time. - */ -extern struct list_head amd_iommu_unity_map; - /* * Data structures for device handling */ diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 2c8dbf2332bd..779505610164 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -141,7 +141,8 @@ struct ivmd_header { u16 length; u16 devid; u16 aux; - u64 resv; + u16 pci_seg; + u8 resv[6]; u64 range_start; u64 range_length; } __attribute__((packed)); @@ -161,8 +162,6 @@ static int amd_iommu_target_ivhd_type; u16 amd_iommu_last_bdf; /* largest PCI device id we have to handle */ -LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings - we find in ACPI */ LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the @@ -1564,6 +1563,7 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id) pci_seg->id = id; init_llist_head(&pci_seg->dev_data_list); + INIT_LIST_HEAD(&pci_seg->unity_map); list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); if (alloc_dev_table(pci_seg)) @@ -2401,10 +2401,13 @@ enable_faults: static void __init free_unity_maps(void) { struct unity_map_entry *entry, *next; + struct amd_iommu_pci_seg *p, *pci_seg; - list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) { - list_del(&entry->list); - kfree(entry); + for_each_pci_segment_safe(pci_seg, p) { + list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) { + list_del(&entry->list); + kfree(entry); + } } } @@ -2412,8 +2415,13 @@ static void __init free_unity_maps(void) static int __init init_unity_map_range(struct ivmd_header *m) { struct unity_map_entry *e = NULL; + struct amd_iommu_pci_seg *pci_seg; char *s; + pci_seg = get_pci_segment(m->pci_seg); + if (pci_seg == NULL) + return -ENOMEM; + e = kzalloc(sizeof(*e), GFP_KERNEL); if (e == NULL) return -ENOMEM; @@ -2451,14 +2459,16 @@ static int __init init_unity_map_range(struct ivmd_header *m) if (m->flags & IVMD_FLAG_EXCL_RANGE) e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1; - DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x" - " range_start: %016llx range_end: %016llx flags: %x\n", s, + DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: " + "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx" + " flags: %x\n", s, m->pci_seg, PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start), - PCI_FUNC(e->devid_start), PCI_BUS_NUM(e->devid_end), + PCI_FUNC(e->devid_start), m->pci_seg, + PCI_BUS_NUM(e->devid_end), PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), e->address_start, e->address_end, m->flags); - list_add_tail(&e->list, &amd_iommu_unity_map); + list_add_tail(&e->list, &pci_seg->unity_map); return 0; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 378fa68f2fdd..53ccee57a7a0 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2237,13 +2237,19 @@ static void amd_iommu_get_resv_regions(struct device *dev, { struct iommu_resv_region *region; struct unity_map_entry *entry; + struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; int devid; devid = get_device_id(dev); if (devid < 0) return; + iommu = rlookup_amd_iommu(dev); + if (!iommu) + return; + pci_seg = iommu->pci_seg; - list_for_each_entry(entry, &amd_iommu_unity_map, list) { + list_for_each_entry(entry, &pci_seg->unity_map, list) { int type, prot = 0; size_t length; -- cgit v1.2.3 From 307959008d80cdfd67fb19a146932582bb68a399 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:08:00 +0530 Subject: iommu/amd: Introduce per PCI segment last_bdf Current code uses global "amd_iommu_last_bdf" to track the last bdf supported by the system. This value is used for various memory allocation, device data flushing, etc. Introduce per PCI segment last_bdf which will be used to track last bdf supported by the given PCI segment and use this value for all per segment memory allocations. Eventually it will replace global "amd_iommu_last_bdf". Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-11-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 3 ++ drivers/iommu/amd/init.c | 69 ++++++++++++++++++++++--------------- 2 files changed, 45 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 3099a018cef0..8be8f3d6b44a 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -552,6 +552,9 @@ struct amd_iommu_pci_seg { /* PCI segment number */ u16 id; + /* Largest PCI device id we expect translation requests for */ + u16 last_bdf; + /* * device table virtual address * diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 779505610164..50be138108e7 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -552,6 +552,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) { u8 *p = (void *)h, *end = (void *)h; struct ivhd_entry *dev; + int last_devid = -EINVAL; u32 ivhd_size = get_ivhd_header_size(h); @@ -569,13 +570,15 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) case IVHD_DEV_ALL: /* Use maximum BDF value for DEV_ALL */ update_last_devid(0xffff); - break; + return 0xffff; case IVHD_DEV_SELECT: case IVHD_DEV_RANGE_END: case IVHD_DEV_ALIAS: case IVHD_DEV_EXT_SELECT: /* all the above subfield types refer to device ids */ update_last_devid(dev->devid); + if (dev->devid > last_devid) + last_devid = dev->devid; break; default: break; @@ -585,7 +588,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) WARN_ON(p != end); - return 0; + return last_devid; } static int __init check_ivrs_checksum(struct acpi_table_header *table) @@ -609,27 +612,31 @@ static int __init check_ivrs_checksum(struct acpi_table_header *table) * id which we need to handle. This is the first of three functions which parse * the ACPI table. So we check the checksum here. */ -static int __init find_last_devid_acpi(struct acpi_table_header *table) +static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg) { u8 *p = (u8 *)table, *end = (u8 *)table; struct ivhd_header *h; + int last_devid, last_bdf = 0; p += IVRS_HEADER_LENGTH; end += table->length; while (p < end) { h = (struct ivhd_header *)p; - if (h->type == amd_iommu_target_ivhd_type) { - int ret = find_last_devid_from_ivhd(h); - - if (ret) - return ret; + if (h->pci_seg == pci_seg && + h->type == amd_iommu_target_ivhd_type) { + last_devid = find_last_devid_from_ivhd(h); + + if (last_devid < 0) + return -EINVAL; + if (last_devid > last_bdf) + last_bdf = last_devid; } p += h->length; } WARN_ON(p != end); - return 0; + return last_bdf; } /**************************************************************************** @@ -1553,14 +1560,28 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, } /* Allocate PCI segment data structure */ -static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id) +static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, + struct acpi_table_header *ivrs_base) { struct amd_iommu_pci_seg *pci_seg; + int last_bdf; + + /* + * First parse ACPI tables to find the largest Bus/Dev/Func we need to + * handle in this PCI segment. Upon this information the shared data + * structures for the PCI segments in the system will be allocated. + */ + last_bdf = find_last_devid_acpi(ivrs_base, id); + if (last_bdf < 0) + return NULL; pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL); if (pci_seg == NULL) return NULL; + pci_seg->last_bdf = last_bdf; + DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); + pci_seg->id = id; init_llist_head(&pci_seg->dev_data_list); INIT_LIST_HEAD(&pci_seg->unity_map); @@ -1576,7 +1597,8 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id) return pci_seg; } -static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id) +static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id, + struct acpi_table_header *ivrs_base) { struct amd_iommu_pci_seg *pci_seg; @@ -1585,7 +1607,7 @@ static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id) return pci_seg; } - return alloc_pci_segment(id); + return alloc_pci_segment(id, ivrs_base); } static void __init free_pci_segments(void) @@ -1686,12 +1708,13 @@ static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) * together and also allocates the command buffer and programs the * hardware. It does NOT enable the IOMMU. This is done afterwards. */ -static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) +static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, + struct acpi_table_header *ivrs_base) { struct amd_iommu_pci_seg *pci_seg; int ret; - pci_seg = get_pci_segment(h->pci_seg); + pci_seg = get_pci_segment(h->pci_seg, ivrs_base); if (pci_seg == NULL) return -ENOMEM; iommu->pci_seg = pci_seg; @@ -1866,7 +1889,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) if (iommu == NULL) return -ENOMEM; - ret = init_iommu_one(iommu, h); + ret = init_iommu_one(iommu, h, table); if (ret) return ret; } @@ -2412,13 +2435,14 @@ static void __init free_unity_maps(void) } /* called for unity map ACPI definition */ -static int __init init_unity_map_range(struct ivmd_header *m) +static int __init init_unity_map_range(struct ivmd_header *m, + struct acpi_table_header *ivrs_base) { struct unity_map_entry *e = NULL; struct amd_iommu_pci_seg *pci_seg; char *s; - pci_seg = get_pci_segment(m->pci_seg); + pci_seg = get_pci_segment(m->pci_seg, ivrs_base); if (pci_seg == NULL) return -ENOMEM; @@ -2485,7 +2509,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) while (p < end) { m = (struct ivmd_header *)p; if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) - init_unity_map_range(m); + init_unity_map_range(m, table); p += m->length; } @@ -2909,15 +2933,6 @@ static int __init early_amd_iommu_init(void) amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); - /* - * First parse ACPI tables to find the largest Bus/Dev/Func - * we need to handle. Upon this information the shared data - * structures for the IOMMUs in the system will be allocated - */ - ret = find_last_devid_acpi(ivrs_base); - if (ret) - goto out; - dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); -- cgit v1.2.3 From b5c852907e0e8a52f8660235390b831ce0a2b50c Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:08:01 +0530 Subject: iommu/amd: Introduce per PCI segment device table size With multiple pci segment support, number of BDF supported by each segment may differ. Hence introduce per segment device table size which depends on last_bdf. This will replace global "device_table_size" variable. Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-12-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 3 +++ drivers/iommu/amd/init.c | 18 ++++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 8be8f3d6b44a..1dbe9c7f973d 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -555,6 +555,9 @@ struct amd_iommu_pci_seg { /* Largest PCI device id we expect translation requests for */ u16 last_bdf; + /* Size of the device table */ + u32 dev_table_size; + /* * device table virtual address * diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 50be138108e7..2950184d5540 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -416,6 +416,7 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu) static void iommu_set_device_table(struct amd_iommu *iommu) { u64 entry; + u32 dev_table_size = iommu->pci_seg->dev_table_size; BUG_ON(iommu->mmio_base == NULL); @@ -652,7 +653,7 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) { pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32, - get_order(dev_table_size)); + get_order(pci_seg->dev_table_size)); if (!pci_seg->dev_table) return -ENOMEM; @@ -662,7 +663,7 @@ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) { free_pages((unsigned long)pci_seg->dev_table, - get_order(dev_table_size)); + get_order(pci_seg->dev_table_size)); pci_seg->dev_table = NULL; } @@ -1035,7 +1036,7 @@ static bool __copy_device_table(struct amd_iommu *iommu) entry = (((u64) hi) << 32) + lo; old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; - if (old_devtb_size != dev_table_size) { + if (old_devtb_size != pci_seg->dev_table_size) { pr_err("The device table size of IOMMU:%d is not expected!\n", iommu->index); return false; @@ -1054,15 +1055,15 @@ static bool __copy_device_table(struct amd_iommu *iommu) } old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel()) ? (__force void *)ioremap_encrypted(old_devtb_phys, - dev_table_size) - : memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB); + pci_seg->dev_table_size) + : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB); if (!old_devtb) return false; gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32; pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, - get_order(dev_table_size)); + get_order(pci_seg->dev_table_size)); if (pci_seg->old_dev_tbl_cpy == NULL) { pr_err("Failed to allocate memory for copying old device table!\n"); memunmap(old_devtb); @@ -1581,6 +1582,7 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, pci_seg->last_bdf = last_bdf; DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); + pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); pci_seg->id = id; init_llist_head(&pci_seg->dev_data_list); @@ -2678,7 +2680,7 @@ static void early_enable_iommus(void) for_each_pci_segment(pci_seg) { if (pci_seg->old_dev_tbl_cpy != NULL) { free_pages((unsigned long)pci_seg->old_dev_tbl_cpy, - get_order(dev_table_size)); + get_order(pci_seg->dev_table_size)); pci_seg->old_dev_tbl_cpy = NULL; } } @@ -2692,7 +2694,7 @@ static void early_enable_iommus(void) for_each_pci_segment(pci_seg) { free_pages((unsigned long)pci_seg->dev_table, - get_order(dev_table_size)); + get_order(pci_seg->dev_table_size)); pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; } -- cgit v1.2.3 From 74ce42a9ab7a245c293c58a4cb964ad285e8fb2e Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:08:02 +0530 Subject: iommu/amd: Introduce per PCI segment alias table size It will replace global "alias_table_size" variable. Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-13-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 3 +++ drivers/iommu/amd/init.c | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 1dbe9c7f973d..8638b1107dd2 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -558,6 +558,9 @@ struct amd_iommu_pci_seg { /* Size of the device table */ u32 dev_table_size; + /* Size of the alias table */ + u32 alias_table_size; + /* * device table virtual address * diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 2950184d5540..b878a50e856d 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -712,7 +712,7 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) int i; pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL, - get_order(alias_table_size)); + get_order(pci_seg->alias_table_size)); if (!pci_seg->alias_table) return -ENOMEM; @@ -728,7 +728,7 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) { free_pages((unsigned long)pci_seg->alias_table, - get_order(alias_table_size)); + get_order(pci_seg->alias_table_size)); pci_seg->alias_table = NULL; } @@ -1583,6 +1583,7 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, pci_seg->last_bdf = last_bdf; DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); + pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); pci_seg->id = id; init_llist_head(&pci_seg->dev_data_list); -- cgit v1.2.3 From ec12dd139252cede736b8f98c5ced9644121de30 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:08:03 +0530 Subject: iommu/amd: Introduce per PCI segment rlookup table size It will replace global "rlookup_table_size" variable. Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-14-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 3 +++ drivers/iommu/amd/init.c | 11 ++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 8638b1107dd2..8d2d5fbdb57f 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -561,6 +561,9 @@ struct amd_iommu_pci_seg { /* Size of the alias table */ u32 alias_table_size; + /* Size of the rlookup table */ + u32 rlookup_table_size; + /* * device table virtual address * diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index b878a50e856d..2f1cdc8bd24c 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -672,7 +672,7 @@ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { pci_seg->rlookup_table = (void *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); + get_order(pci_seg->rlookup_table_size)); if (pci_seg->rlookup_table == NULL) return -ENOMEM; @@ -682,7 +682,7 @@ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { free_pages((unsigned long)pci_seg->rlookup_table, - get_order(rlookup_table_size)); + get_order(pci_seg->rlookup_table_size)); pci_seg->rlookup_table = NULL; } @@ -690,9 +690,9 @@ static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_se { pci_seg->irq_lookup_table = (void *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); + get_order(pci_seg->rlookup_table_size)); kmemleak_alloc(pci_seg->irq_lookup_table, - rlookup_table_size, 1, GFP_KERNEL); + pci_seg->rlookup_table_size, 1, GFP_KERNEL); if (pci_seg->irq_lookup_table == NULL) return -ENOMEM; @@ -703,7 +703,7 @@ static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) { kmemleak_free(pci_seg->irq_lookup_table); free_pages((unsigned long)pci_seg->irq_lookup_table, - get_order(rlookup_table_size)); + get_order(pci_seg->rlookup_table_size)); pci_seg->irq_lookup_table = NULL; } @@ -1584,6 +1584,7 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); + pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); pci_seg->id = id; init_llist_head(&pci_seg->dev_data_list); -- cgit v1.2.3 From 0217ed5a946b61cca3a608d1ba9a422f8f85baf6 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:08:04 +0530 Subject: iommu/amd: Convert to use per PCI segment irq_lookup_table Then, remove the global irq_lookup_table. Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-15-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 2 -- drivers/iommu/amd/init.c | 19 ------------------- drivers/iommu/amd/iommu.c | 36 +++++++++++++++++++++++------------- 3 files changed, 23 insertions(+), 34 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 8d2d5fbdb57f..ca1a3d55cc83 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -445,8 +445,6 @@ struct irq_remap_table { u32 *table; }; -extern struct irq_remap_table **irq_lookup_table; - /* Interrupt remapping feature used? */ extern bool amd_iommu_irq_remap; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 2f1cdc8bd24c..a60bb6c5a6da 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -206,12 +206,6 @@ u16 *amd_iommu_alias_table; */ struct amd_iommu **amd_iommu_rlookup_table; -/* - * This table is used to find the irq remapping table for a given device id - * quickly. - */ -struct irq_remap_table **irq_lookup_table; - /* * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap * to know which ones are already in use. @@ -2789,11 +2783,6 @@ static struct syscore_ops amd_iommu_syscore_ops = { static void __init free_iommu_resources(void) { - kmemleak_free(irq_lookup_table); - free_pages((unsigned long)irq_lookup_table, - get_order(rlookup_table_size)); - irq_lookup_table = NULL; - kmem_cache_destroy(amd_iommu_irq_cache); amd_iommu_irq_cache = NULL; @@ -3014,14 +3003,6 @@ static int __init early_amd_iommu_init(void) if (alloc_irq_lookup_table(pci_seg)) goto out; } - - irq_lookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); - kmemleak_alloc(irq_lookup_table, rlookup_table_size, - 1, GFP_KERNEL); - if (!irq_lookup_table) - goto out; } ret = init_memory_definitions(ivrs_base); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 53ccee57a7a0..cfecd072e7a6 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2732,16 +2732,18 @@ static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) amd_iommu_dev_table[devid].data[2] = dte; } -static struct irq_remap_table *get_irq_table(u16 devid) +static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid) { struct irq_remap_table *table; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; if (WARN_ONCE(!amd_iommu_rlookup_table[devid], "%s: no iommu for devid %x\n", __func__, devid)) return NULL; - table = irq_lookup_table[devid]; - if (WARN_ONCE(!table, "%s: no table for devid %x\n", __func__, devid)) + table = pci_seg->irq_lookup_table[devid]; + if (WARN_ONCE(!table, "%s: no table for devid %x:%x\n", + __func__, pci_seg->id, devid)) return NULL; return table; @@ -2774,7 +2776,9 @@ static struct irq_remap_table *__alloc_irq_table(void) static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid, struct irq_remap_table *table) { - irq_lookup_table[devid] = table; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; + + pci_seg->irq_lookup_table[devid] = table; set_dte_irq_entry(devid, table); iommu_flush_dte(iommu, devid); } @@ -2783,8 +2787,14 @@ static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias, void *data) { struct irq_remap_table *table = data; + struct amd_iommu_pci_seg *pci_seg; + struct amd_iommu *iommu = rlookup_amd_iommu(&pdev->dev); - irq_lookup_table[alias] = table; + if (!iommu) + return -EINVAL; + + pci_seg = iommu->pci_seg; + pci_seg->irq_lookup_table[alias] = table; set_dte_irq_entry(alias, table); iommu_flush_dte(amd_iommu_rlookup_table[alias], alias); @@ -2808,12 +2818,12 @@ static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev) goto out_unlock; pci_seg = iommu->pci_seg; - table = irq_lookup_table[devid]; + table = pci_seg->irq_lookup_table[devid]; if (table) goto out_unlock; alias = pci_seg->alias_table[devid]; - table = irq_lookup_table[alias]; + table = pci_seg->irq_lookup_table[alias]; if (table) { set_remap_table_entry(iommu, devid, table); goto out_wait; @@ -2827,11 +2837,11 @@ static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev) spin_lock_irqsave(&iommu_table_lock, flags); - table = irq_lookup_table[devid]; + table = pci_seg->irq_lookup_table[devid]; if (table) goto out_unlock; - table = irq_lookup_table[alias]; + table = pci_seg->irq_lookup_table[alias]; if (table) { set_remap_table_entry(iommu, devid, table); goto out_wait; @@ -2925,7 +2935,7 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, if (iommu == NULL) return -EINVAL; - table = get_irq_table(devid); + table = get_irq_table(iommu, devid); if (!table) return -ENOMEM; @@ -2966,7 +2976,7 @@ static int modify_irte(u16 devid, int index, union irte *irte) if (iommu == NULL) return -EINVAL; - table = get_irq_table(devid); + table = get_irq_table(iommu, devid); if (!table) return -ENOMEM; @@ -2990,7 +3000,7 @@ static void free_irte(u16 devid, int index) if (iommu == NULL) return; - table = get_irq_table(devid); + table = get_irq_table(iommu, devid); if (!table) return; @@ -3625,7 +3635,7 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data) if (!iommu) return -ENODEV; - table = get_irq_table(devid); + table = get_irq_table(iommu, devid); if (!table) return -ENODEV; -- cgit v1.2.3 From 8b71c9bf4df8294f775b81babfd384d1b3c00aa8 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:05 +0530 Subject: iommu/amd: Convert to use rlookup_amd_iommu helper function Use rlookup_amd_iommu() helper function which will give per PCI segment rlookup_table. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-16-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 64 ++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index cfecd072e7a6..19db4d54c337 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -229,13 +229,17 @@ static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid static int clone_alias(struct pci_dev *pdev, u16 alias, void *data) { + struct amd_iommu *iommu; u16 devid = pci_dev_id(pdev); if (devid == alias) return 0; - amd_iommu_rlookup_table[alias] = - amd_iommu_rlookup_table[devid]; + iommu = rlookup_amd_iommu(&pdev->dev); + if (!iommu) + return 0; + + amd_iommu_set_rlookup_table(iommu, alias); memcpy(amd_iommu_dev_table[alias].data, amd_iommu_dev_table[devid].data, sizeof(amd_iommu_dev_table[alias].data)); @@ -366,7 +370,7 @@ static bool check_device(struct device *dev) if (devid > amd_iommu_last_bdf) return false; - if (amd_iommu_rlookup_table[devid] == NULL) + if (rlookup_amd_iommu(dev) == NULL) return false; return true; @@ -1270,7 +1274,9 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data, int qdep; qdep = dev_data->ats.qdep; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + return -EINVAL; build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size); @@ -1295,7 +1301,9 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) u16 alias; int ret; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + return -EINVAL; if (dev_is_pci(dev_data->dev)) pdev = to_pci_dev(dev_data->dev); @@ -1525,8 +1533,8 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } -static void set_dte_entry(u16 devid, struct protection_domain *domain, - bool ats, bool ppr) +static void set_dte_entry(struct amd_iommu *iommu, u16 devid, + struct protection_domain *domain, bool ats, bool ppr) { u64 pte_root = 0; u64 flags = 0; @@ -1545,8 +1553,6 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, flags |= DTE_FLAG_IOTLB; if (ppr) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - if (iommu_feature(iommu, FEATURE_EPHSUP)) pte_root |= 1ULL << DEV_ENTRY_PPR; } @@ -1590,8 +1596,6 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, * entries for the old domain ID that is being overwritten */ if (old_domid) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - amd_iommu_flush_tlb_domid(iommu, old_domid); } } @@ -1611,7 +1615,9 @@ static void do_attach(struct iommu_dev_data *dev_data, struct amd_iommu *iommu; bool ats; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + return; ats = dev_data->ats.enabled; /* Update data structures */ @@ -1623,7 +1629,7 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_cnt += 1; /* Update device table */ - set_dte_entry(dev_data->devid, domain, + set_dte_entry(iommu, dev_data->devid, domain, ats, dev_data->iommu_v2); clone_aliases(iommu, dev_data->dev); @@ -1635,7 +1641,9 @@ static void do_detach(struct iommu_dev_data *dev_data) struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + return; /* Update data structures */ dev_data->domain = NULL; @@ -1813,13 +1821,14 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) { struct iommu_device *iommu_dev; struct amd_iommu *iommu; - int ret, devid; + int ret; if (!check_device(dev)) return ERR_PTR(-ENODEV); - devid = get_device_id(dev); - iommu = amd_iommu_rlookup_table[devid]; + iommu = rlookup_amd_iommu(dev); + if (!iommu) + return ERR_PTR(-ENODEV); if (dev_iommu_priv_get(dev)) return &iommu->iommu; @@ -1849,13 +1858,14 @@ static void amd_iommu_probe_finalize(struct device *dev) static void amd_iommu_release_device(struct device *dev) { - int devid = get_device_id(dev); struct amd_iommu *iommu; if (!check_device(dev)) return; - iommu = amd_iommu_rlookup_table[devid]; + iommu = rlookup_amd_iommu(dev); + if (!iommu) + return; amd_iommu_uninit_device(dev); iommu_completion_wait(iommu); @@ -1884,7 +1894,7 @@ static void update_device_table(struct protection_domain *domain) if (!iommu) continue; - set_dte_entry(dev_data->devid, domain, + set_dte_entry(iommu, dev_data->devid, domain, dev_data->ats.enabled, dev_data->iommu_v2); clone_aliases(iommu, dev_data->dev); } @@ -2072,7 +2082,6 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); - int devid = get_device_id(dev); struct amd_iommu *iommu; if (!check_device(dev)) @@ -2081,7 +2090,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, if (dev_data->domain != NULL) detach_device(dev); - iommu = amd_iommu_rlookup_table[devid]; + iommu = rlookup_amd_iommu(dev); if (!iommu) return; @@ -2108,7 +2117,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, dev_data = dev_iommu_priv_get(dev); dev_data->defer_attach = false; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev); if (!iommu) return -EINVAL; @@ -2493,8 +2502,9 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid, continue; qdep = dev_data->ats.qdep; - iommu = amd_iommu_rlookup_table[dev_data->devid]; - + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + continue; build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid, qdep, address, size); @@ -2656,7 +2666,9 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, struct iommu_cmd cmd; dev_data = dev_iommu_priv_get(&pdev->dev); - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(&pdev->dev); + if (!iommu) + return -ENODEV; build_complete_ppr(&cmd, dev_data->devid, pasid, status, tag, dev_data->pri_tlp); -- cgit v1.2.3 From 9873ae6e944d2e6f939ab81b25b2be7f0ee92d18 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:06 +0530 Subject: iommu/amd: Update irq_remapping_alloc to use IOMMU lookup helper function To allow IOMMU rlookup using both PCI segment and device ID. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-17-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 19db4d54c337..5e4648cadff9 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3246,8 +3246,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, struct irq_alloc_info *info = arg; struct irq_data *irq_data; struct amd_ir_data *data = NULL; + struct amd_iommu *iommu; struct irq_cfg *cfg; - int i, ret, devid; + int i, ret, devid, seg, sbdf; int index; if (!info) @@ -3263,8 +3264,14 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; - devid = get_devid(info); - if (devid < 0) + sbdf = get_devid(info); + if (sbdf < 0) + return -EINVAL; + + seg = PCI_SBDF_TO_SEGID(sbdf); + devid = PCI_SBDF_TO_DEVID(sbdf); + iommu = __rlookup_amd_iommu(seg, devid); + if (!iommu) return -EINVAL; ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); @@ -3273,7 +3280,6 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) { struct irq_remap_table *table; - struct amd_iommu *iommu; table = alloc_irq_table(devid, NULL); if (table) { @@ -3283,7 +3289,6 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, * interrupts. */ table->min_index = 32; - iommu = amd_iommu_rlookup_table[devid]; for (i = 0; i < 32; ++i) iommu->irte_ops->set_allocated(table, i); } -- cgit v1.2.3 From 9457d75c056ad52b6c33693c87500e38fdd41c6f Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:07 +0530 Subject: iommu/amd: Introduce struct amd_ir_data.iommu Add a pointer to struct amd_iommu to amd_ir_data structure, which can be used to correlate interrupt remapping data to a per-PCI-segment interrupt remapping table. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-18-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 1 + drivers/iommu/amd/iommu.c | 34 +++++++++++++++------------------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index ca1a3d55cc83..693926afdd0f 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -989,6 +989,7 @@ struct irq_2_irte { struct amd_ir_data { u32 cached_ga_tag; + struct amd_iommu *iommu; struct irq_2_irte irq_2_irte; struct msi_msg msi_entry; void *entry; /* Pointer to union irte or struct irte_ga */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 5e4648cadff9..9f373b164762 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3002,16 +3002,11 @@ static int modify_irte(u16 devid, int index, union irte *irte) return 0; } -static void free_irte(u16 devid, int index) +static void free_irte(struct amd_iommu *iommu, u16 devid, int index) { struct irq_remap_table *table; - struct amd_iommu *iommu; unsigned long flags; - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - return; - table = get_irq_table(iommu, devid); if (!table) return; @@ -3195,7 +3190,7 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data, int devid, int index, int sub_handle) { struct irq_2_irte *irte_info = &data->irq_2_irte; - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + struct amd_iommu *iommu = data->iommu; if (!iommu) return; @@ -3336,6 +3331,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, goto out_free_data; } + data->iommu = iommu; irq_data->hwirq = (devid << 16) + i; irq_data->chip_data = data; irq_data->chip = &amd_ir_chip; @@ -3352,7 +3348,7 @@ out_free_data: kfree(irq_data->chip_data); } for (i = 0; i < nr_irqs; i++) - free_irte(devid, index + i); + free_irte(iommu, devid, index + i); out_free_parent: irq_domain_free_irqs_common(domain, virq, nr_irqs); return ret; @@ -3371,7 +3367,7 @@ static void irq_remapping_free(struct irq_domain *domain, unsigned int virq, if (irq_data && irq_data->chip_data) { data = irq_data->chip_data; irte_info = &data->irq_2_irte; - free_irte(irte_info->devid, irte_info->index); + free_irte(data->iommu, irte_info->devid, irte_info->index); kfree(data->entry); kfree(data); } @@ -3389,7 +3385,7 @@ static int irq_remapping_activate(struct irq_domain *domain, { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; - struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; + struct amd_iommu *iommu = data->iommu; struct irq_cfg *cfg = irqd_cfg(irq_data); if (!iommu) @@ -3406,7 +3402,7 @@ static void irq_remapping_deactivate(struct irq_domain *domain, { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; - struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; + struct amd_iommu *iommu = data->iommu; if (iommu) iommu->irte_ops->deactivate(data->entry, irte_info->devid, @@ -3502,12 +3498,16 @@ EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode); static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) { int ret; - struct amd_iommu *iommu; struct amd_iommu_pi_data *pi_data = vcpu_info; struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data; struct amd_ir_data *ir_data = data->chip_data; struct irq_2_irte *irte_info = &ir_data->irq_2_irte; - struct iommu_dev_data *dev_data = search_dev_data(NULL, irte_info->devid); + struct iommu_dev_data *dev_data; + + if (ir_data->iommu == NULL) + return -EINVAL; + + dev_data = search_dev_data(ir_data->iommu, irte_info->devid); /* Note: * This device has never been set up for guest mode. @@ -3529,10 +3529,6 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) pi_data->is_guest_mode = false; } - iommu = amd_iommu_rlookup_table[irte_info->devid]; - if (iommu == NULL) - return -EINVAL; - pi_data->prev_ga_tag = ir_data->cached_ga_tag; if (pi_data->is_guest_mode) { ir_data->ga_root_ptr = (pi_data->base >> 12); @@ -3578,7 +3574,7 @@ static int amd_ir_set_affinity(struct irq_data *data, struct irq_2_irte *irte_info = &ir_data->irq_2_irte; struct irq_cfg *cfg = irqd_cfg(data); struct irq_data *parent = data->parent_data; - struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; + struct amd_iommu *iommu = ir_data->iommu; int ret; if (!iommu) @@ -3648,7 +3644,7 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data) !ref || !entry || !entry->lo.fields_vapic.guest_mode) return 0; - iommu = amd_iommu_rlookup_table[devid]; + iommu = ir_data->iommu; if (!iommu) return -ENODEV; -- cgit v1.2.3 From c4649a45f613a34012be82c700f9be5292893752 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:08 +0530 Subject: iommu/amd: Update amd_irte_ops functions Pass amd_iommu structure as one of the parameter to amd_irte_ops functions since its needed to activate/deactivate the iommu. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-19-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 6 ++--- drivers/iommu/amd/iommu.c | 51 +++++++++++++++---------------------- 2 files changed, 24 insertions(+), 33 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 693926afdd0f..67feb847fc13 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -1007,9 +1007,9 @@ struct amd_ir_data { struct amd_irte_ops { void (*prepare)(void *, u32, bool, u8, u32, int); - void (*activate)(void *, u16, u16); - void (*deactivate)(void *, u16, u16); - void (*set_affinity)(void *, u16, u16, u8, u32); + void (*activate)(struct amd_iommu *iommu, void *, u16, u16); + void (*deactivate)(struct amd_iommu *iommu, void *, u16, u16); + void (*set_affinity)(struct amd_iommu *iommu, void *, u16, u16, u8, u32); void *(*get)(struct irq_remap_table *, int); void (*set_allocated)(struct irq_remap_table *, int); bool (*is_allocated)(struct irq_remap_table *, int); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 9f373b164762..c4701fa957d0 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2934,19 +2934,14 @@ out: return index; } -static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, - struct amd_ir_data *data) +static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index, + struct irte_ga *irte, struct amd_ir_data *data) { bool ret; struct irq_remap_table *table; - struct amd_iommu *iommu; unsigned long flags; struct irte_ga *entry; - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - return -EINVAL; - table = get_irq_table(iommu, devid); if (!table) return -ENOMEM; @@ -2978,16 +2973,12 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, return 0; } -static int modify_irte(u16 devid, int index, union irte *irte) +static int modify_irte(struct amd_iommu *iommu, + u16 devid, int index, union irte *irte) { struct irq_remap_table *table; - struct amd_iommu *iommu; unsigned long flags; - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - return -EINVAL; - table = get_irq_table(iommu, devid); if (!table) return -ENOMEM; @@ -3049,49 +3040,49 @@ static void irte_ga_prepare(void *entry, irte->lo.fields_remap.valid = 1; } -static void irte_activate(void *entry, u16 devid, u16 index) +static void irte_activate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) { union irte *irte = (union irte *) entry; irte->fields.valid = 1; - modify_irte(devid, index, irte); + modify_irte(iommu, devid, index, irte); } -static void irte_ga_activate(void *entry, u16 devid, u16 index) +static void irte_ga_activate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) { struct irte_ga *irte = (struct irte_ga *) entry; irte->lo.fields_remap.valid = 1; - modify_irte_ga(devid, index, irte, NULL); + modify_irte_ga(iommu, devid, index, irte, NULL); } -static void irte_deactivate(void *entry, u16 devid, u16 index) +static void irte_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) { union irte *irte = (union irte *) entry; irte->fields.valid = 0; - modify_irte(devid, index, irte); + modify_irte(iommu, devid, index, irte); } -static void irte_ga_deactivate(void *entry, u16 devid, u16 index) +static void irte_ga_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) { struct irte_ga *irte = (struct irte_ga *) entry; irte->lo.fields_remap.valid = 0; - modify_irte_ga(devid, index, irte, NULL); + modify_irte_ga(iommu, devid, index, irte, NULL); } -static void irte_set_affinity(void *entry, u16 devid, u16 index, +static void irte_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index, u8 vector, u32 dest_apicid) { union irte *irte = (union irte *) entry; irte->fields.vector = vector; irte->fields.destination = dest_apicid; - modify_irte(devid, index, irte); + modify_irte(iommu, devid, index, irte); } -static void irte_ga_set_affinity(void *entry, u16 devid, u16 index, +static void irte_ga_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index, u8 vector, u32 dest_apicid) { struct irte_ga *irte = (struct irte_ga *) entry; @@ -3102,7 +3093,7 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index, APICID_TO_IRTE_DEST_LO(dest_apicid); irte->hi.fields.destination = APICID_TO_IRTE_DEST_HI(dest_apicid); - modify_irte_ga(devid, index, irte, NULL); + modify_irte_ga(iommu, devid, index, irte, NULL); } } @@ -3391,7 +3382,7 @@ static int irq_remapping_activate(struct irq_domain *domain, if (!iommu) return 0; - iommu->irte_ops->activate(data->entry, irte_info->devid, + iommu->irte_ops->activate(iommu, data->entry, irte_info->devid, irte_info->index); amd_ir_update_irte(irq_data, iommu, data, irte_info, cfg); return 0; @@ -3405,7 +3396,7 @@ static void irq_remapping_deactivate(struct irq_domain *domain, struct amd_iommu *iommu = data->iommu; if (iommu) - iommu->irte_ops->deactivate(data->entry, irte_info->devid, + iommu->irte_ops->deactivate(iommu, data->entry, irte_info->devid, irte_info->index); } @@ -3460,7 +3451,7 @@ int amd_iommu_activate_guest_mode(void *data) entry->hi.fields.vector = ir_data->ga_vector; entry->lo.fields_vapic.ga_tag = ir_data->ga_tag; - return modify_irte_ga(ir_data->irq_2_irte.devid, + return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, ir_data->irq_2_irte.index, entry, ir_data); } EXPORT_SYMBOL(amd_iommu_activate_guest_mode); @@ -3490,7 +3481,7 @@ int amd_iommu_deactivate_guest_mode(void *data) entry->hi.fields.destination = APICID_TO_IRTE_DEST_HI(cfg->dest_apicid); - return modify_irte_ga(ir_data->irq_2_irte.devid, + return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, ir_data->irq_2_irte.index, entry, ir_data); } EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode); @@ -3562,7 +3553,7 @@ static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu, * Atomically updates the IRTE with the new destination, vector * and flushes the interrupt entry cache. */ - iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid, + iommu->irte_ops->set_affinity(iommu, ir_data->entry, irte_info->devid, irte_info->index, cfg->vector, cfg->dest_apicid); } -- cgit v1.2.3 From e6457d7cfca14e8f7f2199cd724c4f03abe493c2 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:09 +0530 Subject: iommu/amd: Update alloc_irq_table and alloc_irq_index Pass amd_iommu structure as one of the parameter to these functions as its needed to retrieve variable tables inside these functions. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-20-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index c4701fa957d0..5ee1af9a0a54 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2814,21 +2814,17 @@ static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias, return 0; } -static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev) +static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu, + u16 devid, struct pci_dev *pdev) { struct irq_remap_table *table = NULL; struct irq_remap_table *new_table = NULL; struct amd_iommu_pci_seg *pci_seg; - struct amd_iommu *iommu; unsigned long flags; u16 alias; spin_lock_irqsave(&iommu_table_lock, flags); - iommu = amd_iommu_rlookup_table[devid]; - if (!iommu) - goto out_unlock; - pci_seg = iommu->pci_seg; table = pci_seg->irq_lookup_table[devid]; if (table) @@ -2884,18 +2880,14 @@ out_unlock: return table; } -static int alloc_irq_index(u16 devid, int count, bool align, - struct pci_dev *pdev) +static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count, + bool align, struct pci_dev *pdev) { struct irq_remap_table *table; int index, c, alignment = 1; unsigned long flags; - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - - if (!iommu) - return -ENODEV; - table = alloc_irq_table(devid, pdev); + table = alloc_irq_table(iommu, devid, pdev); if (!table) return -ENODEV; @@ -3267,7 +3259,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) { struct irq_remap_table *table; - table = alloc_irq_table(devid, NULL); + table = alloc_irq_table(iommu, devid, NULL); if (table) { if (!table->min_index) { /* @@ -3287,10 +3279,10 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, info->type == X86_IRQ_ALLOC_TYPE_PCI_MSIX) { bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI); - index = alloc_irq_index(devid, nr_irqs, align, + index = alloc_irq_index(iommu, devid, nr_irqs, align, msi_desc_to_pci_dev(info->desc)); } else { - index = alloc_irq_index(devid, nr_irqs, false, NULL); + index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL); } if (index < 0) { @@ -3416,8 +3408,8 @@ static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec, if (devid < 0) return 0; + iommu = __rlookup_amd_iommu((devid >> 16), (devid & 0xffff)); - iommu = amd_iommu_rlookup_table[devid]; return iommu && iommu->ir_domain == d; } -- cgit v1.2.3 From ccacd94fdacabe77f5a887c3b75334982b1ce9ca Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:08:10 +0530 Subject: iommu/amd: Convert to use per PCI segment rlookup_table Then, remove the global amd_iommu_rlookup_table and rlookup_table_size. Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-21-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 5 ----- drivers/iommu/amd/init.c | 23 ++--------------------- drivers/iommu/amd/iommu.c | 19 +++++++++---------- 3 files changed, 11 insertions(+), 36 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 67feb847fc13..d932c90329e4 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -846,11 +846,6 @@ extern struct dev_table_entry *amd_iommu_dev_table; */ extern u16 *amd_iommu_alias_table; -/* - * Reverse lookup table to find the IOMMU which translates a specific device. - */ -extern struct amd_iommu **amd_iommu_rlookup_table; - /* size of the dma_ops aperture as power of 2 */ extern unsigned amd_iommu_aperture_order; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index a60bb6c5a6da..d7dce20dca43 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -200,12 +200,6 @@ struct dev_table_entry *amd_iommu_dev_table; */ u16 *amd_iommu_alias_table; -/* - * The rlookup table is used to find the IOMMU which is responsible - * for a specific device. It is also indexed by the PCI device id. - */ -struct amd_iommu **amd_iommu_rlookup_table; - /* * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap * to know which ones are already in use. @@ -214,7 +208,6 @@ unsigned long *amd_iommu_pd_alloc_bitmap; static u32 dev_table_size; /* size of the device table */ static u32 alias_table_size; /* size of the alias table */ -static u32 rlookup_table_size; /* size if the rlookup table */ enum iommu_init_state { IOMMU_START_STATE, @@ -1144,7 +1137,7 @@ void amd_iommu_apply_erratum_63(u16 devid) /* Writes the specific IOMMU for a device into the rlookup table */ static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) { - amd_iommu_rlookup_table[devid] = iommu; + iommu->pci_seg->rlookup_table[devid] = iommu; } /* @@ -1826,7 +1819,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, * Make sure IOMMU is not considered to translate itself. The IVRS * table tells us so, but this is a lie! */ - amd_iommu_rlookup_table[iommu->devid] = NULL; + pci_seg->rlookup_table[iommu->devid] = NULL; return 0; } @@ -2786,10 +2779,6 @@ static void __init free_iommu_resources(void) kmem_cache_destroy(amd_iommu_irq_cache); amd_iommu_irq_cache = NULL; - free_pages((unsigned long)amd_iommu_rlookup_table, - get_order(rlookup_table_size)); - amd_iommu_rlookup_table = NULL; - free_pages((unsigned long)amd_iommu_alias_table, get_order(alias_table_size)); amd_iommu_alias_table = NULL; @@ -2928,7 +2917,6 @@ static int __init early_amd_iommu_init(void) dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); - rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); /* Device table - directly used by all IOMMUs */ ret = -ENOMEM; @@ -2947,13 +2935,6 @@ static int __init early_amd_iommu_init(void) if (amd_iommu_alias_table == NULL) goto out; - /* IOMMU rlookup table - find the IOMMU for a specific device */ - amd_iommu_rlookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); - if (amd_iommu_rlookup_table == NULL) - goto out; - amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, get_order(MAX_DOMAIN_ID/8)); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 5ee1af9a0a54..6e0cd9c4f57c 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -287,10 +287,9 @@ static void setup_aliases(struct amd_iommu *iommu, struct device *dev) clone_aliases(iommu, dev); } -static struct iommu_dev_data *find_dev_data(u16 devid) +static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid) { struct iommu_dev_data *dev_data; - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; dev_data = search_dev_data(iommu, devid); @@ -388,7 +387,7 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) if (devid < 0) return devid; - dev_data = find_dev_data(devid); + dev_data = find_dev_data(iommu, devid); if (!dev_data) return -ENOMEM; @@ -403,9 +402,6 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) */ if ((iommu_default_passthrough() || !amd_iommu_force_isolation) && dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) { - struct amd_iommu *iommu; - - iommu = amd_iommu_rlookup_table[dev_data->devid]; dev_data->iommu_v2 = iommu->is_iommu_v2; } @@ -416,13 +412,15 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev) { + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; int devid; devid = get_device_id(dev); if (devid < 0) return; - amd_iommu_rlookup_table[devid] = NULL; + + pci_seg->rlookup_table[devid] = NULL; memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); setup_aliases(iommu, dev); @@ -2749,8 +2747,9 @@ static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid) struct irq_remap_table *table; struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; - if (WARN_ONCE(!amd_iommu_rlookup_table[devid], - "%s: no iommu for devid %x\n", __func__, devid)) + if (WARN_ONCE(!pci_seg->rlookup_table[devid], + "%s: no iommu for devid %x:%x\n", + __func__, pci_seg->id, devid)) return NULL; table = pci_seg->irq_lookup_table[devid]; @@ -2809,7 +2808,7 @@ static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias, pci_seg->irq_lookup_table[alias] = table; set_dte_irq_entry(alias, table); - iommu_flush_dte(amd_iommu_rlookup_table[alias], alias); + iommu_flush_dte(pci_seg->rlookup_table[alias], alias); return 0; } -- cgit v1.2.3 From 54625ef1db1ce7e57f356d8f355095a4fbb71efb Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:11 +0530 Subject: iommu/amd: Update set_dte_entry and clear_dte_entry Start using per PCI segment data structures instead of global data structures. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-22-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 6e0cd9c4f57c..493cda5e0246 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1537,6 +1537,7 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, u64 pte_root = 0; u64 flags = 0; u32 old_domid; + struct dev_table_entry *dev_table = get_dev_table(iommu); if (domain->iop.mode != PAGE_MODE_NONE) pte_root = iommu_virt_to_phys(domain->iop.root); @@ -1545,7 +1546,7 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, << DEV_ENTRY_MODE_SHIFT; pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; - flags = amd_iommu_dev_table[devid].data[1]; + flags = dev_table[devid].data[1]; if (ats) flags |= DTE_FLAG_IOTLB; @@ -1584,9 +1585,9 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, flags &= ~DEV_DOMID_MASK; flags |= domain->id; - old_domid = amd_iommu_dev_table[devid].data[1] & DEV_DOMID_MASK; - amd_iommu_dev_table[devid].data[1] = flags; - amd_iommu_dev_table[devid].data[0] = pte_root; + old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK; + dev_table[devid].data[1] = flags; + dev_table[devid].data[0] = pte_root; /* * A kdump kernel might be replacing a domain ID that was copied from @@ -1598,11 +1599,13 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, } } -static void clear_dte_entry(u16 devid) +static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) { + struct dev_table_entry *dev_table = get_dev_table(iommu); + /* remove entry from the device table seen by the hardware */ - amd_iommu_dev_table[devid].data[0] = DTE_FLAG_V | DTE_FLAG_TV; - amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK; + dev_table[devid].data[0] = DTE_FLAG_V | DTE_FLAG_TV; + dev_table[devid].data[1] &= DTE_FLAG_MASK; amd_iommu_apply_erratum_63(devid); } @@ -1646,7 +1649,7 @@ static void do_detach(struct iommu_dev_data *dev_data) /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); - clear_dte_entry(dev_data->devid); + clear_dte_entry(iommu, dev_data->devid); clone_aliases(iommu, dev_data->dev); /* Flush the DTE entry */ -- cgit v1.2.3 From ccbb091f3f2019c803480bb4ed63c84869da68dc Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:12 +0530 Subject: iommu/amd: Update iommu_ignore_device Start using per PCI segment device table instead of global device table. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-23-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 493cda5e0246..90755da7cff0 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -413,15 +413,15 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev) { struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; + struct dev_table_entry *dev_table = get_dev_table(iommu); int devid; - devid = get_device_id(dev); + devid = (get_device_id(dev)) & 0xffff; if (devid < 0) return; - pci_seg->rlookup_table[devid] = NULL; - memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); + memset(&dev_table[devid], 0, sizeof(struct dev_table_entry)); setup_aliases(iommu, dev); } -- cgit v1.2.3 From 4cc053d7aea778a959a9300e8acff7854de1be7f Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:13 +0530 Subject: iommu/amd: Update dump_dte_entry Start using per PCI segment device table instead of global device table. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-24-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 90755da7cff0..790a3449e7b7 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -451,13 +451,13 @@ static void amd_iommu_uninit_device(struct device *dev) * ****************************************************************************/ -static void dump_dte_entry(u16 devid) +static void dump_dte_entry(struct amd_iommu *iommu, u16 devid) { int i; + struct dev_table_entry *dev_table = get_dev_table(iommu); for (i = 0; i < 4; ++i) - pr_err("DTE[%d]: %016llx\n", i, - amd_iommu_dev_table[devid].data[i]); + pr_err("DTE[%d]: %016llx\n", i, dev_table[devid].data[i]); } static void dump_command(unsigned long phys_addr) @@ -618,7 +618,7 @@ retry: dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); - dump_dte_entry(devid); + dump_dte_entry(iommu, devid); break; case EVENT_TYPE_DEV_TAB_ERR: dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " -- cgit v1.2.3 From c7d311247b1b03906bc54d578db53f0bc2112674 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:14 +0530 Subject: iommu/amd: Update set_dte_irq_entry Start using per PCI segment device table instead of global device table. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-25-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 790a3449e7b7..f1fab4168101 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2731,18 +2731,20 @@ EXPORT_SYMBOL(amd_iommu_device_info); static struct irq_chip amd_ir_chip; static DEFINE_SPINLOCK(iommu_table_lock); -static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) +static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, + struct irq_remap_table *table) { u64 dte; + struct dev_table_entry *dev_table = get_dev_table(iommu); - dte = amd_iommu_dev_table[devid].data[2]; + dte = dev_table[devid].data[2]; dte &= ~DTE_IRQ_PHYS_ADDR_MASK; dte |= iommu_virt_to_phys(table->table); dte |= DTE_IRQ_REMAP_INTCTL; dte |= DTE_INTTABLEN; dte |= DTE_IRQ_REMAP_ENABLE; - amd_iommu_dev_table[devid].data[2] = dte; + dev_table[devid].data[2] = dte; } static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid) @@ -2793,7 +2795,7 @@ static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid, struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; pci_seg->irq_lookup_table[devid] = table; - set_dte_irq_entry(devid, table); + set_dte_irq_entry(iommu, devid, table); iommu_flush_dte(iommu, devid); } @@ -2809,8 +2811,7 @@ static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias, pci_seg = iommu->pci_seg; pci_seg->irq_lookup_table[alias] = table; - set_dte_irq_entry(alias, table); - + set_dte_irq_entry(iommu, alias, table); iommu_flush_dte(pci_seg->rlookup_table[alias], alias); return 0; -- cgit v1.2.3 From 1ab5a15334529d3980a85abb2e06498c8e5ac8cc Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:15 +0530 Subject: iommu/amd: Update (un)init_device_table_dma() Include struct amd_iommu_pci_seg as a function parameter since we need to access per PCI segment device table. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-26-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index d7dce20dca43..d6a24032a948 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -238,7 +238,7 @@ static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); -static void init_device_table_dma(void); +static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); static bool amd_iommu_pre_enabled = true; @@ -2119,6 +2119,7 @@ static void print_iommu_info(void) static int __init amd_iommu_init_pci(void) { struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; int ret; for_each_iommu(iommu) { @@ -2149,7 +2150,8 @@ static int __init amd_iommu_init_pci(void) goto out; } - init_device_table_dma(); + for_each_pci_segment(pci_seg) + init_device_table_dma(pci_seg); for_each_iommu(iommu) iommu_flush_all_caches(iommu); @@ -2511,9 +2513,13 @@ static int __init init_memory_definitions(struct acpi_table_header *table) /* * Init the device table to not allow DMA access for devices */ -static void init_device_table_dma(void) +static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) { u32 devid; + struct dev_table_entry *dev_table = pci_seg->dev_table; + + if (dev_table == NULL) + return; for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { set_dev_entry_bit(devid, DEV_ENTRY_VALID); @@ -2521,13 +2527,17 @@ static void init_device_table_dma(void) } } -static void __init uninit_device_table_dma(void) +static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg) { u32 devid; + struct dev_table_entry *dev_table = pci_seg->dev_table; + + if (dev_table == NULL) + return; for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { - amd_iommu_dev_table[devid].data[0] = 0ULL; - amd_iommu_dev_table[devid].data[1] = 0ULL; + dev_table[devid].data[0] = 0ULL; + dev_table[devid].data[1] = 0ULL; } } @@ -3120,8 +3130,11 @@ static int __init state_next(void) free_iommu_resources(); } else { struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; + + for_each_pci_segment(pci_seg) + uninit_device_table_dma(pci_seg); - uninit_device_table_dma(); for_each_iommu(iommu) iommu_flush_all_caches(iommu); } -- cgit v1.2.3 From 56fb79514c52947107001ff9313870d76c4c8008 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:16 +0530 Subject: iommu/amd: Update set_dev_entry_bit() and get_dev_entry_bit() To include a pointer to per PCI segment device table. Also include struct amd_iommu as one of the function parameter to amd_iommu_apply_erratum_63() since it is needed when setting up DTE. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-27-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/init.c | 59 ++++++++++++++++++++++++++++--------------- drivers/iommu/amd/iommu.c | 2 +- 3 files changed, 41 insertions(+), 22 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 2947239700ce..64c954e168d7 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -13,7 +13,7 @@ extern irqreturn_t amd_iommu_int_thread(int irq, void *data); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); -extern void amd_iommu_apply_erratum_63(u16 devid); +extern void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid); extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu); extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index d6a24032a948..bb8589750fb6 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -989,22 +989,37 @@ static void iommu_enable_gt(struct amd_iommu *iommu) } /* sets a specific bit in the device table entry. */ -static void set_dev_entry_bit(u16 devid, u8 bit) +static void __set_dev_entry_bit(struct dev_table_entry *dev_table, + u16 devid, u8 bit) { int i = (bit >> 6) & 0x03; int _bit = bit & 0x3f; - amd_iommu_dev_table[devid].data[i] |= (1UL << _bit); + dev_table[devid].data[i] |= (1UL << _bit); } -static int get_dev_entry_bit(u16 devid, u8 bit) +static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) +{ + struct dev_table_entry *dev_table = get_dev_table(iommu); + + return __set_dev_entry_bit(dev_table, devid, bit); +} + +static int __get_dev_entry_bit(struct dev_table_entry *dev_table, + u16 devid, u8 bit) { int i = (bit >> 6) & 0x03; int _bit = bit & 0x3f; - return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit; + return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit; } +static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) +{ + struct dev_table_entry *dev_table = get_dev_table(iommu); + + return __get_dev_entry_bit(dev_table, devid, bit); +} static bool __copy_device_table(struct amd_iommu *iommu) { @@ -1123,15 +1138,15 @@ static bool copy_device_table(void) return true; } -void amd_iommu_apply_erratum_63(u16 devid) +void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid) { int sysmgt; - sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) | - (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1); + sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) | + (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1); if (sysmgt == 0x01) - set_dev_entry_bit(devid, DEV_ENTRY_IW); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW); } /* Writes the specific IOMMU for a device into the rlookup table */ @@ -1148,21 +1163,21 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, u16 devid, u32 flags, u32 ext_flags) { if (flags & ACPI_DEVFLAG_INITPASS) - set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS); if (flags & ACPI_DEVFLAG_EXTINT) - set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS); if (flags & ACPI_DEVFLAG_NMI) - set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS); if (flags & ACPI_DEVFLAG_SYSMGT1) - set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1); if (flags & ACPI_DEVFLAG_SYSMGT2) - set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2); if (flags & ACPI_DEVFLAG_LINT0) - set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS); if (flags & ACPI_DEVFLAG_LINT1) - set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS); - amd_iommu_apply_erratum_63(devid); + amd_iommu_apply_erratum_63(iommu, devid); set_iommu_for_device(iommu, devid); } @@ -2522,8 +2537,8 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) return; for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { - set_dev_entry_bit(devid, DEV_ENTRY_VALID); - set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); + __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID); + __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION); } } @@ -2543,13 +2558,17 @@ static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg) static void init_device_table(void) { + struct amd_iommu_pci_seg *pci_seg; u32 devid; if (!amd_iommu_irq_remap) return; - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) - set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN); + for_each_pci_segment(pci_seg) { + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) + __set_dev_entry_bit(pci_seg->dev_table, + devid, DEV_ENTRY_IRQ_TBL_EN); + } } static void iommu_init_flags(struct amd_iommu *iommu) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f1fab4168101..7d431a0b80e5 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1607,7 +1607,7 @@ static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) dev_table[devid].data[0] = DTE_FLAG_V | DTE_FLAG_TV; dev_table[devid].data[1] &= DTE_FLAG_MASK; - amd_iommu_apply_erratum_63(devid); + amd_iommu_apply_erratum_63(iommu, devid); } static void do_attach(struct iommu_dev_data *dev_data, -- cgit v1.2.3 From 401360ec98c61bf91a107f292bfbfc9245795eea Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:17 +0530 Subject: iommu/amd: Remove global amd_iommu_[dev_table/alias_table/last_bdf] Replace them with per PCI segment device table. Also remove dev_table_size, alias_table_size, amd_iommu_last_bdf variables. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-28-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 15 ------- drivers/iommu/amd/init.c | 89 +++++++------------------------------ drivers/iommu/amd/iommu.c | 18 +++++--- 3 files changed, 27 insertions(+), 95 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index d932c90329e4..65b02e2ae28f 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -834,24 +834,9 @@ struct unity_map_entry { * Data structures for device handling */ -/* - * Device table used by hardware. Read and write accesses by software are - * locked with the amd_iommu_pd_table lock. - */ -extern struct dev_table_entry *amd_iommu_dev_table; - -/* - * Alias table to find requestor ids to device ids. Not locked because only - * read on runtime. - */ -extern u16 *amd_iommu_alias_table; - /* size of the dma_ops aperture as power of 2 */ extern unsigned amd_iommu_aperture_order; -/* largest PCI device id we expect translation requests for */ -extern u16 amd_iommu_last_bdf; - /* allocation bitmap for domain ids */ extern unsigned long *amd_iommu_pd_alloc_bitmap; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index bb8589750fb6..a3a5f906679e 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -160,9 +160,6 @@ static bool amd_iommu_disabled __initdata; static bool amd_iommu_force_enable __initdata; static int amd_iommu_target_ivhd_type; -u16 amd_iommu_last_bdf; /* largest PCI device id we have - to handle */ - LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ @@ -185,30 +182,12 @@ bool amdr_ivrs_remap_support __read_mostly; bool amd_iommu_force_isolation __read_mostly; -/* - * Pointer to the device table which is shared by all AMD IOMMUs - * it is indexed by the PCI device id or the HT unit id and contains - * information about the domain the device belongs to as well as the - * page table root pointer. - */ -struct dev_table_entry *amd_iommu_dev_table; - -/* - * The alias table is a driver specific data structure which contains the - * mappings of the PCI device ids to the actual requestor ids on the IOMMU. - * More than one device can share the same requestor id. - */ -u16 *amd_iommu_alias_table; - /* * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap * to know which ones are already in use. */ unsigned long *amd_iommu_pd_alloc_bitmap; -static u32 dev_table_size; /* size of the device table */ -static u32 alias_table_size; /* size of the alias table */ - enum iommu_init_state { IOMMU_START_STATE, IOMMU_IVRS_DETECTED, @@ -263,16 +242,10 @@ static void init_translation_status(struct amd_iommu *iommu) iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; } -static inline void update_last_devid(u16 devid) -{ - if (devid > amd_iommu_last_bdf) - amd_iommu_last_bdf = devid; -} - -static inline unsigned long tbl_size(int entry_size) +static inline unsigned long tbl_size(int entry_size, int last_bdf) { unsigned shift = PAGE_SHIFT + - get_order(((int)amd_iommu_last_bdf + 1) * entry_size); + get_order((last_bdf + 1) * entry_size); return 1UL << shift; } @@ -404,10 +377,11 @@ static void iommu_set_device_table(struct amd_iommu *iommu) { u64 entry; u32 dev_table_size = iommu->pci_seg->dev_table_size; + void *dev_table = (void *)get_dev_table(iommu); BUG_ON(iommu->mmio_base == NULL); - entry = iommu_virt_to_phys(amd_iommu_dev_table); + entry = iommu_virt_to_phys(dev_table); entry |= (dev_table_size >> 12) - 1; memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, &entry, sizeof(entry)); @@ -557,14 +531,12 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) switch (dev->type) { case IVHD_DEV_ALL: /* Use maximum BDF value for DEV_ALL */ - update_last_devid(0xffff); return 0xffff; case IVHD_DEV_SELECT: case IVHD_DEV_RANGE_END: case IVHD_DEV_ALIAS: case IVHD_DEV_EXT_SELECT: /* all the above subfield types refer to device ids */ - update_last_devid(dev->devid); if (dev->devid > last_devid) last_devid = dev->devid; break; @@ -706,7 +678,7 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) /* * let all alias entries point to itself */ - for (i = 0; i <= amd_iommu_last_bdf; ++i) + for (i = 0; i <= pci_seg->last_bdf; ++i) pci_seg->alias_table[i] = i; return 0; @@ -1072,7 +1044,7 @@ static bool __copy_device_table(struct amd_iommu *iommu) return false; } - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid]; dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; @@ -1149,12 +1121,6 @@ void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid) set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW); } -/* Writes the specific IOMMU for a device into the rlookup table */ -static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) -{ - iommu->pci_seg->rlookup_table[devid] = iommu; -} - /* * This function takes the device specific flags read from the ACPI * table and sets up the device table entry with that information @@ -1179,7 +1145,7 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, amd_iommu_apply_erratum_63(iommu, devid); - set_iommu_for_device(iommu, devid); + amd_iommu_set_rlookup_table(iommu, devid); } int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line) @@ -1339,7 +1305,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); - for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i) + for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i) set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); break; case IVHD_DEV_SELECT: @@ -1584,9 +1550,9 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, pci_seg->last_bdf = last_bdf; DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); - pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); - pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); - pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); + pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf); + pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf); + pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf); pci_seg->id = id; init_llist_head(&pci_seg->dev_data_list); @@ -2469,7 +2435,7 @@ static int __init init_unity_map_range(struct ivmd_header *m, case ACPI_IVMD_TYPE_ALL: s = "IVMD_TYPE_ALL\t\t"; e->devid_start = 0; - e->devid_end = amd_iommu_last_bdf; + e->devid_end = pci_seg->last_bdf; break; case ACPI_IVMD_TYPE_RANGE: s = "IVMD_TYPE_RANGE\t\t"; @@ -2536,7 +2502,7 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) if (dev_table == NULL) return; - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID); __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION); } @@ -2550,7 +2516,7 @@ static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg) if (dev_table == NULL) return; - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { dev_table[devid].data[0] = 0ULL; dev_table[devid].data[1] = 0ULL; } @@ -2565,7 +2531,7 @@ static void init_device_table(void) return; for_each_pci_segment(pci_seg) { - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) + for (devid = 0; devid <= pci_seg->last_bdf; ++devid) __set_dev_entry_bit(pci_seg->dev_table, devid, DEV_ENTRY_IRQ_TBL_EN); } @@ -2808,14 +2774,6 @@ static void __init free_iommu_resources(void) kmem_cache_destroy(amd_iommu_irq_cache); amd_iommu_irq_cache = NULL; - free_pages((unsigned long)amd_iommu_alias_table, - get_order(alias_table_size)); - amd_iommu_alias_table = NULL; - - free_pages((unsigned long)amd_iommu_dev_table, - get_order(dev_table_size)); - amd_iommu_dev_table = NULL; - free_iommu_all(); free_pci_segments(); } @@ -2944,25 +2902,8 @@ static int __init early_amd_iommu_init(void) amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); - dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); - alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); - /* Device table - directly used by all IOMMUs */ ret = -ENOMEM; - amd_iommu_dev_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO | GFP_DMA32, - get_order(dev_table_size)); - if (amd_iommu_dev_table == NULL) - goto out; - - /* - * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the - * IOMMU see for that device - */ - amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL, - get_order(alias_table_size)); - if (amd_iommu_alias_table == NULL) - goto out; amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 7d431a0b80e5..94ebffe15960 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -230,6 +230,7 @@ static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid static int clone_alias(struct pci_dev *pdev, u16 alias, void *data) { struct amd_iommu *iommu; + struct dev_table_entry *dev_table; u16 devid = pci_dev_id(pdev); if (devid == alias) @@ -240,9 +241,10 @@ static int clone_alias(struct pci_dev *pdev, u16 alias, void *data) return 0; amd_iommu_set_rlookup_table(iommu, alias); - memcpy(amd_iommu_dev_table[alias].data, - amd_iommu_dev_table[devid].data, - sizeof(amd_iommu_dev_table[alias].data)); + dev_table = get_dev_table(iommu); + memcpy(dev_table[alias].data, + dev_table[devid].data, + sizeof(dev_table[alias].data)); return 0; } @@ -356,6 +358,8 @@ static bool pci_iommuv2_capable(struct pci_dev *pdev) */ static bool check_device(struct device *dev) { + struct amd_iommu_pci_seg *pci_seg; + struct amd_iommu *iommu; int devid; if (!dev) @@ -365,11 +369,13 @@ static bool check_device(struct device *dev) if (devid < 0) return false; - /* Out of our scope? */ - if (devid > amd_iommu_last_bdf) + iommu = rlookup_amd_iommu(dev); + if (!iommu) return false; - if (rlookup_amd_iommu(dev) == NULL) + /* Out of our scope? */ + pci_seg = iommu->pci_seg; + if ((devid & 0xffff) > pci_seg->last_bdf) return false; return true; -- cgit v1.2.3 From a3cf6ab35751ea77a2bd47636dca761e835b282d Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:08:18 +0530 Subject: iommu/amd: Flush upto last_bdf only Fix amd_iommu_flush_dte_all() and amd_iommu_flush_tlb_all() to flush upto last_bdf only. Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-29-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 94ebffe15960..6914911d4fb6 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1191,8 +1191,9 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) static void amd_iommu_flush_dte_all(struct amd_iommu *iommu) { u32 devid; + u16 last_bdf = iommu->pci_seg->last_bdf; - for (devid = 0; devid <= 0xffff; ++devid) + for (devid = 0; devid <= last_bdf; ++devid) iommu_flush_dte(iommu, devid); iommu_completion_wait(iommu); @@ -1205,8 +1206,9 @@ static void amd_iommu_flush_dte_all(struct amd_iommu *iommu) static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu) { u32 dom_id; + u16 last_bdf = iommu->pci_seg->last_bdf; - for (dom_id = 0; dom_id <= 0xffff; ++dom_id) { + for (dom_id = 0; dom_id <= last_bdf; ++dom_id) { struct iommu_cmd cmd; build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, dom_id, 1); @@ -1249,8 +1251,9 @@ static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid) static void amd_iommu_flush_irt_all(struct amd_iommu *iommu) { u32 devid; + u16 last_bdf = iommu->pci_seg->last_bdf; - for (devid = 0; devid <= MAX_DEV_TABLE_ENTRIES; devid++) + for (devid = 0; devid <= last_bdf; devid++) iommu_flush_irt(iommu, devid); iommu_completion_wait(iommu); -- cgit v1.2.3 From bf87972ca664863dcfd38ab581589c1d87677cb6 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:19 +0530 Subject: iommu/amd: Introduce get_device_sbdf_id() helper function Current get_device_id() only provide 16-bit PCI device ID (i.e. BDF). With multiple PCI segment support, we need to extend the helper function to include PCI segment ID. So, introduce a new helper function get_device_sbdf_id() to replace the current get_pci_device_id(). Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-30-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 7 +++++ drivers/iommu/amd/amd_iommu_types.h | 2 ++ drivers/iommu/amd/iommu.c | 58 ++++++++++++++++++------------------- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 64c954e168d7..e73bd48fc716 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -115,6 +115,13 @@ void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) amd_iommu_domain_set_pt_root(domain, 0); } +static inline int get_pci_sbdf_id(struct pci_dev *pdev) +{ + int seg = pci_domain_nr(pdev->bus); + u16 devid = pci_dev_id(pdev); + + return PCI_SEG_DEVID_TO_SBDF(seg, devid); +} extern bool translation_pre_enabled(struct amd_iommu *iommu); extern bool amd_iommu_is_attach_deferred(struct device *dev); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 65b02e2ae28f..ea238e8e6c99 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -456,6 +456,8 @@ extern struct kmem_cache *amd_iommu_irq_cache; #define PCI_SBDF_TO_SEGID(sbdf) (((sbdf) >> 16) & 0xffff) #define PCI_SBDF_TO_DEVID(sbdf) ((sbdf) & 0xffff) +#define PCI_SEG_DEVID_TO_SBDF(seg, devid) ((((u32)(seg) & 0xffff) << 16) | \ + ((devid) & 0xffff)) /* Make iterating over all pci segment easier */ #define for_each_pci_segment(pci_seg) \ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 6914911d4fb6..0751dda04a10 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -92,13 +92,6 @@ static void detach_device(struct device *dev); * ****************************************************************************/ -static inline u16 get_pci_device_id(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - - return pci_dev_id(pdev); -} - static inline int get_acpihid_device_id(struct device *dev, struct acpihid_map_entry **entry) { @@ -119,16 +112,16 @@ static inline int get_acpihid_device_id(struct device *dev, return -EINVAL; } -static inline int get_device_id(struct device *dev) +static inline int get_device_sbdf_id(struct device *dev) { - int devid; + int sbdf; if (dev_is_pci(dev)) - devid = get_pci_device_id(dev); + sbdf = get_pci_sbdf_id(to_pci_dev(dev)); else - devid = get_acpihid_device_id(dev, NULL); + sbdf = get_acpihid_device_id(dev, NULL); - return devid; + return sbdf; } struct dev_table_entry *get_dev_table(struct amd_iommu *iommu) @@ -182,9 +175,11 @@ static struct amd_iommu *__rlookup_amd_iommu(u16 seg, u16 devid) static struct amd_iommu *rlookup_amd_iommu(struct device *dev) { u16 seg = get_device_segment(dev); - u16 devid = get_device_id(dev); + int devid = get_device_sbdf_id(dev); - return __rlookup_amd_iommu(seg, devid); + if (devid < 0) + return NULL; + return __rlookup_amd_iommu(seg, PCI_SBDF_TO_DEVID(devid)); } static struct protection_domain *to_pdomain(struct iommu_domain *dom) @@ -360,14 +355,15 @@ static bool check_device(struct device *dev) { struct amd_iommu_pci_seg *pci_seg; struct amd_iommu *iommu; - int devid; + int devid, sbdf; if (!dev) return false; - devid = get_device_id(dev); - if (devid < 0) + sbdf = get_device_sbdf_id(dev); + if (sbdf < 0) return false; + devid = PCI_SBDF_TO_DEVID(sbdf); iommu = rlookup_amd_iommu(dev); if (!iommu) @@ -375,7 +371,7 @@ static bool check_device(struct device *dev) /* Out of our scope? */ pci_seg = iommu->pci_seg; - if ((devid & 0xffff) > pci_seg->last_bdf) + if (devid > pci_seg->last_bdf) return false; return true; @@ -384,15 +380,16 @@ static bool check_device(struct device *dev) static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) { struct iommu_dev_data *dev_data; - int devid; + int devid, sbdf; if (dev_iommu_priv_get(dev)) return 0; - devid = get_device_id(dev); - if (devid < 0) - return devid; + sbdf = get_device_sbdf_id(dev); + if (sbdf < 0) + return sbdf; + devid = PCI_SBDF_TO_DEVID(sbdf); dev_data = find_dev_data(iommu, devid); if (!dev_data) return -ENOMEM; @@ -420,12 +417,13 @@ static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev) { struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; struct dev_table_entry *dev_table = get_dev_table(iommu); - int devid; + int devid, sbdf; - devid = (get_device_id(dev)) & 0xffff; - if (devid < 0) + sbdf = get_device_sbdf_id(dev); + if (sbdf < 0) return; + devid = PCI_SBDF_TO_DEVID(sbdf); pci_seg->rlookup_table[devid] = NULL; memset(&dev_table[devid], 0, sizeof(struct dev_table_entry)); @@ -2258,11 +2256,13 @@ static void amd_iommu_get_resv_regions(struct device *dev, struct unity_map_entry *entry; struct amd_iommu *iommu; struct amd_iommu_pci_seg *pci_seg; - int devid; + int devid, sbdf; - devid = get_device_id(dev); - if (devid < 0) + sbdf = get_device_sbdf_id(dev); + if (sbdf < 0) return; + + devid = PCI_SBDF_TO_DEVID(sbdf); iommu = rlookup_amd_iommu(dev); if (!iommu) return; @@ -3156,7 +3156,7 @@ static int get_devid(struct irq_alloc_info *info) return get_hpet_devid(info->devid); case X86_IRQ_ALLOC_TYPE_PCI_MSI: case X86_IRQ_ALLOC_TYPE_PCI_MSIX: - return get_device_id(msi_desc_to_dev(info->desc)); + return get_device_sbdf_id(msi_desc_to_dev(info->desc)); default: WARN_ON_ONCE(1); return -1; -- cgit v1.2.3 From a45627baa7bc56d633ade47d5503bd2be53f1baa Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:20 +0530 Subject: iommu/amd: Include PCI segment ID when initialize IOMMU Extend current device ID variables to 32-bit to include the 16-bit segment ID when parsing device information from IVRS table to initialize each IOMMU. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-31-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/amd_iommu_types.h | 6 ++-- drivers/iommu/amd/init.c | 56 +++++++++++++++++++------------------ drivers/iommu/amd/quirks.c | 4 +-- 4 files changed, 35 insertions(+), 33 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index e73bd48fc716..9b7092182ca7 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -125,7 +125,7 @@ static inline int get_pci_sbdf_id(struct pci_dev *pdev) extern bool translation_pre_enabled(struct amd_iommu *iommu); extern bool amd_iommu_is_attach_deferred(struct device *dev); -extern int __init add_special_device(u8 type, u8 id, u16 *devid, +extern int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line); #ifdef CONFIG_DMI diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index ea238e8e6c99..1ca54803702a 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -744,8 +744,8 @@ struct acpihid_map_entry { struct list_head list; u8 uid[ACPIHID_UID_LEN]; u8 hid[ACPIHID_HID_LEN]; - u16 devid; - u16 root_devid; + u32 devid; + u32 root_devid; bool cmd_line; struct iommu_group *group; }; @@ -753,7 +753,7 @@ struct acpihid_map_entry { struct devid_map { struct list_head list; u8 id; - u16 devid; + u32 devid; bool cmd_line; }; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index a3a5f906679e..9011d9be6954 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1148,7 +1148,7 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, amd_iommu_set_rlookup_table(iommu, devid); } -int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line) +int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line) { struct devid_map *entry; struct list_head *list; @@ -1185,7 +1185,7 @@ int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line) return 0; } -static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid, +static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid, bool cmd_line) { struct acpihid_map_entry *entry; @@ -1264,7 +1264,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, { u8 *p = (u8 *)h; u8 *end = p, flags = 0; - u16 devid = 0, devid_start = 0, devid_to = 0; + u16 devid = 0, devid_start = 0, devid_to = 0, seg_id; u32 dev_i, ext_flags = 0; bool alias = false; struct ivhd_entry *e; @@ -1300,6 +1300,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, while (p < end) { e = (struct ivhd_entry *)p; + seg_id = pci_seg->id; + switch (e->type) { case IVHD_DEV_ALL: @@ -1310,9 +1312,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_SELECT: - DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x " + DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x " "flags: %02x\n", - PCI_BUS_NUM(e->devid), + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags); @@ -1323,8 +1325,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_SELECT_RANGE_START: DUMP_printk(" DEV_SELECT_RANGE_START\t " - "devid: %02x:%02x.%x flags: %02x\n", - PCI_BUS_NUM(e->devid), + "devid: %04x:%02x:%02x.%x flags: %02x\n", + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags); @@ -1336,9 +1338,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_ALIAS: - DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x " + DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x " "flags: %02x devid_to: %02x:%02x.%x\n", - PCI_BUS_NUM(e->devid), + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags, @@ -1355,13 +1357,13 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_ALIAS_RANGE: DUMP_printk(" DEV_ALIAS_RANGE\t\t " - "devid: %02x:%02x.%x flags: %02x " - "devid_to: %02x:%02x.%x\n", - PCI_BUS_NUM(e->devid), + "devid: %04x:%02x:%02x.%x flags: %02x " + "devid_to: %04x:%02x:%02x.%x\n", + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags, - PCI_BUS_NUM(e->ext >> 8), + seg_id, PCI_BUS_NUM(e->ext >> 8), PCI_SLOT(e->ext >> 8), PCI_FUNC(e->ext >> 8)); @@ -1373,9 +1375,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_EXT_SELECT: - DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x " + DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x " "flags: %02x ext: %08x\n", - PCI_BUS_NUM(e->devid), + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags, e->ext); @@ -1387,8 +1389,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_EXT_SELECT_RANGE: DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " - "%02x:%02x.%x flags: %02x ext: %08x\n", - PCI_BUS_NUM(e->devid), + "%04x:%02x:%02x.%x flags: %02x ext: %08x\n", + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags, e->ext); @@ -1400,8 +1402,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_RANGE_END: - DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", - PCI_BUS_NUM(e->devid), + DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n", + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid)); @@ -1419,11 +1421,11 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_SPECIAL: { u8 handle, type; const char *var; - u16 devid; + u32 devid; int ret; handle = e->ext & 0xff; - devid = (e->ext >> 8) & 0xffff; + devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8)); type = (e->ext >> 24) & 0xff; if (type == IVHD_SPECIAL_IOAPIC) @@ -1433,9 +1435,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, else var = "UNKNOWN"; - DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n", + DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n", var, (int)handle, - PCI_BUS_NUM(devid), + seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); @@ -1453,7 +1455,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; } case IVHD_DEV_ACPI_HID: { - u16 devid; + u32 devid; u8 hid[ACPIHID_HID_LEN]; u8 uid[ACPIHID_UID_LEN]; int ret; @@ -1496,9 +1498,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; } - devid = e->devid; - DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n", - hid, uid, + devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid); + DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n", + hid, uid, seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); diff --git a/drivers/iommu/amd/quirks.c b/drivers/iommu/amd/quirks.c index 5120ce4fdce3..79dbb8f33b47 100644 --- a/drivers/iommu/amd/quirks.c +++ b/drivers/iommu/amd/quirks.c @@ -15,7 +15,7 @@ struct ivrs_quirk_entry { u8 id; - u16 devid; + u32 devid; }; enum { @@ -49,7 +49,7 @@ static int __init ivrs_ioapic_quirk_cb(const struct dmi_system_id *d) const struct ivrs_quirk_entry *i; for (i = d->driver_data; i->id != 0 && i->devid != 0; i++) - add_special_device(IVHD_SPECIAL_IOAPIC, i->id, (u16 *)&i->devid, 0); + add_special_device(IVHD_SPECIAL_IOAPIC, i->id, (u32 *)&i->devid, 0); return 0; } -- cgit v1.2.3 From e5670e1822cf2858e4c9133fc4e834a1d4a8a4d2 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:21 +0530 Subject: iommu/amd: Specify PCI segment ID when getting pci device Upcoming AMD systems can have multiple PCI segments. Hence pass PCI segment ID to pci_get_domain_bus_and_slot() instead of '0'. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-32-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 6 ++++-- drivers/iommu/amd/iommu.c | 19 ++++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 9011d9be6954..0336740b41b0 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1962,7 +1962,8 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) int cap_ptr = iommu->cap_ptr; int ret; - iommu->dev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(iommu->devid), + iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, + PCI_BUS_NUM(iommu->devid), iommu->devid & 0xff); if (!iommu->dev) return -ENODEV; @@ -2025,7 +2026,8 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) int i, j; iommu->root_pdev = - pci_get_domain_bus_and_slot(0, iommu->dev->bus->number, + pci_get_domain_bus_and_slot(iommu->pci_seg->id, + iommu->dev->bus->number, PCI_DEVFN(0, 0)); /* diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 0751dda04a10..2dbe17e49ffc 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -473,7 +473,7 @@ static void dump_command(unsigned long phys_addr) pr_err("CMD[%d]: %08x\n", i, cmd->data[i]); } -static void amd_iommu_report_rmp_hw_error(volatile u32 *event) +static void amd_iommu_report_rmp_hw_error(struct amd_iommu *iommu, volatile u32 *event) { struct iommu_dev_data *dev_data = NULL; int devid, vmg_tag, flags; @@ -485,7 +485,7 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event) flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; spa = ((u64)event[3] << 32) | (event[2] & 0xFFFFFFF8); - pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid), devid & 0xff); if (pdev) dev_data = dev_iommu_priv_get(&pdev->dev); @@ -505,7 +505,7 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event) pci_dev_put(pdev); } -static void amd_iommu_report_rmp_fault(volatile u32 *event) +static void amd_iommu_report_rmp_fault(struct amd_iommu *iommu, volatile u32 *event) { struct iommu_dev_data *dev_data = NULL; int devid, flags_rmp, vmg_tag, flags; @@ -518,7 +518,7 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event) flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; gpa = ((u64)event[3] << 32) | event[2]; - pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid), devid & 0xff); if (pdev) dev_data = dev_iommu_priv_get(&pdev->dev); @@ -544,13 +544,14 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event) #define IS_WRITE_REQUEST(flags) \ ((flags) & EVENT_FLAG_RW) -static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, +static void amd_iommu_report_page_fault(struct amd_iommu *iommu, + u16 devid, u16 domain_id, u64 address, int flags) { struct iommu_dev_data *dev_data = NULL; struct pci_dev *pdev; - pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid), devid & 0xff); if (pdev) dev_data = dev_iommu_priv_get(&pdev->dev); @@ -613,7 +614,7 @@ retry: } if (type == EVENT_TYPE_IO_FAULT) { - amd_iommu_report_page_fault(devid, pasid, address, flags); + amd_iommu_report_page_fault(iommu, devid, pasid, address, flags); return; } @@ -654,10 +655,10 @@ retry: pasid, address, flags); break; case EVENT_TYPE_RMP_FAULT: - amd_iommu_report_rmp_fault(event); + amd_iommu_report_rmp_fault(iommu, event); break; case EVENT_TYPE_RMP_HW_ERR: - amd_iommu_report_rmp_hw_error(event); + amd_iommu_report_rmp_hw_error(iommu, event); break; case EVENT_TYPE_INV_PPR_REQ: pasid = PPR_PASID(*((u64 *)__evt)); -- cgit v1.2.3 From bbe3a106580c21bc883fb0c9fa3da01534392fe8 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:22 +0530 Subject: iommu/amd: Add PCI segment support for ivrs_[ioapic/hpet/acpihid] commands By default, PCI segment is zero and can be omitted. To support system with non-zero PCI segment ID, modify the parsing functions to allow PCI segment ID. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-33-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- Documentation/admin-guide/kernel-parameters.txt | 34 ++++++++++++++----- drivers/iommu/amd/init.c | 44 +++++++++++++++---------- 2 files changed, 52 insertions(+), 26 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2522b11e593f..d45e58328ce6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2266,23 +2266,39 @@ ivrs_ioapic [HW,X86-64] Provide an override to the IOAPIC-ID<->DEVICE-ID - mapping provided in the IVRS ACPI table. For - example, to map IOAPIC-ID decimal 10 to - PCI device 00:14.0 write the parameter as: + mapping provided in the IVRS ACPI table. + By default, PCI segment is 0, and can be omitted. + For example: + * To map IOAPIC-ID decimal 10 to PCI device 00:14.0 + write the parameter as: ivrs_ioapic[10]=00:14.0 + * To map IOAPIC-ID decimal 10 to PCI segment 0x1 and + PCI device 00:14.0 write the parameter as: + ivrs_ioapic[10]=0001:00:14.0 ivrs_hpet [HW,X86-64] Provide an override to the HPET-ID<->DEVICE-ID - mapping provided in the IVRS ACPI table. For - example, to map HPET-ID decimal 0 to - PCI device 00:14.0 write the parameter as: + mapping provided in the IVRS ACPI table. + By default, PCI segment is 0, and can be omitted. + For example: + * To map HPET-ID decimal 0 to PCI device 00:14.0 + write the parameter as: ivrs_hpet[0]=00:14.0 + * To map HPET-ID decimal 10 to PCI segment 0x1 and + PCI device 00:14.0 write the parameter as: + ivrs_ioapic[10]=0001:00:14.0 ivrs_acpihid [HW,X86-64] Provide an override to the ACPI-HID:UID<->DEVICE-ID - mapping provided in the IVRS ACPI table. For - example, to map UART-HID:UID AMD0020:0 to - PCI device 00:14.5 write the parameter as: + mapping provided in the IVRS ACPI table. + + For example, to map UART-HID:UID AMD0020:0 to + PCI segment 0x1 and PCI device ID 00:14.5, + write the parameter as: + ivrs_acpihid[0001:00:14.5]=AMD0020:0 + + By default, PCI segment is 0, and can be omitted. + For example, PCI device 00:14.5 write the parameter as: ivrs_acpihid[00:14.5]=AMD0020:0 js= [HW,JOY] Analog joystick diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 0336740b41b0..19da19eaba99 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -84,6 +84,10 @@ #define ACPI_DEVFLAG_ATSDIS 0x10000000 #define LOOP_TIMEOUT 2000000 + +#define IVRS_GET_SBDF_ID(seg, bus, dev, fd) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ + | ((dev & 0x1f) << 3) | (fn & 0x7)) + /* * ACPI table definitions * @@ -3291,15 +3295,17 @@ static int __init parse_amd_iommu_options(char *str) static int __init parse_ivrs_ioapic(char *str) { - unsigned int bus, dev, fn; + u32 seg = 0, bus, dev, fn; int ret, id, i; - u16 devid; + u32 devid; ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - pr_err("Invalid command line: ivrs_ioapic%s\n", str); - return 1; + ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); + if (ret != 5) { + pr_err("Invalid command line: ivrs_ioapic%s\n", str); + return 1; + } } if (early_ioapic_map_size == EARLY_MAP_SIZE) { @@ -3308,7 +3314,7 @@ static int __init parse_ivrs_ioapic(char *str) return 1; } - devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); + devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); cmdline_maps = true; i = early_ioapic_map_size++; @@ -3321,15 +3327,17 @@ static int __init parse_ivrs_ioapic(char *str) static int __init parse_ivrs_hpet(char *str) { - unsigned int bus, dev, fn; + u32 seg = 0, bus, dev, fn; int ret, id, i; - u16 devid; + u32 devid; ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - pr_err("Invalid command line: ivrs_hpet%s\n", str); - return 1; + ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); + if (ret != 5) { + pr_err("Invalid command line: ivrs_hpet%s\n", str); + return 1; + } } if (early_hpet_map_size == EARLY_MAP_SIZE) { @@ -3338,7 +3346,7 @@ static int __init parse_ivrs_hpet(char *str) return 1; } - devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); + devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); cmdline_maps = true; i = early_hpet_map_size++; @@ -3351,15 +3359,18 @@ static int __init parse_ivrs_hpet(char *str) static int __init parse_ivrs_acpihid(char *str) { - u32 bus, dev, fn; + u32 seg = 0, bus, dev, fn; char *hid, *uid, *p; char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0}; int ret, i; ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid); if (ret != 4) { - pr_err("Invalid command line: ivrs_acpihid(%s)\n", str); - return 1; + ret = sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid); + if (ret != 5) { + pr_err("Invalid command line: ivrs_acpihid(%s)\n", str); + return 1; + } } p = acpiid; @@ -3374,8 +3385,7 @@ static int __init parse_ivrs_acpihid(char *str) i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); - early_acpihid_map[i].devid = - ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); + early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); early_acpihid_map[i].cmd_line = true; return 1; -- cgit v1.2.3 From b36a5b0f1cedbb2de2f527883d4eda8a5025591d Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:08:23 +0530 Subject: iommu/amd: Print PCI segment ID in error log messages Print pci segment ID along with bdf. Useful for debugging. Co-developed-by: Suravee Suthikulpaint Signed-off-by: Suravee Suthikulpaint Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-34-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 10 +++++----- drivers/iommu/amd/iommu.c | 36 ++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 19da19eaba99..a3c4fdd40f04 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1855,11 +1855,11 @@ static int __init init_iommu_all(struct acpi_table_header *table) h = (struct ivhd_header *)p; if (*p == amd_iommu_target_ivhd_type) { - DUMP_printk("device: %02x:%02x.%01x cap: %04x " - "seg: %d flags: %01x info %04x\n", - PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid), - PCI_FUNC(h->devid), h->cap_ptr, - h->pci_seg, h->flags, h->info); + DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x " + "flags: %01x info %04x\n", + h->pci_seg, PCI_BUS_NUM(h->devid), + PCI_SLOT(h->devid), PCI_FUNC(h->devid), + h->cap_ptr, h->flags, h->info); DUMP_printk(" mmio-addr: %016llx\n", h->mmio_phys); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 2dbe17e49ffc..6a1db8f9f453 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -496,8 +496,8 @@ static void amd_iommu_report_rmp_hw_error(struct amd_iommu *iommu, volatile u32 vmg_tag, spa, flags); } } else { - pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%04x:%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), vmg_tag, spa, flags); } @@ -529,8 +529,8 @@ static void amd_iommu_report_rmp_fault(struct amd_iommu *iommu, volatile u32 *ev vmg_tag, gpa, flags_rmp, flags); } } else { - pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%04x:%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), vmg_tag, gpa, flags_rmp, flags); } @@ -576,8 +576,8 @@ static void amd_iommu_report_page_fault(struct amd_iommu *iommu, domain_id, address, flags); } } else { - pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%04x:%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), domain_id, address, flags); } @@ -620,20 +620,20 @@ retry: switch (type) { case EVENT_TYPE_ILL_DEV: - dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); dump_dte_entry(iommu, devid); break; case EVENT_TYPE_DEV_TAB_ERR: - dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%04x:%02x:%02x.%x " "address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), address, flags); break; case EVENT_TYPE_PAGE_TAB_ERR: - dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%04x:%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; case EVENT_TYPE_ILL_CMD: @@ -645,13 +645,13 @@ retry: address, flags); break; case EVENT_TYPE_IOTLB_INV_TO: - dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%llx]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%04x:%02x:%02x.%x address=0x%llx]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), address); break; case EVENT_TYPE_INV_DEV_REQ: - dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; case EVENT_TYPE_RMP_FAULT: @@ -663,8 +663,8 @@ retry: case EVENT_TYPE_INV_PPR_REQ: pasid = PPR_PASID(*((u64 *)__evt)); tag = event[1] & 0x03FF; - dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags, tag); break; default: -- cgit v1.2.3 From 196dff712ea2c86f4957ad1a99bc7d65d4b01c5d Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:08:24 +0530 Subject: iommu/amd: Update device_state structure to include PCI seg ID Rename struct device_state.devid variable to struct device_state.sbdf and extend it to 32-bit to include the 16-bit PCI segment ID via the helper function get_pci_sbdf_id(). Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-35-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu_v2.c | 58 ++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index afb3efd565b7..40484af2ffc2 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -51,7 +51,7 @@ struct pasid_state { struct device_state { struct list_head list; - u16 devid; + u32 sbdf; atomic_t count; struct pci_dev *pdev; struct pasid_state **states; @@ -83,35 +83,25 @@ static struct workqueue_struct *iommu_wq; static void free_pasid_states(struct device_state *dev_state); -static u16 device_id(struct pci_dev *pdev) -{ - u16 devid; - - devid = pdev->bus->number; - devid = (devid << 8) | pdev->devfn; - - return devid; -} - -static struct device_state *__get_device_state(u16 devid) +static struct device_state *__get_device_state(u32 sbdf) { struct device_state *dev_state; list_for_each_entry(dev_state, &state_list, list) { - if (dev_state->devid == devid) + if (dev_state->sbdf == sbdf) return dev_state; } return NULL; } -static struct device_state *get_device_state(u16 devid) +static struct device_state *get_device_state(u32 sbdf) { struct device_state *dev_state; unsigned long flags; spin_lock_irqsave(&state_lock, flags); - dev_state = __get_device_state(devid); + dev_state = __get_device_state(sbdf); if (dev_state != NULL) atomic_inc(&dev_state->count); spin_unlock_irqrestore(&state_lock, flags); @@ -609,7 +599,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, struct pasid_state *pasid_state; struct device_state *dev_state; struct mm_struct *mm; - u16 devid; + u32 sbdf; int ret; might_sleep(); @@ -617,8 +607,8 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, if (!amd_iommu_v2_supported()) return -ENODEV; - devid = device_id(pdev); - dev_state = get_device_state(devid); + sbdf = get_pci_sbdf_id(pdev); + dev_state = get_device_state(sbdf); if (dev_state == NULL) return -EINVAL; @@ -692,15 +682,15 @@ void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid) { struct pasid_state *pasid_state; struct device_state *dev_state; - u16 devid; + u32 sbdf; might_sleep(); if (!amd_iommu_v2_supported()) return; - devid = device_id(pdev); - dev_state = get_device_state(devid); + sbdf = get_pci_sbdf_id(pdev); + dev_state = get_device_state(sbdf); if (dev_state == NULL) return; @@ -742,7 +732,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) struct iommu_group *group; unsigned long flags; int ret, tmp; - u16 devid; + u32 sbdf; might_sleep(); @@ -759,7 +749,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) if (pasids <= 0 || pasids > (PASID_MASK + 1)) return -EINVAL; - devid = device_id(pdev); + sbdf = get_pci_sbdf_id(pdev); dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL); if (dev_state == NULL) @@ -768,7 +758,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) spin_lock_init(&dev_state->lock); init_waitqueue_head(&dev_state->wq); dev_state->pdev = pdev; - dev_state->devid = devid; + dev_state->sbdf = sbdf; tmp = pasids; for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) @@ -806,7 +796,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) spin_lock_irqsave(&state_lock, flags); - if (__get_device_state(devid) != NULL) { + if (__get_device_state(sbdf) != NULL) { spin_unlock_irqrestore(&state_lock, flags); ret = -EBUSY; goto out_free_domain; @@ -838,16 +828,16 @@ void amd_iommu_free_device(struct pci_dev *pdev) { struct device_state *dev_state; unsigned long flags; - u16 devid; + u32 sbdf; if (!amd_iommu_v2_supported()) return; - devid = device_id(pdev); + sbdf = get_pci_sbdf_id(pdev); spin_lock_irqsave(&state_lock, flags); - dev_state = __get_device_state(devid); + dev_state = __get_device_state(sbdf); if (dev_state == NULL) { spin_unlock_irqrestore(&state_lock, flags); return; @@ -867,18 +857,18 @@ int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, { struct device_state *dev_state; unsigned long flags; - u16 devid; + u32 sbdf; int ret; if (!amd_iommu_v2_supported()) return -ENODEV; - devid = device_id(pdev); + sbdf = get_pci_sbdf_id(pdev); spin_lock_irqsave(&state_lock, flags); ret = -EINVAL; - dev_state = __get_device_state(devid); + dev_state = __get_device_state(sbdf); if (dev_state == NULL) goto out_unlock; @@ -898,18 +888,18 @@ int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, { struct device_state *dev_state; unsigned long flags; - u16 devid; + u32 sbdf; int ret; if (!amd_iommu_v2_supported()) return -ENODEV; - devid = device_id(pdev); + sbdf = get_pci_sbdf_id(pdev); spin_lock_irqsave(&state_lock, flags); ret = -EINVAL; - dev_state = __get_device_state(devid); + dev_state = __get_device_state(sbdf); if (dev_state == NULL) goto out_unlock; -- cgit v1.2.3 From 214a05c1c2314b92c9a2e5c594b8646930d788e4 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 6 Jul 2022 17:08:25 +0530 Subject: iommu/amd: Update amd_iommu_fault structure to include PCI seg ID Rename 'device_id' as 'sbdf' and extend it to 32bit so that we can pass PCI segment ID to ppr_notifier(). Also pass PCI segment ID to pci_get_domain_bus_and_slot() instead of default value. Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220706113825.25582-36-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 2 +- drivers/iommu/amd/iommu.c | 2 +- drivers/iommu/amd/iommu_v2.c | 9 +++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 1ca54803702a..40f52d02c5b9 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -486,7 +486,7 @@ extern struct kmem_cache *amd_iommu_irq_cache; struct amd_iommu_fault { u64 address; /* IO virtual address of the fault*/ u32 pasid; /* Address space identifier */ - u16 device_id; /* Originating PCI device id */ + u32 sbdf; /* Originating PCI device id */ u16 tag; /* PPR tag */ u16 flags; /* Fault flags */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 6a1db8f9f453..a56a9ad3273e 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -701,7 +701,7 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw) fault.address = raw[1]; fault.pasid = PPR_PASID(raw[0]); - fault.device_id = PPR_DEVID(raw[0]); + fault.sbdf = PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, PPR_DEVID(raw[0])); fault.tag = PPR_TAG(raw[0]); fault.flags = PPR_FLAGS(raw[0]); diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 40484af2ffc2..696d5555be57 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -518,15 +518,16 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) unsigned long flags; struct fault *fault; bool finish; - u16 tag, devid; + u16 tag, devid, seg_id; int ret; iommu_fault = data; tag = iommu_fault->tag & 0x1ff; finish = (iommu_fault->tag >> 9) & 1; - devid = iommu_fault->device_id; - pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + seg_id = PCI_SBDF_TO_SEGID(iommu_fault->sbdf); + devid = PCI_SBDF_TO_DEVID(iommu_fault->sbdf); + pdev = pci_get_domain_bus_and_slot(seg_id, PCI_BUS_NUM(devid), devid & 0xff); if (!pdev) return -ENODEV; @@ -540,7 +541,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) goto out; } - dev_state = get_device_state(iommu_fault->device_id); + dev_state = get_device_state(iommu_fault->sbdf); if (dev_state == NULL) goto out; -- cgit v1.2.3 From bfdd231374181254742c5e2faef0bef2d30c0ee4 Mon Sep 17 00:00:00 2001 From: Yunfei Wang Date: Thu, 30 Jun 2022 17:29:25 +0800 Subject: iommu/io-pgtable-arm-v7s: Add a quirk to allow pgtable PA up to 35bit Single memory zone feature will remove ZONE_DMA32 and ZONE_DMA and cause pgtable PA size larger than 32bit. Since Mediatek IOMMU hardware support at most 35bit PA in pgtable, so add a quirk to allow the PA of pgtables support up to bit35. Signed-off-by: Ning Li Signed-off-by: Yunfei Wang Reviewed-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/20220630092927.24925-2-yf.wang@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm-v7s.c | 75 ++++++++++++++++++++++++++++---------- include/linux/io-pgtable.h | 15 +++++--- 2 files changed, 66 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index be066c1503d3..ba3115fd0f86 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -182,14 +182,8 @@ static bool arm_v7s_is_mtk_enabled(struct io_pgtable_cfg *cfg) (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT); } -static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl, - struct io_pgtable_cfg *cfg) +static arm_v7s_iopte to_mtk_iopte(phys_addr_t paddr, arm_v7s_iopte pte) { - arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl); - - if (!arm_v7s_is_mtk_enabled(cfg)) - return pte; - if (paddr & BIT_ULL(32)) pte |= ARM_V7S_ATTR_MTK_PA_BIT32; if (paddr & BIT_ULL(33)) @@ -199,6 +193,17 @@ static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl, return pte; } +static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl, + struct io_pgtable_cfg *cfg) +{ + arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl); + + if (arm_v7s_is_mtk_enabled(cfg)) + return to_mtk_iopte(paddr, pte); + + return pte; +} + static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl, struct io_pgtable_cfg *cfg) { @@ -240,10 +245,17 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, dma_addr_t dma; size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg); void *table = NULL; + gfp_t gfp_l1; + + /* + * ARM_MTK_TTBR_EXT extend the translation table base support larger + * memory address. + */ + gfp_l1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT ? + GFP_KERNEL : ARM_V7S_TABLE_GFP_DMA; if (lvl == 1) - table = (void *)__get_free_pages( - __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size)); + table = (void *)__get_free_pages(gfp_l1 | __GFP_ZERO, get_order(size)); else if (lvl == 2) table = kmem_cache_zalloc(data->l2_tables, gfp); @@ -251,7 +263,8 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, return NULL; phys = virt_to_phys(table); - if (phys != (arm_v7s_iopte)phys) { + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT ? + phys >= (1ULL << cfg->oas) : phys != (arm_v7s_iopte)phys) { /* Doesn't fit in PTE */ dev_err(dev, "Page table does not fit in PTE: %pa", &phys); goto out_free; @@ -457,9 +470,14 @@ static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table, arm_v7s_iopte curr, struct io_pgtable_cfg *cfg) { + phys_addr_t phys = virt_to_phys(table); arm_v7s_iopte old, new; - new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE; + new = phys | ARM_V7S_PTE_TYPE_TABLE; + + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT) + new = to_mtk_iopte(phys, new); + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) new |= ARM_V7S_ATTR_NS_TABLE; @@ -779,6 +797,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) { struct arm_v7s_io_pgtable *data; + slab_flags_t slab_flag; + phys_addr_t paddr; if (cfg->ias > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS)) return NULL; @@ -788,7 +808,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | - IO_PGTABLE_QUIRK_ARM_MTK_EXT)) + IO_PGTABLE_QUIRK_ARM_MTK_EXT | + IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT)) return NULL; /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */ @@ -796,15 +817,27 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS)) return NULL; + if ((cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT) && + !arm_v7s_is_mtk_enabled(cfg)) + return NULL; + data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) return NULL; spin_lock_init(&data->split_lock); + + /* + * ARM_MTK_TTBR_EXT extend the translation table base support larger + * memory address. + */ + slab_flag = cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT ? + 0 : ARM_V7S_TABLE_SLAB_FLAGS; + data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", ARM_V7S_TABLE_SIZE(2, cfg), ARM_V7S_TABLE_SIZE(2, cfg), - ARM_V7S_TABLE_SLAB_FLAGS, NULL); + slab_flag, NULL); if (!data->l2_tables) goto out_free_data; @@ -850,12 +883,16 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, wmb(); /* TTBR */ - cfg->arm_v7s_cfg.ttbr = virt_to_phys(data->pgd) | ARM_V7S_TTBR_S | - (cfg->coherent_walk ? (ARM_V7S_TTBR_NOS | - ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) | - ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) : - (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) | - ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC))); + paddr = virt_to_phys(data->pgd); + if (arm_v7s_is_mtk_enabled(cfg)) + cfg->arm_v7s_cfg.ttbr = paddr | upper_32_bits(paddr); + else + cfg->arm_v7s_cfg.ttbr = paddr | ARM_V7S_TTBR_S | + (cfg->coherent_walk ? (ARM_V7S_TTBR_NOS | + ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) | + ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) : + (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) | + ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC))); return &data->iop; out_free_data: diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 86af6f0a00a2..ca98aeadcc80 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -74,17 +74,22 @@ struct io_pgtable_cfg { * to support up to 35 bits PA where the bit32, bit33 and bit34 are * encoded in the bit9, bit4 and bit5 of the PTE respectively. * + * IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT: (ARM v7s format) MediaTek IOMMUs + * extend the translation table base support up to 35 bits PA, the + * encoding format is same with IO_PGTABLE_QUIRK_ARM_MTK_EXT. + * * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table * for use in the upper half of a split address space. * * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability * attributes set in the TCR for a non-coherent page-table walker. */ - #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) - #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) - #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) - #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) - #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) + #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) + #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) + #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) + #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) + #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) + #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; -- cgit v1.2.3 From 301c3ca125767e7cf931b092c353f89ade87cf2c Mon Sep 17 00:00:00 2001 From: Yunfei Wang Date: Thu, 30 Jun 2022 17:29:26 +0800 Subject: iommu/mediatek: Allow page table PA up to 35bit Single memory zone feature will remove ZONE_DMA32 and ZONE_DMA. So add the quirk IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT to let level 1 and level 2 pgtable support at most 35bit PA. Signed-off-by: Ning Li Signed-off-by: Yunfei Wang Reviewed-by: Yong Wu Reviewed-by: Robin Murphy Acked-by: Will Deacon Link: https://lore.kernel.org/r/20220630092927.24925-3-yf.wang@mediatek.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index b2ae84046249..a1c1a3a8e0a7 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -34,7 +34,6 @@ #include #define REG_MMU_PT_BASE_ADDR 0x000 -#define MMU_PT_ADDR_MASK GENMASK(31, 7) #define REG_MMU_INVALIDATE 0x020 #define F_ALL_INVLD 0x2 @@ -138,6 +137,7 @@ /* PM and clock always on. e.g. infra iommu */ #define PM_CLK_AO BIT(15) #define IFA_IOMMU_PCIE_SUPPORT BIT(16) +#define PGTABLE_PA_35_EN BIT(17) #define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask) \ ((((pdata)->flags) & (mask)) == (_x)) @@ -596,6 +596,9 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, .iommu_dev = data->dev, }; + if (MTK_IOMMU_HAS_FLAG(data->plat_data, PGTABLE_PA_35_EN)) + dom->cfg.quirks |= IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT; + if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_4GB_MODE)) dom->cfg.oas = data->enable_4GB ? 33 : 32; else @@ -684,8 +687,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, goto err_unlock; } bank->m4u_dom = dom; - writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, - bank->base + REG_MMU_PT_BASE_ADDR); + writel(dom->cfg.arm_v7s_cfg.ttbr, bank->base + REG_MMU_PT_BASE_ADDR); pm_runtime_put(m4udev); } @@ -1374,8 +1376,7 @@ static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev) writel_relaxed(reg->int_control[i], base + REG_MMU_INT_CONTROL0); writel_relaxed(reg->int_main_control[i], base + REG_MMU_INT_MAIN_CONTROL); writel_relaxed(reg->ivrp_paddr[i], base + REG_MMU_IVRP_PADDR); - writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, - base + REG_MMU_PT_BASE_ADDR); + writel(m4u_dom->cfg.arm_v7s_cfg.ttbr, base + REG_MMU_PT_BASE_ADDR); } while (++i < data->plat_data->banks_num); /* @@ -1409,7 +1410,7 @@ static const struct mtk_iommu_plat_data mt2712_data = { static const struct mtk_iommu_plat_data mt6779_data = { .m4u_plat = M4U_MT6779, .flags = HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | WR_THROT_EN | - MTK_IOMMU_TYPE_MM, + MTK_IOMMU_TYPE_MM | PGTABLE_PA_35_EN, .inv_sel_reg = REG_MMU_INV_SEL_GEN2, .banks_num = 1, .banks_enable = {true}, -- cgit v1.2.3 From b9b721d117e9d360ba693e75ba6b9262dfcdf9e2 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Fri, 8 Jul 2022 15:12:30 +0530 Subject: iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts TLB sync timeouts can be due to various reasons such as TBU power down or pending TCU/TBU invalidation/sync and so on. Debugging these often require dumping of some implementation defined registers to know the status of TBU/TCU operations and some of these registers are not accessible in non-secure world such as from kernel and requires SMC calls to read them in the secure world. So, add this debug support to dump implementation defined registers for TLB sync timeout issues. Signed-off-by: Sai Prakash Ranjan Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220708094230.4349-1-quic_saipraka@quicinc.com Signed-off-by: Will Deacon --- drivers/iommu/Kconfig | 10 ++ drivers/iommu/arm/arm-smmu/Makefile | 1 + drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 142 +++++++++++++++++++++++ drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 32 ++++- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 28 +++++ drivers/iommu/arm/arm-smmu/arm-smmu.c | 6 +- drivers/iommu/arm/arm-smmu/arm-smmu.h | 1 + 7 files changed, 211 insertions(+), 9 deletions(-) create mode 100644 drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c create mode 100644 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index c79a0df090c0..5c5cb5bee8b6 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -363,6 +363,16 @@ config ARM_SMMU_QCOM When running on a Qualcomm platform that has the custom variant of the ARM SMMU, this needs to be built into the SMMU driver. +config ARM_SMMU_QCOM_DEBUG + bool "ARM SMMU QCOM implementation defined debug support" + depends on ARM_SMMU_QCOM + help + Support for implementation specific debug features in ARM SMMU + hardware found in QTI platforms. + + Say Y here to enable debug for issues such as TLB sync timeouts + which requires implementation defined register dumps. + config ARM_SMMU_V3 tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support" depends on ARM64 diff --git a/drivers/iommu/arm/arm-smmu/Makefile b/drivers/iommu/arm/arm-smmu/Makefile index b0cc01aa20c9..2a5a95e8e3f9 100644 --- a/drivers/iommu/arm/arm-smmu/Makefile +++ b/drivers/iommu/arm/arm-smmu/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o obj-$(CONFIG_ARM_SMMU) += arm_smmu.o arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o arm_smmu-$(CONFIG_ARM_SMMU_QCOM) += arm-smmu-qcom.o +arm_smmu-$(CONFIG_ARM_SMMU_QCOM_DEBUG) += arm-smmu-qcom-debug.o diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c new file mode 100644 index 000000000000..6eed8e67a0ca --- /dev/null +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include +#include +#include + +#include "arm-smmu.h" +#include "arm-smmu-qcom.h" + +enum qcom_smmu_impl_reg_offset { + QCOM_SMMU_TBU_PWR_STATUS, + QCOM_SMMU_STATS_SYNC_INV_TBU_ACK, + QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR, +}; + +struct qcom_smmu_config { + const u32 *reg_offset; +}; + +void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) +{ + int ret; + u32 tbu_pwr_status, sync_inv_ack, sync_inv_progress; + struct qcom_smmu *qsmmu = container_of(smmu, struct qcom_smmu, smmu); + const struct qcom_smmu_config *cfg; + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + + if (__ratelimit(&rs)) { + dev_err(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n"); + + cfg = qsmmu->cfg; + if (!cfg) + return; + + ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS], + &tbu_pwr_status); + if (ret) + dev_err(smmu->dev, + "Failed to read TBU power status: %d\n", ret); + + ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK], + &sync_inv_ack); + if (ret) + dev_err(smmu->dev, + "Failed to read TBU sync/inv ack status: %d\n", ret); + + ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR], + &sync_inv_progress); + if (ret) + dev_err(smmu->dev, + "Failed to read TCU syn/inv progress: %d\n", ret); + + dev_err(smmu->dev, + "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n", + tbu_pwr_status, sync_inv_ack, sync_inv_progress); + } +} + +/* Implementation Defined Register Space 0 register offsets */ +static const u32 qcom_smmu_impl0_reg_offset[] = { + [QCOM_SMMU_TBU_PWR_STATUS] = 0x2204, + [QCOM_SMMU_STATS_SYNC_INV_TBU_ACK] = 0x25dc, + [QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR] = 0x2670, +}; + +static const struct qcom_smmu_config qcm2290_smmu_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + +static const struct qcom_smmu_config sc7180_smmu_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + +static const struct qcom_smmu_config sc7280_smmu_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + +static const struct qcom_smmu_config sc8180x_smmu_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + +static const struct qcom_smmu_config sc8280xp_smmu_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + +static const struct qcom_smmu_config sm6125_smmu_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + +static const struct qcom_smmu_config sm6350_smmu_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + +static const struct qcom_smmu_config sm8150_smmu_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + +static const struct qcom_smmu_config sm8250_smmu_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + +static const struct qcom_smmu_config sm8350_smmu_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + +static const struct qcom_smmu_config sm8450_smmu_cfg = { + .reg_offset = qcom_smmu_impl0_reg_offset, +}; + +static const struct of_device_id __maybe_unused qcom_smmu_impl_debug_match[] = { + { .compatible = "qcom,msm8998-smmu-v2" }, + { .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg }, + { .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg }, + { .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg}, + { .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg }, + { .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg }, + { .compatible = "qcom,sdm630-smmu-v2" }, + { .compatible = "qcom,sdm845-smmu-500" }, + { .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg}, + { .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg}, + { .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg }, + { .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg }, + { .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg }, + { .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg }, + { } +}; + +const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu) +{ + const struct of_device_id *match; + const struct device_node *np = smmu->dev->of_node; + + match = of_match_node(qcom_smmu_impl_debug_match, np); + if (!match) + return NULL; + + return match->data; +} diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 2d470d867887..de25071e33ab 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -5,23 +5,40 @@ #include #include +#include #include #include #include "arm-smmu.h" +#include "arm-smmu-qcom.h" -struct qcom_smmu { - struct arm_smmu_device smmu; - bool bypass_quirk; - u8 bypass_cbndx; - u32 stall_enabled; -}; +#define QCOM_DUMMY_VAL -1 static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu) { return container_of(smmu, struct qcom_smmu, smmu); } +static void qcom_smmu_tlb_sync(struct arm_smmu_device *smmu, int page, + int sync, int status) +{ + unsigned int spin_cnt, delay; + u32 reg; + + arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL); + for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { + for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { + reg = arm_smmu_readl(smmu, page, status); + if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE)) + return; + cpu_relax(); + } + udelay(delay); + } + + qcom_smmu_tlb_sync_debug(smmu); +} + static void qcom_adreno_smmu_write_sctlr(struct arm_smmu_device *smmu, int idx, u32 reg) { @@ -375,6 +392,7 @@ static const struct arm_smmu_impl qcom_smmu_impl = { .def_domain_type = qcom_smmu_def_domain_type, .reset = qcom_smmu500_reset, .write_s2cr = qcom_smmu_write_s2cr, + .tlb_sync = qcom_smmu_tlb_sync, }; static const struct arm_smmu_impl qcom_adreno_smmu_impl = { @@ -383,6 +401,7 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = { .reset = qcom_smmu500_reset, .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank, .write_sctlr = qcom_adreno_smmu_write_sctlr, + .tlb_sync = qcom_smmu_tlb_sync, }; static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, @@ -399,6 +418,7 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, return ERR_PTR(-ENOMEM); qsmmu->smmu.impl = impl; + qsmmu->cfg = qcom_smmu_impl_data(smmu); return &qsmmu->smmu; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h new file mode 100644 index 000000000000..99ec8f8629a0 --- /dev/null +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _ARM_SMMU_QCOM_H +#define _ARM_SMMU_QCOM_H + +struct qcom_smmu { + struct arm_smmu_device smmu; + const struct qcom_smmu_config *cfg; + bool bypass_quirk; + u8 bypass_cbndx; + u32 stall_enabled; +}; + +#ifdef CONFIG_ARM_SMMU_QCOM_DEBUG +void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu); +const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu); +#else +static inline void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) { } +static inline const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu) +{ + return NULL; +} +#endif + +#endif /* _ARM_SMMU_QCOM_H */ diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 2ed3594f384e..41633e5484f8 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -2074,7 +2074,6 @@ err_reset_platform_ops: __maybe_unused; static int arm_smmu_device_probe(struct platform_device *pdev) { struct resource *res; - resource_size_t ioaddr; struct arm_smmu_device *smmu; struct device *dev = &pdev->dev; int num_irqs, i, err; @@ -2098,7 +2097,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(smmu->base)) return PTR_ERR(smmu->base); - ioaddr = res->start; + smmu->ioaddr = res->start; + /* * The resource size should effectively match the value of SMMU_TOP; * stash that temporarily until we know PAGESIZE to validate it with. @@ -2178,7 +2178,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) } err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL, - "smmu.%pa", &ioaddr); + "smmu.%pa", &smmu->ioaddr); if (err) { dev_err(dev, "Failed to register iommu in sysfs\n"); return err; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index 2b9b42fb6f30..703fd5817ec1 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -278,6 +278,7 @@ struct arm_smmu_device { struct device *dev; void __iomem *base; + phys_addr_t ioaddr; unsigned int numpage; unsigned int pgshift; -- cgit v1.2.3 From 7e62edd7a33accf02cb7abdc1bbe381d975b9b5a Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Mon, 6 Jun 2022 00:11:52 +0800 Subject: iommu/virtio: Add map/unmap_pages() callbacks implementation Map/unmap_pags() allows map and unmap multiple pages of the same size in one call, which can improve performance by reducing the numbers of vmexits. With map/unmap_pages() implemented, the prior map/unmap() callbacks are deprecated. Signed-off-by: Tina Zhang Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20220605161152.3171-1-tina.zhang@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/virtio-iommu.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 25be4b822aa0..3c943dbd9fd0 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -788,11 +788,13 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev) return 0; } -static int viommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +static int viommu_map_pages(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) { int ret; u32 flags; + size_t size = pgsize * pgcount; u64 end = iova + size - 1; struct virtio_iommu_req_map map; struct viommu_domain *vdomain = to_viommu_domain(domain); @@ -823,17 +825,21 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova, ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map)); if (ret) viommu_del_mappings(vdomain, iova, end); + else if (mapped) + *mapped = size; return ret; } -static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *gather) +static size_t viommu_unmap_pages(struct iommu_domain *domain, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) { int ret = 0; size_t unmapped; struct virtio_iommu_req_unmap unmap; struct viommu_domain *vdomain = to_viommu_domain(domain); + size_t size = pgsize * pgcount; unmapped = viommu_del_mappings(vdomain, iova, iova + size - 1); if (unmapped < size) @@ -1018,8 +1024,8 @@ static struct iommu_ops viommu_ops = { .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = viommu_attach_dev, - .map = viommu_map, - .unmap = viommu_unmap, + .map_pages = viommu_map_pages, + .unmap_pages = viommu_unmap_pages, .iova_to_phys = viommu_iova_to_phys, .iotlb_sync = viommu_iotlb_sync, .free = viommu_domain_free, -- cgit v1.2.3 From 309c56e84602d894e7ca424b0b13837117568fca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 Jul 2022 10:06:13 +0200 Subject: iommu: remove the unused dev_has_feat method This method is never actually called. Signed-off-by: Christoph Hellwig Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220708080616.238833-2-hch@lst.de Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 - include/linux/iommu.h | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index d9c1623ec1a9..1b6c17dd81ee 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2853,7 +2853,6 @@ static struct iommu_ops arm_smmu_ops = { .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, - .dev_has_feat = arm_smmu_dev_has_feature, .dev_feat_enabled = arm_smmu_dev_feature_enabled, .dev_enable_feat = arm_smmu_dev_enable_feature, .dev_disable_feat = arm_smmu_dev_disable_feature, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e6abd998dbe7..a3acdb46b939 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -164,8 +164,7 @@ struct iommu_iort_rmr_data { * supported, this feature must be enabled before and * disabled after %IOMMU_DEV_FEAT_SVA. * - * Device drivers query whether a feature is supported using - * iommu_dev_has_feature(), and enable it using iommu_dev_enable_feature(). + * Device drivers enable a feature using iommu_dev_enable_feature(). */ enum iommu_dev_features { IOMMU_DEV_FEAT_SVA, @@ -248,7 +247,6 @@ struct iommu_ops { bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ - bool (*dev_has_feat)(struct device *dev, enum iommu_dev_features f); bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f); int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); -- cgit v1.2.3 From a871765d5588531e1972902d1ae077b8be306c94 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 Jul 2022 10:06:14 +0200 Subject: iommu: remove iommu_dev_feature_enabled Remove the unused iommu_dev_feature_enabled function. Signed-off-by: Christoph Hellwig Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220708080616.238833-3-hch@lst.de Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 - drivers/iommu/iommu.c | 13 ------------- include/linux/iommu.h | 9 --------- 3 files changed, 23 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 1b6c17dd81ee..4d30a8d2bc23 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2853,7 +2853,6 @@ static struct iommu_ops arm_smmu_ops = { .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, - .dev_feat_enabled = arm_smmu_dev_feature_enabled, .dev_enable_feat = arm_smmu_dev_enable_feature, .dev_disable_feat = arm_smmu_dev_disable_feature, .sva_bind = arm_smmu_sva_bind, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 0aa141646bdf..1bb016a6a2aa 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2760,19 +2760,6 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) } EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); -bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) -{ - if (dev->iommu && dev->iommu->iommu_dev) { - const struct iommu_ops *ops = dev->iommu->iommu_dev->ops; - - if (ops->dev_feat_enabled) - return ops->dev_feat_enabled(dev, feat); - } - - return false; -} -EXPORT_SYMBOL_GPL(iommu_dev_feature_enabled); - /** * iommu_sva_bind_device() - Bind a process address space to a device * @dev: the device diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a3acdb46b939..0bc2eb14b026 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -215,7 +215,6 @@ struct iommu_iotlb_gather { * driver init to device driver init (default no) * @dev_has/enable/disable_feat: per device entries to check/enable/disable * iommu specific features. - * @dev_feat_enabled: check enabled feature * @sva_bind: Bind process address space to device * @sva_unbind: Unbind process address space from device * @sva_get_pasid: Get PASID associated to a SVA handle @@ -247,7 +246,6 @@ struct iommu_ops { bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ - bool (*dev_feat_enabled)(struct device *dev, enum iommu_dev_features f); int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); @@ -670,7 +668,6 @@ void iommu_release_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); -bool iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features f); struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, @@ -997,12 +994,6 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) return NULL; } -static inline bool -iommu_dev_feature_enabled(struct device *dev, enum iommu_dev_features feat) -{ - return false; -} - static inline int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { -- cgit v1.2.3 From ae3ff39a51a0f5843960487962e110339f321b0f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 Jul 2022 10:06:15 +0200 Subject: iommu: remove the put_resv_regions method All drivers that implement get_resv_regions just use generic_put_resv_regions to implement the put side. Remove the indirections and document the allocations constraints. Signed-off-by: Christoph Hellwig Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220708080616.238833-4-hch@lst.de Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 1 - drivers/iommu/apple-dart.c | 1 - drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 - drivers/iommu/arm/arm-smmu/arm-smmu.c | 1 - drivers/iommu/intel/iommu.c | 1 - drivers/iommu/iommu.c | 21 ++++----------------- drivers/iommu/mtk_iommu.c | 1 - drivers/iommu/virtio-iommu.c | 5 ++--- include/linux/iommu.h | 4 ---- 9 files changed, 6 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 840831d5d2ad..e66e071e8c3b 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2280,7 +2280,6 @@ const struct iommu_ops amd_iommu_ops = { .probe_finalize = amd_iommu_probe_finalize, .device_group = amd_iommu_device_group, .get_resv_regions = amd_iommu_get_resv_regions, - .put_resv_regions = generic_iommu_put_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, .def_domain_type = amd_iommu_def_domain_type, diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index e87d3cf54ed6..1b1725759262 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -768,7 +768,6 @@ static const struct iommu_ops apple_dart_iommu_ops = { .of_xlate = apple_dart_of_xlate, .def_domain_type = apple_dart_def_domain_type, .get_resv_regions = apple_dart_get_resv_regions, - .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during dart probe */ .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 4d30a8d2bc23..4a5e435567f1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2852,7 +2852,6 @@ static struct iommu_ops arm_smmu_ops = { .device_group = arm_smmu_device_group, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, - .put_resv_regions = generic_iommu_put_resv_regions, .dev_enable_feat = arm_smmu_dev_enable_feature, .dev_disable_feat = arm_smmu_dev_disable_feature, .sva_bind = arm_smmu_sva_bind, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 588929bed1bc..2d4129a4ccfc 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1584,7 +1584,6 @@ static struct iommu_ops arm_smmu_ops = { .device_group = arm_smmu_device_group, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, - .put_resv_regions = generic_iommu_put_resv_regions, .def_domain_type = arm_smmu_def_domain_type, .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 44016594831d..49d616aa2148 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4911,7 +4911,6 @@ const struct iommu_ops intel_iommu_ops = { .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, - .put_resv_regions = generic_iommu_put_resv_regions, .device_group = intel_iommu_device_group, .dev_enable_feat = intel_iommu_dev_enable_feat, .dev_disable_feat = intel_iommu_dev_disable_feat, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 1bb016a6a2aa..f53f8b2d27a5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2579,27 +2579,14 @@ void iommu_get_resv_regions(struct device *dev, struct list_head *list) ops->get_resv_regions(dev, list); } -void iommu_put_resv_regions(struct device *dev, struct list_head *list) -{ - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (ops->put_resv_regions) - ops->put_resv_regions(dev, list); -} - /** - * generic_iommu_put_resv_regions - Reserved region driver helper + * iommu_put_resv_regions - release resered regions * @dev: device for which to free reserved regions * @list: reserved region list for device * - * IOMMU drivers can use this to implement their .put_resv_regions() callback - * for simple reservations. If a per region callback is provided that will be - * used to free all memory allocations associated with the reserved region or - * else just free up the memory for the regions. If an IOMMU driver allocates - * additional resources per region, it is going to have to implement a custom - * callback. + * This releases a reserved region list acquired by iommu_get_resv_regions(). */ -void generic_iommu_put_resv_regions(struct device *dev, struct list_head *list) +void iommu_put_resv_regions(struct device *dev, struct list_head *list) { struct iommu_resv_region *entry, *next; @@ -2610,7 +2597,7 @@ void generic_iommu_put_resv_regions(struct device *dev, struct list_head *list) kfree(entry); } } -EXPORT_SYMBOL(generic_iommu_put_resv_regions); +EXPORT_SYMBOL(iommu_put_resv_regions); struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 5c3d9366c25c..95fd21c7207a 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -928,7 +928,6 @@ static const struct iommu_ops mtk_iommu_ops = { .device_group = mtk_iommu_device_group, .of_xlate = mtk_iommu_of_xlate, .get_resv_regions = mtk_iommu_get_resv_regions, - .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 55337796a5f8..feeb5fde72a3 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -964,7 +964,7 @@ static struct iommu_device *viommu_probe_device(struct device *dev) return &viommu->iommu; err_free_dev: - generic_iommu_put_resv_regions(dev, &vdev->resv_regions); + iommu_put_resv_regions(dev, &vdev->resv_regions); kfree(vdev); return ERR_PTR(ret); @@ -983,7 +983,7 @@ static void viommu_release_device(struct device *dev) { struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); - generic_iommu_put_resv_regions(dev, &vdev->resv_regions); + iommu_put_resv_regions(dev, &vdev->resv_regions); kfree(vdev); } @@ -1007,7 +1007,6 @@ static struct iommu_ops viommu_ops = { .release_device = viommu_release_device, .device_group = viommu_device_group, .get_resv_regions = viommu_get_resv_regions, - .put_resv_regions = generic_iommu_put_resv_regions, .of_xlate = viommu_of_xlate, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0bc2eb14b026..ea30f00dc145 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -209,7 +209,6 @@ struct iommu_iotlb_gather { * group and attached to the groups domain * @device_group: find iommu group for a particular device * @get_resv_regions: Request list of reserved regions for a device - * @put_resv_regions: Free list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) @@ -240,7 +239,6 @@ struct iommu_ops { /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); - void (*put_resv_regions)(struct device *dev, struct list_head *list); int (*of_xlate)(struct device *dev, struct of_phandle_args *args); bool (*is_attach_deferred)(struct device *dev); @@ -454,8 +452,6 @@ extern void iommu_set_fault_handler(struct iommu_domain *domain, extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); -extern void generic_iommu_put_resv_regions(struct device *dev, - struct list_head *list); extern void iommu_set_default_passthrough(bool cmd_line); extern void iommu_set_default_translated(bool cmd_line); extern bool iommu_default_passthrough(void); -- cgit v1.2.3 From 469b7b8ac552041d038d0451a5e1343a8d6080c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 Jul 2022 10:06:16 +0200 Subject: iommu/arm-smmu-v3: cleanup arm_smmu_dev_{enable,disable}_feature Fold the arm_smmu_dev_has_feature arm_smmu_dev_feature_enabled into the main methods. Signed-off-by: Christoph Hellwig Acked-by: Will Deacon Link: https://lore.kernel.org/r/20220708080616.238833-5-hch@lst.de Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 55 ++++++++--------------------- 1 file changed, 14 insertions(+), 41 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 4a5e435567f1..d32b02336411 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2765,58 +2765,27 @@ static void arm_smmu_get_resv_regions(struct device *dev, iommu_dma_get_resv_regions(dev, head); } -static bool arm_smmu_dev_has_feature(struct device *dev, - enum iommu_dev_features feat) -{ - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - - if (!master) - return false; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - return arm_smmu_master_iopf_supported(master); - case IOMMU_DEV_FEAT_SVA: - return arm_smmu_master_sva_supported(master); - default: - return false; - } -} - -static bool arm_smmu_dev_feature_enabled(struct device *dev, - enum iommu_dev_features feat) -{ - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - - if (!master) - return false; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - return master->iopf_enabled; - case IOMMU_DEV_FEAT_SVA: - return arm_smmu_master_sva_enabled(master); - default: - return false; - } -} - static int arm_smmu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); - if (!arm_smmu_dev_has_feature(dev, feat)) + if (!master) return -ENODEV; - if (arm_smmu_dev_feature_enabled(dev, feat)) - return -EBUSY; - switch (feat) { case IOMMU_DEV_FEAT_IOPF: + if (!arm_smmu_master_iopf_supported(master)) + return -EINVAL; + if (master->iopf_enabled) + return -EBUSY; master->iopf_enabled = true; return 0; case IOMMU_DEV_FEAT_SVA: + if (!arm_smmu_master_sva_supported(master)) + return -EINVAL; + if (arm_smmu_master_sva_enabled(master)) + return -EBUSY; return arm_smmu_master_enable_sva(master); default: return -EINVAL; @@ -2828,16 +2797,20 @@ static int arm_smmu_dev_disable_feature(struct device *dev, { struct arm_smmu_master *master = dev_iommu_priv_get(dev); - if (!arm_smmu_dev_feature_enabled(dev, feat)) + if (!master) return -EINVAL; switch (feat) { case IOMMU_DEV_FEAT_IOPF: + if (!master->iopf_enabled) + return -EINVAL; if (master->sva_enabled) return -EBUSY; master->iopf_enabled = false; return 0; case IOMMU_DEV_FEAT_SVA: + if (!arm_smmu_master_sva_enabled(master)) + return -EINVAL; return arm_smmu_master_disable_sva(master); default: return -EINVAL; -- cgit v1.2.3 From 933ab6d30153f937ffa9471ce64207e6d9acf56e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:44 +0800 Subject: iommu/vt-d: Move trace/events/intel_iommu.h under iommu This header file is private to the Intel IOMMU driver. Move it to the driver folder. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220514014322.2927339-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 2 +- drivers/iommu/intel/svm.c | 2 +- drivers/iommu/intel/trace.c | 2 +- drivers/iommu/intel/trace.h | 98 ++++++++++++++++++++++++++++++++++++++ include/trace/events/intel_iommu.h | 94 ------------------------------------ 5 files changed, 101 insertions(+), 97 deletions(-) create mode 100644 drivers/iommu/intel/trace.h delete mode 100644 include/trace/events/intel_iommu.h diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 9699ca101c62..f91b45be1d92 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -30,10 +30,10 @@ #include #include #include -#include #include "../irq_remapping.h" #include "perf.h" +#include "trace.h" typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *); struct dmar_res_callback { diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7ee37d996e15..70b40d007a52 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -21,11 +21,11 @@ #include #include #include -#include #include "pasid.h" #include "perf.h" #include "../iommu-sva-lib.h" +#include "trace.h" static irqreturn_t prq_event_thread(int irq, void *d); static void intel_svm_drain_prq(struct device *dev, u32 pasid); diff --git a/drivers/iommu/intel/trace.c b/drivers/iommu/intel/trace.c index bfb6a6e37a88..117e626e3ea9 100644 --- a/drivers/iommu/intel/trace.c +++ b/drivers/iommu/intel/trace.c @@ -11,4 +11,4 @@ #include #define CREATE_TRACE_POINTS -#include +#include "trace.h" diff --git a/drivers/iommu/intel/trace.h b/drivers/iommu/intel/trace.h new file mode 100644 index 000000000000..25cb7f88e1a2 --- /dev/null +++ b/drivers/iommu/intel/trace.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Intel IOMMU trace support + * + * Copyright (C) 2019 Intel Corporation + * + * Author: Lu Baolu + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM intel_iommu + +#if !defined(_TRACE_INTEL_IOMMU_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_INTEL_IOMMU_H + +#include +#include + +#define MSG_MAX 256 + +TRACE_EVENT(qi_submit, + TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3), + + TP_ARGS(iommu, qw0, qw1, qw2, qw3), + + TP_STRUCT__entry( + __field(u64, qw0) + __field(u64, qw1) + __field(u64, qw2) + __field(u64, qw3) + __string(iommu, iommu->name) + ), + + TP_fast_assign( + __assign_str(iommu, iommu->name); + __entry->qw0 = qw0; + __entry->qw1 = qw1; + __entry->qw2 = qw2; + __entry->qw3 = qw3; + ), + + TP_printk("%s %s: 0x%llx 0x%llx 0x%llx 0x%llx", + __print_symbolic(__entry->qw0 & 0xf, + { QI_CC_TYPE, "cc_inv" }, + { QI_IOTLB_TYPE, "iotlb_inv" }, + { QI_DIOTLB_TYPE, "dev_tlb_inv" }, + { QI_IEC_TYPE, "iec_inv" }, + { QI_IWD_TYPE, "inv_wait" }, + { QI_EIOTLB_TYPE, "p_iotlb_inv" }, + { QI_PC_TYPE, "pc_inv" }, + { QI_DEIOTLB_TYPE, "p_dev_tlb_inv" }, + { QI_PGRP_RESP_TYPE, "page_grp_resp" }), + __get_str(iommu), + __entry->qw0, __entry->qw1, __entry->qw2, __entry->qw3 + ) +); + +TRACE_EVENT(prq_report, + TP_PROTO(struct intel_iommu *iommu, struct device *dev, + u64 dw0, u64 dw1, u64 dw2, u64 dw3, + unsigned long seq), + + TP_ARGS(iommu, dev, dw0, dw1, dw2, dw3, seq), + + TP_STRUCT__entry( + __field(u64, dw0) + __field(u64, dw1) + __field(u64, dw2) + __field(u64, dw3) + __field(unsigned long, seq) + __string(iommu, iommu->name) + __string(dev, dev_name(dev)) + __dynamic_array(char, buff, MSG_MAX) + ), + + TP_fast_assign( + __entry->dw0 = dw0; + __entry->dw1 = dw1; + __entry->dw2 = dw2; + __entry->dw3 = dw3; + __entry->seq = seq; + __assign_str(iommu, iommu->name); + __assign_str(dev, dev_name(dev)); + ), + + TP_printk("%s/%s seq# %ld: %s", + __get_str(iommu), __get_str(dev), __entry->seq, + decode_prq_descriptor(__get_str(buff), MSG_MAX, __entry->dw0, + __entry->dw1, __entry->dw2, __entry->dw3) + ) +); +#endif /* _TRACE_INTEL_IOMMU_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../drivers/iommu/intel/ +#define TRACE_INCLUDE_FILE trace +#include diff --git a/include/trace/events/intel_iommu.h b/include/trace/events/intel_iommu.h deleted file mode 100644 index e5c1ca6d16ee..000000000000 --- a/include/trace/events/intel_iommu.h +++ /dev/null @@ -1,94 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Intel IOMMU trace support - * - * Copyright (C) 2019 Intel Corporation - * - * Author: Lu Baolu - */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM intel_iommu - -#if !defined(_TRACE_INTEL_IOMMU_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_INTEL_IOMMU_H - -#include -#include - -#define MSG_MAX 256 - -TRACE_EVENT(qi_submit, - TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3), - - TP_ARGS(iommu, qw0, qw1, qw2, qw3), - - TP_STRUCT__entry( - __field(u64, qw0) - __field(u64, qw1) - __field(u64, qw2) - __field(u64, qw3) - __string(iommu, iommu->name) - ), - - TP_fast_assign( - __assign_str(iommu, iommu->name); - __entry->qw0 = qw0; - __entry->qw1 = qw1; - __entry->qw2 = qw2; - __entry->qw3 = qw3; - ), - - TP_printk("%s %s: 0x%llx 0x%llx 0x%llx 0x%llx", - __print_symbolic(__entry->qw0 & 0xf, - { QI_CC_TYPE, "cc_inv" }, - { QI_IOTLB_TYPE, "iotlb_inv" }, - { QI_DIOTLB_TYPE, "dev_tlb_inv" }, - { QI_IEC_TYPE, "iec_inv" }, - { QI_IWD_TYPE, "inv_wait" }, - { QI_EIOTLB_TYPE, "p_iotlb_inv" }, - { QI_PC_TYPE, "pc_inv" }, - { QI_DEIOTLB_TYPE, "p_dev_tlb_inv" }, - { QI_PGRP_RESP_TYPE, "page_grp_resp" }), - __get_str(iommu), - __entry->qw0, __entry->qw1, __entry->qw2, __entry->qw3 - ) -); - -TRACE_EVENT(prq_report, - TP_PROTO(struct intel_iommu *iommu, struct device *dev, - u64 dw0, u64 dw1, u64 dw2, u64 dw3, - unsigned long seq), - - TP_ARGS(iommu, dev, dw0, dw1, dw2, dw3, seq), - - TP_STRUCT__entry( - __field(u64, dw0) - __field(u64, dw1) - __field(u64, dw2) - __field(u64, dw3) - __field(unsigned long, seq) - __string(iommu, iommu->name) - __string(dev, dev_name(dev)) - __dynamic_array(char, buff, MSG_MAX) - ), - - TP_fast_assign( - __entry->dw0 = dw0; - __entry->dw1 = dw1; - __entry->dw2 = dw2; - __entry->dw3 = dw3; - __entry->seq = seq; - __assign_str(iommu, iommu->name); - __assign_str(dev, dev_name(dev)); - ), - - TP_printk("%s/%s seq# %ld: %s", - __get_str(iommu), __get_str(dev), __entry->seq, - decode_prq_descriptor(__get_str(buff), MSG_MAX, __entry->dw0, - __entry->dw1, __entry->dw2, __entry->dw3) - ) -); -#endif /* _TRACE_INTEL_IOMMU_H */ - -/* This part must be outside protection */ -#include -- cgit v1.2.3 From f19e038c25079edcba4796328d99ff00f2b47b68 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:45 +0800 Subject: agp/intel: Use per device iommu check The IOMMU subsystem has already provided an interface to query whether the IOMMU hardware is enabled for a specific device. This changes the check from Intel specific intel_iommu_gfx_mapped (globally exported by the Intel IOMMU driver) to probing the presence of IOMMU on a specific device using the generic device_iommu_mapped(). This follows commit cca084692394a ("drm/i915: Use per device iommu check") which converted drm/i915 driver to use device_iommu_mapped(). Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220514014322.2927339-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/char/agp/intel-gtt.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 79a1b65527c2..cfcb450e9b59 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include "agp.h" @@ -573,18 +573,15 @@ static void intel_gtt_cleanup(void) */ static inline int needs_ilk_vtd_wa(void) { -#ifdef CONFIG_INTEL_IOMMU const unsigned short gpu_devid = intel_private.pcidev->device; - /* Query intel_iommu to see if we need the workaround. Presumably that - * was loaded first. + /* + * Query iommu subsystem to see if we need the workaround. Presumably + * that was loaded first. */ - if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG || - gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG) && - intel_iommu_gfx_mapped) - return 1; -#endif - return 0; + return ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG || + gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG) && + device_iommu_mapped(&intel_private.pcidev->dev)); } static bool intel_gtt_can_wc(void) -- cgit v1.2.3 From f9903555dd05a4096d8fef4eb823c0b94f710982 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:46 +0800 Subject: iommu/vt-d: Remove unnecessary exported symbol The exported symbol intel_iommu_gfx_mapped is not used anywhere in the tree. Remove it to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220514014322.2927339-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 6 ------ include/linux/intel-iommu.h | 1 - 2 files changed, 7 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5c0dce78586a..6564af7ec0dd 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -314,9 +314,6 @@ static int iommu_skip_te_disable; #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -int intel_iommu_gfx_mapped; -EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); - DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); @@ -4093,9 +4090,6 @@ int __init intel_iommu_init(void) if (list_empty(&dmar_satc_units)) pr_info("No SATC found\n"); - if (dmar_map_gfx) - intel_iommu_gfx_mapped = 1; - init_no_remapping_devices(); ret = init_dmars(); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 5fcf89faa31a..4ebf3c4da45e 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -787,7 +787,6 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; extern int intel_iommu_enabled; -extern int intel_iommu_gfx_mapped; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { -- cgit v1.2.3 From 3890f749c590a5a33f0034362be3304b69ae6d65 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:47 +0800 Subject: drm/i915: Remove unnecessary include intel-iommu.h is not needed in drm/i915 anymore. Remove its include. Signed-off-by: Lu Baolu Reviewed-by: Steve Wahl Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Acked-by: Jani Nikula Link: https://lore.kernel.org/r/20220514014322.2927339-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/gpu/drm/i915/display/intel_display.c | 1 - drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 806d50b302ab..493421759afa 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 30fe847c6664..193c7c83c70f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -6,7 +6,6 @@ #include #include -#include #include #include -- cgit v1.2.3 From bfd39a73879e80313bb634927f185ace60bb229b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:48 +0800 Subject: KVM: x86: Remove unnecessary include intel-iommu.h is not needed in kvm/x86 anymore. Remove its include. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220514014322.2927339-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- arch/x86/kvm/x86.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1910e1e78b15..f7ec1e39eec7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 853788b9a66ff089053df3b4ed7d166e61def449 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:49 +0800 Subject: x86/boot/tboot: Move tboot_force_iommu() to Intel IOMMU tboot_force_iommu() is only called by the Intel IOMMU driver. Move the helper into that driver. No functional change intended. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220514014322.2927339-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- arch/x86/kernel/tboot.c | 15 --------------- drivers/iommu/intel/iommu.c | 14 ++++++++++++++ include/linux/tboot.h | 2 -- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 0c1154a1c403..3bacd935f840 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -6,7 +6,6 @@ * Copyright (c) 2006-2009, Intel Corporation */ -#include #include #include #include @@ -516,17 +515,3 @@ struct acpi_table_header *tboot_get_dmar_table(struct acpi_table_header *dmar_tb return dmar_tbl; } - -int tboot_force_iommu(void) -{ - if (!tboot_enabled()) - return 0; - - if (no_iommu || dmar_disabled) - pr_warn("Forcing Intel-IOMMU to enabled\n"); - - dmar_disabled = 0; - no_iommu = 0; - - return 1; -} diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6564af7ec0dd..c70948021e8e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4019,6 +4019,20 @@ static int __init probe_acpi_namespace_devices(void) return 0; } +static __init int tboot_force_iommu(void) +{ + if (!tboot_enabled()) + return 0; + + if (no_iommu || dmar_disabled) + pr_warn("Forcing Intel-IOMMU to enabled\n"); + + dmar_disabled = 0; + no_iommu = 0; + + return 1; +} + int __init intel_iommu_init(void) { int ret = -ENODEV; diff --git a/include/linux/tboot.h b/include/linux/tboot.h index 5146d2574e85..d2279160ef39 100644 --- a/include/linux/tboot.h +++ b/include/linux/tboot.h @@ -126,7 +126,6 @@ extern void tboot_probe(void); extern void tboot_shutdown(u32 shutdown_type); extern struct acpi_table_header *tboot_get_dmar_table( struct acpi_table_header *dmar_tbl); -extern int tboot_force_iommu(void); #else @@ -136,7 +135,6 @@ extern int tboot_force_iommu(void); #define tboot_sleep(sleep_state, pm1a_control, pm1b_control) \ do { } while (0) #define tboot_get_dmar_table(dmar_tbl) (dmar_tbl) -#define tboot_force_iommu() 0 #endif /* !CONFIG_INTEL_TXT */ -- cgit v1.2.3 From 2585a2790e7fdb3dadfe309c9220bbc03704f84f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:50 +0800 Subject: iommu/vt-d: Move include/linux/intel-iommu.h under iommu This header file is private to the Intel IOMMU driver. Move it to the driver folder. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220514014322.2927339-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- MAINTAINERS | 1 - drivers/iommu/intel/cap_audit.c | 2 +- drivers/iommu/intel/debugfs.c | 2 +- drivers/iommu/intel/dmar.c | 2 +- drivers/iommu/intel/iommu.c | 2 +- drivers/iommu/intel/iommu.h | 831 ++++++++++++++++++++++++++++++++++++ drivers/iommu/intel/irq_remapping.c | 2 +- drivers/iommu/intel/pasid.c | 2 +- drivers/iommu/intel/perf.c | 2 +- drivers/iommu/intel/svm.c | 2 +- drivers/iommu/intel/trace.h | 3 +- include/linux/intel-iommu.h | 831 ------------------------------------ 12 files changed, 841 insertions(+), 841 deletions(-) create mode 100644 drivers/iommu/intel/iommu.h delete mode 100644 include/linux/intel-iommu.h diff --git a/MAINTAINERS b/MAINTAINERS index f679152bdbad..8f9ed151ad4c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10077,7 +10077,6 @@ L: iommu@lists.linux.dev S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/intel/ -F: include/linux/intel-iommu.h F: include/linux/intel-svm.h INTEL IOP-ADMA DMA DRIVER diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c index 71596fc62822..3ee68393122f 100644 --- a/drivers/iommu/intel/cap_audit.c +++ b/drivers/iommu/intel/cap_audit.c @@ -10,7 +10,7 @@ #define pr_fmt(fmt) "DMAR: " fmt -#include +#include "iommu.h" #include "cap_audit.h" static u64 intel_iommu_cap_sanity; diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index ed796eea4581..d927ef10641b 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -10,11 +10,11 @@ #include #include -#include #include #include +#include "iommu.h" #include "pasid.h" #include "perf.h" diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index f91b45be1d92..2a5e0f91e647 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -31,6 +30,7 @@ #include #include +#include "iommu.h" #include "../irq_remapping.h" #include "perf.h" #include "trace.h" diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index c70948021e8e..10bda4bec8fe 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -26,6 +25,7 @@ #include #include +#include "iommu.h" #include "../irq_remapping.h" #include "../iommu-sva-lib.h" #include "pasid.h" diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h new file mode 100644 index 000000000000..4ebf3c4da45e --- /dev/null +++ b/drivers/iommu/intel/iommu.h @@ -0,0 +1,831 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2006-2015, Intel Corporation. + * + * Authors: Ashok Raj + * Anil S Keshavamurthy + * David Woodhouse + */ + +#ifndef _INTEL_IOMMU_H_ +#define _INTEL_IOMMU_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * VT-d hardware uses 4KiB page size regardless of host page size. + */ +#define VTD_PAGE_SHIFT (12) +#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT) +#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) +#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) + +#define VTD_STRIDE_SHIFT (9) +#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) + +#define DMA_PTE_READ BIT_ULL(0) +#define DMA_PTE_WRITE BIT_ULL(1) +#define DMA_PTE_LARGE_PAGE BIT_ULL(7) +#define DMA_PTE_SNP BIT_ULL(11) + +#define DMA_FL_PTE_PRESENT BIT_ULL(0) +#define DMA_FL_PTE_US BIT_ULL(2) +#define DMA_FL_PTE_ACCESS BIT_ULL(5) +#define DMA_FL_PTE_DIRTY BIT_ULL(6) +#define DMA_FL_PTE_XD BIT_ULL(63) + +#define ADDR_WIDTH_5LEVEL (57) +#define ADDR_WIDTH_4LEVEL (48) + +#define CONTEXT_TT_MULTI_LEVEL 0 +#define CONTEXT_TT_DEV_IOTLB 1 +#define CONTEXT_TT_PASS_THROUGH 2 +#define CONTEXT_PASIDE BIT_ULL(3) + +/* + * Intel IOMMU register specification per version 1.0 public spec. + */ +#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ +#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ +#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ +#define DMAR_GCMD_REG 0x18 /* Global command register */ +#define DMAR_GSTS_REG 0x1c /* Global status register */ +#define DMAR_RTADDR_REG 0x20 /* Root entry table */ +#define DMAR_CCMD_REG 0x28 /* Context command reg */ +#define DMAR_FSTS_REG 0x34 /* Fault Status register */ +#define DMAR_FECTL_REG 0x38 /* Fault control register */ +#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ +#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ +#define DMAR_FEUADDR_REG 0x44 /* Upper address register */ +#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ +#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ +#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ +#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ +#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ +#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ +#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ +#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ +#define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */ +#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ +#define DMAR_ICS_REG 0x9c /* Invalidation complete status register */ +#define DMAR_IQER_REG 0xb0 /* Invalidation queue error record register */ +#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ +#define DMAR_PQH_REG 0xc0 /* Page request queue head register */ +#define DMAR_PQT_REG 0xc8 /* Page request queue tail register */ +#define DMAR_PQA_REG 0xd0 /* Page request queue address register */ +#define DMAR_PRS_REG 0xdc /* Page request status register */ +#define DMAR_PECTL_REG 0xe0 /* Page request event control register */ +#define DMAR_PEDATA_REG 0xe4 /* Page request event interrupt data register */ +#define DMAR_PEADDR_REG 0xe8 /* Page request event interrupt addr register */ +#define DMAR_PEUADDR_REG 0xec /* Page request event Upper address register */ +#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability register */ +#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type register */ +#define DMAR_MTRR_FIX64K_00000_REG 0x120 /* MTRR Fixed range registers */ +#define DMAR_MTRR_FIX16K_80000_REG 0x128 +#define DMAR_MTRR_FIX16K_A0000_REG 0x130 +#define DMAR_MTRR_FIX4K_C0000_REG 0x138 +#define DMAR_MTRR_FIX4K_C8000_REG 0x140 +#define DMAR_MTRR_FIX4K_D0000_REG 0x148 +#define DMAR_MTRR_FIX4K_D8000_REG 0x150 +#define DMAR_MTRR_FIX4K_E0000_REG 0x158 +#define DMAR_MTRR_FIX4K_E8000_REG 0x160 +#define DMAR_MTRR_FIX4K_F0000_REG 0x168 +#define DMAR_MTRR_FIX4K_F8000_REG 0x170 +#define DMAR_MTRR_PHYSBASE0_REG 0x180 /* MTRR Variable range registers */ +#define DMAR_MTRR_PHYSMASK0_REG 0x188 +#define DMAR_MTRR_PHYSBASE1_REG 0x190 +#define DMAR_MTRR_PHYSMASK1_REG 0x198 +#define DMAR_MTRR_PHYSBASE2_REG 0x1a0 +#define DMAR_MTRR_PHYSMASK2_REG 0x1a8 +#define DMAR_MTRR_PHYSBASE3_REG 0x1b0 +#define DMAR_MTRR_PHYSMASK3_REG 0x1b8 +#define DMAR_MTRR_PHYSBASE4_REG 0x1c0 +#define DMAR_MTRR_PHYSMASK4_REG 0x1c8 +#define DMAR_MTRR_PHYSBASE5_REG 0x1d0 +#define DMAR_MTRR_PHYSMASK5_REG 0x1d8 +#define DMAR_MTRR_PHYSBASE6_REG 0x1e0 +#define DMAR_MTRR_PHYSMASK6_REG 0x1e8 +#define DMAR_MTRR_PHYSBASE7_REG 0x1f0 +#define DMAR_MTRR_PHYSMASK7_REG 0x1f8 +#define DMAR_MTRR_PHYSBASE8_REG 0x200 +#define DMAR_MTRR_PHYSMASK8_REG 0x208 +#define DMAR_MTRR_PHYSBASE9_REG 0x210 +#define DMAR_MTRR_PHYSMASK9_REG 0x218 +#define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ +#define DMAR_VCMD_REG 0xe00 /* Virtual command register */ +#define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ + +#define DMAR_IQER_REG_IQEI(reg) FIELD_GET(GENMASK_ULL(3, 0), reg) +#define DMAR_IQER_REG_ITESID(reg) FIELD_GET(GENMASK_ULL(47, 32), reg) +#define DMAR_IQER_REG_ICESID(reg) FIELD_GET(GENMASK_ULL(63, 48), reg) + +#define OFFSET_STRIDE (9) + +#define dmar_readq(a) readq(a) +#define dmar_writeq(a,v) writeq(v,a) +#define dmar_readl(a) readl(a) +#define dmar_writel(a, v) writel(v, a) + +#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) +#define DMAR_VER_MINOR(v) ((v) & 0x0f) + +/* + * Decoding Capability Register + */ +#define cap_5lp_support(c) (((c) >> 60) & 1) +#define cap_pi_support(c) (((c) >> 59) & 1) +#define cap_fl1gp_support(c) (((c) >> 56) & 1) +#define cap_read_drain(c) (((c) >> 55) & 1) +#define cap_write_drain(c) (((c) >> 54) & 1) +#define cap_max_amask_val(c) (((c) >> 48) & 0x3f) +#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) +#define cap_pgsel_inv(c) (((c) >> 39) & 1) + +#define cap_super_page_val(c) (((c) >> 34) & 0xf) +#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ + * OFFSET_STRIDE) + 21) + +#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) +#define cap_max_fault_reg_offset(c) \ + (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16) + +#define cap_zlr(c) (((c) >> 22) & 1) +#define cap_isoch(c) (((c) >> 23) & 1) +#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) +#define cap_sagaw(c) (((c) >> 8) & 0x1f) +#define cap_caching_mode(c) (((c) >> 7) & 1) +#define cap_phmr(c) (((c) >> 6) & 1) +#define cap_plmr(c) (((c) >> 5) & 1) +#define cap_rwbf(c) (((c) >> 4) & 1) +#define cap_afl(c) (((c) >> 3) & 1) +#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) +/* + * Extended Capability Register + */ + +#define ecap_rps(e) (((e) >> 49) & 0x1) +#define ecap_smpwc(e) (((e) >> 48) & 0x1) +#define ecap_flts(e) (((e) >> 47) & 0x1) +#define ecap_slts(e) (((e) >> 46) & 0x1) +#define ecap_slads(e) (((e) >> 45) & 0x1) +#define ecap_vcs(e) (((e) >> 44) & 0x1) +#define ecap_smts(e) (((e) >> 43) & 0x1) +#define ecap_dit(e) (((e) >> 41) & 0x1) +#define ecap_pds(e) (((e) >> 42) & 0x1) +#define ecap_pasid(e) (((e) >> 40) & 0x1) +#define ecap_pss(e) (((e) >> 35) & 0x1f) +#define ecap_eafs(e) (((e) >> 34) & 0x1) +#define ecap_nwfs(e) (((e) >> 33) & 0x1) +#define ecap_srs(e) (((e) >> 31) & 0x1) +#define ecap_ers(e) (((e) >> 30) & 0x1) +#define ecap_prs(e) (((e) >> 29) & 0x1) +#define ecap_broken_pasid(e) (((e) >> 28) & 0x1) +#define ecap_dis(e) (((e) >> 27) & 0x1) +#define ecap_nest(e) (((e) >> 26) & 0x1) +#define ecap_mts(e) (((e) >> 25) & 0x1) +#define ecap_ecs(e) (((e) >> 24) & 0x1) +#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) +#define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) +#define ecap_coherent(e) ((e) & 0x1) +#define ecap_qis(e) ((e) & 0x2) +#define ecap_pass_through(e) (((e) >> 6) & 0x1) +#define ecap_eim_support(e) (((e) >> 4) & 0x1) +#define ecap_ir_support(e) (((e) >> 3) & 0x1) +#define ecap_dev_iotlb_support(e) (((e) >> 2) & 0x1) +#define ecap_max_handle_mask(e) (((e) >> 20) & 0xf) +#define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */ + +/* Virtual command interface capability */ +#define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */ + +/* IOTLB_REG */ +#define DMA_TLB_FLUSH_GRANU_OFFSET 60 +#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) +#define DMA_TLB_DSI_FLUSH (((u64)2) << 60) +#define DMA_TLB_PSI_FLUSH (((u64)3) << 60) +#define DMA_TLB_IIRG(type) ((type >> 60) & 3) +#define DMA_TLB_IAIG(val) (((val) >> 57) & 3) +#define DMA_TLB_READ_DRAIN (((u64)1) << 49) +#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) +#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) +#define DMA_TLB_IVT (((u64)1) << 63) +#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_TLB_MAX_SIZE (0x3f) + +/* INVALID_DESC */ +#define DMA_CCMD_INVL_GRANU_OFFSET 61 +#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 4) +#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 4) +#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 4) +#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) +#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) +#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) +#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_ID_TLB_ADDR(addr) (addr) +#define DMA_ID_TLB_ADDR_MASK(mask) (mask) + +/* PMEN_REG */ +#define DMA_PMEN_EPM (((u32)1)<<31) +#define DMA_PMEN_PRS (((u32)1)<<0) + +/* GCMD_REG */ +#define DMA_GCMD_TE (((u32)1) << 31) +#define DMA_GCMD_SRTP (((u32)1) << 30) +#define DMA_GCMD_SFL (((u32)1) << 29) +#define DMA_GCMD_EAFL (((u32)1) << 28) +#define DMA_GCMD_WBF (((u32)1) << 27) +#define DMA_GCMD_QIE (((u32)1) << 26) +#define DMA_GCMD_SIRTP (((u32)1) << 24) +#define DMA_GCMD_IRE (((u32) 1) << 25) +#define DMA_GCMD_CFI (((u32) 1) << 23) + +/* GSTS_REG */ +#define DMA_GSTS_TES (((u32)1) << 31) +#define DMA_GSTS_RTPS (((u32)1) << 30) +#define DMA_GSTS_FLS (((u32)1) << 29) +#define DMA_GSTS_AFLS (((u32)1) << 28) +#define DMA_GSTS_WBFS (((u32)1) << 27) +#define DMA_GSTS_QIES (((u32)1) << 26) +#define DMA_GSTS_IRTPS (((u32)1) << 24) +#define DMA_GSTS_IRES (((u32)1) << 25) +#define DMA_GSTS_CFIS (((u32)1) << 23) + +/* DMA_RTADDR_REG */ +#define DMA_RTADDR_RTT (((u64)1) << 11) +#define DMA_RTADDR_SMT (((u64)1) << 10) + +/* CCMD_REG */ +#define DMA_CCMD_ICC (((u64)1) << 63) +#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) +#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) +#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) +#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) +#define DMA_CCMD_MASK_NOBIT 0 +#define DMA_CCMD_MASK_1BIT 1 +#define DMA_CCMD_MASK_2BIT 2 +#define DMA_CCMD_MASK_3BIT 3 +#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) +#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) + +/* FECTL_REG */ +#define DMA_FECTL_IM (((u32)1) << 31) + +/* FSTS_REG */ +#define DMA_FSTS_PFO (1 << 0) /* Primary Fault Overflow */ +#define DMA_FSTS_PPF (1 << 1) /* Primary Pending Fault */ +#define DMA_FSTS_IQE (1 << 4) /* Invalidation Queue Error */ +#define DMA_FSTS_ICE (1 << 5) /* Invalidation Completion Error */ +#define DMA_FSTS_ITE (1 << 6) /* Invalidation Time-out Error */ +#define DMA_FSTS_PRO (1 << 7) /* Page Request Overflow */ +#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) + +/* FRCD_REG, 32 bits access */ +#define DMA_FRCD_F (((u32)1) << 31) +#define dma_frcd_type(d) ((d >> 30) & 1) +#define dma_frcd_fault_reason(c) (c & 0xff) +#define dma_frcd_source_id(c) (c & 0xffff) +#define dma_frcd_pasid_value(c) (((c) >> 8) & 0xfffff) +#define dma_frcd_pasid_present(c) (((c) >> 31) & 1) +/* low 64 bit */ +#define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT)) + +/* PRS_REG */ +#define DMA_PRS_PPR ((u32)1) +#define DMA_PRS_PRO ((u32)2) + +#define DMA_VCS_PAS ((u64)1) + +#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ +do { \ + cycles_t start_time = get_cycles(); \ + while (1) { \ + sts = op(iommu->reg + offset); \ + if (cond) \ + break; \ + if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ + panic("DMAR hardware is malfunctioning\n"); \ + cpu_relax(); \ + } \ +} while (0) + +#define QI_LENGTH 256 /* queue length */ + +enum { + QI_FREE, + QI_IN_USE, + QI_DONE, + QI_ABORT +}; + +#define QI_CC_TYPE 0x1 +#define QI_IOTLB_TYPE 0x2 +#define QI_DIOTLB_TYPE 0x3 +#define QI_IEC_TYPE 0x4 +#define QI_IWD_TYPE 0x5 +#define QI_EIOTLB_TYPE 0x6 +#define QI_PC_TYPE 0x7 +#define QI_DEIOTLB_TYPE 0x8 +#define QI_PGRP_RESP_TYPE 0x9 +#define QI_PSTRM_RESP_TYPE 0xa + +#define QI_IEC_SELECTIVE (((u64)1) << 4) +#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) +#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27)) + +#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) +#define QI_IWD_STATUS_WRITE (((u64)1) << 5) +#define QI_IWD_FENCE (((u64)1) << 6) +#define QI_IWD_PRQ_DRAIN (((u64)1) << 7) + +#define QI_IOTLB_DID(did) (((u64)did) << 16) +#define QI_IOTLB_DR(dr) (((u64)dr) << 7) +#define QI_IOTLB_DW(dw) (((u64)dw) << 6) +#define QI_IOTLB_GRAN(gran) (((u64)gran) >> (DMA_TLB_FLUSH_GRANU_OFFSET-4)) +#define QI_IOTLB_ADDR(addr) (((u64)addr) & VTD_PAGE_MASK) +#define QI_IOTLB_IH(ih) (((u64)ih) << 6) +#define QI_IOTLB_AM(am) (((u8)am) & 0x3f) + +#define QI_CC_FM(fm) (((u64)fm) << 48) +#define QI_CC_SID(sid) (((u64)sid) << 32) +#define QI_CC_DID(did) (((u64)did) << 16) +#define QI_CC_GRAN(gran) (((u64)gran) >> (DMA_CCMD_INVL_GRANU_OFFSET-4)) + +#define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) +#define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16) +#define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) +#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ + ((u64)((pfsid >> 4) & 0xfff) << 52)) +#define QI_DEV_IOTLB_SIZE 1 +#define QI_DEV_IOTLB_MAX_INVS 32 + +#define QI_PC_PASID(pasid) (((u64)pasid) << 32) +#define QI_PC_DID(did) (((u64)did) << 16) +#define QI_PC_GRAN(gran) (((u64)gran) << 4) + +/* PASID cache invalidation granu */ +#define QI_PC_ALL_PASIDS 0 +#define QI_PC_PASID_SEL 1 +#define QI_PC_GLOBAL 3 + +#define QI_EIOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) +#define QI_EIOTLB_IH(ih) (((u64)ih) << 6) +#define QI_EIOTLB_AM(am) (((u64)am) & 0x3f) +#define QI_EIOTLB_PASID(pasid) (((u64)pasid) << 32) +#define QI_EIOTLB_DID(did) (((u64)did) << 16) +#define QI_EIOTLB_GRAN(gran) (((u64)gran) << 4) + +/* QI Dev-IOTLB inv granu */ +#define QI_DEV_IOTLB_GRAN_ALL 1 +#define QI_DEV_IOTLB_GRAN_PASID_SEL 0 + +#define QI_DEV_EIOTLB_ADDR(a) ((u64)(a) & VTD_PAGE_MASK) +#define QI_DEV_EIOTLB_SIZE (((u64)1) << 11) +#define QI_DEV_EIOTLB_PASID(p) ((u64)((p) & 0xfffff) << 32) +#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) +#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) +#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ + ((u64)((pfsid >> 4) & 0xfff) << 52)) +#define QI_DEV_EIOTLB_MAX_INVS 32 + +/* Page group response descriptor QW0 */ +#define QI_PGRP_PASID_P(p) (((u64)(p)) << 4) +#define QI_PGRP_PDP(p) (((u64)(p)) << 5) +#define QI_PGRP_RESP_CODE(res) (((u64)(res)) << 12) +#define QI_PGRP_DID(rid) (((u64)(rid)) << 16) +#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) + +/* Page group response descriptor QW1 */ +#define QI_PGRP_LPIG(x) (((u64)(x)) << 2) +#define QI_PGRP_IDX(idx) (((u64)(idx)) << 3) + + +#define QI_RESP_SUCCESS 0x0 +#define QI_RESP_INVALID 0x1 +#define QI_RESP_FAILURE 0xf + +#define QI_GRAN_NONG_PASID 2 +#define QI_GRAN_PSI_PASID 3 + +#define qi_shift(iommu) (DMAR_IQ_SHIFT + !!ecap_smts((iommu)->ecap)) + +struct qi_desc { + u64 qw0; + u64 qw1; + u64 qw2; + u64 qw3; +}; + +struct q_inval { + raw_spinlock_t q_lock; + void *desc; /* invalidation queue */ + int *desc_status; /* desc status */ + int free_head; /* first free entry */ + int free_tail; /* last free entry */ + int free_cnt; +}; + +struct dmar_pci_notify_info; + +#ifdef CONFIG_IRQ_REMAP +/* 1MB - maximum possible interrupt remapping table size */ +#define INTR_REMAP_PAGE_ORDER 8 +#define INTR_REMAP_TABLE_REG_SIZE 0xf +#define INTR_REMAP_TABLE_REG_SIZE_MASK 0xf + +#define INTR_REMAP_TABLE_ENTRIES 65536 + +struct irq_domain; + +struct ir_table { + struct irte *base; + unsigned long *bitmap; +}; + +void intel_irq_remap_add_device(struct dmar_pci_notify_info *info); +#else +static inline void +intel_irq_remap_add_device(struct dmar_pci_notify_info *info) { } +#endif + +struct iommu_flush { + void (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, + u8 fm, u64 type); + void (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); +}; + +enum { + SR_DMAR_FECTL_REG, + SR_DMAR_FEDATA_REG, + SR_DMAR_FEADDR_REG, + SR_DMAR_FEUADDR_REG, + MAX_SR_DMAR_REGS +}; + +#define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0) +#define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1) +#define VTD_FLAG_SVM_CAPABLE (1 << 2) + +extern int intel_iommu_sm; +extern spinlock_t device_domain_lock; + +#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) +#define pasid_supported(iommu) (sm_supported(iommu) && \ + ecap_pasid((iommu)->ecap)) + +struct pasid_entry; +struct pasid_state_entry; +struct page_req_dsc; + +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 lo; + u64 hi; +}; + +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; + +/* + * When VT-d works in the scalable mode, it allows DMA translation to + * happen through either first level or second level page table. This + * bit marks that the DMA translation for the domain goes through the + * first level page table, otherwise, it goes through the second level. + */ +#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1) + +struct dmar_domain { + int nid; /* node id */ + + unsigned int iommu_refcnt[DMAR_UNITS_SUPPORTED]; + /* Refcount of devices per iommu */ + + + u16 iommu_did[DMAR_UNITS_SUPPORTED]; + /* Domain ids per IOMMU. Use u16 since + * domain ids are 16 bit wide according + * to VT-d spec, section 9.3 */ + + u8 has_iotlb_device: 1; + u8 iommu_coherency: 1; /* indicate coherency of iommu access */ + u8 force_snooping : 1; /* Create IOPTEs with snoop control */ + u8 set_pte_snp:1; + + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ + + struct dma_pte *pgd; /* virtual address */ + int gaw; /* max guest address width */ + + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; + + int flags; /* flags to find out type of domain */ + int iommu_superpage;/* Level of superpages supported: + 0 == 4KiB (no superpages), 1 == 2MiB, + 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ + u64 max_addr; /* maximum mapped address */ + + struct iommu_domain domain; /* generic domain data structure for + iommu core */ +}; + +struct intel_iommu { + void __iomem *reg; /* Pointer to hardware regs, virtual addr */ + u64 reg_phys; /* physical address of hw register set */ + u64 reg_size; /* size of hw register set */ + u64 cap; + u64 ecap; + u64 vccap; + u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ + raw_spinlock_t register_lock; /* protect register handling */ + int seq_id; /* sequence id of the iommu */ + int agaw; /* agaw of this iommu */ + int msagaw; /* max sagaw of this iommu */ + unsigned int irq, pr_irq; + u16 segment; /* PCI segment# */ + unsigned char name[13]; /* Device Name */ + +#ifdef CONFIG_INTEL_IOMMU + unsigned long *domain_ids; /* bitmap of domains */ + spinlock_t lock; /* protect context, domain ids */ + struct root_entry *root_entry; /* virtual address */ + + struct iommu_flush flush; +#endif +#ifdef CONFIG_INTEL_IOMMU_SVM + struct page_req_dsc *prq; + unsigned char prq_name[16]; /* Name for PRQ interrupt */ + struct completion prq_complete; + struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */ +#endif + struct iopf_queue *iopf_queue; + unsigned char iopfq_name[16]; + struct q_inval *qi; /* Queued invalidation info */ + u32 *iommu_state; /* Store iommu states between suspend and resume.*/ + +#ifdef CONFIG_IRQ_REMAP + struct ir_table *ir_table; /* Interrupt remapping info */ + struct irq_domain *ir_domain; + struct irq_domain *ir_msi_domain; +#endif + struct iommu_device iommu; /* IOMMU core code handle */ + int node; + u32 flags; /* Software defined flags */ + + struct dmar_drhd_unit *drhd; + void *perf_statistic; +}; + +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + u32 segment; /* PCI segment number */ + u8 bus; /* PCI bus number */ + u8 devfn; /* PCI devfn number */ + u16 pfsid; /* SRIOV physical function source ID */ + u8 pasid_supported:3; + u8 pasid_enabled:1; + u8 pri_supported:1; + u8 pri_enabled:1; + u8 ats_supported:1; + u8 ats_enabled:1; + u8 ats_qdep; + struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ + struct intel_iommu *iommu; /* IOMMU used by this device */ + struct dmar_domain *domain; /* pointer to domain */ + struct pasid_table *pasid_table; /* pasid table */ +}; + +static inline void __iommu_flush_cache( + struct intel_iommu *iommu, void *addr, int size) +{ + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(addr, size); +} + +/* Convert generic struct iommu_domain to private struct dmar_domain */ +static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct dmar_domain, domain); +} + +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-10: available + * 11: snoop behavior + * 12-63: Host physical address + */ +struct dma_pte { + u64 val; +}; + +static inline void dma_clear_pte(struct dma_pte *pte) +{ + pte->val = 0; +} + +static inline u64 dma_pte_addr(struct dma_pte *pte) +{ +#ifdef CONFIG_64BIT + return pte->val & VTD_PAGE_MASK & (~DMA_FL_PTE_XD); +#else + /* Must have a full atomic 64-bit read */ + return __cmpxchg64(&pte->val, 0ULL, 0ULL) & + VTD_PAGE_MASK & (~DMA_FL_PTE_XD); +#endif +} + +static inline bool dma_pte_present(struct dma_pte *pte) +{ + return (pte->val & 3) != 0; +} + +static inline bool dma_pte_superpage(struct dma_pte *pte) +{ + return (pte->val & DMA_PTE_LARGE_PAGE); +} + +static inline bool first_pte_in_page(struct dma_pte *pte) +{ + return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE); +} + +static inline int nr_pte_to_next_page(struct dma_pte *pte) +{ + return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) : + (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte; +} + +extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); + +extern int dmar_enable_qi(struct intel_iommu *iommu); +extern void dmar_disable_qi(struct intel_iommu *iommu); +extern int dmar_reenable_qi(struct intel_iommu *iommu); +extern void qi_global_iec(struct intel_iommu *iommu); + +extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, + u8 fm, u64 type); +extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); +extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u16 qdep, u64 addr, unsigned mask); + +void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, + unsigned long npages, bool ih); + +void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u32 pasid, u16 qdep, u64 addr, + unsigned int size_order); +void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, + u32 pasid); + +int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, + unsigned int count, unsigned long options); +/* + * Options used in qi_submit_sync: + * QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8. + */ +#define QI_OPT_WAIT_DRAIN BIT(0) + +extern int dmar_ir_support(void); + +void *alloc_pgtable_page(int node); +void free_pgtable_page(void *vaddr); +struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); +void iommu_flush_write_buffer(struct intel_iommu *iommu); +int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); +struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); + +#ifdef CONFIG_INTEL_IOMMU_SVM +extern void intel_svm_check(struct intel_iommu *iommu); +extern int intel_svm_enable_prq(struct intel_iommu *iommu); +extern int intel_svm_finish_prq(struct intel_iommu *iommu); +struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, + void *drvdata); +void intel_svm_unbind(struct iommu_sva *handle); +u32 intel_svm_get_pasid(struct iommu_sva *handle); +int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, + struct iommu_page_response *msg); + +struct intel_svm_dev { + struct list_head list; + struct rcu_head rcu; + struct device *dev; + struct intel_iommu *iommu; + struct iommu_sva sva; + unsigned long prq_seq_number; + u32 pasid; + int users; + u16 did; + u16 dev_iotlb:1; + u16 sid, qdep; +}; + +struct intel_svm { + struct mmu_notifier notifier; + struct mm_struct *mm; + + unsigned int flags; + u32 pasid; + struct list_head devs; +}; +#else +static inline void intel_svm_check(struct intel_iommu *iommu) {} +#endif + +#ifdef CONFIG_INTEL_IOMMU_DEBUGFS +void intel_iommu_debugfs_init(void); +#else +static inline void intel_iommu_debugfs_init(void) {} +#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */ + +extern const struct attribute_group *intel_iommu_groups[]; +bool context_present(struct context_entry *context); +struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, + u8 devfn, int alloc); + +extern const struct iommu_ops intel_iommu_ops; + +#ifdef CONFIG_INTEL_IOMMU +extern int iommu_calculate_agaw(struct intel_iommu *iommu); +extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); +extern int dmar_disabled; +extern int intel_iommu_enabled; +#else +static inline int iommu_calculate_agaw(struct intel_iommu *iommu) +{ + return 0; +} +static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) +{ + return 0; +} +#define dmar_disabled (1) +#define intel_iommu_enabled (0) +#endif + +static inline const char *decode_prq_descriptor(char *str, size_t size, + u64 dw0, u64 dw1, u64 dw2, u64 dw3) +{ + char *buf = str; + int bytes; + + bytes = snprintf(buf, size, + "rid=0x%llx addr=0x%llx %c%c%c%c%c pasid=0x%llx index=0x%llx", + FIELD_GET(GENMASK_ULL(31, 16), dw0), + FIELD_GET(GENMASK_ULL(63, 12), dw1), + dw1 & BIT_ULL(0) ? 'r' : '-', + dw1 & BIT_ULL(1) ? 'w' : '-', + dw0 & BIT_ULL(52) ? 'x' : '-', + dw0 & BIT_ULL(53) ? 'p' : '-', + dw1 & BIT_ULL(2) ? 'l' : '-', + FIELD_GET(GENMASK_ULL(51, 32), dw0), + FIELD_GET(GENMASK_ULL(11, 3), dw1)); + + /* Private Data */ + if (dw0 & BIT_ULL(9)) { + size -= bytes; + buf += bytes; + snprintf(buf, size, " private=0x%llx/0x%llx\n", dw2, dw3); + } + + return str; +} + +#endif diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index a67319597884..2e9683e970f8 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -21,6 +20,7 @@ #include #include +#include "iommu.h" #include "../irq_remapping.h" #include "cap_audit.h" diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 17cad7c1f62d..43f090381ec7 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -12,13 +12,13 @@ #include #include #include -#include #include #include #include #include #include +#include "iommu.h" #include "pasid.h" /* diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c index 0e8e03252d92..94ee70ac38e3 100644 --- a/drivers/iommu/intel/perf.c +++ b/drivers/iommu/intel/perf.c @@ -9,8 +9,8 @@ */ #include -#include +#include "iommu.h" #include "perf.h" static DEFINE_SPINLOCK(latency_lock); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 70b40d007a52..580713aa9e07 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -5,7 +5,6 @@ * Authors: David Woodhouse */ -#include #include #include #include @@ -22,6 +21,7 @@ #include #include +#include "iommu.h" #include "pasid.h" #include "perf.h" #include "../iommu-sva-lib.h" diff --git a/drivers/iommu/intel/trace.h b/drivers/iommu/intel/trace.h index 25cb7f88e1a2..93d96f93a89b 100644 --- a/drivers/iommu/intel/trace.h +++ b/drivers/iommu/intel/trace.h @@ -13,7 +13,8 @@ #define _TRACE_INTEL_IOMMU_H #include -#include + +#include "iommu.h" #define MSG_MAX 256 diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h deleted file mode 100644 index 4ebf3c4da45e..000000000000 --- a/include/linux/intel-iommu.h +++ /dev/null @@ -1,831 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright © 2006-2015, Intel Corporation. - * - * Authors: Ashok Raj - * Anil S Keshavamurthy - * David Woodhouse - */ - -#ifndef _INTEL_IOMMU_H_ -#define _INTEL_IOMMU_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -/* - * VT-d hardware uses 4KiB page size regardless of host page size. - */ -#define VTD_PAGE_SHIFT (12) -#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT) -#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) -#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) - -#define VTD_STRIDE_SHIFT (9) -#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) - -#define DMA_PTE_READ BIT_ULL(0) -#define DMA_PTE_WRITE BIT_ULL(1) -#define DMA_PTE_LARGE_PAGE BIT_ULL(7) -#define DMA_PTE_SNP BIT_ULL(11) - -#define DMA_FL_PTE_PRESENT BIT_ULL(0) -#define DMA_FL_PTE_US BIT_ULL(2) -#define DMA_FL_PTE_ACCESS BIT_ULL(5) -#define DMA_FL_PTE_DIRTY BIT_ULL(6) -#define DMA_FL_PTE_XD BIT_ULL(63) - -#define ADDR_WIDTH_5LEVEL (57) -#define ADDR_WIDTH_4LEVEL (48) - -#define CONTEXT_TT_MULTI_LEVEL 0 -#define CONTEXT_TT_DEV_IOTLB 1 -#define CONTEXT_TT_PASS_THROUGH 2 -#define CONTEXT_PASIDE BIT_ULL(3) - -/* - * Intel IOMMU register specification per version 1.0 public spec. - */ -#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ -#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ -#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ -#define DMAR_GCMD_REG 0x18 /* Global command register */ -#define DMAR_GSTS_REG 0x1c /* Global status register */ -#define DMAR_RTADDR_REG 0x20 /* Root entry table */ -#define DMAR_CCMD_REG 0x28 /* Context command reg */ -#define DMAR_FSTS_REG 0x34 /* Fault Status register */ -#define DMAR_FECTL_REG 0x38 /* Fault control register */ -#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ -#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ -#define DMAR_FEUADDR_REG 0x44 /* Upper address register */ -#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ -#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ -#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ -#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ -#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ -#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ -#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ -#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ -#define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */ -#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ -#define DMAR_ICS_REG 0x9c /* Invalidation complete status register */ -#define DMAR_IQER_REG 0xb0 /* Invalidation queue error record register */ -#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ -#define DMAR_PQH_REG 0xc0 /* Page request queue head register */ -#define DMAR_PQT_REG 0xc8 /* Page request queue tail register */ -#define DMAR_PQA_REG 0xd0 /* Page request queue address register */ -#define DMAR_PRS_REG 0xdc /* Page request status register */ -#define DMAR_PECTL_REG 0xe0 /* Page request event control register */ -#define DMAR_PEDATA_REG 0xe4 /* Page request event interrupt data register */ -#define DMAR_PEADDR_REG 0xe8 /* Page request event interrupt addr register */ -#define DMAR_PEUADDR_REG 0xec /* Page request event Upper address register */ -#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability register */ -#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type register */ -#define DMAR_MTRR_FIX64K_00000_REG 0x120 /* MTRR Fixed range registers */ -#define DMAR_MTRR_FIX16K_80000_REG 0x128 -#define DMAR_MTRR_FIX16K_A0000_REG 0x130 -#define DMAR_MTRR_FIX4K_C0000_REG 0x138 -#define DMAR_MTRR_FIX4K_C8000_REG 0x140 -#define DMAR_MTRR_FIX4K_D0000_REG 0x148 -#define DMAR_MTRR_FIX4K_D8000_REG 0x150 -#define DMAR_MTRR_FIX4K_E0000_REG 0x158 -#define DMAR_MTRR_FIX4K_E8000_REG 0x160 -#define DMAR_MTRR_FIX4K_F0000_REG 0x168 -#define DMAR_MTRR_FIX4K_F8000_REG 0x170 -#define DMAR_MTRR_PHYSBASE0_REG 0x180 /* MTRR Variable range registers */ -#define DMAR_MTRR_PHYSMASK0_REG 0x188 -#define DMAR_MTRR_PHYSBASE1_REG 0x190 -#define DMAR_MTRR_PHYSMASK1_REG 0x198 -#define DMAR_MTRR_PHYSBASE2_REG 0x1a0 -#define DMAR_MTRR_PHYSMASK2_REG 0x1a8 -#define DMAR_MTRR_PHYSBASE3_REG 0x1b0 -#define DMAR_MTRR_PHYSMASK3_REG 0x1b8 -#define DMAR_MTRR_PHYSBASE4_REG 0x1c0 -#define DMAR_MTRR_PHYSMASK4_REG 0x1c8 -#define DMAR_MTRR_PHYSBASE5_REG 0x1d0 -#define DMAR_MTRR_PHYSMASK5_REG 0x1d8 -#define DMAR_MTRR_PHYSBASE6_REG 0x1e0 -#define DMAR_MTRR_PHYSMASK6_REG 0x1e8 -#define DMAR_MTRR_PHYSBASE7_REG 0x1f0 -#define DMAR_MTRR_PHYSMASK7_REG 0x1f8 -#define DMAR_MTRR_PHYSBASE8_REG 0x200 -#define DMAR_MTRR_PHYSMASK8_REG 0x208 -#define DMAR_MTRR_PHYSBASE9_REG 0x210 -#define DMAR_MTRR_PHYSMASK9_REG 0x218 -#define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ -#define DMAR_VCMD_REG 0xe00 /* Virtual command register */ -#define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ - -#define DMAR_IQER_REG_IQEI(reg) FIELD_GET(GENMASK_ULL(3, 0), reg) -#define DMAR_IQER_REG_ITESID(reg) FIELD_GET(GENMASK_ULL(47, 32), reg) -#define DMAR_IQER_REG_ICESID(reg) FIELD_GET(GENMASK_ULL(63, 48), reg) - -#define OFFSET_STRIDE (9) - -#define dmar_readq(a) readq(a) -#define dmar_writeq(a,v) writeq(v,a) -#define dmar_readl(a) readl(a) -#define dmar_writel(a, v) writel(v, a) - -#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) -#define DMAR_VER_MINOR(v) ((v) & 0x0f) - -/* - * Decoding Capability Register - */ -#define cap_5lp_support(c) (((c) >> 60) & 1) -#define cap_pi_support(c) (((c) >> 59) & 1) -#define cap_fl1gp_support(c) (((c) >> 56) & 1) -#define cap_read_drain(c) (((c) >> 55) & 1) -#define cap_write_drain(c) (((c) >> 54) & 1) -#define cap_max_amask_val(c) (((c) >> 48) & 0x3f) -#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) -#define cap_pgsel_inv(c) (((c) >> 39) & 1) - -#define cap_super_page_val(c) (((c) >> 34) & 0xf) -#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ - * OFFSET_STRIDE) + 21) - -#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) -#define cap_max_fault_reg_offset(c) \ - (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16) - -#define cap_zlr(c) (((c) >> 22) & 1) -#define cap_isoch(c) (((c) >> 23) & 1) -#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) -#define cap_sagaw(c) (((c) >> 8) & 0x1f) -#define cap_caching_mode(c) (((c) >> 7) & 1) -#define cap_phmr(c) (((c) >> 6) & 1) -#define cap_plmr(c) (((c) >> 5) & 1) -#define cap_rwbf(c) (((c) >> 4) & 1) -#define cap_afl(c) (((c) >> 3) & 1) -#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) -/* - * Extended Capability Register - */ - -#define ecap_rps(e) (((e) >> 49) & 0x1) -#define ecap_smpwc(e) (((e) >> 48) & 0x1) -#define ecap_flts(e) (((e) >> 47) & 0x1) -#define ecap_slts(e) (((e) >> 46) & 0x1) -#define ecap_slads(e) (((e) >> 45) & 0x1) -#define ecap_vcs(e) (((e) >> 44) & 0x1) -#define ecap_smts(e) (((e) >> 43) & 0x1) -#define ecap_dit(e) (((e) >> 41) & 0x1) -#define ecap_pds(e) (((e) >> 42) & 0x1) -#define ecap_pasid(e) (((e) >> 40) & 0x1) -#define ecap_pss(e) (((e) >> 35) & 0x1f) -#define ecap_eafs(e) (((e) >> 34) & 0x1) -#define ecap_nwfs(e) (((e) >> 33) & 0x1) -#define ecap_srs(e) (((e) >> 31) & 0x1) -#define ecap_ers(e) (((e) >> 30) & 0x1) -#define ecap_prs(e) (((e) >> 29) & 0x1) -#define ecap_broken_pasid(e) (((e) >> 28) & 0x1) -#define ecap_dis(e) (((e) >> 27) & 0x1) -#define ecap_nest(e) (((e) >> 26) & 0x1) -#define ecap_mts(e) (((e) >> 25) & 0x1) -#define ecap_ecs(e) (((e) >> 24) & 0x1) -#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) -#define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) -#define ecap_coherent(e) ((e) & 0x1) -#define ecap_qis(e) ((e) & 0x2) -#define ecap_pass_through(e) (((e) >> 6) & 0x1) -#define ecap_eim_support(e) (((e) >> 4) & 0x1) -#define ecap_ir_support(e) (((e) >> 3) & 0x1) -#define ecap_dev_iotlb_support(e) (((e) >> 2) & 0x1) -#define ecap_max_handle_mask(e) (((e) >> 20) & 0xf) -#define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */ - -/* Virtual command interface capability */ -#define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */ - -/* IOTLB_REG */ -#define DMA_TLB_FLUSH_GRANU_OFFSET 60 -#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) -#define DMA_TLB_DSI_FLUSH (((u64)2) << 60) -#define DMA_TLB_PSI_FLUSH (((u64)3) << 60) -#define DMA_TLB_IIRG(type) ((type >> 60) & 3) -#define DMA_TLB_IAIG(val) (((val) >> 57) & 3) -#define DMA_TLB_READ_DRAIN (((u64)1) << 49) -#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) -#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) -#define DMA_TLB_IVT (((u64)1) << 63) -#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) -#define DMA_TLB_MAX_SIZE (0x3f) - -/* INVALID_DESC */ -#define DMA_CCMD_INVL_GRANU_OFFSET 61 -#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 4) -#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 4) -#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 4) -#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) -#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) -#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) -#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6) -#define DMA_ID_TLB_ADDR(addr) (addr) -#define DMA_ID_TLB_ADDR_MASK(mask) (mask) - -/* PMEN_REG */ -#define DMA_PMEN_EPM (((u32)1)<<31) -#define DMA_PMEN_PRS (((u32)1)<<0) - -/* GCMD_REG */ -#define DMA_GCMD_TE (((u32)1) << 31) -#define DMA_GCMD_SRTP (((u32)1) << 30) -#define DMA_GCMD_SFL (((u32)1) << 29) -#define DMA_GCMD_EAFL (((u32)1) << 28) -#define DMA_GCMD_WBF (((u32)1) << 27) -#define DMA_GCMD_QIE (((u32)1) << 26) -#define DMA_GCMD_SIRTP (((u32)1) << 24) -#define DMA_GCMD_IRE (((u32) 1) << 25) -#define DMA_GCMD_CFI (((u32) 1) << 23) - -/* GSTS_REG */ -#define DMA_GSTS_TES (((u32)1) << 31) -#define DMA_GSTS_RTPS (((u32)1) << 30) -#define DMA_GSTS_FLS (((u32)1) << 29) -#define DMA_GSTS_AFLS (((u32)1) << 28) -#define DMA_GSTS_WBFS (((u32)1) << 27) -#define DMA_GSTS_QIES (((u32)1) << 26) -#define DMA_GSTS_IRTPS (((u32)1) << 24) -#define DMA_GSTS_IRES (((u32)1) << 25) -#define DMA_GSTS_CFIS (((u32)1) << 23) - -/* DMA_RTADDR_REG */ -#define DMA_RTADDR_RTT (((u64)1) << 11) -#define DMA_RTADDR_SMT (((u64)1) << 10) - -/* CCMD_REG */ -#define DMA_CCMD_ICC (((u64)1) << 63) -#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) -#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) -#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) -#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) -#define DMA_CCMD_MASK_NOBIT 0 -#define DMA_CCMD_MASK_1BIT 1 -#define DMA_CCMD_MASK_2BIT 2 -#define DMA_CCMD_MASK_3BIT 3 -#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) -#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) - -/* FECTL_REG */ -#define DMA_FECTL_IM (((u32)1) << 31) - -/* FSTS_REG */ -#define DMA_FSTS_PFO (1 << 0) /* Primary Fault Overflow */ -#define DMA_FSTS_PPF (1 << 1) /* Primary Pending Fault */ -#define DMA_FSTS_IQE (1 << 4) /* Invalidation Queue Error */ -#define DMA_FSTS_ICE (1 << 5) /* Invalidation Completion Error */ -#define DMA_FSTS_ITE (1 << 6) /* Invalidation Time-out Error */ -#define DMA_FSTS_PRO (1 << 7) /* Page Request Overflow */ -#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) - -/* FRCD_REG, 32 bits access */ -#define DMA_FRCD_F (((u32)1) << 31) -#define dma_frcd_type(d) ((d >> 30) & 1) -#define dma_frcd_fault_reason(c) (c & 0xff) -#define dma_frcd_source_id(c) (c & 0xffff) -#define dma_frcd_pasid_value(c) (((c) >> 8) & 0xfffff) -#define dma_frcd_pasid_present(c) (((c) >> 31) & 1) -/* low 64 bit */ -#define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT)) - -/* PRS_REG */ -#define DMA_PRS_PPR ((u32)1) -#define DMA_PRS_PRO ((u32)2) - -#define DMA_VCS_PAS ((u64)1) - -#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ -do { \ - cycles_t start_time = get_cycles(); \ - while (1) { \ - sts = op(iommu->reg + offset); \ - if (cond) \ - break; \ - if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ - panic("DMAR hardware is malfunctioning\n"); \ - cpu_relax(); \ - } \ -} while (0) - -#define QI_LENGTH 256 /* queue length */ - -enum { - QI_FREE, - QI_IN_USE, - QI_DONE, - QI_ABORT -}; - -#define QI_CC_TYPE 0x1 -#define QI_IOTLB_TYPE 0x2 -#define QI_DIOTLB_TYPE 0x3 -#define QI_IEC_TYPE 0x4 -#define QI_IWD_TYPE 0x5 -#define QI_EIOTLB_TYPE 0x6 -#define QI_PC_TYPE 0x7 -#define QI_DEIOTLB_TYPE 0x8 -#define QI_PGRP_RESP_TYPE 0x9 -#define QI_PSTRM_RESP_TYPE 0xa - -#define QI_IEC_SELECTIVE (((u64)1) << 4) -#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) -#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27)) - -#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) -#define QI_IWD_STATUS_WRITE (((u64)1) << 5) -#define QI_IWD_FENCE (((u64)1) << 6) -#define QI_IWD_PRQ_DRAIN (((u64)1) << 7) - -#define QI_IOTLB_DID(did) (((u64)did) << 16) -#define QI_IOTLB_DR(dr) (((u64)dr) << 7) -#define QI_IOTLB_DW(dw) (((u64)dw) << 6) -#define QI_IOTLB_GRAN(gran) (((u64)gran) >> (DMA_TLB_FLUSH_GRANU_OFFSET-4)) -#define QI_IOTLB_ADDR(addr) (((u64)addr) & VTD_PAGE_MASK) -#define QI_IOTLB_IH(ih) (((u64)ih) << 6) -#define QI_IOTLB_AM(am) (((u8)am) & 0x3f) - -#define QI_CC_FM(fm) (((u64)fm) << 48) -#define QI_CC_SID(sid) (((u64)sid) << 32) -#define QI_CC_DID(did) (((u64)did) << 16) -#define QI_CC_GRAN(gran) (((u64)gran) >> (DMA_CCMD_INVL_GRANU_OFFSET-4)) - -#define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) -#define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16) -#define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) -#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ - ((u64)((pfsid >> 4) & 0xfff) << 52)) -#define QI_DEV_IOTLB_SIZE 1 -#define QI_DEV_IOTLB_MAX_INVS 32 - -#define QI_PC_PASID(pasid) (((u64)pasid) << 32) -#define QI_PC_DID(did) (((u64)did) << 16) -#define QI_PC_GRAN(gran) (((u64)gran) << 4) - -/* PASID cache invalidation granu */ -#define QI_PC_ALL_PASIDS 0 -#define QI_PC_PASID_SEL 1 -#define QI_PC_GLOBAL 3 - -#define QI_EIOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) -#define QI_EIOTLB_IH(ih) (((u64)ih) << 6) -#define QI_EIOTLB_AM(am) (((u64)am) & 0x3f) -#define QI_EIOTLB_PASID(pasid) (((u64)pasid) << 32) -#define QI_EIOTLB_DID(did) (((u64)did) << 16) -#define QI_EIOTLB_GRAN(gran) (((u64)gran) << 4) - -/* QI Dev-IOTLB inv granu */ -#define QI_DEV_IOTLB_GRAN_ALL 1 -#define QI_DEV_IOTLB_GRAN_PASID_SEL 0 - -#define QI_DEV_EIOTLB_ADDR(a) ((u64)(a) & VTD_PAGE_MASK) -#define QI_DEV_EIOTLB_SIZE (((u64)1) << 11) -#define QI_DEV_EIOTLB_PASID(p) ((u64)((p) & 0xfffff) << 32) -#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) -#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) -#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ - ((u64)((pfsid >> 4) & 0xfff) << 52)) -#define QI_DEV_EIOTLB_MAX_INVS 32 - -/* Page group response descriptor QW0 */ -#define QI_PGRP_PASID_P(p) (((u64)(p)) << 4) -#define QI_PGRP_PDP(p) (((u64)(p)) << 5) -#define QI_PGRP_RESP_CODE(res) (((u64)(res)) << 12) -#define QI_PGRP_DID(rid) (((u64)(rid)) << 16) -#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) - -/* Page group response descriptor QW1 */ -#define QI_PGRP_LPIG(x) (((u64)(x)) << 2) -#define QI_PGRP_IDX(idx) (((u64)(idx)) << 3) - - -#define QI_RESP_SUCCESS 0x0 -#define QI_RESP_INVALID 0x1 -#define QI_RESP_FAILURE 0xf - -#define QI_GRAN_NONG_PASID 2 -#define QI_GRAN_PSI_PASID 3 - -#define qi_shift(iommu) (DMAR_IQ_SHIFT + !!ecap_smts((iommu)->ecap)) - -struct qi_desc { - u64 qw0; - u64 qw1; - u64 qw2; - u64 qw3; -}; - -struct q_inval { - raw_spinlock_t q_lock; - void *desc; /* invalidation queue */ - int *desc_status; /* desc status */ - int free_head; /* first free entry */ - int free_tail; /* last free entry */ - int free_cnt; -}; - -struct dmar_pci_notify_info; - -#ifdef CONFIG_IRQ_REMAP -/* 1MB - maximum possible interrupt remapping table size */ -#define INTR_REMAP_PAGE_ORDER 8 -#define INTR_REMAP_TABLE_REG_SIZE 0xf -#define INTR_REMAP_TABLE_REG_SIZE_MASK 0xf - -#define INTR_REMAP_TABLE_ENTRIES 65536 - -struct irq_domain; - -struct ir_table { - struct irte *base; - unsigned long *bitmap; -}; - -void intel_irq_remap_add_device(struct dmar_pci_notify_info *info); -#else -static inline void -intel_irq_remap_add_device(struct dmar_pci_notify_info *info) { } -#endif - -struct iommu_flush { - void (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, - u8 fm, u64 type); - void (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type); -}; - -enum { - SR_DMAR_FECTL_REG, - SR_DMAR_FEDATA_REG, - SR_DMAR_FEADDR_REG, - SR_DMAR_FEUADDR_REG, - MAX_SR_DMAR_REGS -}; - -#define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0) -#define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1) -#define VTD_FLAG_SVM_CAPABLE (1 << 2) - -extern int intel_iommu_sm; -extern spinlock_t device_domain_lock; - -#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) -#define pasid_supported(iommu) (sm_supported(iommu) && \ - ecap_pasid((iommu)->ecap)) - -struct pasid_entry; -struct pasid_state_entry; -struct page_req_dsc; - -/* - * 0: Present - * 1-11: Reserved - * 12-63: Context Ptr (12 - (haw-1)) - * 64-127: Reserved - */ -struct root_entry { - u64 lo; - u64 hi; -}; - -/* - * low 64 bits: - * 0: present - * 1: fault processing disable - * 2-3: translation type - * 12-63: address space root - * high 64 bits: - * 0-2: address width - * 3-6: aval - * 8-23: domain id - */ -struct context_entry { - u64 lo; - u64 hi; -}; - -/* - * When VT-d works in the scalable mode, it allows DMA translation to - * happen through either first level or second level page table. This - * bit marks that the DMA translation for the domain goes through the - * first level page table, otherwise, it goes through the second level. - */ -#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1) - -struct dmar_domain { - int nid; /* node id */ - - unsigned int iommu_refcnt[DMAR_UNITS_SUPPORTED]; - /* Refcount of devices per iommu */ - - - u16 iommu_did[DMAR_UNITS_SUPPORTED]; - /* Domain ids per IOMMU. Use u16 since - * domain ids are 16 bit wide according - * to VT-d spec, section 9.3 */ - - u8 has_iotlb_device: 1; - u8 iommu_coherency: 1; /* indicate coherency of iommu access */ - u8 force_snooping : 1; /* Create IOPTEs with snoop control */ - u8 set_pte_snp:1; - - struct list_head devices; /* all devices' list */ - struct iova_domain iovad; /* iova's that belong to this domain */ - - struct dma_pte *pgd; /* virtual address */ - int gaw; /* max guest address width */ - - /* adjusted guest address width, 0 is level 2 30-bit */ - int agaw; - - int flags; /* flags to find out type of domain */ - int iommu_superpage;/* Level of superpages supported: - 0 == 4KiB (no superpages), 1 == 2MiB, - 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ - u64 max_addr; /* maximum mapped address */ - - struct iommu_domain domain; /* generic domain data structure for - iommu core */ -}; - -struct intel_iommu { - void __iomem *reg; /* Pointer to hardware regs, virtual addr */ - u64 reg_phys; /* physical address of hw register set */ - u64 reg_size; /* size of hw register set */ - u64 cap; - u64 ecap; - u64 vccap; - u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ - raw_spinlock_t register_lock; /* protect register handling */ - int seq_id; /* sequence id of the iommu */ - int agaw; /* agaw of this iommu */ - int msagaw; /* max sagaw of this iommu */ - unsigned int irq, pr_irq; - u16 segment; /* PCI segment# */ - unsigned char name[13]; /* Device Name */ - -#ifdef CONFIG_INTEL_IOMMU - unsigned long *domain_ids; /* bitmap of domains */ - spinlock_t lock; /* protect context, domain ids */ - struct root_entry *root_entry; /* virtual address */ - - struct iommu_flush flush; -#endif -#ifdef CONFIG_INTEL_IOMMU_SVM - struct page_req_dsc *prq; - unsigned char prq_name[16]; /* Name for PRQ interrupt */ - struct completion prq_complete; - struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */ -#endif - struct iopf_queue *iopf_queue; - unsigned char iopfq_name[16]; - struct q_inval *qi; /* Queued invalidation info */ - u32 *iommu_state; /* Store iommu states between suspend and resume.*/ - -#ifdef CONFIG_IRQ_REMAP - struct ir_table *ir_table; /* Interrupt remapping info */ - struct irq_domain *ir_domain; - struct irq_domain *ir_msi_domain; -#endif - struct iommu_device iommu; /* IOMMU core code handle */ - int node; - u32 flags; /* Software defined flags */ - - struct dmar_drhd_unit *drhd; - void *perf_statistic; -}; - -/* PCI domain-device relationship */ -struct device_domain_info { - struct list_head link; /* link to domain siblings */ - struct list_head global; /* link to global list */ - u32 segment; /* PCI segment number */ - u8 bus; /* PCI bus number */ - u8 devfn; /* PCI devfn number */ - u16 pfsid; /* SRIOV physical function source ID */ - u8 pasid_supported:3; - u8 pasid_enabled:1; - u8 pri_supported:1; - u8 pri_enabled:1; - u8 ats_supported:1; - u8 ats_enabled:1; - u8 ats_qdep; - struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ - struct intel_iommu *iommu; /* IOMMU used by this device */ - struct dmar_domain *domain; /* pointer to domain */ - struct pasid_table *pasid_table; /* pasid table */ -}; - -static inline void __iommu_flush_cache( - struct intel_iommu *iommu, void *addr, int size) -{ - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(addr, size); -} - -/* Convert generic struct iommu_domain to private struct dmar_domain */ -static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) -{ - return container_of(dom, struct dmar_domain, domain); -} - -/* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-10: available - * 11: snoop behavior - * 12-63: Host physical address - */ -struct dma_pte { - u64 val; -}; - -static inline void dma_clear_pte(struct dma_pte *pte) -{ - pte->val = 0; -} - -static inline u64 dma_pte_addr(struct dma_pte *pte) -{ -#ifdef CONFIG_64BIT - return pte->val & VTD_PAGE_MASK & (~DMA_FL_PTE_XD); -#else - /* Must have a full atomic 64-bit read */ - return __cmpxchg64(&pte->val, 0ULL, 0ULL) & - VTD_PAGE_MASK & (~DMA_FL_PTE_XD); -#endif -} - -static inline bool dma_pte_present(struct dma_pte *pte) -{ - return (pte->val & 3) != 0; -} - -static inline bool dma_pte_superpage(struct dma_pte *pte) -{ - return (pte->val & DMA_PTE_LARGE_PAGE); -} - -static inline bool first_pte_in_page(struct dma_pte *pte) -{ - return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE); -} - -static inline int nr_pte_to_next_page(struct dma_pte *pte) -{ - return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) : - (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte; -} - -extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); - -extern int dmar_enable_qi(struct intel_iommu *iommu); -extern void dmar_disable_qi(struct intel_iommu *iommu); -extern int dmar_reenable_qi(struct intel_iommu *iommu); -extern void qi_global_iec(struct intel_iommu *iommu); - -extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, - u8 fm, u64 type); -extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type); -extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, - u16 qdep, u64 addr, unsigned mask); - -void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, - unsigned long npages, bool ih); - -void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, - u32 pasid, u16 qdep, u64 addr, - unsigned int size_order); -void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, - u32 pasid); - -int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, - unsigned int count, unsigned long options); -/* - * Options used in qi_submit_sync: - * QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8. - */ -#define QI_OPT_WAIT_DRAIN BIT(0) - -extern int dmar_ir_support(void); - -void *alloc_pgtable_page(int node); -void free_pgtable_page(void *vaddr); -struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); -void iommu_flush_write_buffer(struct intel_iommu *iommu); -int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); -struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); - -#ifdef CONFIG_INTEL_IOMMU_SVM -extern void intel_svm_check(struct intel_iommu *iommu); -extern int intel_svm_enable_prq(struct intel_iommu *iommu); -extern int intel_svm_finish_prq(struct intel_iommu *iommu); -struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, - void *drvdata); -void intel_svm_unbind(struct iommu_sva *handle); -u32 intel_svm_get_pasid(struct iommu_sva *handle); -int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, - struct iommu_page_response *msg); - -struct intel_svm_dev { - struct list_head list; - struct rcu_head rcu; - struct device *dev; - struct intel_iommu *iommu; - struct iommu_sva sva; - unsigned long prq_seq_number; - u32 pasid; - int users; - u16 did; - u16 dev_iotlb:1; - u16 sid, qdep; -}; - -struct intel_svm { - struct mmu_notifier notifier; - struct mm_struct *mm; - - unsigned int flags; - u32 pasid; - struct list_head devs; -}; -#else -static inline void intel_svm_check(struct intel_iommu *iommu) {} -#endif - -#ifdef CONFIG_INTEL_IOMMU_DEBUGFS -void intel_iommu_debugfs_init(void); -#else -static inline void intel_iommu_debugfs_init(void) {} -#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */ - -extern const struct attribute_group *intel_iommu_groups[]; -bool context_present(struct context_entry *context); -struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, - u8 devfn, int alloc); - -extern const struct iommu_ops intel_iommu_ops; - -#ifdef CONFIG_INTEL_IOMMU -extern int iommu_calculate_agaw(struct intel_iommu *iommu); -extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); -extern int dmar_disabled; -extern int intel_iommu_enabled; -#else -static inline int iommu_calculate_agaw(struct intel_iommu *iommu) -{ - return 0; -} -static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) -{ - return 0; -} -#define dmar_disabled (1) -#define intel_iommu_enabled (0) -#endif - -static inline const char *decode_prq_descriptor(char *str, size_t size, - u64 dw0, u64 dw1, u64 dw2, u64 dw3) -{ - char *buf = str; - int bytes; - - bytes = snprintf(buf, size, - "rid=0x%llx addr=0x%llx %c%c%c%c%c pasid=0x%llx index=0x%llx", - FIELD_GET(GENMASK_ULL(31, 16), dw0), - FIELD_GET(GENMASK_ULL(63, 12), dw1), - dw1 & BIT_ULL(0) ? 'r' : '-', - dw1 & BIT_ULL(1) ? 'w' : '-', - dw0 & BIT_ULL(52) ? 'x' : '-', - dw0 & BIT_ULL(53) ? 'p' : '-', - dw1 & BIT_ULL(2) ? 'l' : '-', - FIELD_GET(GENMASK_ULL(51, 32), dw0), - FIELD_GET(GENMASK_ULL(11, 3), dw1)); - - /* Private Data */ - if (dw0 & BIT_ULL(9)) { - size -= bytes; - buf += bytes; - snprintf(buf, size, " private=0x%llx/0x%llx\n", dw2, dw3); - } - - return str; -} - -#endif -- cgit v1.2.3 From 9f18abab6063ded860097dbbd7c8b3cef198e4dd Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:51 +0800 Subject: iommu/vt-d: Remove unused iovad from dmar_domain Not used anywhere. Cleanup it to avoid dead code. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20220527053424.3111186-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 4ebf3c4da45e..8285fcfa5fea 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -543,7 +543,6 @@ struct dmar_domain { u8 set_pte_snp:1; struct list_head devices; /* all devices' list */ - struct iova_domain iovad; /* iova's that belong to this domain */ struct dma_pte *pgd; /* virtual address */ int gaw; /* max guest address width */ -- cgit v1.2.3 From 983ebe57b3af05288aa41ee721f01430424bbf31 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:52 +0800 Subject: iommu/vt-d: debugfs: Remove device_domain_lock usage The domain_translation_struct debugfs node is used to dump the DMAR page tables for the PCI devices. It potentially races with setting domains to devices. The existing code uses the global spinlock device_domain_lock to avoid the races. This removes the use of device_domain_lock outside of iommu.c by replacing it with the group mutex lock. Using the group mutex lock is cleaner and more compatible to following cleanups. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220706025524.2904370-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/debugfs.c | 43 +++++++++++++++++++++++++++++++------------ drivers/iommu/intel/iommu.c | 2 +- drivers/iommu/intel/iommu.h | 1 - 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index d927ef10641b..6e1a3f88abc8 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -342,13 +342,13 @@ static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde, } } -static int show_device_domain_translation(struct device *dev, void *data) +static int __show_device_domain_translation(struct device *dev, void *data) { - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct dmar_domain *domain = info->domain; + struct dmar_domain *domain; struct seq_file *m = data; u64 path[6] = { 0 }; + domain = to_dmar_domain(iommu_get_domain_for_dev(dev)); if (!domain) return 0; @@ -359,20 +359,39 @@ static int show_device_domain_translation(struct device *dev, void *data) pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path); seq_putc(m, '\n'); - return 0; + /* Don't iterate */ + return 1; } -static int domain_translation_struct_show(struct seq_file *m, void *unused) +static int show_device_domain_translation(struct device *dev, void *data) { - unsigned long flags; - int ret; + struct iommu_group *group; - spin_lock_irqsave(&device_domain_lock, flags); - ret = bus_for_each_dev(&pci_bus_type, NULL, m, - show_device_domain_translation); - spin_unlock_irqrestore(&device_domain_lock, flags); + group = iommu_group_get(dev); + if (group) { + /* + * The group->mutex is held across the callback, which will + * block calls to iommu_attach/detach_group/device. Hence, + * the domain of the device will not change during traversal. + * + * All devices in an iommu group share a single domain, hence + * we only dump the domain of the first device. Even though, + * this code still possibly races with the iommu_unmap() + * interface. This could be solved by RCU-freeing the page + * table pages in the iommu_unmap() path. + */ + iommu_group_for_each_dev(group, data, + __show_device_domain_translation); + iommu_group_put(group); + } - return ret; + return 0; +} + +static int domain_translation_struct_show(struct seq_file *m, void *unused) +{ + return bus_for_each_dev(&pci_bus_type, NULL, m, + show_device_domain_translation); } DEFINE_SHOW_ATTRIBUTE(domain_translation_struct); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 10bda4bec8fe..3b6571681bdd 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -314,7 +314,7 @@ static int iommu_skip_te_disable; #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -DEFINE_SPINLOCK(device_domain_lock); +static DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); const struct iommu_ops intel_iommu_ops; diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 8285fcfa5fea..8deb745d8b36 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -480,7 +480,6 @@ enum { #define VTD_FLAG_SVM_CAPABLE (1 << 2) extern int intel_iommu_sm; -extern spinlock_t device_domain_lock; #define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) #define pasid_supported(iommu) (sm_supported(iommu) && \ -- cgit v1.2.3 From 98f7b0db4976690e47701b702ae314375101e327 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:53 +0800 Subject: iommu/vt-d: Remove clearing translation data in disable_dmar_iommu() The disable_dmar_iommu() is called when IOMMU initialization fails or the IOMMU is hot-removed from the system. In both cases, there is no need to clear the IOMMU translation data structures for devices. On the initialization path, the device probing only happens after the IOMMU is initialized successfully, hence there're no translation data structures. On the hot-remove path, there is no real use case where the IOMMU is hot-removed, but the devices that it manages are still alive in the system. The translation data structures were torn down during device release, hence there's no need to repeat it in IOMMU hot-remove path either. This removes the unnecessary code and only leaves a check. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220706025524.2904370-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 21 +++++++-------------- drivers/iommu/intel/pasid.h | 1 + 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 3b6571681bdd..43aaec5bdd84 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1716,23 +1716,16 @@ static int iommu_init_domains(struct intel_iommu *iommu) static void disable_dmar_iommu(struct intel_iommu *iommu) { - struct device_domain_info *info, *tmp; - unsigned long flags; - if (!iommu->domain_ids) return; - spin_lock_irqsave(&device_domain_lock, flags); - list_for_each_entry_safe(info, tmp, &device_domain_list, global) { - if (info->iommu != iommu) - continue; - - if (!info->dev || !info->domain) - continue; - - __dmar_remove_one_dev_info(info); - } - spin_unlock_irqrestore(&device_domain_lock, flags); + /* + * All iommu domains must have been detached from the devices, + * hence there should be no domain IDs in use. + */ + if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap)) + > NUM_RESERVED_DID)) + return; if (iommu->gcmd & DMA_GCMD_TE) iommu_disable_translation(iommu); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index bf5b937848b4..20c54e50f533 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -39,6 +39,7 @@ * only and pass-through transfer modes. */ #define FLPT_DEFAULT_DID 1 +#define NUM_RESERVED_DID 2 /* * The SUPERVISOR_MODE flag indicates a first level translation which -- cgit v1.2.3 From 8ac0b64b9735ae896d200dd952c22999742b4414 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:54 +0800 Subject: iommu/vt-d: Use pci_get_domain_bus_and_slot() in pgtable_walk() Use pci_get_domain_bus_and_slot() instead of searching the global list to retrieve the pci device pointer. This also removes the global device_domain_list as there isn't any consumer anymore. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220706025524.2904370-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 34 ++++++---------------------------- drivers/iommu/intel/iommu.h | 1 - 2 files changed, 6 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 43aaec5bdd84..77915d61f7ec 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -131,8 +131,6 @@ static struct intel_iommu **g_iommus; static void __init check_tylersburg_isoch(void); static int rwbf_quirk; -static inline struct device_domain_info * -dmar_search_domain_by_dev_info(int segment, int bus, int devfn); /* * set to 1 to panic kernel if can't successfully enable VT-d @@ -315,8 +313,6 @@ static int iommu_skip_te_disable; #define IDENTMAP_AZALIA 4 static DEFINE_SPINLOCK(device_domain_lock); -static LIST_HEAD(device_domain_list); - const struct iommu_ops intel_iommu_ops; static bool translation_pre_enabled(struct intel_iommu *iommu) @@ -846,9 +842,14 @@ static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, u8 bus, u struct device_domain_info *info; struct dma_pte *parent, *pte; struct dmar_domain *domain; + struct pci_dev *pdev; int offset, level; - info = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn); + pdev = pci_get_domain_bus_and_slot(iommu->segment, bus, devfn); + if (!pdev) + return; + + info = dev_iommu_priv_get(&pdev->dev); if (!info || !info->domain) { pr_info("device [%02x:%02x.%d] not probed\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); @@ -2357,19 +2358,6 @@ static void domain_remove_dev_info(struct dmar_domain *domain) spin_unlock_irqrestore(&device_domain_lock, flags); } -static inline struct device_domain_info * -dmar_search_domain_by_dev_info(int segment, int bus, int devfn) -{ - struct device_domain_info *info; - - list_for_each_entry(info, &device_domain_list, global) - if (info->segment == segment && info->bus == bus && - info->devfn == devfn) - return info; - - return NULL; -} - static int domain_setup_first_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, @@ -4573,7 +4561,6 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; struct device_domain_info *info; struct intel_iommu *iommu; - unsigned long flags; u8 bus, devfn; iommu = device_to_iommu(dev, &bus, &devfn); @@ -4616,10 +4603,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) } } - spin_lock_irqsave(&device_domain_lock, flags); - list_add(&info->global, &device_domain_list); dev_iommu_priv_set(dev, info); - spin_unlock_irqrestore(&device_domain_lock, flags); return &iommu->iommu; } @@ -4627,15 +4611,9 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) static void intel_iommu_release_device(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); - unsigned long flags; dmar_remove_one_dev_info(dev); - - spin_lock_irqsave(&device_domain_lock, flags); dev_iommu_priv_set(dev, NULL); - list_del(&info->global); - spin_unlock_irqrestore(&device_domain_lock, flags); - kfree(info); set_dma_ops(dev, NULL); } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 8deb745d8b36..198c6c822ef4 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -609,7 +609,6 @@ struct intel_iommu { /* PCI domain-device relationship */ struct device_domain_info { struct list_head link; /* link to domain siblings */ - struct list_head global; /* link to global list */ u32 segment; /* PCI segment number */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ -- cgit v1.2.3 From 2e1c8dafb8c2c459baa5aef5b338c15edd9b9b5b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:55 +0800 Subject: iommu/vt-d: Unnecessary spinlock for root table alloc and free The IOMMU root table is allocated and freed in the IOMMU initialization code in static boot or hot-remove paths. There's no need for a spinlock. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220706025524.2904370-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 77915d61f7ec..ff49c9460ede 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -809,14 +809,12 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) static void free_context_table(struct intel_iommu *iommu) { - int i; - unsigned long flags; struct context_entry *context; + int i; + + if (!iommu->root_entry) + return; - spin_lock_irqsave(&iommu->lock, flags); - if (!iommu->root_entry) { - goto out; - } for (i = 0; i < ROOT_ENTRY_NR; i++) { context = iommu_context_addr(iommu, i, 0, 0); if (context) @@ -828,12 +826,10 @@ static void free_context_table(struct intel_iommu *iommu) context = iommu_context_addr(iommu, i, 0x80, 0); if (context) free_pgtable_page(context); - } + free_pgtable_page(iommu->root_entry); iommu->root_entry = NULL; -out: - spin_unlock_irqrestore(&iommu->lock, flags); } #ifdef CONFIG_DMAR_DEBUG @@ -1232,7 +1228,6 @@ static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - unsigned long flags; root = (struct root_entry *)alloc_pgtable_page(iommu->node); if (!root) { @@ -1242,10 +1237,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) } __iommu_flush_cache(iommu, root, ROOT_SIZE); - - spin_lock_irqsave(&iommu->lock, flags); iommu->root_entry = root; - spin_unlock_irqrestore(&iommu->lock, flags); return 0; } -- cgit v1.2.3 From ffd5869d935303b3e8dedf0d6fbe202ace66c653 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:56 +0800 Subject: iommu/vt-d: Replace spin_lock_irqsave() with spin_lock() The iommu->lock is used to protect changes in root/context/pasid tables and domain ID allocation. There's no use case to change these resources in any interrupt context. Therefore, it is unnecessary to disable the interrupts when the spinlock is held. The same thing happens on the device_domain_lock side, which protects the device domain attachment information. This replaces spin_lock/unlock_irqsave/irqrestore() calls with the normal spin_lock/unlock(). Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220706025524.2904370-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/debugfs.c | 6 ++-- drivers/iommu/intel/iommu.c | 66 ++++++++++++++++++------------------------- drivers/iommu/intel/svm.c | 6 ++-- 3 files changed, 32 insertions(+), 46 deletions(-) diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index 6e1a3f88abc8..1f925285104e 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -263,10 +263,9 @@ static void ctx_tbl_walk(struct seq_file *m, struct intel_iommu *iommu, u16 bus) static void root_tbl_walk(struct seq_file *m, struct intel_iommu *iommu) { - unsigned long flags; u16 bus; - spin_lock_irqsave(&iommu->lock, flags); + spin_lock(&iommu->lock); seq_printf(m, "IOMMU %s: Root Table Address: 0x%llx\n", iommu->name, (u64)virt_to_phys(iommu->root_entry)); seq_puts(m, "B.D.F\tRoot_entry\t\t\t\tContext_entry\t\t\t\tPASID\tPASID_table_entry\n"); @@ -278,8 +277,7 @@ static void root_tbl_walk(struct seq_file *m, struct intel_iommu *iommu) */ for (bus = 0; bus < 256; bus++) ctx_tbl_walk(m, iommu, bus); - - spin_unlock_irqrestore(&iommu->lock, flags); + spin_unlock(&iommu->lock); } static int dmar_translation_struct_show(struct seq_file *m, void *unused) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index ff49c9460ede..93f01082dce1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -797,13 +797,12 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) { struct context_entry *context; int ret = 0; - unsigned long flags; - spin_lock_irqsave(&iommu->lock, flags); + spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, 0); if (context) ret = context_present(context); - spin_unlock_irqrestore(&iommu->lock, flags); + spin_unlock(&iommu->lock); return ret; } @@ -1508,17 +1507,15 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, static void iommu_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, unsigned mask) { - unsigned long flags; struct device_domain_info *info; if (!domain->has_iotlb_device) return; - spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&device_domain_lock); list_for_each_entry(info, &domain->devices, link) __iommu_flush_dev_iotlb(info, addr, mask); - - spin_unlock_irqrestore(&device_domain_lock, flags); + spin_unlock(&device_domain_lock); } static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, @@ -1917,7 +1914,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int translation = CONTEXT_TT_MULTI_LEVEL; struct device_domain_info *info = NULL; struct context_entry *context; - unsigned long flags; int ret; WARN_ON(did == 0); @@ -1930,7 +1926,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, BUG_ON(!domain->pgd); - spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&device_domain_lock); spin_lock(&iommu->lock); ret = -ENOMEM; @@ -2052,7 +2048,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, out_unlock: spin_unlock(&iommu->lock); - spin_unlock_irqrestore(&device_domain_lock, flags); + spin_unlock(&device_domain_lock); return ret; } @@ -2296,16 +2292,15 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 { struct intel_iommu *iommu = info->iommu; struct context_entry *context; - unsigned long flags; u16 did_old; if (!iommu) return; - spin_lock_irqsave(&iommu->lock, flags); + spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, 0); if (!context) { - spin_unlock_irqrestore(&iommu->lock, flags); + spin_unlock(&iommu->lock); return; } @@ -2320,7 +2315,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); - spin_unlock_irqrestore(&iommu->lock, flags); + spin_unlock(&iommu->lock); iommu->flush.flush_context(iommu, did_old, (((u16)bus) << 8) | devfn, @@ -2342,12 +2337,11 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 static void domain_remove_dev_info(struct dmar_domain *domain) { struct device_domain_info *info, *tmp; - unsigned long flags; - spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&device_domain_lock); list_for_each_entry_safe(info, tmp, &domain->devices, link) __dmar_remove_one_dev_info(info); - spin_unlock_irqrestore(&device_domain_lock, flags); + spin_unlock(&device_domain_lock); } static int domain_setup_first_level(struct intel_iommu *iommu, @@ -2469,7 +2463,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu; - unsigned long flags; u8 bus, devfn; int ret; @@ -2477,17 +2470,17 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) if (!iommu) return -ENODEV; - spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&device_domain_lock); info->domain = domain; spin_lock(&iommu->lock); ret = domain_attach_iommu(domain, iommu); spin_unlock(&iommu->lock); if (ret) { - spin_unlock_irqrestore(&device_domain_lock, flags); + spin_unlock(&device_domain_lock); return ret; } list_add(&info->link, &domain->devices); - spin_unlock_irqrestore(&device_domain_lock, flags); + spin_unlock(&device_domain_lock); /* PASID table is mandatory for a PCI device in scalable mode. */ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { @@ -2499,7 +2492,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) } /* Setup the PASID entry for requests without PASID: */ - spin_lock_irqsave(&iommu->lock, flags); + spin_lock(&iommu->lock); if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, dev, PASID_RID2PASID); @@ -2509,7 +2502,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) else ret = intel_pasid_setup_second_level(iommu, domain, dev, PASID_RID2PASID); - spin_unlock_irqrestore(&iommu->lock, flags); + spin_unlock(&iommu->lock); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); dmar_remove_one_dev_info(dev); @@ -2777,7 +2770,6 @@ static int copy_translation_tables(struct intel_iommu *iommu) struct root_entry *old_rt; phys_addr_t old_rt_phys; int ctxt_table_entries; - unsigned long flags; u64 rtaddr_reg; int bus, ret; bool new_ext, ext; @@ -2820,7 +2812,7 @@ static int copy_translation_tables(struct intel_iommu *iommu) } } - spin_lock_irqsave(&iommu->lock, flags); + spin_lock(&iommu->lock); /* Context tables are copied, now write them to the root_entry table */ for (bus = 0; bus < 256; bus++) { @@ -2839,7 +2831,7 @@ static int copy_translation_tables(struct intel_iommu *iommu) iommu->root_entry[bus].hi = val; } - spin_unlock_irqrestore(&iommu->lock, flags); + spin_unlock(&iommu->lock); kfree(ctxt_tbls); @@ -4166,7 +4158,6 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) { struct dmar_domain *domain; struct intel_iommu *iommu; - unsigned long flags; assert_spin_locked(&device_domain_lock); @@ -4188,21 +4179,20 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) list_del(&info->link); - spin_lock_irqsave(&iommu->lock, flags); + spin_lock(&iommu->lock); domain_detach_iommu(domain, iommu); - spin_unlock_irqrestore(&iommu->lock, flags); + spin_unlock(&iommu->lock); } static void dmar_remove_one_dev_info(struct device *dev) { struct device_domain_info *info; - unsigned long flags; - spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&device_domain_lock); info = dev_iommu_priv_get(dev); if (info) __dmar_remove_one_dev_info(info); - spin_unlock_irqrestore(&device_domain_lock, flags); + spin_unlock(&device_domain_lock); } static int md_domain_init(struct dmar_domain *domain, int guest_width) @@ -4518,20 +4508,19 @@ static void domain_set_force_snooping(struct dmar_domain *domain) static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); - unsigned long flags; if (dmar_domain->force_snooping) return true; - spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&device_domain_lock); if (!domain_support_force_snooping(dmar_domain)) { - spin_unlock_irqrestore(&device_domain_lock, flags); + spin_unlock(&device_domain_lock); return false; } domain_set_force_snooping(dmar_domain); dmar_domain->force_snooping = true; - spin_unlock_irqrestore(&device_domain_lock, flags); + spin_unlock(&device_domain_lock); return true; } @@ -4678,7 +4667,6 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) struct device_domain_info *info = dev_iommu_priv_get(dev); struct context_entry *context; struct dmar_domain *domain; - unsigned long flags; u64 ctx_lo; int ret; @@ -4686,7 +4674,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) if (!domain) return -EINVAL; - spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&device_domain_lock); spin_lock(&iommu->lock); ret = -EINVAL; @@ -4718,7 +4706,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) out: spin_unlock(&iommu->lock); - spin_unlock_irqrestore(&device_domain_lock, flags); + spin_unlock(&device_domain_lock); return ret; } diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 580713aa9e07..82288a50660d 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -328,9 +328,9 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, unsigned int flags) { struct device_domain_info *info = dev_iommu_priv_get(dev); - unsigned long iflags, sflags; struct intel_svm_dev *sdev; struct intel_svm *svm; + unsigned long sflags; int ret = 0; svm = pasid_private_find(mm->pasid); @@ -394,10 +394,10 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? PASID_FLAG_SUPERVISOR_MODE : 0; sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; - spin_lock_irqsave(&iommu->lock, iflags); + spin_lock(&iommu->lock); ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, FLPT_DEFAULT_DID, sflags); - spin_unlock_irqrestore(&iommu->lock, iflags); + spin_unlock(&iommu->lock); if (ret) goto free_sdev; -- cgit v1.2.3 From 2c3262f9e881ae403b6d92a7e4824531cdb63829 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:57 +0800 Subject: iommu/vt-d: Acquiring lock in domain ID allocation helpers The iommu->lock is used to protect the per-IOMMU domain ID resource. Moving the lock into the ID alloc/free helpers makes the code more compact. At the same time, the device_domain_lock is irrelevant to the domain ID resource, remove its assertion as well. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220706025524.2904370-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 93f01082dce1..7f03576e72d7 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1779,16 +1779,13 @@ static struct dmar_domain *alloc_domain(unsigned int type) return domain; } -/* Must be called with iommu->lock */ static int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) { unsigned long ndomains; - int num; - - assert_spin_locked(&device_domain_lock); - assert_spin_locked(&iommu->lock); + int num, ret = 0; + spin_lock(&iommu->lock); domain->iommu_refcnt[iommu->seq_id] += 1; if (domain->iommu_refcnt[iommu->seq_id] == 1) { ndomains = cap_ndoms(iommu->cap); @@ -1797,7 +1794,8 @@ static int domain_attach_iommu(struct dmar_domain *domain, if (num >= ndomains) { pr_err("%s: No free domain ids\n", iommu->name); domain->iommu_refcnt[iommu->seq_id] -= 1; - return -ENOSPC; + ret = -ENOSPC; + goto out_unlock; } set_bit(num, iommu->domain_ids); @@ -1806,7 +1804,9 @@ static int domain_attach_iommu(struct dmar_domain *domain, domain_update_iommu_cap(domain); } - return 0; +out_unlock: + spin_unlock(&iommu->lock); + return ret; } static void domain_detach_iommu(struct dmar_domain *domain, @@ -1814,9 +1814,7 @@ static void domain_detach_iommu(struct dmar_domain *domain, { int num; - assert_spin_locked(&device_domain_lock); - assert_spin_locked(&iommu->lock); - + spin_lock(&iommu->lock); domain->iommu_refcnt[iommu->seq_id] -= 1; if (domain->iommu_refcnt[iommu->seq_id] == 0) { num = domain->iommu_did[iommu->seq_id]; @@ -1824,6 +1822,7 @@ static void domain_detach_iommu(struct dmar_domain *domain, domain_update_iommu_cap(domain); domain->iommu_did[iommu->seq_id] = 0; } + spin_unlock(&iommu->lock); } static inline int guestwidth_to_adjustwidth(int gaw) @@ -2472,9 +2471,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) spin_lock(&device_domain_lock); info->domain = domain; - spin_lock(&iommu->lock); ret = domain_attach_iommu(domain, iommu); - spin_unlock(&iommu->lock); if (ret) { spin_unlock(&device_domain_lock); return ret; @@ -4178,10 +4175,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) } list_del(&info->link); - - spin_lock(&iommu->lock); domain_detach_iommu(domain, iommu); - spin_unlock(&iommu->lock); } static void dmar_remove_one_dev_info(struct device *dev) -- cgit v1.2.3 From 8430fd3f3287013cb3f04b465c0686c41c86a8c8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:58 +0800 Subject: iommu/vt-d: Acquiring lock in pasid manipulation helpers The iommu->lock is used to protect the per-IOMMU pasid directory table and pasid table. Move the spinlock acquisition/release into the helpers to make the code self-contained. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220706025524.2904370-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 2 - drivers/iommu/intel/pasid.c | 103 +++++++++++++++++++++++--------------------- drivers/iommu/intel/svm.c | 3 -- 3 files changed, 55 insertions(+), 53 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7f03576e72d7..3d53de8c7634 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2489,7 +2489,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) } /* Setup the PASID entry for requests without PASID: */ - spin_lock(&iommu->lock); if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, dev, PASID_RID2PASID); @@ -2499,7 +2498,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) else ret = intel_pasid_setup_second_level(iommu, domain, dev, PASID_RID2PASID); - spin_unlock(&iommu->lock); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); dmar_remove_one_dev_info(dev); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 43f090381ec7..7792a1b2ebc4 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -450,17 +450,17 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, struct pasid_entry *pte; u16 did, pgtt; + spin_lock(&iommu->lock); pte = intel_pasid_get_entry(dev, pasid); - if (WARN_ON(!pte)) - return; - - if (!pasid_pte_is_present(pte)) + if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) { + spin_unlock(&iommu->lock); return; + } did = pasid_get_domain_id(pte); pgtt = pasid_pte_get_pgtt(pte); - intel_pasid_clear_entry(dev, pasid, fault_ignore); + spin_unlock(&iommu->lock); if (!ecap_coherent(iommu->ecap)) clflush_cache_range(pte, sizeof(*pte)); @@ -496,22 +496,6 @@ static void pasid_flush_caches(struct intel_iommu *iommu, } } -static inline int pasid_enable_wpe(struct pasid_entry *pte) -{ -#ifdef CONFIG_X86 - unsigned long cr0 = read_cr0(); - - /* CR0.WP is normally set but just to be sure */ - if (unlikely(!(cr0 & X86_CR0_WP))) { - pr_err_ratelimited("No CPU write protect!\n"); - return -EINVAL; - } -#endif - pasid_set_wpe(pte); - - return 0; -}; - /* * Set up the scalable mode pasid table entry for first only * translation type. @@ -528,39 +512,52 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, return -EINVAL; } - pte = intel_pasid_get_entry(dev, pasid); - if (WARN_ON(!pte)) + if (flags & PASID_FLAG_SUPERVISOR_MODE) { +#ifdef CONFIG_X86 + unsigned long cr0 = read_cr0(); + + /* CR0.WP is normally set but just to be sure */ + if (unlikely(!(cr0 & X86_CR0_WP))) { + pr_err("No CPU write protect!\n"); + return -EINVAL; + } +#endif + if (!ecap_srs(iommu->ecap)) { + pr_err("No supervisor request support on %s\n", + iommu->name); + return -EINVAL; + } + } + + if ((flags & PASID_FLAG_FL5LP) && !cap_5lp_support(iommu->cap)) { + pr_err("No 5-level paging support for first-level on %s\n", + iommu->name); return -EINVAL; + } - /* Caller must ensure PASID entry is not in use. */ - if (pasid_pte_is_present(pte)) + spin_lock(&iommu->lock); + pte = intel_pasid_get_entry(dev, pasid); + if (!pte) { + spin_unlock(&iommu->lock); + return -ENODEV; + } + + if (pasid_pte_is_present(pte)) { + spin_unlock(&iommu->lock); return -EBUSY; + } pasid_clear_entry(pte); /* Setup the first level page table pointer: */ pasid_set_flptr(pte, (u64)__pa(pgd)); if (flags & PASID_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap)) { - pr_err("No supervisor request support on %s\n", - iommu->name); - return -EINVAL; - } pasid_set_sre(pte); - if (pasid_enable_wpe(pte)) - return -EINVAL; - + pasid_set_wpe(pte); } - if (flags & PASID_FLAG_FL5LP) { - if (cap_5lp_support(iommu->cap)) { - pasid_set_flpm(pte, 1); - } else { - pr_err("No 5-level paging support for first-level\n"); - pasid_clear_entry(pte); - return -EINVAL; - } - } + if (flags & PASID_FLAG_FL5LP) + pasid_set_flpm(pte, 1); if (flags & PASID_FLAG_PAGE_SNOOP) pasid_set_pgsnp(pte); @@ -572,6 +569,8 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, /* Setup Present and PASID Granular Transfer Type: */ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); pasid_set_present(pte); + spin_unlock(&iommu->lock); + pasid_flush_caches(iommu, pte, pasid, did); return 0; @@ -629,15 +628,17 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, pgd_val = virt_to_phys(pgd); did = domain->iommu_did[iommu->seq_id]; + spin_lock(&iommu->lock); pte = intel_pasid_get_entry(dev, pasid); if (!pte) { - dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); + spin_unlock(&iommu->lock); return -ENODEV; } - /* Caller must ensure PASID entry is not in use. */ - if (pasid_pte_is_present(pte)) + if (pasid_pte_is_present(pte)) { + spin_unlock(&iommu->lock); return -EBUSY; + } pasid_clear_entry(pte); pasid_set_domain_id(pte, did); @@ -654,6 +655,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, if (pasid != PASID_RID2PASID) pasid_set_sre(pte); pasid_set_present(pte); + spin_unlock(&iommu->lock); + pasid_flush_caches(iommu, pte, pasid, did); return 0; @@ -669,15 +672,17 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, u16 did = FLPT_DEFAULT_DID; struct pasid_entry *pte; + spin_lock(&iommu->lock); pte = intel_pasid_get_entry(dev, pasid); if (!pte) { - dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); + spin_unlock(&iommu->lock); return -ENODEV; } - /* Caller must ensure PASID entry is not in use. */ - if (pasid_pte_is_present(pte)) + if (pasid_pte_is_present(pte)) { + spin_unlock(&iommu->lock); return -EBUSY; + } pasid_clear_entry(pte); pasid_set_domain_id(pte, did); @@ -692,6 +697,8 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, */ pasid_set_sre(pte); pasid_set_present(pte); + spin_unlock(&iommu->lock); + pasid_flush_caches(iommu, pte, pasid, did); return 0; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 82288a50660d..64072e628bbd 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -394,11 +394,8 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? PASID_FLAG_SUPERVISOR_MODE : 0; sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; - spin_lock(&iommu->lock); ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, FLPT_DEFAULT_DID, sflags); - spin_unlock(&iommu->lock); - if (ret) goto free_sdev; -- cgit v1.2.3 From 79d82ce4027f30c4d40fdf377888203f76b7ddcc Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:08:59 +0800 Subject: iommu/vt-d: Check device list of domain in domain free path When the IOMMU domain is about to be freed, it should not be set on any device. Instead of silently dealing with some bug cases, it's better to trigger a warning to report and fix any potential bugs at the first time. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220706025524.2904370-9-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 3d53de8c7634..a94fb69e1f9a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -294,7 +294,6 @@ static LIST_HEAD(dmar_satc_units); /* bitmap for indexing intel_iommus */ static int g_num_of_iommus; -static void domain_remove_dev_info(struct dmar_domain *domain); static void dmar_remove_one_dev_info(struct device *dev); static void __dmar_remove_one_dev_info(struct device_domain_info *info); @@ -1841,10 +1840,6 @@ static inline int guestwidth_to_adjustwidth(int gaw) static void domain_exit(struct dmar_domain *domain) { - - /* Remove associated devices and clear attached or cached domains */ - domain_remove_dev_info(domain); - if (domain->pgd) { LIST_HEAD(freelist); @@ -1852,6 +1847,9 @@ static void domain_exit(struct dmar_domain *domain) put_pages_list(&freelist); } + if (WARN_ON(!list_empty(&domain->devices))) + return; + kfree(domain); } @@ -2333,16 +2331,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH); } -static void domain_remove_dev_info(struct dmar_domain *domain) -{ - struct device_domain_info *info, *tmp; - - spin_lock(&device_domain_lock); - list_for_each_entry_safe(info, tmp, &domain->devices, link) - __dmar_remove_one_dev_info(info); - spin_unlock(&device_domain_lock); -} - static int domain_setup_first_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, -- cgit v1.2.3 From db75c9573b08e28109cf7b4ceb62b5f11154742e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:09:00 +0800 Subject: iommu/vt-d: Fold __dmar_remove_one_dev_info() into its caller Fold __dmar_remove_one_dev_info() into dmar_remove_one_dev_info() which is its only caller. Make the spin lock critical range only cover the device list change code and remove some unnecessary checks. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220706025524.2904370-10-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a94fb69e1f9a..ccddf634fae9 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -295,7 +295,6 @@ static LIST_HEAD(dmar_satc_units); static int g_num_of_iommus; static void dmar_remove_one_dev_info(struct device *dev); -static void __dmar_remove_one_dev_info(struct device_domain_info *info); int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); @@ -4137,20 +4136,12 @@ static void domain_context_clear(struct device_domain_info *info) &domain_context_clear_one_cb, info); } -static void __dmar_remove_one_dev_info(struct device_domain_info *info) +static void dmar_remove_one_dev_info(struct device *dev) { - struct dmar_domain *domain; - struct intel_iommu *iommu; - - assert_spin_locked(&device_domain_lock); - - if (WARN_ON(!info)) - return; - - iommu = info->iommu; - domain = info->domain; + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; - if (info->dev && !dev_is_real_dma_subdevice(info->dev)) { + if (!dev_is_real_dma_subdevice(info->dev)) { if (dev_is_pci(info->dev) && sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, info->dev, PASID_RID2PASID, false); @@ -4160,19 +4151,12 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) intel_pasid_free_table(info->dev); } - list_del(&info->link); - domain_detach_iommu(domain, iommu); -} - -static void dmar_remove_one_dev_info(struct device *dev) -{ - struct device_domain_info *info; - spin_lock(&device_domain_lock); - info = dev_iommu_priv_get(dev); - if (info) - __dmar_remove_one_dev_info(info); + list_del(&info->link); spin_unlock(&device_domain_lock); + + domain_detach_iommu(info->domain, iommu); + info->domain = NULL; } static int md_domain_init(struct dmar_domain *domain, int guest_width) -- cgit v1.2.3 From 969aaefbaaf2b444852c1980ac0cbbb66463fdae Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:09:01 +0800 Subject: iommu/vt-d: Use device_domain_lock accurately The device_domain_lock is used to protect the device tracking list of a domain. Remove unnecessary spin_lock/unlock()'s and move the necessary ones around the list access. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220706025524.2904370-11-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 57 +++++++++++++++------------------------------ 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index ccddf634fae9..ae42a9e43997 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -535,15 +535,8 @@ static int domain_update_device_node(struct dmar_domain *domain) struct device_domain_info *info; int nid = NUMA_NO_NODE; - assert_spin_locked(&device_domain_lock); - - if (list_empty(&domain->devices)) - return NUMA_NO_NODE; - + spin_lock(&device_domain_lock); list_for_each_entry(info, &domain->devices, link) { - if (!info->dev) - continue; - /* * There could possibly be multiple device numa nodes as devices * within the same domain may sit behind different IOMMUs. There @@ -554,6 +547,7 @@ static int domain_update_device_node(struct dmar_domain *domain) if (nid != NUMA_NO_NODE) break; } + spin_unlock(&device_domain_lock); return nid; } @@ -1376,23 +1370,23 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, } static struct device_domain_info * -iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, - u8 bus, u8 devfn) +iommu_support_dev_iotlb(struct dmar_domain *domain, struct intel_iommu *iommu, + u8 bus, u8 devfn) { struct device_domain_info *info; - assert_spin_locked(&device_domain_lock); - if (!iommu->qi) return NULL; - list_for_each_entry(info, &domain->devices, link) + spin_lock(&device_domain_lock); + list_for_each_entry(info, &domain->devices, link) { if (info->iommu == iommu && info->bus == bus && info->devfn == devfn) { - if (info->ats_supported && info->dev) - return info; - break; + spin_unlock(&device_domain_lock); + return info->ats_supported ? info : NULL; } + } + spin_unlock(&device_domain_lock); return NULL; } @@ -1402,23 +1396,21 @@ static void domain_update_iotlb(struct dmar_domain *domain) struct device_domain_info *info; bool has_iotlb_device = false; - assert_spin_locked(&device_domain_lock); - - list_for_each_entry(info, &domain->devices, link) + spin_lock(&device_domain_lock); + list_for_each_entry(info, &domain->devices, link) { if (info->ats_enabled) { has_iotlb_device = true; break; } - + } domain->has_iotlb_device = has_iotlb_device; + spin_unlock(&device_domain_lock); } static void iommu_enable_dev_iotlb(struct device_domain_info *info) { struct pci_dev *pdev; - assert_spin_locked(&device_domain_lock); - if (!info || !dev_is_pci(info->dev)) return; @@ -1464,8 +1456,6 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info) { struct pci_dev *pdev; - assert_spin_locked(&device_domain_lock); - if (!dev_is_pci(info->dev)) return; @@ -1906,9 +1896,10 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct pasid_table *table, u8 bus, u8 devfn) { + struct device_domain_info *info = + iommu_support_dev_iotlb(domain, iommu, bus, devfn); u16 did = domain->iommu_did[iommu->seq_id]; int translation = CONTEXT_TT_MULTI_LEVEL; - struct device_domain_info *info = NULL; struct context_entry *context; int ret; @@ -1922,9 +1913,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, BUG_ON(!domain->pgd); - spin_lock(&device_domain_lock); spin_lock(&iommu->lock); - ret = -ENOMEM; context = iommu_context_addr(iommu, bus, devfn, 1); if (!context) @@ -1975,7 +1964,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, * Setup the Device-TLB enable bit and Page request * Enable bit: */ - info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); if (info && info->ats_supported) context_set_sm_dte(context); if (info && info->pri_supported) @@ -1998,7 +1986,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, goto out_unlock; } - info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); if (info && info->ats_supported) translation = CONTEXT_TT_DEV_IOTLB; else @@ -2044,7 +2031,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, out_unlock: spin_unlock(&iommu->lock); - spin_unlock(&device_domain_lock); return ret; } @@ -2456,13 +2442,11 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) if (!iommu) return -ENODEV; - spin_lock(&device_domain_lock); - info->domain = domain; ret = domain_attach_iommu(domain, iommu); - if (ret) { - spin_unlock(&device_domain_lock); + if (ret) return ret; - } + info->domain = domain; + spin_lock(&device_domain_lock); list_add(&info->link, &domain->devices); spin_unlock(&device_domain_lock); @@ -4638,9 +4622,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) if (!domain) return -EINVAL; - spin_lock(&device_domain_lock); spin_lock(&iommu->lock); - ret = -EINVAL; if (!info->pasid_supported) goto out; @@ -4670,7 +4652,6 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) out: spin_unlock(&iommu->lock); - spin_unlock(&device_domain_lock); return ret; } -- cgit v1.2.3 From 5eaafdf0c05577f9b6cf8c64dee7d39ccdafa63a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:09:02 +0800 Subject: iommu/vt-d: Convert global spinlock into per domain lock Using a global device_domain_lock spinlock to protect per-domain device tracking lists is an inefficient way, especially considering this lock is also needed in the hot paths. This optimizes the locking mechanism by converting the global lock to per domain lock. On the other hand, as the device tracking lists are never accessed in any interrupt context, there is no need to disable interrupts while spinning. Replace irqsave variant with spinlock calls. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220706025524.2904370-12-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 42 +++++++++++++++++++++--------------------- drivers/iommu/intel/iommu.h | 1 + 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index ae42a9e43997..769e850b9767 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -310,7 +310,6 @@ static int iommu_skip_te_disable; #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -static DEFINE_SPINLOCK(device_domain_lock); const struct iommu_ops intel_iommu_ops; static bool translation_pre_enabled(struct intel_iommu *iommu) @@ -535,7 +534,7 @@ static int domain_update_device_node(struct dmar_domain *domain) struct device_domain_info *info; int nid = NUMA_NO_NODE; - spin_lock(&device_domain_lock); + spin_lock(&domain->lock); list_for_each_entry(info, &domain->devices, link) { /* * There could possibly be multiple device numa nodes as devices @@ -547,7 +546,7 @@ static int domain_update_device_node(struct dmar_domain *domain) if (nid != NUMA_NO_NODE) break; } - spin_unlock(&device_domain_lock); + spin_unlock(&domain->lock); return nid; } @@ -1378,15 +1377,15 @@ iommu_support_dev_iotlb(struct dmar_domain *domain, struct intel_iommu *iommu, if (!iommu->qi) return NULL; - spin_lock(&device_domain_lock); + spin_lock(&domain->lock); list_for_each_entry(info, &domain->devices, link) { if (info->iommu == iommu && info->bus == bus && info->devfn == devfn) { - spin_unlock(&device_domain_lock); + spin_unlock(&domain->lock); return info->ats_supported ? info : NULL; } } - spin_unlock(&device_domain_lock); + spin_unlock(&domain->lock); return NULL; } @@ -1396,7 +1395,7 @@ static void domain_update_iotlb(struct dmar_domain *domain) struct device_domain_info *info; bool has_iotlb_device = false; - spin_lock(&device_domain_lock); + spin_lock(&domain->lock); list_for_each_entry(info, &domain->devices, link) { if (info->ats_enabled) { has_iotlb_device = true; @@ -1404,7 +1403,7 @@ static void domain_update_iotlb(struct dmar_domain *domain) } } domain->has_iotlb_device = has_iotlb_device; - spin_unlock(&device_domain_lock); + spin_unlock(&domain->lock); } static void iommu_enable_dev_iotlb(struct device_domain_info *info) @@ -1500,10 +1499,10 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, if (!domain->has_iotlb_device) return; - spin_lock(&device_domain_lock); + spin_lock(&domain->lock); list_for_each_entry(info, &domain->devices, link) __iommu_flush_dev_iotlb(info, addr, mask); - spin_unlock(&device_domain_lock); + spin_unlock(&domain->lock); } static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, @@ -1763,6 +1762,7 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); + spin_lock_init(&domain->lock); return domain; } @@ -2446,9 +2446,9 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) if (ret) return ret; info->domain = domain; - spin_lock(&device_domain_lock); + spin_lock(&domain->lock); list_add(&info->link, &domain->devices); - spin_unlock(&device_domain_lock); + spin_unlock(&domain->lock); /* PASID table is mandatory for a PCI device in scalable mode. */ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { @@ -4123,6 +4123,7 @@ static void domain_context_clear(struct device_domain_info *info) static void dmar_remove_one_dev_info(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); + struct dmar_domain *domain = info->domain; struct intel_iommu *iommu = info->iommu; if (!dev_is_real_dma_subdevice(info->dev)) { @@ -4135,11 +4136,11 @@ static void dmar_remove_one_dev_info(struct device *dev) intel_pasid_free_table(info->dev); } - spin_lock(&device_domain_lock); + spin_lock(&domain->lock); list_del(&info->link); - spin_unlock(&device_domain_lock); + spin_unlock(&domain->lock); - domain_detach_iommu(info->domain, iommu); + domain_detach_iommu(domain, iommu); info->domain = NULL; } @@ -4422,7 +4423,7 @@ static bool domain_support_force_snooping(struct dmar_domain *domain) struct device_domain_info *info; bool support = true; - assert_spin_locked(&device_domain_lock); + assert_spin_locked(&domain->lock); list_for_each_entry(info, &domain->devices, link) { if (!ecap_sc_support(info->iommu->ecap)) { support = false; @@ -4437,8 +4438,7 @@ static void domain_set_force_snooping(struct dmar_domain *domain) { struct device_domain_info *info; - assert_spin_locked(&device_domain_lock); - + assert_spin_locked(&domain->lock); /* * Second level page table supports per-PTE snoop control. The * iommu_map() interface will handle this by setting SNP bit. @@ -4460,15 +4460,15 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) if (dmar_domain->force_snooping) return true; - spin_lock(&device_domain_lock); + spin_lock(&dmar_domain->lock); if (!domain_support_force_snooping(dmar_domain)) { - spin_unlock(&device_domain_lock); + spin_unlock(&dmar_domain->lock); return false; } domain_set_force_snooping(dmar_domain); dmar_domain->force_snooping = true; - spin_unlock(&device_domain_lock); + spin_unlock(&dmar_domain->lock); return true; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 198c6c822ef4..df64d3d9c49a 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -541,6 +541,7 @@ struct dmar_domain { u8 force_snooping : 1; /* Create IOPTEs with snoop control */ u8 set_pte_snp:1; + spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ struct dma_pte *pgd; /* virtual address */ -- cgit v1.2.3 From c3f27c834ae56c315a1fc47d71b027e2c400f4de Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:09:03 +0800 Subject: iommu/vt-d: Remove unused domain_get_iommu() It is not used anywhere. Remove it to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220702015610.2849494-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 18 ------------------ drivers/iommu/intel/iommu.h | 1 - 2 files changed, 19 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 769e850b9767..55a055a8ed4b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -445,24 +445,6 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); } -/* This functionin only returns single iommu in a domain */ -struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) -{ - int iommu_id; - - /* si_domain and vm domain should not get here. */ - if (WARN_ON(!iommu_is_dma_domain(&domain->domain))) - return NULL; - - for_each_domain_iommu(iommu_id, domain) - break; - - if (iommu_id < 0 || iommu_id >= g_num_of_iommus) - return NULL; - - return g_iommus[iommu_id]; -} - static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu) { return sm_supported(iommu) ? diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index df64d3d9c49a..56e0d8cd2102 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -725,7 +725,6 @@ extern int dmar_ir_support(void); void *alloc_pgtable_page(int node); void free_pgtable_page(void *vaddr); -struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); void iommu_flush_write_buffer(struct intel_iommu *iommu); int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); -- cgit v1.2.3 From 913432f217c843a69ff9d11a6474a7982033087b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:09:04 +0800 Subject: iommu/vt-d: Use IDA interface to manage iommu sequence id Switch dmar unit sequence id allocation and release from bitmap to IDA interface. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220702015610.2849494-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 2a5e0f91e647..6327b34f5aa7 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -60,7 +60,7 @@ LIST_HEAD(dmar_drhd_units); struct acpi_table_header * __initdata dmar_tbl; static int dmar_dev_scope_status = 1; -static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)]; +static DEFINE_IDA(dmar_seq_ids); static int alloc_iommu(struct dmar_drhd_unit *drhd); static void free_iommu(struct intel_iommu *iommu); @@ -1023,28 +1023,6 @@ out: return err; } -static int dmar_alloc_seq_id(struct intel_iommu *iommu) -{ - iommu->seq_id = find_first_zero_bit(dmar_seq_ids, - DMAR_UNITS_SUPPORTED); - if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) { - iommu->seq_id = -1; - } else { - set_bit(iommu->seq_id, dmar_seq_ids); - sprintf(iommu->name, "dmar%d", iommu->seq_id); - } - - return iommu->seq_id; -} - -static void dmar_free_seq_id(struct intel_iommu *iommu) -{ - if (iommu->seq_id >= 0) { - clear_bit(iommu->seq_id, dmar_seq_ids); - iommu->seq_id = -1; - } -} - static int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; @@ -1062,11 +1040,14 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) if (!iommu) return -ENOMEM; - if (dmar_alloc_seq_id(iommu) < 0) { + iommu->seq_id = ida_alloc_range(&dmar_seq_ids, 0, + DMAR_UNITS_SUPPORTED - 1, GFP_KERNEL); + if (iommu->seq_id < 0) { pr_err("Failed to allocate seq_id\n"); - err = -ENOSPC; + err = iommu->seq_id; goto error; } + sprintf(iommu->name, "dmar%d", iommu->seq_id); err = map_iommu(iommu, drhd->reg_base_addr); if (err) { @@ -1150,7 +1131,7 @@ err_sysfs: err_unmap: unmap_iommu(iommu); error_free_seq_id: - dmar_free_seq_id(iommu); + ida_free(&dmar_seq_ids, iommu->seq_id); error: kfree(iommu); return err; @@ -1183,7 +1164,7 @@ static void free_iommu(struct intel_iommu *iommu) if (iommu->reg) unmap_iommu(iommu); - dmar_free_seq_id(iommu); + ida_free(&dmar_seq_ids, iommu->seq_id); kfree(iommu); } -- cgit v1.2.3 From ba949f4cd4c39c587e9b722ac7eb7f7e8a42dace Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:09:05 +0800 Subject: iommu/vt-d: Refactor iommu information of each domain When a DMA domain is attached to a device, it needs to allocate a domain ID from its IOMMU. Currently, the domain ID information is stored in two static arrays embedded in the domain structure. This can lead to memory waste when the driver is running on a small platform. This optimizes these static arrays by replacing them with an xarray and consuming memory on demand. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220702015610.2849494-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 123 +++++++++++++++++++++++++------------------- drivers/iommu/intel/iommu.h | 29 +++++++---- drivers/iommu/intel/pasid.c | 2 +- drivers/iommu/intel/svm.c | 2 +- 4 files changed, 93 insertions(+), 63 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 55a055a8ed4b..36b3649a83e8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -254,10 +254,6 @@ static inline void context_clear_entry(struct context_entry *context) static struct dmar_domain *si_domain; static int hw_pass_through = 1; -#define for_each_domain_iommu(idx, domain) \ - for (idx = 0; idx < g_num_of_iommus; idx++) \ - if (domain->iommu_refcnt[idx]) - struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -453,16 +449,16 @@ static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu) static void domain_update_iommu_coherency(struct dmar_domain *domain) { + struct iommu_domain_info *info; struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; bool found = false; - int i; + unsigned long i; domain->iommu_coherency = true; - - for_each_domain_iommu(i, domain) { + xa_for_each(&domain->iommu_array, i, info) { found = true; - if (!iommu_paging_structure_coherency(g_iommus[i])) { + if (!iommu_paging_structure_coherency(info->iommu)) { domain->iommu_coherency = false; break; } @@ -1495,7 +1491,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, unsigned int aligned_pages = __roundup_pow_of_two(pages); unsigned int mask = ilog2(aligned_pages); uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; - u16 did = domain->iommu_did[iommu->seq_id]; + u16 did = domain_id_iommu(domain, iommu); BUG_ON(pages == 0); @@ -1565,11 +1561,12 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu, static void intel_flush_iotlb_all(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); - int idx; + struct iommu_domain_info *info; + unsigned long idx; - for_each_domain_iommu(idx, dmar_domain) { - struct intel_iommu *iommu = g_iommus[idx]; - u16 did = dmar_domain->iommu_did[iommu->seq_id]; + xa_for_each(&dmar_domain->iommu_array, idx, info) { + struct intel_iommu *iommu = info->iommu; + u16 did = domain_id_iommu(dmar_domain, iommu); if (domain_use_first_level(dmar_domain)) qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0); @@ -1745,6 +1742,7 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); spin_lock_init(&domain->lock); + xa_init(&domain->iommu_array); return domain; } @@ -1752,45 +1750,66 @@ static struct dmar_domain *alloc_domain(unsigned int type) static int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) { + struct iommu_domain_info *info, *curr; unsigned long ndomains; - int num, ret = 0; + int num, ret = -ENOSPC; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; spin_lock(&iommu->lock); - domain->iommu_refcnt[iommu->seq_id] += 1; - if (domain->iommu_refcnt[iommu->seq_id] == 1) { - ndomains = cap_ndoms(iommu->cap); - num = find_first_zero_bit(iommu->domain_ids, ndomains); - - if (num >= ndomains) { - pr_err("%s: No free domain ids\n", iommu->name); - domain->iommu_refcnt[iommu->seq_id] -= 1; - ret = -ENOSPC; - goto out_unlock; - } + curr = xa_load(&domain->iommu_array, iommu->seq_id); + if (curr) { + curr->refcnt++; + spin_unlock(&iommu->lock); + kfree(info); + return 0; + } - set_bit(num, iommu->domain_ids); - domain->iommu_did[iommu->seq_id] = num; - domain->nid = iommu->node; - domain_update_iommu_cap(domain); + ndomains = cap_ndoms(iommu->cap); + num = find_first_zero_bit(iommu->domain_ids, ndomains); + if (num >= ndomains) { + pr_err("%s: No free domain ids\n", iommu->name); + goto err_unlock; } -out_unlock: + set_bit(num, iommu->domain_ids); + info->refcnt = 1; + info->did = num; + info->iommu = iommu; + curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id, + NULL, info, GFP_ATOMIC); + if (curr) { + ret = xa_err(curr) ? : -EBUSY; + goto err_clear; + } + domain_update_iommu_cap(domain); + spin_unlock(&iommu->lock); + return 0; + +err_clear: + clear_bit(info->did, iommu->domain_ids); +err_unlock: + spin_unlock(&iommu->lock); + kfree(info); return ret; } static void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) { - int num; + struct iommu_domain_info *info; spin_lock(&iommu->lock); - domain->iommu_refcnt[iommu->seq_id] -= 1; - if (domain->iommu_refcnt[iommu->seq_id] == 0) { - num = domain->iommu_did[iommu->seq_id]; - clear_bit(num, iommu->domain_ids); + info = xa_load(&domain->iommu_array, iommu->seq_id); + if (--info->refcnt == 0) { + clear_bit(info->did, iommu->domain_ids); + xa_erase(&domain->iommu_array, iommu->seq_id); + domain->nid = NUMA_NO_NODE; domain_update_iommu_cap(domain); - domain->iommu_did[iommu->seq_id] = 0; + kfree(info); } spin_unlock(&iommu->lock); } @@ -1880,7 +1899,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, { struct device_domain_info *info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); - u16 did = domain->iommu_did[iommu->seq_id]; + u16 did = domain_id_iommu(domain, iommu); int translation = CONTEXT_TT_MULTI_LEVEL; struct context_entry *context; int ret; @@ -2129,8 +2148,9 @@ static void switch_to_super_page(struct dmar_domain *domain, unsigned long end_pfn, int level) { unsigned long lvl_pages = lvl_to_nr_pages(level); + struct iommu_domain_info *info; struct dma_pte *pte = NULL; - int i; + unsigned long i; while (start_pfn <= end_pfn) { if (!pte) @@ -2141,8 +2161,8 @@ static void switch_to_super_page(struct dmar_domain *domain, start_pfn + lvl_pages - 1, level + 1); - for_each_domain_iommu(i, domain) - iommu_flush_iotlb_psi(g_iommus[i], domain, + xa_for_each(&domain->iommu_array, i, info) + iommu_flush_iotlb_psi(info->iommu, domain, start_pfn, lvl_pages, 0, 0); } @@ -2272,7 +2292,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 if (hw_pass_through && domain_type_is_si(info->domain)) did_old = FLPT_DEFAULT_DID; else - did_old = info->domain->iommu_did[iommu->seq_id]; + did_old = domain_id_iommu(info->domain, iommu); } else { did_old = context_domain_id(context); } @@ -2330,7 +2350,7 @@ static int domain_setup_first_level(struct intel_iommu *iommu, flags |= PASID_FLAG_PAGE_SNOOP; return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid, - domain->iommu_did[iommu->seq_id], + domain_id_iommu(domain, iommu), flags); } @@ -4368,15 +4388,16 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, struct dmar_domain *dmar_domain = to_dmar_domain(domain); unsigned long iova_pfn = IOVA_PFN(gather->start); size_t size = gather->end - gather->start; + struct iommu_domain_info *info; unsigned long start_pfn; unsigned long nrpages; - int iommu_id; + unsigned long i; nrpages = aligned_nrpages(gather->start, size); start_pfn = mm_to_dma_pfn(iova_pfn); - for_each_domain_iommu(iommu_id, dmar_domain) - iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain, + xa_for_each(&dmar_domain->iommu_array, i, info) + iommu_flush_iotlb_psi(info->iommu, dmar_domain, start_pfn, nrpages, list_empty(&gather->freelist), 0); @@ -4620,7 +4641,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) context[0].lo = ctx_lo; wmb(); iommu->flush.flush_context(iommu, - domain->iommu_did[iommu->seq_id], + domain_id_iommu(domain, iommu), PCI_DEVID(info->bus, info->devfn), DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); @@ -4757,13 +4778,11 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, struct dmar_domain *dmar_domain = to_dmar_domain(domain); unsigned long pages = aligned_nrpages(iova, size); unsigned long pfn = iova >> VTD_PAGE_SHIFT; - struct intel_iommu *iommu; - int iommu_id; + struct iommu_domain_info *info; + unsigned long i; - for_each_domain_iommu(iommu_id, dmar_domain) { - iommu = g_iommus[iommu_id]; - __mapping_notify_one(iommu, dmar_domain, pfn, pages); - } + xa_for_each(&dmar_domain->iommu_array, i, info) + __mapping_notify_one(info->iommu, dmar_domain, pfn, pages); } const struct iommu_ops intel_iommu_ops = { diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 56e0d8cd2102..fae45bbb0c7f 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -524,17 +525,17 @@ struct context_entry { */ #define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1) -struct dmar_domain { - int nid; /* node id */ - - unsigned int iommu_refcnt[DMAR_UNITS_SUPPORTED]; - /* Refcount of devices per iommu */ - - - u16 iommu_did[DMAR_UNITS_SUPPORTED]; - /* Domain ids per IOMMU. Use u16 since +struct iommu_domain_info { + struct intel_iommu *iommu; + unsigned int refcnt; /* Refcount of devices per iommu */ + u16 did; /* Domain ids per IOMMU. Use u16 since * domain ids are 16 bit wide according * to VT-d spec, section 9.3 */ +}; + +struct dmar_domain { + int nid; /* node id */ + struct xarray iommu_array; /* Attached IOMMU array */ u8 has_iotlb_device: 1; u8 iommu_coherency: 1; /* indicate coherency of iommu access */ @@ -640,6 +641,16 @@ static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) return container_of(dom, struct dmar_domain, domain); } +/* Retrieve the domain ID which has allocated to the domain */ +static inline u16 +domain_id_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) +{ + struct iommu_domain_info *info = + xa_load(&domain->iommu_array, iommu->seq_id); + + return info->did; +} + /* * 0: readable * 1: writable diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 7792a1b2ebc4..c5e7e8b020a5 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -626,7 +626,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, } pgd_val = virt_to_phys(pgd); - did = domain->iommu_did[iommu->seq_id]; + did = domain_id_iommu(domain, iommu); spin_lock(&iommu->lock); pte = intel_pasid_get_entry(dev, pasid); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 64072e628bbd..8bcfb93dda56 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -541,7 +541,7 @@ static void intel_svm_drain_prq(struct device *dev, u32 pasid) domain = info->domain; pdev = to_pci_dev(dev); sid = PCI_DEVID(info->bus, info->devfn); - did = domain->iommu_did[iommu->seq_id]; + did = domain_id_iommu(domain, iommu); qdep = pci_ats_queue_depth(pdev); /* -- cgit v1.2.3 From 97a79de99a4083056184bd4790723fe64c3fba66 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:09:06 +0800 Subject: iommu/vt-d: Remove unnecessary check in intel_iommu_add() The Intel IOMMU hot-add process starts from dmar_device_hotplug(). It uses the global dmar_global_lock to synchronize all the hot-add and hot-remove paths. In the hot-add path, the new IOMMU data structures are allocated firstly by dmar_parse_one_drhd() and then initialized by dmar_hp_add_drhd(). All the IOMMU units are allocated and initialized in the same synchronized path. There is no case where any IOMMU unit is created and then initialized for multiple times. This removes the unnecessary check in intel_iommu_add() which is the last reference place of the global IOMMU array. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220702015610.2849494-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 36b3649a83e8..2f64ff6e6e75 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3460,9 +3460,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) int sp, ret; struct intel_iommu *iommu = dmaru->iommu; - if (g_iommus[iommu->seq_id]) - return 0; - ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu); if (ret) goto out; -- cgit v1.2.3 From bdb46d175872af56d3bffe361f21be2b87784039 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:09:07 +0800 Subject: iommu/vt-d: Remove global g_iommus array The g_iommus and g_num_of_iommus is not used anywhere. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Steve Wahl Link: https://lore.kernel.org/r/20220702015610.2849494-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 44 -------------------------------------------- 1 file changed, 44 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2f64ff6e6e75..3ba0d7e76efd 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -126,9 +126,6 @@ static inline unsigned long virt_to_dma_pfn(void *p) return page_to_dma_pfn(virt_to_page(p)); } -/* global iommu list, set NULL for ignored DMAR units */ -static struct intel_iommu **g_iommus; - static void __init check_tylersburg_isoch(void); static int rwbf_quirk; @@ -287,9 +284,6 @@ static LIST_HEAD(dmar_satc_units); #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) -/* bitmap for indexing intel_iommus */ -static int g_num_of_iommus; - static void dmar_remove_one_dev_info(struct device *dev); int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); @@ -1694,8 +1688,6 @@ static void free_dmar_iommu(struct intel_iommu *iommu) iommu->domain_ids = NULL; } - g_iommus[iommu->seq_id] = NULL; - /* free context mapping */ free_context_table(iommu); @@ -2898,36 +2890,6 @@ static int __init init_dmars(void) struct intel_iommu *iommu; int ret; - /* - * for each drhd - * allocate root - * initialize and program root entry to not present - * endfor - */ - for_each_drhd_unit(drhd) { - /* - * lock not needed as this is only incremented in the single - * threaded kernel __init code path all other access are read - * only - */ - if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) { - g_num_of_iommus++; - continue; - } - pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED); - } - - /* Preallocate enough resources for IOMMU hot-addition */ - if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) - g_num_of_iommus = DMAR_UNITS_SUPPORTED; - - g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *), - GFP_KERNEL); - if (!g_iommus) { - ret = -ENOMEM; - goto error; - } - ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL); if (ret) goto free_iommu; @@ -2950,8 +2912,6 @@ static int __init init_dmars(void) intel_pasid_max_id); } - g_iommus[iommu->seq_id] = iommu; - intel_iommu_init_qi(iommu); ret = iommu_init_domains(iommu); @@ -3077,9 +3037,6 @@ free_iommu: free_dmar_iommu(iommu); } - kfree(g_iommus); - -error: return ret; } @@ -3483,7 +3440,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) if (iommu->gcmd & DMA_GCMD_TE) iommu_disable_translation(iommu); - g_iommus[iommu->seq_id] = iommu; ret = iommu_init_domains(iommu); if (ret == 0) ret = iommu_alloc_root_entry(iommu); -- cgit v1.2.3 From 25357900f4e67094dd09b7dcb8a5f47c3f5a159d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 12 Jul 2022 08:09:08 +0800 Subject: iommu/vt-d: Make DMAR_UNITS_SUPPORTED default 1024 If the available hardware exceeds DMAR_UNITS_SUPPORTED (previously set to MAX_IO_APICS, or 128), it causes these messages: "DMAR: Failed to allocate seq_id", "DMAR: Parse DMAR table failure.", and "x2apic: IRQ remapping doesn't support X2APIC mode x2apic disabled"; and the system fails to boot properly. To support up to 64 sockets with 10 DMAR units each (640), make the value of DMAR_UNITS_SUPPORTED default 1024. Signed-off-by: Steve Wahl Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Steve Wahl Link: https://lore.kernel.org/linux-iommu/20220615183650.32075-1-steve.wahl@hpe.com/ Link: https://lore.kernel.org/r/20220702015610.2849494-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/dmar.h b/include/linux/dmar.h index cbd714a198a0..d81a51978d01 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -18,11 +18,7 @@ struct acpi_dmar_header; -#ifdef CONFIG_X86 -# define DMAR_UNITS_SUPPORTED MAX_IO_APICS -#else -# define DMAR_UNITS_SUPPORTED 64 -#endif +#define DMAR_UNITS_SUPPORTED 1024 /* DMAR Flags */ #define DMAR_INTR_REMAP 0x1 -- cgit v1.2.3 From 3168010d2ab445a65338aec9714dbad845a9469f Mon Sep 17 00:00:00 2001 From: Nícolas F. R. A. Prado Date: Tue, 12 Jul 2022 17:44:27 -0400 Subject: iommu/mediatek: Log with dev_err_probe when failing to parse dts mtk_iommu_mm_dts_parse() can fail with EPROBE_DEFER if not all larbs have probed yet, so use dev_err_probe() to avoid logging as an error in that case. Also drop the return value from the message since it's already printed by dev_err_probe(), and add the missing newline at the end. Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220712214427.544860-1-nfraprado@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index a1c1a3a8e0a7..17597740cd47 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1216,7 +1216,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { ret = mtk_iommu_mm_dts_parse(dev, &match, data); if (ret) { - dev_err(dev, "mm dts parse fail(%d).", ret); + dev_err_probe(dev, ret, "mm dts parse fail\n"); goto out_runtime_disable; } } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) { -- cgit v1.2.3 From bc0d9af21f463b1705b35ea3f8a7266ac333fc14 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Thu, 14 Jul 2022 19:55:45 +0300 Subject: iommu/exynos: Reuse SysMMU constants for page size and order Using SZ_4K in context of SysMMU driver is better than using PAGE_SIZE, as PAGE_SIZE might have different value on different platforms. Though it would be even better to use more specific constants, already existing in SysMMU driver. Make the code more strict by using SPAGE_ORDER and SPAGE_SIZE constants. It also makes sense, as __sysmmu_tlb_invalidate_entry() also uses SPAGE_* constants for further calculations with num_inv param, so it's logical that num_inv should be previously calculated using also SPAGE_* values. Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Acked-by: Marek Szyprowski Link: https://lore.kernel.org/r/20220714165550.8884-2-semen.protsenko@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 9c060505a46e..e3527ae5d96f 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -340,7 +340,7 @@ static void __sysmmu_set_ptbase(struct sysmmu_drvdata *data, phys_addr_t pgd) if (MMU_MAJ_VER(data->version) < 5) writel(pgd, data->sfrbase + REG_PT_BASE_ADDR); else - writel(pgd / SZ_4K, data->sfrbase + REG_V5_PT_BASE_PFN); + writel(pgd >> SPAGE_ORDER, data->sfrbase + REG_V5_PT_BASE_PFN); __sysmmu_tlb_invalidate(data); } @@ -550,7 +550,7 @@ static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data, * 64KB page can be one of 16 consecutive sets. */ if (MMU_MAJ_VER(data->version) == 2) - num_inv = min_t(unsigned int, size / SZ_4K, 64); + num_inv = min_t(unsigned int, size / SPAGE_SIZE, 64); if (sysmmu_block(data)) { __sysmmu_tlb_invalidate_entry(data, iova, num_inv); -- cgit v1.2.3 From fce398d2d02c0a9a2bedf7c7201b123e153e8963 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Thu, 14 Jul 2022 19:55:46 +0300 Subject: iommu/exynos: Handle failed IOMMU device registration properly If iommu_device_register() fails in exynos_sysmmu_probe(), the previous calls have to be cleaned up. In this case, the iommu_device_sysfs_add() should be cleaned up, by calling its remove counterpart call. Fixes: d2c302b6e8b1 ("iommu/exynos: Make use of iommu_device_register interface") Signed-off-by: Sam Protsenko Reviewed-by: Krzysztof Kozlowski Acked-by: Marek Szyprowski Link: https://lore.kernel.org/r/20220714165550.8884-3-semen.protsenko@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index e3527ae5d96f..7058f08e8301 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -629,7 +629,7 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) ret = iommu_device_register(&data->iommu, &exynos_iommu_ops, dev); if (ret) - return ret; + goto err_iommu_register; platform_set_drvdata(pdev, data); @@ -656,6 +656,10 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) pm_runtime_enable(dev); return 0; + +err_iommu_register: + iommu_device_sysfs_remove(&data->iommu); + return ret; } static int __maybe_unused exynos_sysmmu_suspend(struct device *dev) -- cgit v1.2.3 From 5f26ad58be8c0ca4dc8b13788a35e71b42cb3b4e Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Thu, 14 Jul 2022 19:55:47 +0300 Subject: iommu/exynos: Set correct dma mask for SysMMU v5+ SysMMU v5+ supports 36 bit physical address space. Set corresponding DMA mask to avoid falling back to SWTLBIO usage in dma_map_single() because of failed dma_capable() check. The original code for this fix was suggested by Marek. Signed-off-by: Sam Protsenko Co-developed-by: Marek Szyprowski Signed-off-by: Marek Szyprowski Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220714165550.8884-4-semen.protsenko@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 7058f08e8301..ca3656e18f25 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -646,6 +646,14 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) } } + if (MMU_MAJ_VER(data->version) >= 5) { + ret = dma_set_mask(dev, DMA_BIT_MASK(36)); + if (ret) { + dev_err(dev, "Unable to set DMA mask: %d\n", ret); + goto err_dma_set_mask; + } + } + /* * use the first registered sysmmu device for performing * dma mapping operations on iommu page tables (cpu cache flush) @@ -657,6 +665,8 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) return 0; +err_dma_set_mask: + iommu_device_unregister(&data->iommu); err_iommu_register: iommu_device_sysfs_remove(&data->iommu); return ret; -- cgit v1.2.3 From 2125afbed8418b212c336509ddd5458aac01744d Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Thu, 14 Jul 2022 19:55:48 +0300 Subject: iommu/exynos: Abstract non-common registers on different variants At the moment the driver supports SysMMU v1..v5 versions. SysMMU v5 has different register layout than SysMMU v1..v3. Instead of checking the version each time before reading/writing the registers, let's create corresponding register structure for each SysMMU version and set the needed structure on init, checking the SysMMU version one single time. This way is faster and more elegant. No behavior changes from the user's point of view, it's only a refactoring patch. Signed-off-by: Sam Protsenko Acked-by: Marek Szyprowski Link: https://lore.kernel.org/r/20220714165550.8884-5-semen.protsenko@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 100 ++++++++++++++++++++++++++----------------- 1 file changed, 60 insertions(+), 40 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index ca3656e18f25..5bd1558d6bf3 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -148,26 +148,12 @@ static u32 lv2ent_offset(sysmmu_iova_t iova) #define MAKE_MMU_VER(maj, min) ((((maj) & 0xF) << 7) | ((min) & 0x7F)) /* v1.x - v3.x registers */ -#define REG_MMU_FLUSH 0x00C -#define REG_MMU_FLUSH_ENTRY 0x010 -#define REG_PT_BASE_ADDR 0x014 -#define REG_INT_STATUS 0x018 -#define REG_INT_CLEAR 0x01C - #define REG_PAGE_FAULT_ADDR 0x024 #define REG_AW_FAULT_ADDR 0x028 #define REG_AR_FAULT_ADDR 0x02C #define REG_DEFAULT_SLAVE_ADDR 0x030 /* v5.x registers */ -#define REG_V5_PT_BASE_PFN 0x00C -#define REG_V5_MMU_FLUSH_ALL 0x010 -#define REG_V5_MMU_FLUSH_ENTRY 0x014 -#define REG_V5_MMU_FLUSH_RANGE 0x018 -#define REG_V5_MMU_FLUSH_START 0x020 -#define REG_V5_MMU_FLUSH_END 0x024 -#define REG_V5_INT_STATUS 0x060 -#define REG_V5_INT_CLEAR 0x064 #define REG_V5_FAULT_AR_VA 0x070 #define REG_V5_FAULT_AW_VA 0x080 @@ -250,6 +236,21 @@ struct exynos_iommu_domain { struct iommu_domain domain; /* generic domain data structure */ }; +/* + * SysMMU version specific data. Contains offsets for the registers which can + * be found in different SysMMU variants, but have different offset values. + */ +struct sysmmu_variant { + u32 pt_base; /* page table base address (physical) */ + u32 flush_all; /* invalidate all TLB entries */ + u32 flush_entry; /* invalidate specific TLB entry */ + u32 flush_range; /* invalidate TLB entries in specified range */ + u32 flush_start; /* start address of range invalidation */ + u32 flush_end; /* end address of range invalidation */ + u32 int_status; /* interrupt status information */ + u32 int_clear; /* clear the interrupt */ +}; + /* * This structure hold all data of a single SYSMMU controller, this includes * hw resources like registers and clocks, pointers and list nodes to connect @@ -274,6 +275,30 @@ struct sysmmu_drvdata { unsigned int version; /* our version */ struct iommu_device iommu; /* IOMMU core handle */ + const struct sysmmu_variant *variant; /* version specific data */ +}; + +#define SYSMMU_REG(data, reg) ((data)->sfrbase + (data)->variant->reg) + +/* SysMMU v1..v3 */ +static const struct sysmmu_variant sysmmu_v1_variant = { + .flush_all = 0x0c, + .flush_entry = 0x10, + .pt_base = 0x14, + .int_status = 0x18, + .int_clear = 0x1c, +}; + +/* SysMMU v5 */ +static const struct sysmmu_variant sysmmu_v5_variant = { + .pt_base = 0x0c, + .flush_all = 0x10, + .flush_entry = 0x14, + .flush_range = 0x18, + .flush_start = 0x20, + .flush_end = 0x24, + .int_status = 0x60, + .int_clear = 0x64, }; static struct exynos_iommu_domain *to_exynos_domain(struct iommu_domain *dom) @@ -304,10 +329,7 @@ static bool sysmmu_block(struct sysmmu_drvdata *data) static void __sysmmu_tlb_invalidate(struct sysmmu_drvdata *data) { - if (MMU_MAJ_VER(data->version) < 5) - writel(0x1, data->sfrbase + REG_MMU_FLUSH); - else - writel(0x1, data->sfrbase + REG_V5_MMU_FLUSH_ALL); + writel(0x1, SYSMMU_REG(data, flush_all)); } static void __sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data, @@ -315,33 +337,30 @@ static void __sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data, { unsigned int i; - if (MMU_MAJ_VER(data->version) < 5) { + if (MMU_MAJ_VER(data->version) < 5 || num_inv == 1) { for (i = 0; i < num_inv; i++) { writel((iova & SPAGE_MASK) | 1, - data->sfrbase + REG_MMU_FLUSH_ENTRY); + SYSMMU_REG(data, flush_entry)); iova += SPAGE_SIZE; } } else { - if (num_inv == 1) { - writel((iova & SPAGE_MASK) | 1, - data->sfrbase + REG_V5_MMU_FLUSH_ENTRY); - } else { - writel((iova & SPAGE_MASK), - data->sfrbase + REG_V5_MMU_FLUSH_START); - writel((iova & SPAGE_MASK) + (num_inv - 1) * SPAGE_SIZE, - data->sfrbase + REG_V5_MMU_FLUSH_END); - writel(1, data->sfrbase + REG_V5_MMU_FLUSH_RANGE); - } + writel(iova & SPAGE_MASK, SYSMMU_REG(data, flush_start)); + writel((iova & SPAGE_MASK) + (num_inv - 1) * SPAGE_SIZE, + SYSMMU_REG(data, flush_end)); + writel(0x1, SYSMMU_REG(data, flush_range)); } } static void __sysmmu_set_ptbase(struct sysmmu_drvdata *data, phys_addr_t pgd) { + u32 pt_base; + if (MMU_MAJ_VER(data->version) < 5) - writel(pgd, data->sfrbase + REG_PT_BASE_ADDR); + pt_base = pgd; else - writel(pgd >> SPAGE_ORDER, data->sfrbase + REG_V5_PT_BASE_PFN); + pt_base = pgd >> SPAGE_ORDER; + writel(pt_base, SYSMMU_REG(data, pt_base)); __sysmmu_tlb_invalidate(data); } @@ -378,6 +397,11 @@ static void __sysmmu_get_version(struct sysmmu_drvdata *data) dev_dbg(data->sysmmu, "hardware version: %d.%d\n", MMU_MAJ_VER(data->version), MMU_MIN_VER(data->version)); + if (MMU_MAJ_VER(data->version) < 5) + data->variant = &sysmmu_v1_variant; + else + data->variant = &sysmmu_v5_variant; + __sysmmu_disable_clocks(data); } @@ -405,19 +429,14 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) const struct sysmmu_fault_info *finfo; unsigned int i, n, itype; sysmmu_iova_t fault_addr; - unsigned short reg_status, reg_clear; int ret = -ENOSYS; WARN_ON(!data->active); if (MMU_MAJ_VER(data->version) < 5) { - reg_status = REG_INT_STATUS; - reg_clear = REG_INT_CLEAR; finfo = sysmmu_faults; n = ARRAY_SIZE(sysmmu_faults); } else { - reg_status = REG_V5_INT_STATUS; - reg_clear = REG_V5_INT_CLEAR; finfo = sysmmu_v5_faults; n = ARRAY_SIZE(sysmmu_v5_faults); } @@ -426,7 +445,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) clk_enable(data->clk_master); - itype = __ffs(readl(data->sfrbase + reg_status)); + itype = __ffs(readl(SYSMMU_REG(data, int_status))); for (i = 0; i < n; i++, finfo++) if (finfo->bit == itype) break; @@ -443,7 +462,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) /* fault is not recovered by fault handler */ BUG_ON(ret != 0); - writel(1 << itype, data->sfrbase + reg_clear); + writel(1 << itype, SYSMMU_REG(data, int_clear)); sysmmu_unblock(data); @@ -622,6 +641,8 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) data->sysmmu = dev; spin_lock_init(&data->lock); + __sysmmu_get_version(data); + ret = iommu_device_sysfs_add(&data->iommu, &pdev->dev, NULL, dev_name(data->sysmmu)); if (ret) @@ -633,7 +654,6 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); - __sysmmu_get_version(data); if (PG_ENT_SHIFT < 0) { if (MMU_MAJ_VER(data->version) < 5) { PG_ENT_SHIFT = SYSMMU_PG_ENT_SHIFT; -- cgit v1.2.3 From 0892c4986b6c9eae90ba56e7541bd020576abf69 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Thu, 14 Jul 2022 19:55:49 +0300 Subject: iommu/exynos: Add SysMMU v7 register set SysMMU v7 might have different register layouts (VM capable or non-VM capable). Virtual Machine registers (if present) implement multiple translation domains. If VM registers are not present, the driver shouldn't try to access those. Check which layout is implemented in current SysMMU module (by reading the capability registers) and prepare the corresponding variant structure for further usage. Signed-off-by: Sam Protsenko Acked-by: Marek Szyprowski Link: https://lore.kernel.org/r/20220714165550.8884-6-semen.protsenko@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 50 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 5bd1558d6bf3..0a773b6a525d 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -135,6 +135,9 @@ static u32 lv2ent_offset(sysmmu_iova_t iova) #define CFG_SYSSEL (1 << 22) /* System MMU 3.2 only */ #define CFG_FLPDCACHE (1 << 20) /* System MMU 3.2+ only */ +#define CAPA0_CAPA1_EXIST BIT(11) +#define CAPA1_VCR_ENABLED BIT(14) + /* common registers */ #define REG_MMU_CTRL 0x000 #define REG_MMU_CFG 0x004 @@ -157,6 +160,10 @@ static u32 lv2ent_offset(sysmmu_iova_t iova) #define REG_V5_FAULT_AR_VA 0x070 #define REG_V5_FAULT_AW_VA 0x080 +/* v7.x registers */ +#define REG_V7_CAPA0 0x870 +#define REG_V7_CAPA1 0x874 + #define has_sysmmu(dev) (dev_iommu_priv_get(dev) != NULL) static struct device *dma_dev; @@ -276,6 +283,9 @@ struct sysmmu_drvdata { struct iommu_device iommu; /* IOMMU core handle */ const struct sysmmu_variant *variant; /* version specific data */ + + /* v7 fields */ + bool has_vcr; /* virtual machine control register */ }; #define SYSMMU_REG(data, reg) ((data)->sfrbase + (data)->variant->reg) @@ -289,7 +299,7 @@ static const struct sysmmu_variant sysmmu_v1_variant = { .int_clear = 0x1c, }; -/* SysMMU v5 */ +/* SysMMU v5 and v7 (non-VM capable) */ static const struct sysmmu_variant sysmmu_v5_variant = { .pt_base = 0x0c, .flush_all = 0x10, @@ -301,6 +311,18 @@ static const struct sysmmu_variant sysmmu_v5_variant = { .int_clear = 0x64, }; +/* SysMMU v7: VM capable register set */ +static const struct sysmmu_variant sysmmu_v7_vm_variant = { + .pt_base = 0x800c, + .flush_all = 0x8010, + .flush_entry = 0x8014, + .flush_range = 0x8018, + .flush_start = 0x8020, + .flush_end = 0x8024, + .int_status = 0x60, + .int_clear = 0x64, +}; + static struct exynos_iommu_domain *to_exynos_domain(struct iommu_domain *dom) { return container_of(dom, struct exynos_iommu_domain, domain); @@ -380,6 +402,20 @@ static void __sysmmu_disable_clocks(struct sysmmu_drvdata *data) clk_disable_unprepare(data->clk_master); } +static bool __sysmmu_has_capa1(struct sysmmu_drvdata *data) +{ + u32 capa0 = readl(data->sfrbase + REG_V7_CAPA0); + + return capa0 & CAPA0_CAPA1_EXIST; +} + +static void __sysmmu_get_vcr(struct sysmmu_drvdata *data) +{ + u32 capa1 = readl(data->sfrbase + REG_V7_CAPA1); + + data->has_vcr = capa1 & CAPA1_VCR_ENABLED; +} + static void __sysmmu_get_version(struct sysmmu_drvdata *data) { u32 ver; @@ -397,10 +433,18 @@ static void __sysmmu_get_version(struct sysmmu_drvdata *data) dev_dbg(data->sysmmu, "hardware version: %d.%d\n", MMU_MAJ_VER(data->version), MMU_MIN_VER(data->version)); - if (MMU_MAJ_VER(data->version) < 5) + if (MMU_MAJ_VER(data->version) < 5) { data->variant = &sysmmu_v1_variant; - else + } else if (MMU_MAJ_VER(data->version) < 7) { data->variant = &sysmmu_v5_variant; + } else { + if (__sysmmu_has_capa1(data)) + __sysmmu_get_vcr(data); + if (data->has_vcr) + data->variant = &sysmmu_v7_vm_variant; + else + data->variant = &sysmmu_v5_variant; + } __sysmmu_disable_clocks(data); } -- cgit v1.2.3 From 7fee5d6f41b4a3ceadf0f8ca48cc78c754d3515b Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Thu, 14 Jul 2022 19:55:50 +0300 Subject: iommu/exynos: Enable default VM instance on SysMMU v7 In order to enable SysMMU v7 with VM register layout, at least the default VM instance (n=0) must be enabled, in addition to enabling the SysMMU itself. To do so, add corresponding write to MMU_CTRL_VM[0] register, before writing to MMU_CTRL register. Signed-off-by: Sam Protsenko Acked-by: Marek Szyprowski Link: https://lore.kernel.org/r/20220714165550.8884-7-semen.protsenko@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 0a773b6a525d..768594d107ad 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -135,6 +135,8 @@ static u32 lv2ent_offset(sysmmu_iova_t iova) #define CFG_SYSSEL (1 << 22) /* System MMU 3.2 only */ #define CFG_FLPDCACHE (1 << 20) /* System MMU 3.2+ only */ +#define CTRL_VM_ENABLE BIT(0) +#define CTRL_VM_FAULT_MODE_STALL BIT(3) #define CAPA0_CAPA1_EXIST BIT(11) #define CAPA1_VCR_ENABLED BIT(14) @@ -163,6 +165,7 @@ static u32 lv2ent_offset(sysmmu_iova_t iova) /* v7.x registers */ #define REG_V7_CAPA0 0x870 #define REG_V7_CAPA1 0x874 +#define REG_V7_CTRL_VM 0x8000 #define has_sysmmu(dev) (dev_iommu_priv_get(dev) != NULL) @@ -548,6 +551,18 @@ static void __sysmmu_init_config(struct sysmmu_drvdata *data) writel(cfg, data->sfrbase + REG_MMU_CFG); } +static void __sysmmu_enable_vid(struct sysmmu_drvdata *data) +{ + u32 ctrl; + + if (MMU_MAJ_VER(data->version) < 7 || !data->has_vcr) + return; + + ctrl = readl(data->sfrbase + REG_V7_CTRL_VM); + ctrl |= CTRL_VM_ENABLE | CTRL_VM_FAULT_MODE_STALL; + writel(ctrl, data->sfrbase + REG_V7_CTRL_VM); +} + static void __sysmmu_enable(struct sysmmu_drvdata *data) { unsigned long flags; @@ -558,6 +573,7 @@ static void __sysmmu_enable(struct sysmmu_drvdata *data) writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL); __sysmmu_init_config(data); __sysmmu_set_ptbase(data, data->pgtable); + __sysmmu_enable_vid(data); writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL); data->active = true; spin_unlock_irqrestore(&data->lock, flags); -- cgit v1.2.3 From 60b51e3e333b9986721d77a465c41577c226eb3b Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 13 Jul 2022 17:56:43 -0500 Subject: iommu/amd: Change macro for IOMMU control register bit shift to decimal value There is no functional change. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220713225651.20758-2-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 42 ++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 40f52d02c5b9..7c830e3c7a91 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -143,27 +143,27 @@ #define EVENT_FLAG_I 0x008 /* feature control bits */ -#define CONTROL_IOMMU_EN 0x00ULL -#define CONTROL_HT_TUN_EN 0x01ULL -#define CONTROL_EVT_LOG_EN 0x02ULL -#define CONTROL_EVT_INT_EN 0x03ULL -#define CONTROL_COMWAIT_EN 0x04ULL -#define CONTROL_INV_TIMEOUT 0x05ULL -#define CONTROL_PASSPW_EN 0x08ULL -#define CONTROL_RESPASSPW_EN 0x09ULL -#define CONTROL_COHERENT_EN 0x0aULL -#define CONTROL_ISOC_EN 0x0bULL -#define CONTROL_CMDBUF_EN 0x0cULL -#define CONTROL_PPRLOG_EN 0x0dULL -#define CONTROL_PPRINT_EN 0x0eULL -#define CONTROL_PPR_EN 0x0fULL -#define CONTROL_GT_EN 0x10ULL -#define CONTROL_GA_EN 0x11ULL -#define CONTROL_GAM_EN 0x19ULL -#define CONTROL_GALOG_EN 0x1CULL -#define CONTROL_GAINT_EN 0x1DULL -#define CONTROL_XT_EN 0x32ULL -#define CONTROL_INTCAPXT_EN 0x33ULL +#define CONTROL_IOMMU_EN 0 +#define CONTROL_HT_TUN_EN 1 +#define CONTROL_EVT_LOG_EN 2 +#define CONTROL_EVT_INT_EN 3 +#define CONTROL_COMWAIT_EN 4 +#define CONTROL_INV_TIMEOUT 5 +#define CONTROL_PASSPW_EN 8 +#define CONTROL_RESPASSPW_EN 9 +#define CONTROL_COHERENT_EN 10 +#define CONTROL_ISOC_EN 11 +#define CONTROL_CMDBUF_EN 12 +#define CONTROL_PPRLOG_EN 13 +#define CONTROL_PPRINT_EN 14 +#define CONTROL_PPR_EN 15 +#define CONTROL_GT_EN 16 +#define CONTROL_GA_EN 17 +#define CONTROL_GAM_EN 25 +#define CONTROL_GALOG_EN 28 +#define CONTROL_GAINT_EN 29 +#define CONTROL_XT_EN 50 +#define CONTROL_INTCAPXT_EN 51 #define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) #define CTRL_INV_TO_NONE 0 -- cgit v1.2.3 From 1e98a35da49ee7d5b0d101518da51a109a02f238 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 13 Jul 2022 17:56:44 -0500 Subject: iommu/amd: Introduce Support for Extended Feature 2 Register AMD IOMMU spec introduces additional extended feature register in the IVRS IVHD offset 80h (for IVHD type 11h and 40h) and MMIO offset 1A0h. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220713225651.20758-3-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 4 ++++ drivers/iommu/amd/init.c | 24 ++++++++++++++++-------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 7c830e3c7a91..3c1205ba636a 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -67,6 +67,7 @@ #define MMIO_INTCAPXT_EVT_OFFSET 0x0170 #define MMIO_INTCAPXT_PPR_OFFSET 0x0178 #define MMIO_INTCAPXT_GALOG_OFFSET 0x0180 +#define MMIO_EXT_FEATURES2 0x01A0 #define MMIO_CMD_HEAD_OFFSET 0x2000 #define MMIO_CMD_TAIL_OFFSET 0x2008 #define MMIO_EVT_HEAD_OFFSET 0x2010 @@ -645,6 +646,9 @@ struct amd_iommu { /* Extended features */ u64 features; + /* Extended features 2 */ + u64 features2; + /* IOMMUv2 */ bool is_iommu_v2; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index a3c4fdd40f04..15a836b4a221 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -114,7 +114,7 @@ struct ivhd_header { /* Following only valid on IVHD type 11h and 40h */ u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */ - u64 res; + u64 efr_reg2; } __attribute__((packed)); /* @@ -283,8 +283,10 @@ static bool check_feature_on_all_iommus(u64 mask) static void __init early_iommu_features_init(struct amd_iommu *iommu, struct ivhd_header *h) { - if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) + if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) { iommu->features = h->efr_reg; + iommu->features2 = h->efr_reg2; + } if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP) amdr_ivrs_remap_support = true; } @@ -1912,7 +1914,7 @@ static ssize_t amd_iommu_show_features(struct device *dev, char *buf) { struct amd_iommu *iommu = dev_to_amd_iommu(dev); - return sprintf(buf, "%llx\n", iommu->features); + return sprintf(buf, "%llx:%llx\n", iommu->features2, iommu->features); } static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); @@ -1939,16 +1941,18 @@ static const struct attribute_group *amd_iommu_groups[] = { */ static void __init late_iommu_features_init(struct amd_iommu *iommu) { - u64 features; + u64 features, features2; if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) return; /* read extended feature bits */ features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); + features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2); if (!iommu->features) { iommu->features = features; + iommu->features2 = features2; return; } @@ -1956,9 +1960,13 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu) * Sanity check and warn if EFR values from * IVHD and MMIO conflict. */ - if (features != iommu->features) - pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx).\n", - features, iommu->features); + if (features != iommu->features || + features2 != iommu->features2) { + pr_warn(FW_WARN + "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n", + features, iommu->features, + features2, iommu->features2); + } } static int __init iommu_init_pci(struct amd_iommu *iommu) @@ -2083,7 +2091,7 @@ static void print_iommu_info(void) pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - pr_info("Extended features (%#llx):", iommu->features); + pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2); for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { if (iommu_feature(iommu, (1ULL << i))) -- cgit v1.2.3 From 9dd299d8c6cd9cd50fe49c067a82f091df87565f Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 13 Jul 2022 17:56:45 -0500 Subject: iommu/amd: Introduce global variable for storing common EFR and EFR2 Some IOMMU features require that all IOMMUs must support the feature, which is determined by checking the support bit in the Extended Feature Register 1 and 2 (EFR/EFR2) on all IOMMUs. This check is done by the function check_feature_on_all_iommus(), which iterates through all IOMMUs everytime it is called. Instead, introduce a global variable to store common EFR/EFR2 among all IOMMUs. In case of inconsistent EFR/EFR2 masks are detected on an IOMMU, a FW_BUG warning is reported. Suggested-by: Joerg Roedel Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220713225651.20758-4-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 3 +++ drivers/iommu/amd/init.c | 45 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 9b7092182ca7..1b945d47741c 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -137,4 +137,7 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { } extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); extern struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); + +extern u64 amd_iommu_efr; +extern u64 amd_iommu_efr2; #endif diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 15a836b4a221..3ab0fc37904a 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -164,6 +164,10 @@ static bool amd_iommu_disabled __initdata; static bool amd_iommu_force_enable __initdata; static int amd_iommu_target_ivhd_type; +/* Global EFR and EFR2 registers */ +u64 amd_iommu_efr; +u64 amd_iommu_efr2; + LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ @@ -259,21 +263,46 @@ int amd_iommu_get_num_iommus(void) return amd_iommus_present; } -#ifdef CONFIG_IRQ_REMAP -static bool check_feature_on_all_iommus(u64 mask) +/* + * Iterate through all the IOMMUs to get common EFR + * masks among all IOMMUs and warn if found inconsistency. + */ +static void get_global_efr(void) { - bool ret = false; struct amd_iommu *iommu; for_each_iommu(iommu) { - ret = iommu_feature(iommu, mask); - if (!ret) - return false; + u64 tmp = iommu->features; + u64 tmp2 = iommu->features2; + + if (list_is_first(&iommu->list, &amd_iommu_list)) { + amd_iommu_efr = tmp; + amd_iommu_efr2 = tmp2; + continue; + } + + if (amd_iommu_efr == tmp && + amd_iommu_efr2 == tmp2) + continue; + + pr_err(FW_BUG + "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n", + tmp, tmp2, amd_iommu_efr, amd_iommu_efr2, + iommu->index, iommu->pci_seg->id, + PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid), + PCI_FUNC(iommu->devid)); + + amd_iommu_efr &= tmp; + amd_iommu_efr2 &= tmp2; } - return true; + pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2); +} + +static bool check_feature_on_all_iommus(u64 mask) +{ + return !!(amd_iommu_efr & mask); } -#endif /* * For IVHD type 0x11/0x40, EFR is also available via IVHD. -- cgit v1.2.3 From ae180ba42662afe2ed7620bdb863c7c6b61f6f2d Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 13 Jul 2022 17:56:46 -0500 Subject: iommu/amd: Process all IVHDs before enabling IOMMU features The ACPI IVRS table can contain multiple IVHD blocks. Each block contains information used to initialize each IOMMU instance. Currently, init_iommu_all sequentially process IVHD block and initialize IOMMU instance one-by-one. However, certain features require all IOMMUs to be configured in the same way system-wide. In case certain IVHD blocks contain inconsistent information (most likely FW bugs), the driver needs to go through and try to revert settings on IOMMUs that have already been configured. A solution is to split IOMMU initialization into 3 phases: Phase1 : Processes information of the IVRS table for all IOMMU instances. This allow all IVHDs to be processed prior to enabling features. Phase2 : Early feature support check on all IOMMUs (using information in IVHD blocks. Phase3 : Iterates through all IOMMU instances and enabling features. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220713225651.20758-5-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 3ab0fc37904a..371f0b276696 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1721,7 +1721,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, struct acpi_table_header *ivrs_base) { struct amd_iommu_pci_seg *pci_seg; - int ret; pci_seg = get_pci_segment(h->pci_seg, ivrs_base); if (pci_seg == NULL) @@ -1802,6 +1801,13 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, if (!iommu->mmio_base) return -ENOMEM; + return init_iommu_from_acpi(iommu, h); +} + +static int __init init_iommu_one_late(struct amd_iommu *iommu) +{ + int ret; + if (alloc_cwwb_sem(iommu)) return -ENOMEM; @@ -1823,10 +1829,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, if (amd_iommu_pre_enabled) amd_iommu_pre_enabled = translation_pre_enabled(iommu); - ret = init_iommu_from_acpi(iommu, h); - if (ret) - return ret; - if (amd_iommu_irq_remap) { ret = amd_iommu_create_irq_domain(iommu); if (ret) @@ -1837,7 +1839,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, * Make sure IOMMU is not considered to translate itself. The IVRS * table tells us so, but this is a lie! */ - pci_seg->rlookup_table[iommu->devid] = NULL; + iommu->pci_seg->rlookup_table[iommu->devid] = NULL; return 0; } @@ -1882,6 +1884,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) end += table->length; p += IVRS_HEADER_LENGTH; + /* Phase 1: Process all IVHD blocks */ while (p < end) { h = (struct ivhd_header *)p; if (*p == amd_iommu_target_ivhd_type) { @@ -1907,6 +1910,16 @@ static int __init init_iommu_all(struct acpi_table_header *table) } WARN_ON(p != end); + /* Phase 2 : Early feature support check */ + get_global_efr(); + + /* Phase 3 : Enabling IOMMU features */ + for_each_iommu(iommu) { + ret = init_iommu_one_late(iommu); + if (ret) + return ret; + } + return 0; } -- cgit v1.2.3 From 02c6f31d0e010a7eef849ca6457e5801de2b42d1 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 13 Jul 2022 17:56:47 -0500 Subject: iommu/amd: Globally detect SNP support Modify existing SNP feature check to use the helper function check_feature_on_all_iommus() to ensure consistency among all IOMMUs. Also report IOMMU SNP support information for each IOMMU. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220713225651.20758-6-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 371f0b276696..7c6e61451ff7 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -389,7 +389,7 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu) u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem); u64 entry = start & PM_ADDR_MASK; - if (!iommu_feature(iommu, FEATURE_SNP)) + if (!check_feature_on_all_iommus(FEATURE_SNP)) return; /* Note: @@ -804,7 +804,7 @@ static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, void *buf = (void *)__get_free_pages(gfp, order); if (buf && - iommu_feature(iommu, FEATURE_SNP) && + check_feature_on_all_iommus(FEATURE_SNP) && set_memory_4k((unsigned long)buf, (1 << order))) { free_pages((unsigned long)buf, order); buf = NULL; @@ -2143,6 +2143,9 @@ static void print_iommu_info(void) if (iommu->features & FEATURE_GAM_VAPIC) pr_cont(" GA_vAPIC"); + if (iommu->features & FEATURE_SNP) + pr_cont(" SNP"); + pr_cont("\n"); } } -- cgit v1.2.3 From fb2accadaa9427a374fa9f482ea24ca731f675ba Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 13 Jul 2022 17:56:48 -0500 Subject: iommu/amd: Introduce function to check and enable SNP To support SNP, IOMMU needs to be enabled, and prohibits IOMMU configurations where DTE[Mode]=0, which means it cannot be supported with IOMMU passthrough domain (a.k.a IOMMU_DOMAIN_IDENTITY), and when AMD IOMMU driver is configured to not use the IOMMU host (v1) page table. Otherwise, RMP table initialization could cause the system to crash. The request to enable SNP support in IOMMU must be done before PCI initialization state of the IOMMU driver because enabling SNP affects how IOMMU driver sets up IOMMU data structures (i.e. DTE). Unlike other IOMMU features, SNP feature does not have an enable bit in the IOMMU control register. Instead, the IOMMU driver introduces an amd_iommu_snp_en variable to track enabling state of SNP. Introduce amd_iommu_snp_enable() for other drivers to request enabling the SNP support in IOMMU, which checks all prerequisites and determines if the feature can be safely enabled. Please see the IOMMU spec section 2.12 for further details. Reviewed-by: Robin Murphy Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Brijesh Singh Link: https://lore.kernel.org/r/20220713225651.20758-7-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 2 ++ drivers/iommu/amd/init.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/linux/amd-iommu.h | 4 ++++ 3 files changed, 48 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 1b945d47741c..84e5bb1bf01b 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -140,4 +140,6 @@ extern struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); extern u64 amd_iommu_efr; extern u64 amd_iommu_efr2; + +extern bool amd_iommu_snp_en; #endif diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 7c6e61451ff7..cddcb6f7e3c9 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -168,6 +168,10 @@ static int amd_iommu_target_ivhd_type; u64 amd_iommu_efr; u64 amd_iommu_efr2; +/* SNP is enabled on the system? */ +bool amd_iommu_snp_en; +EXPORT_SYMBOL(amd_iommu_snp_en); + LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ @@ -3556,3 +3560,41 @@ int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); } + +#ifdef CONFIG_AMD_MEM_ENCRYPT +int amd_iommu_snp_enable(void) +{ + /* + * The SNP support requires that IOMMU must be enabled, and is + * not configured in the passthrough mode. + */ + if (no_iommu || iommu_default_passthrough()) { + pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported"); + return -EINVAL; + } + + /* + * Prevent enabling SNP after IOMMU_ENABLED state because this process + * affect how IOMMU driver sets up data structures and configures + * IOMMU hardware. + */ + if (init_state > IOMMU_ENABLED) { + pr_err("SNP: Too late to enable SNP for IOMMU.\n"); + return -EINVAL; + } + + amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP); + if (!amd_iommu_snp_en) + return -EINVAL; + + pr_info("SNP enabled\n"); + + /* Enforce IOMMU v1 pagetable when SNP is enabled. */ + if (amd_iommu_pgtable != AMD_IOMMU_V1) { + pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n"); + amd_iommu_pgtable = AMD_IOMMU_V1; + } + + return 0; +} +#endif diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 58e6c3806c09..953e6f12fa1c 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -206,4 +206,8 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value); struct amd_iommu *get_amd_iommu(unsigned int idx); +#ifdef CONFIG_AMD_MEM_ENCRYPT +int amd_iommu_snp_enable(void); +#endif + #endif /* _ASM_X86_AMD_IOMMU_H */ -- cgit v1.2.3 From b9f0043e1ea68c8d2c0f75800a7ac70ab7774a5f Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 13 Jul 2022 17:56:49 -0500 Subject: iommu/amd: Set translation valid bit only when IO page tables are in use On AMD system with SNP enabled, IOMMU hardware checks the host translation valid (TV) and guest translation valid (GV) bits in the device table entry (DTE) before accessing the corresponded page tables. However, current IOMMU driver sets the TV bit for all devices regardless of whether the host page table is in use. This results in ILLEGAL_DEV_TABLE_ENTRY event for devices, which do not the host page table root pointer set up. Thefore, when SNP is enabled, only set TV bit when DMA remapping is not used, which is when domain ID in the AMD IOMMU device table entry (DTE) is zero. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220713225651.20758-8-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 3 ++- drivers/iommu/amd/iommu.c | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index cddcb6f7e3c9..538c750ceed6 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2569,7 +2569,8 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID); - __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION); + if (!amd_iommu_snp_en) + __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION); } } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index a56a9ad3273e..aedeff8af929 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1552,7 +1552,15 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; - pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; + + pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V; + + /* + * When SNP is enabled, Only set TV bit when IOMMU + * page translation is in use. + */ + if (!amd_iommu_snp_en || (domain->id != 0)) + pte_root |= DTE_FLAG_TV; flags = dev_table[devid].data[1]; @@ -1612,7 +1620,11 @@ static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) struct dev_table_entry *dev_table = get_dev_table(iommu); /* remove entry from the device table seen by the hardware */ - dev_table[devid].data[0] = DTE_FLAG_V | DTE_FLAG_TV; + dev_table[devid].data[0] = DTE_FLAG_V; + + if (!amd_iommu_snp_en) + dev_table[devid].data[0] |= DTE_FLAG_TV; + dev_table[devid].data[1] &= DTE_FLAG_MASK; amd_iommu_apply_erratum_63(iommu, devid); -- cgit v1.2.3 From 8388f7df936bae004026bd718466127f27bbc398 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 13 Jul 2022 17:56:50 -0500 Subject: iommu/amd: Do not support IOMMU_DOMAIN_IDENTITY after SNP is enabled Once SNP is enabled (by executing SNP_INIT command), IOMMU can no longer support the passthrough domain (i.e. IOMMU_DOMAIN_IDENTITY). The SNP_INIT command is called early in the boot process, and would fail if the kernel is configure to default to passthrough mode. After the system is already booted, users can try to change IOMMU domain type of a particular IOMMU group. In this case, the IOMMU driver needs to check the SNP-enable status and return failure when requesting to change domain type to identity. Therefore, return failure when trying to allocate identity domain. Reviewed-by: Robin Murphy Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220713225651.20758-9-suravee.suthikulpanit@amd.com [ joro: Removed WARN_ON_ONCE() ] Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index aedeff8af929..f287dca85990 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2068,6 +2068,13 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) { struct protection_domain *domain; + /* + * Since DTE[Mode]=0 is prohibited on SNP-enabled system, + * default to use IOMMU_DOMAIN_DMA[_FQ]. + */ + if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY)) + return NULL; + domain = protection_domain_alloc(type); if (!domain) return NULL; -- cgit v1.2.3 From 30315e71b42603307989d813a620781ff8238f3c Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 13 Jul 2022 17:56:51 -0500 Subject: iommu/amd: Do not support IOMMUv2 APIs when SNP is enabled The IOMMUv2 APIs (for supporting shared virtual memory with PASID) configures the domain with IOMMU v2 page table, and sets DTE[Mode]=0. This configuration cannot be supported on SNP-enabled system. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220713225651.20758-10-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 538c750ceed6..d86496114ca5 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3458,7 +3458,12 @@ __setup("ivrs_acpihid", parse_ivrs_acpihid); bool amd_iommu_v2_supported(void) { - return amd_iommu_v2_present; + /* + * Since DTE[Mode]=0 is prohibited on SNP-enabled system + * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without + * setting up IOMMUv1 page table. + */ + return amd_iommu_v2_present && !amd_iommu_snp_en; } EXPORT_SYMBOL(amd_iommu_v2_supported); -- cgit v1.2.3 From 1dcef3d2f345810472cc7385384014a412a4682c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 15 Jul 2022 13:03:34 +0200 Subject: MAINTAINERS: Add Robin Murphy as IOMMU SUBSYTEM reviewer Robin has been acting as a reviewer of the IOMMU Subsystem for a long time. He is also defacto maintaining the IOMMU DMA-API Layer, so make both roles official by adding Robin to the MAINTAINERS file. Signed-off-by: Joerg Roedel Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20220715110334.6969-1-joro@8bytes.org --- MAINTAINERS | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3cf9842d9233..b61dd738a11b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10352,9 +10352,20 @@ T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git F: fs/iomap/ F: include/linux/iomap.h -IOMMU DRIVERS +IOMMU DMA-API LAYER +M: Robin Murphy +L: iommu@lists.linux.dev +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git +F: drivers/iommu/dma-iommu.c +F: drivers/iommu/iova.c +F: include/linux/dma-iommu.h +F: include/linux/iova.h + +IOMMU SUBSYSTEM M: Joerg Roedel M: Will Deacon +R: Robin Murphy L: iommu@lists.linux-foundation.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git -- cgit v1.2.3 From 743302d4ad6cf51f6abffed4f12d2d0c24e3288a Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 16 Jul 2022 21:32:22 +0200 Subject: dt-bindings: arm-smmu: Add compatible for Qualcomm SM6375 Add a compatible for Qualcomm SM6375's broken-as-usual MMU500 impl. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20220716193223.455859-1-konrad.dybcio@somainline.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 76fc2c0f4d54..9066e6df1ba1 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -42,6 +42,7 @@ properties: - qcom,sdx55-smmu-500 - qcom,sdx65-smmu-500 - qcom,sm6350-smmu-500 + - qcom,sm6375-smmu-500 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 - qcom,sm8350-smmu-500 -- cgit v1.2.3 From ef660de424cf06eab262a055b18cdf704aaf5fb1 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 16 Jul 2022 21:32:23 +0200 Subject: iommu/arm-smmu-qcom: Add SM6375 SMMU compatible Add a compatible for SM6375 to the qcom impl match list. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20220716193223.455859-2-konrad.dybcio@somainline.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index de25071e33ab..b2708de25ea3 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -434,6 +434,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sdm845-smmu-500" }, { .compatible = "qcom,sm6125-smmu-500" }, { .compatible = "qcom,sm6350-smmu-500" }, + { .compatible = "qcom,sm6375-smmu-500" }, { .compatible = "qcom,sm8150-smmu-500" }, { .compatible = "qcom,sm8250-smmu-500" }, { .compatible = "qcom,sm8350-smmu-500" }, -- cgit v1.2.3 From a91eb6803c1c715738682fece095145cbd68fe0b Mon Sep 17 00:00:00 2001 From: Liang He Date: Tue, 19 Jul 2022 20:49:55 +0800 Subject: iommu/arm-smmu: qcom_iommu: Add of_node_put() when breaking out of loop In qcom_iommu_has_secure_context(), we should call of_node_put() for the reference 'child' when breaking out of for_each_child_of_node() which will automatically increase and decrease the refcount. Fixes: d051f28c8807 ("iommu/qcom: Initialize secure page table") Signed-off-by: Liang He Link: https://lore.kernel.org/r/20220719124955.1242171-1-windhl@126.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 4c077c38fbd6..c11d2c2cbb62 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -750,9 +750,12 @@ static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu) { struct device_node *child; - for_each_child_of_node(qcom_iommu->dev->of_node, child) - if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec")) + for_each_child_of_node(qcom_iommu->dev->of_node, child) { + if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec")) { + of_node_put(child); return true; + } + } return false; } -- cgit v1.2.3 From f066b8f7d961b0f3ec08678a27f2f1a2f0148380 Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Thu, 21 Jul 2022 14:03:31 -0700 Subject: drivers: iommu: fix clang -wformat warning When building with Clang we encounter the following warning: | drivers/iommu/msm_iommu.c:603:6: error: format specifies type 'unsigned | short' but the argument has type 'int' [-Werror,-Wformat] sid); `sid` is an int, use the proper format specifier `%x`. Link: https://github.com/ClangBuiltLinux/linux/issues/378 Reported-by: Nathan Chancellor Suggested-by: Nathan Chancellor Signed-off-by: Justin Stitt Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20220721210331.4012015-1-justinstitt@google.com Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index f09aedfdd462..a07076baa4b5 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -603,7 +603,7 @@ static int insert_iommu_master(struct device *dev, for (sid = 0; sid < master->num_mids; sid++) if (master->mids[sid] == spec->args[0]) { - dev_warn(dev, "Stream ID 0x%hx repeated; ignoring\n", + dev_warn(dev, "Stream ID 0x%x repeated; ignoring\n", sid); return 0; } -- cgit v1.2.3 From de0269765b268717b70f16f809560e83d5a8d0e7 Mon Sep 17 00:00:00 2001 From: Ren Zhijie Date: Tue, 26 Jul 2022 11:35:20 +0800 Subject: ACPI/IORT: Fix build error implicit-function-declaration If CONFIG_ACPI_IORT=y and CONFIG_IOMMU_API is not set, make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu-, will be failed, like this: drivers/acpi/arm64/iort.c: In function ‘iort_get_rmr_sids’: drivers/acpi/arm64/iort.c:1406:2: error: implicit declaration of function ‘iort_iommu_rmr_get_resv_regions’; did you mean ‘iort_iommu_get_resv_regions’? [-Werror=implicit-function-declaration] iort_iommu_rmr_get_resv_regions(iommu_fwnode, NULL, head); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ iort_iommu_get_resv_regions cc1: some warnings being treated as errors make[3]: *** [drivers/acpi/arm64/iort.o] Error 1 The function iort_iommu_rmr_get_resv_regions() is declared under CONFIG_IOMMU_API, and the callers of iort_get_rmr_sids() and iort_put_rmr_sids() would select IOMMU_API. To fix this error, move the definitions to #ifdef CONFIG_IOMMU_API. Fixes: e302eea8f497 ("ACPI/IORT: Add a helper to retrieve RMR info directly") Signed-off-by: Ren Zhijie Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20220726033520.47865-1-renzhijie2@huawei.com Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/iort.c | 56 +++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index cd1349d3544e..ca2aed86b540 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1162,6 +1162,34 @@ void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head) iort_iommu_rmr_get_resv_regions(fwspec->iommu_fwnode, dev, head); } +/** + * iort_get_rmr_sids - Retrieve IORT RMR node reserved regions with + * associated StreamIDs information. + * @iommu_fwnode: fwnode associated with IOMMU + * @head: Resereved region list + */ +void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode, + struct list_head *head) +{ + iort_iommu_rmr_get_resv_regions(iommu_fwnode, NULL, head); +} +EXPORT_SYMBOL_GPL(iort_get_rmr_sids); + +/** + * iort_put_rmr_sids - Free memory allocated for RMR reserved regions. + * @iommu_fwnode: fwnode associated with IOMMU + * @head: Resereved region list + */ +void iort_put_rmr_sids(struct fwnode_handle *iommu_fwnode, + struct list_head *head) +{ + struct iommu_resv_region *entry, *next; + + list_for_each_entry_safe(entry, next, head, list) + entry->free(NULL, entry); +} +EXPORT_SYMBOL_GPL(iort_put_rmr_sids); + static inline bool iort_iommu_driver_enabled(u8 type) { switch (type) { @@ -1394,34 +1422,6 @@ int iort_dma_get_ranges(struct device *dev, u64 *size) return nc_dma_get_range(dev, size); } -/** - * iort_get_rmr_sids - Retrieve IORT RMR node reserved regions with - * associated StreamIDs information. - * @iommu_fwnode: fwnode associated with IOMMU - * @head: Resereved region list - */ -void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode, - struct list_head *head) -{ - iort_iommu_rmr_get_resv_regions(iommu_fwnode, NULL, head); -} -EXPORT_SYMBOL_GPL(iort_get_rmr_sids); - -/** - * iort_put_rmr_sids - Free memory allocated for RMR reserved regions. - * @iommu_fwnode: fwnode associated with IOMMU - * @head: Resereved region list - */ -void iort_put_rmr_sids(struct fwnode_handle *iommu_fwnode, - struct list_head *head) -{ - struct iommu_resv_region *entry, *next; - - list_for_each_entry_safe(entry, next, head, list) - entry->free(NULL, entry); -} -EXPORT_SYMBOL_GPL(iort_put_rmr_sids); - static void __init acpi_iort_register_irq(int hwirq, const char *name, int trigger, struct resource *res) -- cgit v1.2.3 From c5e1a1eb9279fdb28d47ca2a4493a4c53b7d6a0b Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 26 Jul 2022 08:43:47 -0500 Subject: iommu/amd: Simplify and Consolidate Virtual APIC (AVIC) Enablement Currently, enabling AVIC requires individually detect and enable GAM and GALOG features on each IOMMU, which is difficult to keep track on multi-IOMMU system, where the features needs to be enabled system-wide. In addition, these features do not need to be enabled in early stage. It can be delayed until after amd_iommu_init_pci(). Therefore, consolidate logic for detecting and enabling IOMMU GAM and GALOG features into a helper function, enable_iommus_vapic(), which uses the check_feature_on_all_iommus() helper function to ensure system-wide support of the features before enabling them, and postpone until after amd_iommu_init_pci(). The new function also check and clean up feature enablement residue from previous boot (e.g. in case of booting into kdump kernel), which triggers a WARN_ON (shown below) introduced by the commit a8d4a37d1bb9 ("iommu/amd: Restore GA log/tail pointer on host resume") in iommu_ga_log_enable(). [ 7.731955] ------------[ cut here ]------------ [ 7.736575] WARNING: CPU: 0 PID: 1 at drivers/iommu/amd/init.c:829 iommu_ga_log_enable.isra.0+0x16f/0x190 [ 7.746135] Modules linked in: [ 7.749193] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W -------- --- 5.19.0-0.rc7.53.eln120.x86_64 #1 [ 7.759706] Hardware name: Dell Inc. PowerEdge R7525/04D5GJ, BIOS 2.1.6 03/09/2021 [ 7.767274] RIP: 0010:iommu_ga_log_enable.isra.0+0x16f/0x190 [ 7.772931] Code: 20 20 00 00 8b 00 f6 c4 01 74 da 48 8b 44 24 08 65 48 2b 04 25 28 00 00 00 75 13 48 83 c4 10 5b 5d e9 f5 00 72 00 0f 0b eb e1 <0f> 0b eb dd e8 f8 66 42 00 48 8b 15 f1 85 53 01 e9 29 ff ff ff 48 [ 7.791679] RSP: 0018:ffffc90000107d20 EFLAGS: 00010206 [ 7.796905] RAX: ffffc90000780000 RBX: 0000000000000100 RCX: ffffc90000780000 [ 7.804038] RDX: 0000000000000001 RSI: ffffc90000780000 RDI: ffff8880451f9800 [ 7.811170] RBP: ffff8880451f9800 R08: ffffffffffffffff R09: 0000000000000000 [ 7.818303] R10: 0000000000000000 R11: 0000000000000000 R12: 0008000000000000 [ 7.825435] R13: ffff8880462ea900 R14: 0000000000000021 R15: 0000000000000000 [ 7.832572] FS: 0000000000000000(0000) GS:ffff888054a00000(0000) knlGS:0000000000000000 [ 7.840657] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 7.846400] CR2: ffff888054dff000 CR3: 0000000053210000 CR4: 0000000000350eb0 [ 7.853533] Call Trace: [ 7.855979] [ 7.858085] amd_iommu_enable_interrupts+0x180/0x270 [ 7.863051] ? iommu_setup+0x271/0x271 [ 7.866803] state_next+0x197/0x2c0 [ 7.870295] ? iommu_setup+0x271/0x271 [ 7.874049] iommu_go_to_state+0x24/0x2c [ 7.877976] amd_iommu_init+0xf/0x29 [ 7.881554] pci_iommu_init+0xe/0x36 [ 7.885133] do_one_initcall+0x44/0x200 [ 7.888975] do_initcalls+0xc8/0xe1 [ 7.892466] kernel_init_freeable+0x14c/0x199 [ 7.896826] ? rest_init+0xd0/0xd0 [ 7.900231] kernel_init+0x16/0x130 [ 7.903723] ret_from_fork+0x22/0x30 [ 7.907306] [ 7.909497] ---[ end trace 0000000000000000 ]--- Fixes: commit a8d4a37d1bb9 ("iommu/amd: Restore GA log/tail pointer on host resume") Reported-by: Jerry Snitselaar Cc: Joerg Roedel Cc: Maxim Levitsky Cc: Will Deacon (maintainer:IOMMU DRIVERS) Signed-off-by: Suravee Suthikulpanit Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20220726134348.6438-2-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 85 +++++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index d86496114ca5..4cd94d716122 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -908,11 +908,6 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) if (!iommu->ga_log) return -EINVAL; - /* Check if already running */ - status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); - if (WARN_ON(status & (MMIO_STATUS_GALOG_RUN_MASK))) - return 0; - entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, &entry, sizeof(entry)); @@ -2068,10 +2063,6 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) return -ENOMEM; - ret = iommu_init_ga_log(iommu); - if (ret) - return ret; - if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { pr_info("Using strict mode due to virtualization\n"); iommu_set_dma_strict(); @@ -2155,8 +2146,6 @@ static void print_iommu_info(void) } if (irq_remapping_enabled) { pr_info("Interrupt remapping enabled\n"); - if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) - pr_info("Virtual APIC enabled\n"); if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) pr_info("X2APIC enabled\n"); } @@ -2446,9 +2435,6 @@ enable_faults: if (iommu->ppr_log != NULL) iommu_feature_enable(iommu, CONTROL_PPRINT_EN); - - iommu_ga_log_enable(iommu); - return 0; } @@ -2678,8 +2664,6 @@ static void iommu_enable_ga(struct amd_iommu *iommu) #ifdef CONFIG_IRQ_REMAP switch (amd_iommu_guest_ir) { case AMD_IOMMU_GUEST_IR_VAPIC: - iommu_feature_enable(iommu, CONTROL_GAM_EN); - fallthrough; case AMD_IOMMU_GUEST_IR_LEGACY_GA: iommu_feature_enable(iommu, CONTROL_GA_EN); iommu->irte_ops = &irte_128_ops; @@ -2759,19 +2743,6 @@ static void early_enable_iommus(void) iommu_flush_all_caches(iommu); } } - -#ifdef CONFIG_IRQ_REMAP - /* - * Note: We have already checked GASup from IVRS table. - * Now, we need to make sure that GAMSup is set. - */ - if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && - !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) - amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; - - if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) - amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); -#endif } static void enable_iommus_v2(void) @@ -2784,10 +2755,63 @@ static void enable_iommus_v2(void) } } +static void enable_iommus_vapic(void) +{ +#ifdef CONFIG_IRQ_REMAP + u32 status, i; + struct amd_iommu *iommu; + + for_each_iommu(iommu) { + /* + * Disable GALog if already running. It could have been enabled + * in the previous boot before kdump. + */ + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) + continue; + + iommu_feature_disable(iommu, CONTROL_GALOG_EN); + iommu_feature_disable(iommu, CONTROL_GAINT_EN); + + /* + * Need to set and poll check the GALOGRun bit to zero before + * we can set/ modify GA Log registers safely. + */ + for (i = 0; i < LOOP_TIMEOUT; ++i) { + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) + break; + udelay(10); + } + + if (WARN_ON(i >= LOOP_TIMEOUT)) + return; + } + + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && + !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) { + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; + return; + } + + /* Enabling GAM support */ + for_each_iommu(iommu) { + if (iommu_init_ga_log(iommu) || + iommu_ga_log_enable(iommu)) + return; + + iommu_feature_enable(iommu, CONTROL_GAM_EN); + } + + amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); + pr_info("Virtual APIC enabled\n"); +#endif +} + static void enable_iommus(void) { early_enable_iommus(); - + enable_iommus_vapic(); enable_iommus_v2(); } @@ -3126,6 +3150,7 @@ static int __init state_next(void) register_syscore_ops(&amd_iommu_syscore_ops); ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; + enable_iommus_vapic(); enable_iommus_v2(); break; case IOMMU_PCI_INIT: -- cgit v1.2.3 From 432e5dfc7eb75681b412c2b0cafb8604475e0aba Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 26 Jul 2022 08:43:48 -0500 Subject: iommu/amd: Add support for AVIC when SNP is enabled In order to support AVIC on SNP-enabled system, The IOMMU driver needs to check EFR2[SNPAVICSup] and enables the support by setting SNPAVICEn bit in the IOMMU control register (MMIO offset 18h). For detail, please see section "SEV-SNP Guest Virtual APIC Support" of the AMD I/O Virtualization Technology (IOMMU) Specification. (https://www.amd.com/system/files/TechDocs/48882_IOMMU.pdf) Signed-off-by: Suravee Suthikulpanit Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20220726134348.6438-3-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 7 +++++++ drivers/iommu/amd/init.c | 11 ++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 3c1205ba636a..5b1019dab328 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -103,6 +103,12 @@ #define FEATURE_GLXVAL_SHIFT 14 #define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT) +/* Extended Feature 2 Bits */ +#define FEATURE_SNPAVICSUP_SHIFT 5 +#define FEATURE_SNPAVICSUP_MASK (0x07ULL << FEATURE_SNPAVICSUP_SHIFT) +#define FEATURE_SNPAVICSUP_GAM(x) \ + ((x & FEATURE_SNPAVICSUP_MASK) >> FEATURE_SNPAVICSUP_SHIFT == 0x1) + /* Note: * The current driver only support 16-bit PASID. * Currently, hardware only implement upto 16-bit PASID @@ -165,6 +171,7 @@ #define CONTROL_GAINT_EN 29 #define CONTROL_XT_EN 50 #define CONTROL_INTCAPXT_EN 51 +#define CONTROL_SNPAVIC_EN 61 #define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) #define CTRL_INV_TO_NONE 0 diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 4cd94d716122..6bbaf6b971e8 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2794,13 +2794,22 @@ static void enable_iommus_vapic(void) return; } - /* Enabling GAM support */ + if (amd_iommu_snp_en && + !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) { + pr_warn("Force to disable Virtual APIC due to SNP\n"); + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; + return; + } + + /* Enabling GAM and SNPAVIC support */ for_each_iommu(iommu) { if (iommu_init_ga_log(iommu) || iommu_ga_log_enable(iommu)) return; iommu_feature_enable(iommu, CONTROL_GAM_EN); + if (amd_iommu_snp_en) + iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN); } amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); -- cgit v1.2.3 From be280ea763f7db492e0e30ba22873433aea0f468 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 29 Jul 2022 12:04:32 +0200 Subject: iommu/amd: Fix compile warning in init code A recent commit introduced these compile warnings: CC drivers/iommu/amd/init.o drivers/iommu/amd/init.c:938:12: error: ‘iommu_init_ga_log’ defined but not used [-Werror=unused-function] 938 | static int iommu_init_ga_log(struct amd_iommu *iommu) | ^~~~~~~~~~~~~~~~~ drivers/iommu/amd/init.c:902:12: error: ‘iommu_ga_log_enable’ defined but not used [-Werror=unused-function] 902 | static int iommu_ga_log_enable(struct amd_iommu *iommu) | ^~~~~~~~~~~~~~~~~~~ The warnings appear because both functions are defined when IRQ remapping is not enabled, but only used when IRQ remapping is enabled. Fix it by only defining the functions when IRQ remapping is enabled. Fixes: c5e1a1eb9279 ("iommu/amd: Simplify and Consolidate Virtual APIC (AVIC) Enablement") Signed-off-by: Joerg Roedel Link: https://lore.kernel.org/r/20220729100432.22474-1-joro@8bytes.org --- drivers/iommu/amd/init.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 6bbaf6b971e8..fdc642362c14 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -899,9 +899,9 @@ static void free_ga_log(struct amd_iommu *iommu) #endif } +#ifdef CONFIG_IRQ_REMAP static int iommu_ga_log_enable(struct amd_iommu *iommu) { -#ifdef CONFIG_IRQ_REMAP u32 status, i; u64 entry; @@ -931,13 +931,12 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) if (WARN_ON(i >= LOOP_TIMEOUT)) return -EINVAL; -#endif /* CONFIG_IRQ_REMAP */ + return 0; } static int iommu_init_ga_log(struct amd_iommu *iommu) { -#ifdef CONFIG_IRQ_REMAP if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) return 0; @@ -955,10 +954,8 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) err_out: free_ga_log(iommu); return -EINVAL; -#else - return 0; -#endif /* CONFIG_IRQ_REMAP */ } +#endif /* CONFIG_IRQ_REMAP */ static int __init alloc_cwwb_sem(struct amd_iommu *iommu) { -- cgit v1.2.3