From 91c64a4f1cb01cf0ec50f0372ff8ca9d3022b7d0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 2 Oct 2021 22:23:34 -0700 Subject: drm/i915/gvt: clean up kernel-doc in gtt.c Fix kernel-doc warnings in gtt.c: gtt.c:1152: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Check if can do 2M page gtt.c:1152: warning: missing initial short description on line: * Check if can do 2M page gtt.c:2209: warning: expecting prototype for intel_vgpu_emulate_gtt_mmio_read(). Prototype was for intel_vgpu_emulate_ggtt_mmio_read() instead Fixes: a752b070a678 ("drm/i915/gvt: Fix function comment doc errors") Fixes: 2707e4446688 ("drm/i915/gvt: vGPU graphics memory virtualization") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Zhenyu Wang Cc: Zhi Wang Cc: Colin Xu Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Cc: intel-gvt-dev@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Zhi Wang Link: http://patchwork.freedesktop.org/patch/msgid/20211003052334.15764-1-rdunlap@infradead.org Reviewed-by: Zhi Wang Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 53d0cb327539..93ce786905dd 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1148,7 +1148,7 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, ops->set_pfn(se, s->shadow_page.mfn); } -/** +/* * Check if can do 2M page * @vgpu: target vgpu * @entry: target pfn's gtt entry @@ -2193,7 +2193,7 @@ static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, } /** - * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read + * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read * @vgpu: a vGPU * @off: register offset * @p_data: data will be returned to guest -- cgit v1.2.3 From d72d69abfdb6e0375981cfdda8eb45143f12c77d Mon Sep 17 00:00:00 2001 From: Siva Mullati Date: Fri, 7 Jan 2022 15:22:35 +0530 Subject: drm/i915/gvt: Make DRM_I915_GVT depend on X86 GVT is not supported on non-x86 platforms, So add dependency of X86 on config parameter DRM_I915_GVT. Fixes: 0ad35fed618c ("drm/i915: gvt: Introduce the basic architecture of GVT-g") Signed-off-by: Siva Mullati Signed-off-by: Zhi Wang Link: http://patchwork.freedesktop.org/patch/msgid/20220107095235.243448-1-siva.mullati@intel.com Reviewed-by: Zhi Wang Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 84b6fc70cbf5..0bddb75fa7e0 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -101,6 +101,7 @@ config DRM_I915_USERPTR config DRM_I915_GVT bool "Enable Intel GVT-g graphics virtualization host support" depends on DRM_I915 + depends on X86 depends on 64BIT default n help -- cgit v1.2.3 From ba1b71b008e97fd747845ff3a818420b11bbe830 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 30 Dec 2021 07:27:51 +0000 Subject: mtd: rawnand: ingenic: Fix missing put_device in ingenic_ecc_get If of_find_device_by_node() succeeds, ingenic_ecc_get() doesn't have a corresponding put_device(). Thus add put_device() to fix the exception handling. Fixes: 15de8c6efd0e ("mtd: rawnand: ingenic: Separate top-level and SoC specific code") Signed-off-by: Miaoqian Lin Reviewed-by: Paul Cercueil Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211230072751.21622-1-linmq006@gmail.com --- drivers/mtd/nand/raw/ingenic/ingenic_ecc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c index efe0ffe4f1ab..9054559e52dd 100644 --- a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c +++ b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c @@ -68,9 +68,14 @@ static struct ingenic_ecc *ingenic_ecc_get(struct device_node *np) struct ingenic_ecc *ecc; pdev = of_find_device_by_node(np); - if (!pdev || !platform_get_drvdata(pdev)) + if (!pdev) return ERR_PTR(-EPROBE_DEFER); + if (!platform_get_drvdata(pdev)) { + put_device(&pdev->dev); + return ERR_PTR(-EPROBE_DEFER); + } + ecc = platform_get_drvdata(pdev); clk_prepare_enable(ecc->clk); -- cgit v1.2.3 From 5c23b3f965bc9ee696bf2ed4bdc54d339dd9a455 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Mon, 3 Jan 2022 03:03:15 +0000 Subject: mtd: rawnand: qcom: Fix clock sequencing in qcom_nandc_probe() Interacting with a NAND chip on an IPQ6018 I found that the qcomsmem NAND partition parser was returning -EPROBE_DEFER waiting for the main smem driver to load. This caused the board to reset. Playing about with the probe() function shows that the problem lies in the core clock being switched off before the nandc_unalloc() routine has completed. If we look at how qcom_nandc_remove() tears down allocated resources we see the expected order is qcom_nandc_unalloc(nandc); clk_disable_unprepare(nandc->aon_clk); clk_disable_unprepare(nandc->core_clk); dma_unmap_resource(&pdev->dev, nandc->base_dma, resource_size(res), DMA_BIDIRECTIONAL, 0); Tweaking probe() to both bring up and tear-down in that order removes the reset if we end up deferring elsewhere. Fixes: c76b78d8ec05 ("mtd: nand: Qualcomm NAND controller driver") Signed-off-by: Bryan O'Donoghue Reviewed-by: Manivannan Sadhasivam Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220103030316.58301-2-bryan.odonoghue@linaro.org --- drivers/mtd/nand/raw/qcom_nandc.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index 7c6efa3b6255..1a77542c6d67 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -2,7 +2,6 @@ /* * Copyright (c) 2016, The Linux Foundation. All rights reserved. */ - #include #include #include @@ -3073,10 +3072,6 @@ static int qcom_nandc_probe(struct platform_device *pdev) if (dma_mapping_error(dev, nandc->base_dma)) return -ENXIO; - ret = qcom_nandc_alloc(nandc); - if (ret) - goto err_nandc_alloc; - ret = clk_prepare_enable(nandc->core_clk); if (ret) goto err_core_clk; @@ -3085,6 +3080,10 @@ static int qcom_nandc_probe(struct platform_device *pdev) if (ret) goto err_aon_clk; + ret = qcom_nandc_alloc(nandc); + if (ret) + goto err_nandc_alloc; + ret = qcom_nandc_setup(nandc); if (ret) goto err_setup; @@ -3096,15 +3095,14 @@ static int qcom_nandc_probe(struct platform_device *pdev) return 0; err_setup: + qcom_nandc_unalloc(nandc); +err_nandc_alloc: clk_disable_unprepare(nandc->aon_clk); err_aon_clk: clk_disable_unprepare(nandc->core_clk); err_core_clk: - qcom_nandc_unalloc(nandc); -err_nandc_alloc: dma_unmap_resource(dev, res->start, resource_size(res), DMA_BIDIRECTIONAL, 0); - return ret; } -- cgit v1.2.3 From 079e6bdb2b1cc1da8b5c602229db782732668ae7 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Mon, 3 Jan 2022 03:03:16 +0000 Subject: mtd: parsers: qcom: Don't print error message on -EPROBE_DEFER Its possible for the main smem driver to not be loaded by the time we come along to parse the smem partition description but, this is a perfectly normal thing. No need to print out an error message in this case. Signed-off-by: Bryan O'Donoghue Reviewed-by: Manivannan Sadhasivam Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220103030316.58301-3-bryan.odonoghue@linaro.org --- drivers/mtd/parsers/qcomsmempart.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/parsers/qcomsmempart.c b/drivers/mtd/parsers/qcomsmempart.c index 06a818cd2433..b2a57fe8479f 100644 --- a/drivers/mtd/parsers/qcomsmempart.c +++ b/drivers/mtd/parsers/qcomsmempart.c @@ -75,7 +75,8 @@ static int parse_qcomsmem_part(struct mtd_info *mtd, pr_debug("Parsing partition table info from SMEM\n"); ptable = qcom_smem_get(SMEM_APPS, SMEM_AARM_PARTITION_TABLE, &len); if (IS_ERR(ptable)) { - pr_err("Error reading partition table header\n"); + if (PTR_ERR(ptable) != -EPROBE_DEFER) + pr_err("Error reading partition table header\n"); return PTR_ERR(ptable); } -- cgit v1.2.3 From 65d003cca335cabc0160d3cd7daa689eaa9dd3cd Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Sun, 16 Jan 2022 04:22:10 +0100 Subject: mtd: parsers: qcom: Fix kernel panic on skipped partition In the event of a skipped partition (case when the entry name is empty) the kernel panics in the cleanup function as the name entry is NULL. Rework the parser logic by first checking the real partition number and then allocate the space and set the data for the valid partitions. The logic was also fundamentally wrong as with a skipped partition, the parts number returned was incorrect by not decreasing it for the skipped partitions. Fixes: 803eb124e1a6 ("mtd: parsers: Add Qcom SMEM parser") Signed-off-by: Ansuel Smith Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220116032211.9728-1-ansuelsmth@gmail.com --- drivers/mtd/parsers/qcomsmempart.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/mtd/parsers/qcomsmempart.c b/drivers/mtd/parsers/qcomsmempart.c index b2a57fe8479f..469d466f568f 100644 --- a/drivers/mtd/parsers/qcomsmempart.c +++ b/drivers/mtd/parsers/qcomsmempart.c @@ -58,11 +58,11 @@ static int parse_qcomsmem_part(struct mtd_info *mtd, const struct mtd_partition **pparts, struct mtd_part_parser_data *data) { + size_t len = SMEM_FLASH_PTABLE_HDR_LEN; + int ret, i, j, tmpparts, numparts = 0; struct smem_flash_pentry *pentry; struct smem_flash_ptable *ptable; - size_t len = SMEM_FLASH_PTABLE_HDR_LEN; struct mtd_partition *parts; - int ret, i, numparts; char *name, *c; if (IS_ENABLED(CONFIG_MTD_SPI_NOR_USE_4K_SECTORS) @@ -88,8 +88,8 @@ static int parse_qcomsmem_part(struct mtd_info *mtd, } /* Ensure that # of partitions is less than the max we have allocated */ - numparts = le32_to_cpu(ptable->numparts); - if (numparts > SMEM_FLASH_PTABLE_MAX_PARTS_V4) { + tmpparts = le32_to_cpu(ptable->numparts); + if (tmpparts > SMEM_FLASH_PTABLE_MAX_PARTS_V4) { pr_err("Partition numbers exceed the max limit\n"); return -EINVAL; } @@ -117,11 +117,17 @@ static int parse_qcomsmem_part(struct mtd_info *mtd, return PTR_ERR(ptable); } + for (i = 0; i < tmpparts; i++) { + pentry = &ptable->pentry[i]; + if (pentry->name[0] != '\0') + numparts++; + } + parts = kcalloc(numparts, sizeof(*parts), GFP_KERNEL); if (!parts) return -ENOMEM; - for (i = 0; i < numparts; i++) { + for (i = 0, j = 0; i < tmpparts; i++) { pentry = &ptable->pentry[i]; if (pentry->name[0] == '\0') continue; @@ -136,24 +142,25 @@ static int parse_qcomsmem_part(struct mtd_info *mtd, for (c = name; *c != '\0'; c++) *c = tolower(*c); - parts[i].name = name; - parts[i].offset = le32_to_cpu(pentry->offset) * mtd->erasesize; - parts[i].mask_flags = pentry->attr; - parts[i].size = le32_to_cpu(pentry->length) * mtd->erasesize; + parts[j].name = name; + parts[j].offset = le32_to_cpu(pentry->offset) * mtd->erasesize; + parts[j].mask_flags = pentry->attr; + parts[j].size = le32_to_cpu(pentry->length) * mtd->erasesize; pr_debug("%d: %s offs=0x%08x size=0x%08x attr:0x%08x\n", i, pentry->name, le32_to_cpu(pentry->offset), le32_to_cpu(pentry->length), pentry->attr); + j++; } pr_debug("SMEM partition table found: ver: %d len: %d\n", - le32_to_cpu(ptable->version), numparts); + le32_to_cpu(ptable->version), tmpparts); *pparts = parts; return numparts; out_free_parts: - while (--i >= 0) - kfree(parts[i].name); + while (--j >= 0) + kfree(parts[j].name); kfree(parts); *pparts = NULL; -- cgit v1.2.3 From 3dd8ba961b9356c4113b96541c752c73d98fef70 Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Sun, 16 Jan 2022 04:22:11 +0100 Subject: mtd: parsers: qcom: Fix missing free for pparts in cleanup Mtdpart doesn't free pparts when a cleanup function is declared. Add missing free for pparts in cleanup function for smem to fix the leak. Fixes: 10f3b4d79958 ("mtd: parsers: qcom: Fix leaking of partition name") Signed-off-by: Ansuel Smith Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220116032211.9728-2-ansuelsmth@gmail.com --- drivers/mtd/parsers/qcomsmempart.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/parsers/qcomsmempart.c b/drivers/mtd/parsers/qcomsmempart.c index 469d466f568f..4311b89d8df0 100644 --- a/drivers/mtd/parsers/qcomsmempart.c +++ b/drivers/mtd/parsers/qcomsmempart.c @@ -174,6 +174,8 @@ static void parse_qcomsmem_cleanup(const struct mtd_partition *pparts, for (i = 0; i < nr_parts; i++) kfree(pparts[i].name); + + kfree(pparts); } static const struct of_device_id qcomsmem_of_match_table[] = { -- cgit v1.2.3 From 4cd335dae3cf25412427938d8abbaf04d46e63b5 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 18 Jan 2022 14:35:25 +0200 Subject: mtd: rawnand: omap2: Prevent invalid configuration and build error We need to select MEMORY as well otherwise OMAP_GPMC will not be built. For simplicity let's select MEMORY and OMAP_GPMC unconditionally as this driver depends on OMAP_GPMC driver and uses symbols from there. Fixes: dbcb124acebd ("mtd: rawnand: omap2: Select GPMC device driver for ARCH_K3") Reported-by: kernel test robot Signed-off-by: Roger Quadros Reviewed-by: Krzysztof Kozlowski Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220118123525.8020-1-rogerq@kernel.org --- drivers/mtd/nand/raw/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig index 20408b7db540..d986ab4e4c35 100644 --- a/drivers/mtd/nand/raw/Kconfig +++ b/drivers/mtd/nand/raw/Kconfig @@ -42,7 +42,8 @@ config MTD_NAND_OMAP2 tristate "OMAP2, OMAP3, OMAP4 and Keystone NAND controller" depends on ARCH_OMAP2PLUS || ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST depends on HAS_IOMEM - select OMAP_GPMC if ARCH_K3 + select MEMORY + select OMAP_GPMC help Support for NAND flash on Texas Instruments OMAP2, OMAP3, OMAP4 and Keystone platforms. -- cgit v1.2.3 From 3e3765875b1b8864898603768fd5c93eeb552211 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Jan 2022 14:55:05 +0300 Subject: mtd: phram: Prevent divide by zero bug in phram_setup() The problem is that "erasesize" is a uint64_t type so it might be non-zero but the lower 32 bits are zero so when it's truncated, "(uint32_t)erasesize", then that value is zero. This leads to a divide by zero bug. Avoid the bug by delaying the divide until after we have validated that "erasesize" is non-zero and within the uint32_t range. Fixes: dc2b3e5cbc80 ("mtd: phram: use div_u64_rem to stop overwrite len in phram_setup") Signed-off-by: Dan Carpenter Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220121115505.GI1978@kadam --- drivers/mtd/devices/phram.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c index 6ed6c51fac69..d503821a3e60 100644 --- a/drivers/mtd/devices/phram.c +++ b/drivers/mtd/devices/phram.c @@ -264,16 +264,20 @@ static int phram_setup(const char *val) } } - if (erasesize) - div_u64_rem(len, (uint32_t)erasesize, &rem); - if (len == 0 || erasesize == 0 || erasesize > len - || erasesize > UINT_MAX || rem) { + || erasesize > UINT_MAX) { parse_err("illegal erasesize or len\n"); ret = -EINVAL; goto error; } + div_u64_rem(len, (uint32_t)erasesize, &rem); + if (rem) { + parse_err("len is not multiple of erasesize\n"); + ret = -EINVAL; + goto error; + } + ret = register_device(name, start, len, (uint32_t)erasesize); if (ret) goto error; -- cgit v1.2.3 From 9161f365c91614e5a3f5c6dcc44c3b1b33bc59c0 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Tue, 25 Jan 2022 09:16:19 +0100 Subject: mtd: rawnand: gpmi: don't leak PM reference in error path If gpmi_nfc_apply_timings() fails, the PM runtime usage counter must be dropped. Reported-by: Pavel Machek Fixes: f53d4c109a66 ("mtd: rawnand: gpmi: Add ERR007117 protection for nfc_apply_timings") Signed-off-by: Christian Eggers Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20220125081619.6286-1-ceggers@arri.de --- drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 1b64c5a5140d..ded4df473928 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -2285,7 +2285,7 @@ static int gpmi_nfc_exec_op(struct nand_chip *chip, this->hw.must_apply_timings = false; ret = gpmi_nfc_apply_timings(this); if (ret) - return ret; + goto out_pm; } dev_dbg(this->dev, "%s: %d instructions\n", __func__, op->ninstrs); @@ -2414,6 +2414,7 @@ unmap: this->bch = false; +out_pm: pm_runtime_mark_last_busy(this->dev); pm_runtime_put_autosuspend(this->dev); -- cgit v1.2.3 From fa4300f060e5c4ca670b705f1e9b93685ad30c5b Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Thu, 27 Jan 2022 13:26:43 -0600 Subject: of: unittest: update text of expected warnings The text of various warning messages triggered by unittest has changed. Update the text of expected warnings to match. The expected vs actual warnings are most easily seen by filtering the boot console messages with the of_unittest_expect program at https://github.com/frowand/dt_tools.git. The filter prefixes problem lines with '***', and prefixes lines that match expected errors with 'ok '. All other lines are prefixed with ' '. Unrelated lines have been deleted in the following examples. The mismatch appears as: -> ### dt-test ### start of unittest - you will see error messages OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1 ** of_unittest_expect WARNING - not found ---> OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1 OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1 ** of_unittest_expect WARNING - not found ---> OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1 OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found 1 ** of_unittest_expect WARNING - not found ---> OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found -1 platform testcase-data:testcase-device2: error -ENXIO: IRQ index 0 not found ** of_unittest_expect WARNING - not found ---> platform testcase-data:testcase-device2: IRQ index 0 not found -> ### dt-test ### end of unittest - 254 passed, 0 failed ** EXPECT statistics: ** ** EXPECT found : 42 ** EXPECT not found : 4 With this commit applied, the mismatch is resolved: -> ### dt-test ### start of unittest - you will see error messages ok OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1 ok OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1 ok OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found 1 ok platform testcase-data:testcase-device2: error -ENXIO: IRQ index 0 not found -> ### dt-test ### end of unittest - 254 passed, 0 failed ** EXPECT statistics: ** ** EXPECT found : 46 ** EXPECT not found : 0 Fixes: 2043727c2882 ("driver core: platform: Make use of the helper function dev_err_probe()") Fixes: 94a4950a4acf ("of: base: Fix phandle argument length mismatch error message") Signed-off-by: Frank Rowand Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220127192643.2534941-1-frowand.list@gmail.com --- drivers/of/unittest.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 70992103c07d..2c2fb161b572 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -513,24 +513,24 @@ static void __init of_unittest_parse_phandle_with_args(void) memset(&args, 0, sizeof(args)); EXPECT_BEGIN(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1"); + "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1"); rc = of_parse_phandle_with_args(np, "phandle-list-bad-args", "#phandle-cells", 1, &args); EXPECT_END(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1"); + "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1"); unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); EXPECT_BEGIN(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1"); + "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1"); rc = of_count_phandle_with_args(np, "phandle-list-bad-args", "#phandle-cells"); EXPECT_END(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found -1"); + "OF: /testcase-data/phandle-tests/consumer-a: #phandle-cells = 3 found 1"); unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); } @@ -670,12 +670,12 @@ static void __init of_unittest_parse_phandle_with_args_map(void) memset(&args, 0, sizeof(args)); EXPECT_BEGIN(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found -1"); + "OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found 1"); rc = of_parse_phandle_with_args_map(np, "phandle-list-bad-args", "phandle", 1, &args); EXPECT_END(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found -1"); + "OF: /testcase-data/phandle-tests/consumer-b: #phandle-cells = 2 found 1"); unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); } @@ -1257,12 +1257,12 @@ static void __init of_unittest_platform_populate(void) unittest(pdev, "device 2 creation failed\n"); EXPECT_BEGIN(KERN_INFO, - "platform testcase-data:testcase-device2: IRQ index 0 not found"); + "platform testcase-data:testcase-device2: error -ENXIO: IRQ index 0 not found"); irq = platform_get_irq(pdev, 0); EXPECT_END(KERN_INFO, - "platform testcase-data:testcase-device2: IRQ index 0 not found"); + "platform testcase-data:testcase-device2: error -ENXIO: IRQ index 0 not found"); unittest(irq < 0 && irq != -EPROBE_DEFER, "device parsing error failed - %d\n", irq); -- cgit v1.2.3 From 36415a7964711822e63695ea67fede63979054d9 Mon Sep 17 00:00:00 2001 From: david regan Date: Wed, 26 Jan 2022 23:43:44 +0100 Subject: mtd: rawnand: brcmnand: Fixed incorrect sub-page ECC status The brcmnand driver contains a bug in which if a page (example 2k byte) is read from the parallel/ONFI NAND and within that page a subpage (512 byte) has correctable errors which is followed by a subpage with uncorrectable errors, the page read will return the wrong status of correctable (as opposed to the actual status of uncorrectable.) The bug is in function brcmnand_read_by_pio where there is a check for uncorrectable bits which will be preempted if a previous status for correctable bits is detected. The fix is to stop checking for bad bits only if we already have a bad bits status. Fixes: 27c5b17cd1b1 ("mtd: nand: add NAND driver "library" for Broadcom STB NAND controller") Signed-off-by: david regan Reviewed-by: Florian Fainelli Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/trinity-478e0c09-9134-40e8-8f8c-31c371225eda-1643237024774@3c-app-mailcom-lxa02 --- drivers/mtd/nand/raw/brcmnand/brcmnand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c index f75929783b94..aee78f5f4f15 100644 --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c @@ -2106,7 +2106,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip, mtd->oobsize / trans, host->hwcfg.sector_size_1k); - if (!ret) { + if (ret != -EBADMSG) { *err_addr = brcmnand_get_uncorrecc_addr(ctrl); if (*err_addr) -- cgit v1.2.3 From da5462a4dc446e7028e6d150a4b02684d1d3ea81 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 17 Jan 2022 18:15:22 -0800 Subject: power: supply: fix table problem in sysfs-class-power Add a bottom table border to complete the table format and prevent a documentation build warning. Documentation/ABI/testing/sysfs-class-power:459: WARNING: Malformed table. No bottom table border found. Fixes: 1b0b6cc8030d0 ("power: supply: add charge_behaviour attributes") Signed-off-by: Randy Dunlap Cc: Thomas Weißschuh Cc: Hans de Goede Cc: Sebastian Reichel Cc: linux-pm@vger.kernel.org Cc: Stephen Rothwell Signed-off-by: Sebastian Reichel --- Documentation/ABI/testing/sysfs-class-power | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power index fde21d900420..859501366777 100644 --- a/Documentation/ABI/testing/sysfs-class-power +++ b/Documentation/ABI/testing/sysfs-class-power @@ -468,6 +468,7 @@ Description: auto: Charge normally, respect thresholds inhibit-charge: Do not charge while AC is attached force-discharge: Force discharge while AC is attached + ================ ==================================== What: /sys/class/power_supply//technology Date: May 2007 -- cgit v1.2.3 From 2b56a9a28a6bf09890a43161402948ed62963f36 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 5 Jan 2022 10:37:00 +0800 Subject: power: supply: core: fix application of sizeof to pointer The coccinelle check report: ./drivers/power/supply/cw2015_battery.c:692:12-18: ERROR: application of sizeof to pointer Reported-by: Abaci Robot Fixes: 25fd330370ac ("power: supply_core: Pass pointer to battery info") Signed-off-by: Yang Li Reviewed-by: Linus Walleij Signed-off-by: Sebastian Reichel --- drivers/power/supply/cw2015_battery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/cw2015_battery.c b/drivers/power/supply/cw2015_battery.c index 0c87ad0dbf71..728e2a6cc9c3 100644 --- a/drivers/power/supply/cw2015_battery.c +++ b/drivers/power/supply/cw2015_battery.c @@ -689,7 +689,7 @@ static int cw_bat_probe(struct i2c_client *client) if (ret) { /* Allocate an empty battery */ cw_bat->battery = devm_kzalloc(&client->dev, - sizeof(cw_bat->battery), + sizeof(*cw_bat->battery), GFP_KERNEL); if (!cw_bat->battery) return -ENOMEM; -- cgit v1.2.3 From 564778d7b1ea465f9487eedeece7527a033549c5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Feb 2022 15:56:26 +0000 Subject: ASoC: ops: Fix stereo change notifications in snd_soc_put_volsw() When writing out a stereo control we discard the change notification from the first channel, meaning that events are only generated based on changes to the second channel. Ensure that we report a change if either channel has changed. Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220201155629.120510-2-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 9833611b83d1..2ce73e3391f4 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -308,7 +308,7 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, unsigned int sign_bit = mc->sign_bit; unsigned int mask = (1 << fls(max)) - 1; unsigned int invert = mc->invert; - int err; + int err, ret; bool type_2r = false; unsigned int val2 = 0; unsigned int val, val_mask; @@ -350,12 +350,18 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, err = snd_soc_component_update_bits(component, reg, val_mask, val); if (err < 0) return err; + ret = err; - if (type_2r) + if (type_2r) { err = snd_soc_component_update_bits(component, reg2, val_mask, - val2); + val2); + /* Don't discard any error code or drop change flag */ + if (ret == 0 || err < 0) { + ret = err; + } + } - return err; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_put_volsw); -- cgit v1.2.3 From 7f3d90a3519680dfa23e750f80bfdefc0f5eda4a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Feb 2022 15:56:27 +0000 Subject: ASoC: ops: Fix stereo change notifications in snd_soc_put_volsw_sx() When writing out a stereo control we discard the change notification from the first channel, meaning that events are only generated based on changes to the second channel. Ensure that we report a change if either channel has changed. Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220201155629.120510-3-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 2ce73e3391f4..4987c58cd59a 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -427,6 +427,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, int min = mc->min; unsigned int mask = (1U << (fls(min + max) - 1)) - 1; int err = 0; + int ret; unsigned int val, val_mask; if (ucontrol->value.integer.value[0] < 0) @@ -443,6 +444,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, err = snd_soc_component_update_bits(component, reg, val_mask, val); if (err < 0) return err; + ret = err; if (snd_soc_volsw_is_stereo(mc)) { unsigned int val2; @@ -453,6 +455,11 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, err = snd_soc_component_update_bits(component, reg2, val_mask, val2); + + /* Don't discard any error code or drop change flag */ + if (ret == 0 || err < 0) { + ret = err; + } } return err; } -- cgit v1.2.3 From 650204ded3703b5817bd4b6a77fa47d333c4f902 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Feb 2022 15:56:28 +0000 Subject: ASoC: ops: Fix stereo change notifications in snd_soc_put_volsw_range() When writing out a stereo control we discard the change notification from the first channel, meaning that events are only generated based on changes to the second channel. Ensure that we report a change if either channel has changed. Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220201155629.120510-4-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 4987c58cd59a..2e57f814bcc2 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -519,7 +519,7 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, unsigned int mask = (1 << fls(max)) - 1; unsigned int invert = mc->invert; unsigned int val, val_mask; - int ret; + int err, ret; if (invert) val = (max - ucontrol->value.integer.value[0]) & mask; @@ -528,9 +528,10 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, val_mask = mask << shift; val = val << shift; - ret = snd_soc_component_update_bits(component, reg, val_mask, val); - if (ret < 0) - return ret; + err = snd_soc_component_update_bits(component, reg, val_mask, val); + if (err < 0) + return err; + ret = err; if (snd_soc_volsw_is_stereo(mc)) { if (invert) @@ -540,8 +541,12 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, val_mask = mask << shift; val = val << shift; - ret = snd_soc_component_update_bits(component, rreg, val_mask, + err = snd_soc_component_update_bits(component, rreg, val_mask, val); + /* Don't discard any error code or drop change flag */ + if (ret == 0 || err < 0) { + ret = err; + } } return ret; -- cgit v1.2.3 From 2b7c46369f09c358164d31d17e5695185403185e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Feb 2022 15:56:29 +0000 Subject: ASoC: ops: Fix stereo change notifications in snd_soc_put_xr_sx() When writing out a stereo control we discard the change notification from the first channel, meaning that events are only generated based on changes to the second channel. Ensure that we report a change if either channel has changed. Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220201155629.120510-5-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 2e57f814bcc2..03ea9591fb16 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -895,6 +895,7 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, unsigned long mask = (1UL<nbits)-1; long max = mc->max; long val = ucontrol->value.integer.value[0]; + int ret = 0; unsigned int i; if (val < mc->min || val > mc->max) @@ -909,9 +910,11 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, regmask, regval); if (err < 0) return err; + if (err > 0) + ret = err; } - return 0; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_put_xr_sx); -- cgit v1.2.3 From 7fa5c33d043160eba3be9fb8e21588dff2a467c7 Mon Sep 17 00:00:00 2001 From: V sujith kumar Reddy Date: Tue, 1 Feb 2022 02:02:15 +0530 Subject: ASoC: amd: acp: Set gpio_spkr_en to None for max speaker amplifer in machine driver Maxim codec driver already enabling/disabling spk_en_gpio in form of sd_mode gpio hence remove such gpio access control from machine driver to avoid conflict Signed-off-by: V sujith kumar Reddy Link: https://lore.kernel.org/r/20220131203225.1418648-1-vsujithkumar.reddy@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-mach.h | 1 - sound/soc/amd/acp/acp-sof-mach.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/amd/acp/acp-mach.h b/sound/soc/amd/acp/acp-mach.h index fd6299844ebe..c855f50d6b34 100644 --- a/sound/soc/amd/acp/acp-mach.h +++ b/sound/soc/amd/acp/acp-mach.h @@ -21,7 +21,6 @@ #include #define EN_SPKR_GPIO_GB 0x11F -#define EN_SPKR_GPIO_NK 0x146 #define EN_SPKR_GPIO_NONE -EINVAL enum be_id { diff --git a/sound/soc/amd/acp/acp-sof-mach.c b/sound/soc/amd/acp/acp-sof-mach.c index 07de46142655..4cc431e54fe1 100644 --- a/sound/soc/amd/acp/acp-sof-mach.c +++ b/sound/soc/amd/acp/acp-sof-mach.c @@ -37,7 +37,7 @@ static struct acp_card_drvdata sof_rt5682_max_data = { .hs_codec_id = RT5682, .amp_codec_id = MAX98360A, .dmic_codec_id = DMIC, - .gpio_spkr_en = EN_SPKR_GPIO_NK, + .gpio_spkr_en = EN_SPKR_GPIO_NONE, }; static struct acp_card_drvdata sof_rt5682s_max_data = { @@ -47,7 +47,7 @@ static struct acp_card_drvdata sof_rt5682s_max_data = { .hs_codec_id = RT5682S, .amp_codec_id = MAX98360A, .dmic_codec_id = DMIC, - .gpio_spkr_en = EN_SPKR_GPIO_NK, + .gpio_spkr_en = EN_SPKR_GPIO_NONE, }; static const struct snd_kcontrol_new acp_controls[] = { -- cgit v1.2.3 From 868d7618d75f2cac23c2be6ca8d55ae1380c36d1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 3 Feb 2022 11:33:02 +0100 Subject: platform/x86: thinkpad_acpi: Add dual-fan quirk for T15g (2nd gen) The ThinkPad T15g Gen 2 has 2 fan, add a TPACPI_FAN_2CTL quirk entry for it to the fan_quirk_table[] so that both fans can be controllerd. Reported-and-tested-by: David Dreschner Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220203103302.49401-1-hdegoede@redhat.com --- drivers/platform/x86/thinkpad_acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index bd045486b933..3424b080db77 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -8703,6 +8703,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('N', '4', '0', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (4nd gen) */ TPACPI_Q_LNV3('N', '3', '0', TPACPI_FAN_2CTL), /* P15 (1st gen) / P15v (1st gen) */ TPACPI_Q_LNV3('N', '3', '2', TPACPI_FAN_2CTL), /* X1 Carbon (9th gen) */ + TPACPI_Q_LNV3('N', '3', '7', TPACPI_FAN_2CTL), /* T15g (2nd gen) */ TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */ }; -- cgit v1.2.3 From e3d13da7f77d73c64981b62591c21614a6cf688f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 5 Feb 2022 12:28:40 +0100 Subject: platform/x86: asus-wmi: Fix regression when probing for fan curve control The fan curve control patches introduced a regression for at least the TUF FX506 and possibly other TUF series laptops that do not have support for fan curve control. As part of the probing process, asus_wmi_evaluate_method_buf is called to get the factory default fan curve . The WMI management function returns 0 on certain laptops to indicate lack of fan curve control instead of ASUS_WMI_UNSUPPORTED_METHOD. This 0 is transformed to -ENODATA which results in failure when probing. Fixes: 0f0ac158d28f ("platform/x86: asus-wmi: Add support for custom fan curves") Reported-and-tested-by: Abhijeet V Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220205112840.33095-1-hdegoede@redhat.com --- drivers/platform/x86/asus-wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index a3b83b22a3b1..2104a2621e50 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2223,7 +2223,7 @@ static int fan_curve_check_present(struct asus_wmi *asus, bool *available, err = fan_curve_get_factory_default(asus, fan_dev); if (err) { - if (err == -ENODEV) + if (err == -ENODEV || err == -ENODATA) return 0; return err; } -- cgit v1.2.3 From 9bb162fa26ed76031ed0e7dbc77ccea0bf977758 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 7 Dec 2021 06:10:05 +0000 Subject: powerpc/603: Fix boot failure with DEBUG_PAGEALLOC and KFENCE Allthough kernel text is always mapped with BATs, we still have inittext mapped with pages, so TLB miss handling is required when CONFIG_DEBUG_PAGEALLOC or CONFIG_KFENCE is set. The final solution should be to set a BAT that also maps inittext but that BAT then needs to be cleared at end of init, and it will require more changes to be able to do it properly. As DEBUG_PAGEALLOC or KFENCE are debugging, performance is not a big deal so let's fix it simply for now to enable easy stable application. Fixes: 035b19a15a98 ("powerpc/32s: Always map kernel text and rodata with BATs") Cc: stable@vger.kernel.org # v5.11+ Reported-by: Maxime Bizon Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/aea33b4813a26bdb9378b5f273f00bd5d4abe240.1638857364.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index fa84744d6b24..b876ef8c70a7 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -421,14 +421,14 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -- cgit v1.2.3 From 9495b9b31abe525ebd93da58de2c88b9f66d3a0e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 23 Feb 2018 22:42:31 +0100 Subject: i2c: bcm2835: Avoid clock stretching timeouts The CLKT register contains at poweron 0x40, which at our typical 100kHz bus rate means .64ms. But there is no specified limit to how long devices should be able to stretch the clocks, so just disable the timeout. We still have a timeout wrapping the entire transfer. Signed-off-by: Eric Anholt Signed-off-by: Stefan Wahren BugLink: https://github.com/raspberrypi/linux/issues/3064 Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-bcm2835.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index dfc534065595..5149454eef4a 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -23,6 +23,11 @@ #define BCM2835_I2C_FIFO 0x10 #define BCM2835_I2C_DIV 0x14 #define BCM2835_I2C_DEL 0x18 +/* + * 16-bit field for the number of SCL cycles to wait after rising SCL + * before deciding the slave is not responding. 0 disables the + * timeout detection. + */ #define BCM2835_I2C_CLKT 0x1c #define BCM2835_I2C_C_READ BIT(0) @@ -474,6 +479,12 @@ static int bcm2835_i2c_probe(struct platform_device *pdev) adap->dev.of_node = pdev->dev.of_node; adap->quirks = of_device_get_match_data(&pdev->dev); + /* + * Disable the hardware clock stretching timeout. SMBUS + * specifies a limit for how long the device can stretch the + * clock, but core I2C doesn't. + */ + bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_CLKT, 0); bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, 0); ret = i2c_add_adapter(adap); -- cgit v1.2.3 From 946eb87114af37c9c13c618a7c1cdaca936905fa Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 7 Feb 2022 08:09:23 -0800 Subject: ASoC: Revert "ASoC: mediatek: Check for error clk pointer" This reverts commit 9de2b9286a6d ("ASoC: mediatek: Check for error clk pointer"). With this patch in the tree, Chromebooks running the affected hardware no longer boot. Bisect points to this patch, and reverting it fixes the problem. An analysis of the code with this patch applied shows: ret = init_clks(pdev, clk); if (ret) return ERR_PTR(ret); ... for (j = 0; j < MAX_CLKS && data->clk_id[j]; j++) { struct clk *c = clk[data->clk_id[j]]; if (IS_ERR(c)) { dev_err(&pdev->dev, "%s: clk unavailable\n", data->name); return ERR_CAST(c); } scpd->clk[j] = c; } Not all clocks in the clk_names array have to be present. Only the clocks in the data->clk_id array are actually needed. The code already checks if the required clocks are available and bails out if not. The assumption that all clocks have to be present is wrong, and commit 9de2b9286a6d ("ASoC: mediatek: Check for error clk pointer") needs to be reverted. Cc: Jiasheng Jiang Cc: Mark Brown Cc: James Liao Cc: Kevin Hilman Cc: Matthias Brugger Reported-by: Frank Wunderlich Reported-by: Daniel Golle Fixes: 9de2b9286a6d ("ASoC: mediatek: Check for error clk pointer") Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20220207160923.3911501-1-linux@roeck-us.net Signed-off-by: Mark Brown --- drivers/soc/mediatek/mtk-scpsys.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/soc/mediatek/mtk-scpsys.c b/drivers/soc/mediatek/mtk-scpsys.c index 670cc82d17dc..ca75b14931ec 100644 --- a/drivers/soc/mediatek/mtk-scpsys.c +++ b/drivers/soc/mediatek/mtk-scpsys.c @@ -411,17 +411,12 @@ out: return ret; } -static int init_clks(struct platform_device *pdev, struct clk **clk) +static void init_clks(struct platform_device *pdev, struct clk **clk) { int i; - for (i = CLK_NONE + 1; i < CLK_MAX; i++) { + for (i = CLK_NONE + 1; i < CLK_MAX; i++) clk[i] = devm_clk_get(&pdev->dev, clk_names[i]); - if (IS_ERR(clk[i])) - return PTR_ERR(clk[i]); - } - - return 0; } static struct scp *init_scp(struct platform_device *pdev, @@ -431,7 +426,7 @@ static struct scp *init_scp(struct platform_device *pdev, { struct genpd_onecell_data *pd_data; struct resource *res; - int i, j, ret; + int i, j; struct scp *scp; struct clk *clk[CLK_MAX]; @@ -486,9 +481,7 @@ static struct scp *init_scp(struct platform_device *pdev, pd_data->num_domains = num; - ret = init_clks(pdev, clk); - if (ret) - return ERR_PTR(ret); + init_clks(pdev, clk); for (i = 0; i < num; i++) { struct scp_domain *scpd = &scp->domains[i]; -- cgit v1.2.3 From 307f31452078792aab94a729fce33200c6e42dc4 Mon Sep 17 00:00:00 2001 From: Martin Povišer Date: Fri, 4 Feb 2022 10:53:01 +0100 Subject: ASoC: tas2770: Insert post reset delay Per TAS2770 datasheet there must be a 1 ms delay from reset to first command. So insert delays into the driver where appropriate. Fixes: 1a476abc723e ("tas2770: add tas2770 smart PA kernel driver") Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20220204095301.5554-1-povik+lin@cutebit.org Signed-off-by: Mark Brown --- sound/soc/codecs/tas2770.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index 6549e7fef3e3..c5ea3b115966 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -38,10 +38,12 @@ static void tas2770_reset(struct tas2770_priv *tas2770) gpiod_set_value_cansleep(tas2770->reset_gpio, 0); msleep(20); gpiod_set_value_cansleep(tas2770->reset_gpio, 1); + usleep_range(1000, 2000); } snd_soc_component_write(tas2770->component, TAS2770_SW_RST, TAS2770_RST); + usleep_range(1000, 2000); } static int tas2770_set_bias_level(struct snd_soc_component *component, @@ -110,6 +112,7 @@ static int tas2770_codec_resume(struct snd_soc_component *component) if (tas2770->sdz_gpio) { gpiod_set_value_cansleep(tas2770->sdz_gpio, 1); + usleep_range(1000, 2000); } else { ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL, TAS2770_PWR_CTRL_MASK, @@ -510,8 +513,10 @@ static int tas2770_codec_probe(struct snd_soc_component *component) tas2770->component = component; - if (tas2770->sdz_gpio) + if (tas2770->sdz_gpio) { gpiod_set_value_cansleep(tas2770->sdz_gpio, 1); + usleep_range(1000, 2000); + } tas2770_reset(tas2770); -- cgit v1.2.3 From d7b530fdc45e75a54914a194c4becd9672a4e24f Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Mon, 7 Feb 2022 17:29:58 +0200 Subject: ASoC: rt5682s: do not block workqueue if card is unbound The current rt5682s_jack_detect_handler() assumes the component and card will always show up and implements an infinite usleep loop waiting for them to show up. This does not hold true if a codec interrupt (or other event) occurs when the card is unbound. The codec driver's remove or shutdown functions cannot cancel the workqueue due to the wait loop. As a result, code can either end up blocking the workqueue, or hit a kernel oops when the card is freed. Fix the issue by rescheduling the jack detect handler in case the card is not ready. In case card never shows up, the shutdown/remove/suspend calls can now cancel the detect task. Signed-off-by: Kai Vehmanen Reviewed-by: Bard Liao Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Reviewed-by: Shuming Fan Link: https://lore.kernel.org/r/20220207153000.3452802-1-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682s.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/rt5682s.c b/sound/soc/codecs/rt5682s.c index efa1016831dd..1e662d1be2b3 100644 --- a/sound/soc/codecs/rt5682s.c +++ b/sound/soc/codecs/rt5682s.c @@ -824,11 +824,13 @@ static void rt5682s_jack_detect_handler(struct work_struct *work) container_of(work, struct rt5682s_priv, jack_detect_work.work); int val, btn_type; - while (!rt5682s->component) - usleep_range(10000, 15000); - - while (!rt5682s->component->card->instantiated) - usleep_range(10000, 15000); + if (!rt5682s->component || !rt5682s->component->card || + !rt5682s->component->card->instantiated) { + /* card not yet ready, try later */ + mod_delayed_work(system_power_efficient_wq, + &rt5682s->jack_detect_work, msecs_to_jiffies(15)); + return; + } mutex_lock(&rt5682s->jdet_mutex); mutex_lock(&rt5682s->calibrate_mutex); -- cgit v1.2.3 From a6d78661dc903d90a327892bbc34268f3a5f4b9c Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Mon, 7 Feb 2022 17:29:59 +0200 Subject: ASoC: rt5668: do not block workqueue if card is unbound The current rt5668_jack_detect_handler() assumes the component and card will always show up and implements an infinite usleep loop waiting for them to show up. This does not hold true if a codec interrupt (or other event) occurs when the card is unbound. The codec driver's remove or shutdown functions cannot cancel the workqueue due to the wait loop. As a result, code can either end up blocking the workqueue, or hit a kernel oops when the card is freed. Fix the issue by rescheduling the jack detect handler in case the card is not ready. In case card never shows up, the shutdown/remove/suspend calls can now cancel the detect task. Signed-off-by: Kai Vehmanen Reviewed-by: Bard Liao Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Reviewed-by: Shuming Fan Link: https://lore.kernel.org/r/20220207153000.3452802-2-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5668.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/rt5668.c b/sound/soc/codecs/rt5668.c index fb09715bf932..5b12cbf2ba21 100644 --- a/sound/soc/codecs/rt5668.c +++ b/sound/soc/codecs/rt5668.c @@ -1022,11 +1022,13 @@ static void rt5668_jack_detect_handler(struct work_struct *work) container_of(work, struct rt5668_priv, jack_detect_work.work); int val, btn_type; - while (!rt5668->component) - usleep_range(10000, 15000); - - while (!rt5668->component->card->instantiated) - usleep_range(10000, 15000); + if (!rt5668->component || !rt5668->component->card || + !rt5668->component->card->instantiated) { + /* card not yet ready, try later */ + mod_delayed_work(system_power_efficient_wq, + &rt5668->jack_detect_work, msecs_to_jiffies(15)); + return; + } mutex_lock(&rt5668->calibrate_mutex); -- cgit v1.2.3 From 4c33de0673ced9c7c37b3bbd9bfe0fda72340b2a Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Mon, 7 Feb 2022 17:30:00 +0200 Subject: ASoC: rt5682: do not block workqueue if card is unbound The current rt5682_jack_detect_handler() assumes the component and card will always show up and implements an infinite usleep loop waiting for them to show up. This does not hold true if a codec interrupt (or other event) occurs when the card is unbound. The codec driver's remove or shutdown functions cannot cancel the workqueue due to the wait loop. As a result, code can either end up blocking the workqueue, or hit a kernel oops when the card is freed. Fix the issue by rescheduling the jack detect handler in case the card is not ready. In case card never shows up, the shutdown/remove/suspend calls can now cancel the detect task. Signed-off-by: Kai Vehmanen Reviewed-by: Bard Liao Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Reviewed-by: Shuming Fan Link: https://lore.kernel.org/r/20220207153000.3452802-3-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 0a0ec4a021e1..be68d573a490 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -1092,11 +1092,13 @@ void rt5682_jack_detect_handler(struct work_struct *work) struct snd_soc_dapm_context *dapm; int val, btn_type; - while (!rt5682->component) - usleep_range(10000, 15000); - - while (!rt5682->component->card->instantiated) - usleep_range(10000, 15000); + if (!rt5682->component || !rt5682->component->card || + !rt5682->component->card->instantiated) { + /* card not yet ready, try later */ + mod_delayed_work(system_power_efficient_wq, + &rt5682->jack_detect_work, msecs_to_jiffies(15)); + return; + } dapm = snd_soc_component_get_dapm(rt5682->component); -- cgit v1.2.3 From 37ef4c19b4c659926ce65a7ac709ceaefb211c40 Mon Sep 17 00:00:00 2001 From: José Expósito Date: Tue, 8 Feb 2022 09:59:16 -0800 Subject: Input: clear BTN_RIGHT/MIDDLE on buttonpads Buttonpads are expected to map the INPUT_PROP_BUTTONPAD property bit and the BTN_LEFT key bit. As explained in the specification, where a device has a button type value of 0 (click-pad) or 1 (pressure-pad) there should not be discrete buttons: https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/touchpad-windows-precision-touchpad-collection#device-capabilities-feature-report However, some drivers map the BTN_RIGHT and/or BTN_MIDDLE key bits even though the device is a buttonpad and therefore does not have those buttons. This behavior has forced userspace applications like libinput to implement different workarounds and quirks to detect buttonpads and offer to the user the right set of features and configuration options. For more information: https://gitlab.freedesktop.org/libinput/libinput/-/merge_requests/726 In order to avoid this issue clear the BTN_RIGHT and BTN_MIDDLE key bits when the input device is register if the INPUT_PROP_BUTTONPAD property bit is set. Notice that this change will not affect udev because it does not check for buttons. See systemd/src/udev/udev-builtin-input_id.c. List of known affected hardware: - Chuwi AeroBook Plus - Chuwi Gemibook - Framework Laptop - GPD Win Max - Huawei MateBook 2020 - Prestigio Smartbook 141 C2 - Purism Librem 14v1 - StarLite Mk II - AMI firmware - StarLite Mk II - Coreboot firmware - StarLite Mk III - AMI firmware - StarLite Mk III - Coreboot firmware - StarLabTop Mk IV - AMI firmware - StarLabTop Mk IV - Coreboot firmware - StarBook Mk V Acked-by: Peter Hutterer Acked-by: Benjamin Tissoires Acked-by: Jiri Kosina Signed-off-by: José Expósito Link: https://lore.kernel.org/r/20220208174806.17183-1-jose.exposito89@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/input.c b/drivers/input/input.c index ccaeb2426385..c3139bc2aa0d 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -2285,6 +2285,12 @@ int input_register_device(struct input_dev *dev) /* KEY_RESERVED is not supposed to be transmitted to userspace. */ __clear_bit(KEY_RESERVED, dev->keybit); + /* Buttonpads should not map BTN_RIGHT and/or BTN_MIDDLE. */ + if (test_bit(INPUT_PROP_BUTTONPAD, dev->propbit)) { + __clear_bit(BTN_RIGHT, dev->keybit); + __clear_bit(BTN_MIDDLE, dev->keybit); + } + /* Make sure that bitmasks not mentioned in dev->evbit are clean. */ input_cleanse_bitmasks(dev); -- cgit v1.2.3 From e4e3a93c6e267572ca2345d8d86053e166843a8c Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Tue, 8 Feb 2022 11:12:42 +0800 Subject: MAINTAINERS: update cros_ec_codec maintainers Updates cros_ec_codec maintainers. Signed-off-by: Tzung-Bi Shih Acked-By: Cheng-Yi Chiang Acked-By: Benson Leung Link: https://lore.kernel.org/r/20220208031242.227563-1-tzungbi@google.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml | 1 + MAINTAINERS | 1 + 2 files changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml index 77adbebed824..c3e9f3485449 100644 --- a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml +++ b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml @@ -8,6 +8,7 @@ title: Audio codec controlled by ChromeOS EC maintainers: - Cheng-Yi Chiang + - Tzung-Bi Shih description: | Google's ChromeOS EC codec is a digital mic codec provided by the diff --git a/MAINTAINERS b/MAINTAINERS index 09734251e1de..9256c3540a0f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4476,6 +4476,7 @@ F: drivers/platform/chrome/ CHROMEOS EC CODEC DRIVER M: Cheng-Yi Chiang +M: Tzung-Bi Shih R: Guenter Roeck S: Maintained F: Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml -- cgit v1.2.3 From 647474b8d980256b26b1cd112d7333a4dbd4260a Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 31 Jan 2022 09:55:20 +0100 Subject: drm/mediatek: mtk_dsi: Avoid EPROBE_DEFER loop with external bridge DRM bridge drivers are now attaching their DSI device at probe time, which requires us to register our DSI host in order to let the bridge to probe: this recently started producing an endless -EPROBE_DEFER loop on some machines that are using external bridges, like the parade-ps8640, found on the ACER Chromebook R13. Now that the DSI hosts/devices probe sequence is documented, we can do adjustments to the mtk_dsi driver as to both fix now and make sure to avoid this situation in the future: for this, following what is documented in drm_bridge.c, move the mtk_dsi component_add() to the mtk_dsi_ops.attach callback and delete it in the detach callback; keeping in mind that we are registering a drm_bridge for our DSI, which is only used/attached if the DSI Host is bound, it wouldn't make sense to keep adding our bridge at probe time (as it would be useless to have it if mtk_dsi_ops.attach() fails!), so also move that one to the dsi host attach function (and remove it in detach). Cc: # 5.15.x Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Andrzej Hajda Reviewed-by: Jagan Teki Tested-by: Nícolas F. R. A. Prado Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_dsi.c | 167 +++++++++++++++++++------------------ 1 file changed, 84 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 5d90d2eb0019..bced4c7d668e 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -786,18 +786,101 @@ void mtk_dsi_ddp_stop(struct device *dev) mtk_dsi_poweroff(dsi); } +static int mtk_dsi_encoder_init(struct drm_device *drm, struct mtk_dsi *dsi) +{ + int ret; + + ret = drm_simple_encoder_init(drm, &dsi->encoder, + DRM_MODE_ENCODER_DSI); + if (ret) { + DRM_ERROR("Failed to encoder init to drm\n"); + return ret; + } + + dsi->encoder.possible_crtcs = mtk_drm_find_possible_crtc_by_comp(drm, dsi->host.dev); + + ret = drm_bridge_attach(&dsi->encoder, &dsi->bridge, NULL, + DRM_BRIDGE_ATTACH_NO_CONNECTOR); + if (ret) + goto err_cleanup_encoder; + + dsi->connector = drm_bridge_connector_init(drm, &dsi->encoder); + if (IS_ERR(dsi->connector)) { + DRM_ERROR("Unable to create bridge connector\n"); + ret = PTR_ERR(dsi->connector); + goto err_cleanup_encoder; + } + drm_connector_attach_encoder(dsi->connector, &dsi->encoder); + + return 0; + +err_cleanup_encoder: + drm_encoder_cleanup(&dsi->encoder); + return ret; +} + +static int mtk_dsi_bind(struct device *dev, struct device *master, void *data) +{ + int ret; + struct drm_device *drm = data; + struct mtk_dsi *dsi = dev_get_drvdata(dev); + + ret = mtk_dsi_encoder_init(drm, dsi); + if (ret) + return ret; + + return device_reset_optional(dev); +} + +static void mtk_dsi_unbind(struct device *dev, struct device *master, + void *data) +{ + struct mtk_dsi *dsi = dev_get_drvdata(dev); + + drm_encoder_cleanup(&dsi->encoder); +} + +static const struct component_ops mtk_dsi_component_ops = { + .bind = mtk_dsi_bind, + .unbind = mtk_dsi_unbind, +}; + static int mtk_dsi_host_attach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { struct mtk_dsi *dsi = host_to_dsi(host); + struct device *dev = host->dev; + int ret; dsi->lanes = device->lanes; dsi->format = device->format; dsi->mode_flags = device->mode_flags; + dsi->next_bridge = devm_drm_of_get_bridge(dev, dev->of_node, 0, 0); + if (IS_ERR(dsi->next_bridge)) + return PTR_ERR(dsi->next_bridge); + + drm_bridge_add(&dsi->bridge); + + ret = component_add(host->dev, &mtk_dsi_component_ops); + if (ret) { + DRM_ERROR("failed to add dsi_host component: %d\n", ret); + drm_bridge_remove(&dsi->bridge); + return ret; + } return 0; } +static int mtk_dsi_host_detach(struct mipi_dsi_host *host, + struct mipi_dsi_device *device) +{ + struct mtk_dsi *dsi = host_to_dsi(host); + + component_del(host->dev, &mtk_dsi_component_ops); + drm_bridge_remove(&dsi->bridge); + return 0; +} + static void mtk_dsi_wait_for_idle(struct mtk_dsi *dsi) { int ret; @@ -938,73 +1021,14 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host, static const struct mipi_dsi_host_ops mtk_dsi_ops = { .attach = mtk_dsi_host_attach, + .detach = mtk_dsi_host_detach, .transfer = mtk_dsi_host_transfer, }; -static int mtk_dsi_encoder_init(struct drm_device *drm, struct mtk_dsi *dsi) -{ - int ret; - - ret = drm_simple_encoder_init(drm, &dsi->encoder, - DRM_MODE_ENCODER_DSI); - if (ret) { - DRM_ERROR("Failed to encoder init to drm\n"); - return ret; - } - - dsi->encoder.possible_crtcs = mtk_drm_find_possible_crtc_by_comp(drm, dsi->host.dev); - - ret = drm_bridge_attach(&dsi->encoder, &dsi->bridge, NULL, - DRM_BRIDGE_ATTACH_NO_CONNECTOR); - if (ret) - goto err_cleanup_encoder; - - dsi->connector = drm_bridge_connector_init(drm, &dsi->encoder); - if (IS_ERR(dsi->connector)) { - DRM_ERROR("Unable to create bridge connector\n"); - ret = PTR_ERR(dsi->connector); - goto err_cleanup_encoder; - } - drm_connector_attach_encoder(dsi->connector, &dsi->encoder); - - return 0; - -err_cleanup_encoder: - drm_encoder_cleanup(&dsi->encoder); - return ret; -} - -static int mtk_dsi_bind(struct device *dev, struct device *master, void *data) -{ - int ret; - struct drm_device *drm = data; - struct mtk_dsi *dsi = dev_get_drvdata(dev); - - ret = mtk_dsi_encoder_init(drm, dsi); - if (ret) - return ret; - - return device_reset_optional(dev); -} - -static void mtk_dsi_unbind(struct device *dev, struct device *master, - void *data) -{ - struct mtk_dsi *dsi = dev_get_drvdata(dev); - - drm_encoder_cleanup(&dsi->encoder); -} - -static const struct component_ops mtk_dsi_component_ops = { - .bind = mtk_dsi_bind, - .unbind = mtk_dsi_unbind, -}; - static int mtk_dsi_probe(struct platform_device *pdev) { struct mtk_dsi *dsi; struct device *dev = &pdev->dev; - struct drm_panel *panel; struct resource *regs; int irq_num; int ret; @@ -1021,19 +1045,6 @@ static int mtk_dsi_probe(struct platform_device *pdev) return ret; } - ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0, - &panel, &dsi->next_bridge); - if (ret) - goto err_unregister_host; - - if (panel) { - dsi->next_bridge = devm_drm_panel_bridge_add(dev, panel); - if (IS_ERR(dsi->next_bridge)) { - ret = PTR_ERR(dsi->next_bridge); - goto err_unregister_host; - } - } - dsi->driver_data = of_device_get_match_data(dev); dsi->engine_clk = devm_clk_get(dev, "engine"); @@ -1098,14 +1109,6 @@ static int mtk_dsi_probe(struct platform_device *pdev) dsi->bridge.of_node = dev->of_node; dsi->bridge.type = DRM_MODE_CONNECTOR_DSI; - drm_bridge_add(&dsi->bridge); - - ret = component_add(&pdev->dev, &mtk_dsi_component_ops); - if (ret) { - dev_err(&pdev->dev, "failed to add component: %d\n", ret); - goto err_unregister_host; - } - return 0; err_unregister_host: @@ -1118,8 +1121,6 @@ static int mtk_dsi_remove(struct platform_device *pdev) struct mtk_dsi *dsi = platform_get_drvdata(pdev); mtk_output_dsi_disable(dsi); - drm_bridge_remove(&dsi->bridge); - component_del(&pdev->dev, &mtk_dsi_component_ops); mipi_dsi_host_unregister(&dsi->host); return 0; -- cgit v1.2.3 From 8e1741c658996a16bd096e077dae0da2460a997f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 10 Feb 2022 13:33:43 +0100 Subject: ALSA: memalloc: Fix dma_need_sync() checks dma_need_sync() checks each DMA address. Fix the incorrect usages for non-contiguous and non-coherent page allocations. Fortunately, there are no actual call sites that need manual syncs yet. Fixes: a25684a95646 ("ALSA: memalloc: Support for non-contiguous page allocation") Fixes: 73325f60e2ed ("ALSA: memalloc: Support for non-coherent page allocation") Cc: Reported-by: Ezequiel Garcia Link: https://lore.kernel.org/r/20220210123344.8756-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/memalloc.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index d1fcd1d5adae..f98f694e3ce4 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -511,7 +511,8 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) DEFAULT_GFP, 0); if (!sgt) return NULL; - dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->dev.dir); + dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, + sg_dma_address(sgt->sgl)); p = dma_vmap_noncontiguous(dmab->dev.dev, size, sgt); if (p) dmab->private_data = sgt; @@ -671,9 +672,13 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = { */ static void *snd_dma_noncoherent_alloc(struct snd_dma_buffer *dmab, size_t size) { - dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->dev.dir); - return dma_alloc_noncoherent(dmab->dev.dev, size, &dmab->addr, - dmab->dev.dir, DEFAULT_GFP); + void *p; + + p = dma_alloc_noncoherent(dmab->dev.dev, size, &dmab->addr, + dmab->dev.dir, DEFAULT_GFP); + if (p) + dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->addr); + return p; } static void snd_dma_noncoherent_free(struct snd_dma_buffer *dmab) -- cgit v1.2.3 From 3e16dc50d77dc3494275a241fac250c94bf45206 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 10 Feb 2022 13:33:44 +0100 Subject: ALSA: memalloc: invalidate SG pages before sync It seems that calling invalidate_kernel_vmap_range() is more correct to be called before dma_sync_*(), judging from the other thread: https://lore.kernel.org/all/20220111085958.GA22795@lst.de/ Although this won't matter much in practice, let's fix the call order for consistency. Fixes: a25684a95646 ("ALSA: memalloc: Support for non-contiguous page allocation") Reported-by: Ezequiel Garcia Cc: Link: https://lore.kernel.org/r/20220210123344.8756-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/memalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index f98f694e3ce4..6fd763d4d15b 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -541,9 +541,9 @@ static void snd_dma_noncontig_sync(struct snd_dma_buffer *dmab, if (mode == SNDRV_DMA_SYNC_CPU) { if (dmab->dev.dir == DMA_TO_DEVICE) return; + invalidate_kernel_vmap_range(dmab->area, dmab->bytes); dma_sync_sgtable_for_cpu(dmab->dev.dev, dmab->private_data, dmab->dev.dir); - invalidate_kernel_vmap_range(dmab->area, dmab->bytes); } else { if (dmab->dev.dir == DMA_FROM_DEVICE) return; -- cgit v1.2.3 From c8d251f51ee61df06ee0e419348d8c9160bbfb86 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 9 Feb 2022 15:25:20 -0800 Subject: ASoC: qcom: Actually clear DMA interrupt register for HDMI In commit da0363f7bfd3 ("ASoC: qcom: Fix for DMA interrupt clear reg overwriting") we changed regmap_write() to regmap_update_bits() so that we can avoid overwriting bits that we didn't intend to modify. Unfortunately this change breaks the case where a register is writable but not readable, which is exactly how the HDMI irq clear register is designed (grep around LPASS_HDMITX_APP_IRQCLEAR_REG to see how it's write only). That's because regmap_update_bits() tries to read the register from the hardware and if it isn't readable it looks in the regmap cache to see what was written there last time to compare against what we want to write there. Eventually, we're unable to modify this register at all because the bits that we're trying to set are already set in the cache. This is doubly bad for the irq clear register because you have to write the bit to clear an interrupt. Given the irq is level triggered, we see an interrupt storm upon plugging in an HDMI cable and starting audio playback. The irq storm is so great that performance degrades significantly, leading to CPU soft lockups. Fix it by using regmap_write_bits() so that we really do write the bits in the clear register that we want to. This brings the number of irqs handled by lpass_dma_interrupt_handler() down from ~150k/sec to ~10/sec. Fixes: da0363f7bfd3 ("ASoC: qcom: Fix for DMA interrupt clear reg overwriting") Cc: Srinivasa Rao Mandadapu Cc: Srinivas Kandagatla Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20220209232520.4017634-1-swboyd@chromium.org Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-platform.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index a59e9d20cb46..4b1773c1fb95 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -524,7 +524,7 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, return -EINVAL; } - ret = regmap_update_bits(map, reg_irqclr, val_irqclr, val_irqclr); + ret = regmap_write_bits(map, reg_irqclr, val_irqclr, val_irqclr); if (ret) { dev_err(soc_runtime->dev, "error writing to irqclear reg: %d\n", ret); return ret; @@ -665,7 +665,7 @@ static irqreturn_t lpass_dma_interrupt_handler( return -EINVAL; } if (interrupts & LPAIF_IRQ_PER(chan)) { - rv = regmap_update_bits(map, reg, mask, (LPAIF_IRQ_PER(chan) | val)); + rv = regmap_write_bits(map, reg, mask, (LPAIF_IRQ_PER(chan) | val)); if (rv) { dev_err(soc_runtime->dev, "error writing to irqclear reg: %d\n", rv); @@ -676,7 +676,7 @@ static irqreturn_t lpass_dma_interrupt_handler( } if (interrupts & LPAIF_IRQ_XRUN(chan)) { - rv = regmap_update_bits(map, reg, mask, (LPAIF_IRQ_XRUN(chan) | val)); + rv = regmap_write_bits(map, reg, mask, (LPAIF_IRQ_XRUN(chan) | val)); if (rv) { dev_err(soc_runtime->dev, "error writing to irqclear reg: %d\n", rv); @@ -688,7 +688,7 @@ static irqreturn_t lpass_dma_interrupt_handler( } if (interrupts & LPAIF_IRQ_ERR(chan)) { - rv = regmap_update_bits(map, reg, mask, (LPAIF_IRQ_ERR(chan) | val)); + rv = regmap_write_bits(map, reg, mask, (LPAIF_IRQ_ERR(chan) | val)); if (rv) { dev_err(soc_runtime->dev, "error writing to irqclear reg: %d\n", rv); -- cgit v1.2.3 From a887f9c7a4d37a8e874ba8415a42a92a1b5139fc Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 10 Feb 2022 17:20:51 +0000 Subject: ASoC: wm_adsp: Correct control read size when parsing compressed buffer When parsing the compressed stream the whole buffer descriptor is now read in a single cs_dsp_coeff_read_ctrl; on older firmwares this descriptor is just 4 bytes but on more modern firmwares it is 24 bytes. The current code reads the full 24 bytes regardless, this was working but reading junk for the last 20 bytes. However commit f444da38ac92 ("firmware: cs_dsp: Add offset to cs_dsp read/write") added a size check into cs_dsp_coeff_read_ctrl, causing the older firmwares to now return an error. Update the code to only read the amount of data appropriate for the firmware loaded. Fixes: 04ae08596737 ("ASoC: wm_adsp: Switch to using wm_coeff_read_ctrl for compressed buffers") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20220210172053.22782-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index f3672e3d1703..0582585236a2 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1441,7 +1441,8 @@ static int wm_adsp_buffer_parse_coeff(struct cs_dsp_coeff_ctl *cs_ctl) int ret, i; for (i = 0; i < 5; ++i) { - ret = cs_dsp_coeff_read_ctrl(cs_ctl, 0, &coeff_v1, sizeof(coeff_v1)); + ret = cs_dsp_coeff_read_ctrl(cs_ctl, 0, &coeff_v1, + min(cs_ctl->len, sizeof(coeff_v1))); if (ret < 0) return ret; -- cgit v1.2.3 From d1c56bfdaca465bd1d0e913053a9c5cafe8b6a6c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 3 Feb 2022 14:14:05 +0100 Subject: tests: fix idmapped mount_setattr test The test treated zero as a successful run when it really should treat non-zero as a successful run. A mount's idmapping can't change once it has been attached to the filesystem. Link: https://lore.kernel.org/r/20220203131411.3093040-2-brauner@kernel.org Fixes: 01eadc8dd96d ("tests: add mount_setattr() selftests") Cc: Seth Forshee Cc: Christoph Hellwig Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- tools/testing/selftests/mount_setattr/mount_setattr_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index f31205f04ee0..8c5fea68ae67 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -1236,7 +1236,7 @@ static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long } /** - * Validate that an attached mount in our mount namespace can be idmapped. + * Validate that an attached mount in our mount namespace cannot be idmapped. * (The kernel enforces that the mount's mount namespace and the caller's mount * namespace match.) */ @@ -1259,7 +1259,7 @@ TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace) attr.userns_fd = get_userns_fd(0, 10000, 10000); ASSERT_GE(attr.userns_fd, 0); - ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } -- cgit v1.2.3 From 97acd701185b13825f8ec7882345040415d46762 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 3 Feb 2022 14:14:06 +0100 Subject: MAINTAINERS: add entry for idmapped mounts I'd like to continue maintaining the work that was done around idmapped, make sure that I'm Cced on new patches and work that impacts the infrastructure. Link: https://lore.kernel.org/r/20220203131411.3093040-3-brauner@kernel.org Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index f41088418aae..0496b973bb87 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9253,6 +9253,15 @@ S: Maintained W: https://github.com/o2genum/ideapad-slidebar F: drivers/input/misc/ideapad_slidebar.c +IDMAPPED MOUNTS +M: Christian Brauner +L: linux-fsdevel@vger.kernel.org +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git +F: Documentation/filesystems/idmappings.rst +F: tools/testing/selftests/mount_setattr/ +F: include/linux/mnt_idmapping.h + IDT VersaClock 5 CLOCK DRIVER M: Luca Ceresoli S: Maintained -- cgit v1.2.3 From ddc204b517e60ae64db34f9832dc41dafa77c751 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 8 Feb 2022 11:39:12 -0500 Subject: copy_process(): Move fd_install() out of sighand->siglock critical section I was made aware of the following lockdep splat: [ 2516.308763] ===================================================== [ 2516.309085] WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected [ 2516.309433] 5.14.0-51.el9.aarch64+debug #1 Not tainted [ 2516.309703] ----------------------------------------------------- [ 2516.310149] stress-ng/153663 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: [ 2516.310512] ffff0000e422b198 (&newf->file_lock){+.+.}-{2:2}, at: fd_install+0x368/0x4f0 [ 2516.310944] and this task is already holding: [ 2516.311248] ffff0000c08140d8 (&sighand->siglock){-.-.}-{2:2}, at: copy_process+0x1e2c/0x3e80 [ 2516.311804] which would create a new lock dependency: [ 2516.312066] (&sighand->siglock){-.-.}-{2:2} -> (&newf->file_lock){+.+.}-{2:2} [ 2516.312446] but this new dependency connects a HARDIRQ-irq-safe lock: [ 2516.312983] (&sighand->siglock){-.-.}-{2:2} : [ 2516.330700] Possible interrupt unsafe locking scenario: [ 2516.331075] CPU0 CPU1 [ 2516.331328] ---- ---- [ 2516.331580] lock(&newf->file_lock); [ 2516.331790] local_irq_disable(); [ 2516.332231] lock(&sighand->siglock); [ 2516.332579] lock(&newf->file_lock); [ 2516.332922] [ 2516.333069] lock(&sighand->siglock); [ 2516.333291] *** DEADLOCK *** [ 2516.389845] stack backtrace: [ 2516.390101] CPU: 3 PID: 153663 Comm: stress-ng Kdump: loaded Not tainted 5.14.0-51.el9.aarch64+debug #1 [ 2516.390756] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 2516.391155] Call trace: [ 2516.391302] dump_backtrace+0x0/0x3e0 [ 2516.391518] show_stack+0x24/0x30 [ 2516.391717] dump_stack_lvl+0x9c/0xd8 [ 2516.391938] dump_stack+0x1c/0x38 [ 2516.392247] print_bad_irq_dependency+0x620/0x710 [ 2516.392525] check_irq_usage+0x4fc/0x86c [ 2516.392756] check_prev_add+0x180/0x1d90 [ 2516.392988] validate_chain+0x8e0/0xee0 [ 2516.393215] __lock_acquire+0x97c/0x1e40 [ 2516.393449] lock_acquire.part.0+0x240/0x570 [ 2516.393814] lock_acquire+0x90/0xb4 [ 2516.394021] _raw_spin_lock+0xe8/0x154 [ 2516.394244] fd_install+0x368/0x4f0 [ 2516.394451] copy_process+0x1f5c/0x3e80 [ 2516.394678] kernel_clone+0x134/0x660 [ 2516.394895] __do_sys_clone3+0x130/0x1f4 [ 2516.395128] __arm64_sys_clone3+0x5c/0x7c [ 2516.395478] invoke_syscall.constprop.0+0x78/0x1f0 [ 2516.395762] el0_svc_common.constprop.0+0x22c/0x2c4 [ 2516.396050] do_el0_svc+0xb0/0x10c [ 2516.396252] el0_svc+0x24/0x34 [ 2516.396436] el0t_64_sync_handler+0xa4/0x12c [ 2516.396688] el0t_64_sync+0x198/0x19c [ 2517.491197] NET: Registered PF_ATMPVC protocol family [ 2517.491524] NET: Registered PF_ATMSVC protocol family [ 2591.991877] sched: RT throttling activated One way to solve this problem is to move the fd_install() call out of the sighand->siglock critical section. Before commit 6fd2fe494b17 ("copy_process(): don't use ksys_close() on cleanups"), the pidfd installation was done without holding both the task_list lock and the sighand->siglock. Obviously, holding these two locks are not really needed to protect the fd_install() call. So move the fd_install() call down to after the releases of both locks. Link: https://lore.kernel.org/r/20220208163912.1084752-1-longman@redhat.com Fixes: 6fd2fe494b17 ("copy_process(): don't use ksys_close() on cleanups") Reviewed-by: "Eric W. Biederman" Signed-off-by: Waiman Long Signed-off-by: Christian Brauner --- kernel/fork.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index d75a528f7b21..007af7fb47c7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2323,10 +2323,6 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_cancel_cgroup; } - /* past the last point of failure */ - if (pidfile) - fd_install(pidfd, pidfile); - init_task_pid_links(p); if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); @@ -2375,6 +2371,9 @@ static __latent_entropy struct task_struct *copy_process( syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); + if (pidfile) + fd_install(pidfd, pidfile); + proc_fork_connector(p); sched_post_fork(p, args); cgroup_post_fork(p, args); -- cgit v1.2.3 From a12821d5e012a42673f6fe521971f193441d8aa4 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Fri, 11 Feb 2022 10:34:25 +0100 Subject: block: Add handling for zone append command in blk_complete_request Zone append command needs special handling to update the bi_sector field in the bio struct with the actual position of the data in the device. It is stored in __sector field of the request struct. Fixes: 5581a5ddfe8d ("block: add completion handler for fast path") Signed-off-by: Pankaj Raghav Reviewed-by: Christoph Hellwig Tested-by: Adam Manzanares Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220211093425.43262-2-p.raghav@samsung.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index 1adfe4824ef5..d69ca91fbc8b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -736,6 +736,10 @@ static void blk_complete_request(struct request *req) /* Completion has already been traced */ bio_clear_flag(bio, BIO_TRACE_COMPLETION); + + if (req_op(req) == REQ_OP_ZONE_APPEND) + bio->bi_iter.bi_sector = req->__sector; + if (!is_flush) bio_endio(bio); bio = next; -- cgit v1.2.3 From 59f39bfa6553d598cb22f694d45e89547f420d85 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 13 Oct 2021 10:36:54 -0400 Subject: drm/cma-helper: Set VM_DONTEXPAND for mmap drm_gem_cma_mmap() cannot assume every implementation of dma_mmap_wc() will end up calling remap_pfn_range() (which happens to set the relevant vma flag, among others), so in order to make sure expectations around VM_DONTEXPAND are met, let it explicitly set the flag like most other GEM mmap implementations do. This avoids repeated warnings on a small minority of systems where the display is behind an IOMMU, and has a simple driver which does not override drm_gem_cma_default_funcs. Arm hdlcd is an in-tree affected driver. Out-of-tree, the Apple DCP driver is affected; this fix is required for DCP to be mainlined. [Alyssa: Update commit message.] Fixes: c40069cb7bd6 ("drm: add mmap() to drm_gem_object_funcs") Acked-by: Daniel Vetter Signed-off-by: Robin Murphy Signed-off-by: Alyssa Rosenzweig Link: https://patchwork.freedesktop.org/patch/msgid/20211013143654.39031-1-alyssa@rosenzweig.io --- drivers/gpu/drm/drm_gem_cma_helper.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index cefd0cbf9deb..dc275c466c9c 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -512,6 +512,7 @@ int drm_gem_cma_mmap(struct drm_gem_cma_object *cma_obj, struct vm_area_struct * */ vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node); vma->vm_flags &= ~VM_PFNMAP; + vma->vm_flags |= VM_DONTEXPAND; if (cma_obj->map_noncoherent) { vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); -- cgit v1.2.3 From a7e793a867ae312cecdeb6f06cceff98263e75dd Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Thu, 10 Feb 2022 22:13:23 +0500 Subject: selftests/exec: Add non-regular to TEST_GEN_PROGS non-regular file needs to be compiled and then copied to the output directory. Remove it from TEST_PROGS and add it to TEST_GEN_PROGS. This removes error thrown by rsync when non-regular object isn't found: rsync: [sender] link_stat "/linux/tools/testing/selftests/exec/non-regular" failed: No such file or directory (2) rsync error: some files/attrs were not transferred (see previous errors) (code 23) at main.c(1333) [sender=3.2.3] Fixes: 0f71241a8e32 ("selftests/exec: add file type errno tests") Reported-by: "kernelci.org bot" Signed-off-by: Muhammad Usama Anjum Reviewed-by: Shuah Khan Reviewed-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/exec/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 12c5e27d32c1..2d7fca446c7f 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -3,8 +3,8 @@ CFLAGS = -Wall CFLAGS += -Wno-nonnull CFLAGS += -D_GNU_SOURCE -TEST_PROGS := binfmt_script non-regular -TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 +TEST_PROGS := binfmt_script +TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 non-regular TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile -- cgit v1.2.3 From a0d48505a1d68e27220369e2dd1e3573a2f362d2 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 3 Feb 2022 18:47:00 +0200 Subject: i2c: qcom-cci: don't delete an unregistered adapter If i2c_add_adapter() fails to add an I2C adapter found on QCOM CCI controller, on error path i2c_del_adapter() is still called. Fortunately there is a sanity check in the I2C core, so the only visible implication is a printed debug level message: i2c-core: attempting to delete unregistered adapter [Qualcomm-CCI] Nevertheless it would be reasonable to correct the probe error path. Fixes: e517526195de ("i2c: Add Qualcomm CCI I2C driver") Signed-off-by: Vladimir Zapolskiy Reviewed-by: Robert Foss Reviewed-by: Bjorn Andersson Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-qcom-cci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c index c1de8eb66169..fd4260d18577 100644 --- a/drivers/i2c/busses/i2c-qcom-cci.c +++ b/drivers/i2c/busses/i2c-qcom-cci.c @@ -655,7 +655,7 @@ static int cci_probe(struct platform_device *pdev) return 0; error_i2c: - for (; i >= 0; i--) { + for (--i ; i >= 0; i--) { if (cci->master[i].cci) i2c_del_adapter(&cci->master[i].adap); } -- cgit v1.2.3 From 02a4a69667a2ad32f3b52ca906f19628fbdd8a01 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 3 Feb 2022 18:47:03 +0200 Subject: i2c: qcom-cci: don't put a device tree node before i2c_add_adapter() There is a minor chance for a race, if a pointer to an i2c-bus subnode is stored and then reused after releasing its reference, and it would be sufficient to get one more reference under a loop over children subnodes. Fixes: e517526195de ("i2c: Add Qualcomm CCI I2C driver") Signed-off-by: Vladimir Zapolskiy Reviewed-by: Robert Foss Reviewed-by: Bjorn Andersson Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-qcom-cci.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c index fd4260d18577..cf54f1cb4c57 100644 --- a/drivers/i2c/busses/i2c-qcom-cci.c +++ b/drivers/i2c/busses/i2c-qcom-cci.c @@ -558,7 +558,7 @@ static int cci_probe(struct platform_device *pdev) cci->master[idx].adap.quirks = &cci->data->quirks; cci->master[idx].adap.algo = &cci_algo; cci->master[idx].adap.dev.parent = dev; - cci->master[idx].adap.dev.of_node = child; + cci->master[idx].adap.dev.of_node = of_node_get(child); cci->master[idx].master = idx; cci->master[idx].cci = cci; @@ -643,8 +643,10 @@ static int cci_probe(struct platform_device *pdev) continue; ret = i2c_add_adapter(&cci->master[i].adap); - if (ret < 0) + if (ret < 0) { + of_node_put(cci->master[i].adap.dev.of_node); goto error_i2c; + } } pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC); @@ -656,8 +658,10 @@ static int cci_probe(struct platform_device *pdev) error_i2c: for (--i ; i >= 0; i--) { - if (cci->master[i].cci) + if (cci->master[i].cci) { i2c_del_adapter(&cci->master[i].adap); + of_node_put(cci->master[i].adap.dev.of_node); + } } error: disable_irq(cci->irq); @@ -673,8 +677,10 @@ static int cci_remove(struct platform_device *pdev) int i; for (i = 0; i < cci->data->num_masters; i++) { - if (cci->master[i].cci) + if (cci->master[i].cci) { i2c_del_adapter(&cci->master[i].adap); + of_node_put(cci->master[i].adap.dev.of_node); + } cci_halt(cci, i); } -- cgit v1.2.3 From f444578d727a0ca4a72b19cd4a1d7da9f1fb99fe Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 10 Jan 2022 16:50:07 +0100 Subject: power: supply: bq256xx: Handle OOM correctly Since we now return a pointer to an allocated object we need to account for memory allocation failure in a separate error path. Fixes: 25fd330370ac ("power: supply_core: Pass pointer to battery info") Reported-by: Dan Carpenter Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq256xx_charger.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/power/supply/bq256xx_charger.c b/drivers/power/supply/bq256xx_charger.c index b274942dc46a..01ad84fd147c 100644 --- a/drivers/power/supply/bq256xx_charger.c +++ b/drivers/power/supply/bq256xx_charger.c @@ -1523,6 +1523,9 @@ static int bq256xx_hw_init(struct bq256xx_device *bq) BQ256XX_WDT_BIT_SHIFT); ret = power_supply_get_battery_info(bq->charger, &bat_info); + if (ret == -ENOMEM) + return ret; + if (ret) { dev_warn(bq->dev, "battery info missing, default values will be applied\n"); -- cgit v1.2.3 From f10f582d28220f50099d3f561116256267821429 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 8 Feb 2022 12:54:48 -0600 Subject: scsi: qedi: Fix ABBA deadlock in qedi_process_tmf_resp() and qedi_process_cmd_cleanup_resp() This fixes a deadlock added with commit b40f3894e39e ("scsi: qedi: Complete TMF works before disconnect") Bug description from Jia-Ju Bai: qedi_process_tmf_resp() spin_lock(&session->back_lock); --> Line 201 (Lock A) spin_lock(&qedi_conn->tmf_work_lock); --> Line 230 (Lock B) qedi_process_cmd_cleanup_resp() spin_lock_bh(&qedi_conn->tmf_work_lock); --> Line 752 (Lock B) spin_lock_bh(&conn->session->back_lock); --> Line 784 (Lock A) When qedi_process_tmf_resp() and qedi_process_cmd_cleanup_resp() are concurrently executed, the deadlock can occur. This patch fixes the deadlock by not holding the tmf_work_lock in qedi_process_cmd_cleanup_resp while holding the back_lock. The tmf_work_lock is only needed while we remove the tmf_work from the work_list. Link: https://lore.kernel.org/r/20220208185448.6206-1-michael.christie@oracle.com Fixes: b40f3894e39e ("scsi: qedi: Complete TMF works before disconnect") Cc: Manish Rangankar Cc: Nilesh Javali Reported-by: TOTE Robot Reported-by: Jia-Ju Bai Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_fw.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index 5916ed7662d5..4eb89aa4a39d 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -771,11 +771,10 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, qedi_cmd->list_tmf_work = NULL; } } + spin_unlock_bh(&qedi_conn->tmf_work_lock); - if (!found) { - spin_unlock_bh(&qedi_conn->tmf_work_lock); + if (!found) goto check_cleanup_reqs; - } QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n", @@ -806,7 +805,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, qedi_cmd->state = CLEANUP_RECV; unlock: spin_unlock_bh(&conn->session->back_lock); - spin_unlock_bh(&qedi_conn->tmf_work_lock); wake_up_interruptible(&qedi_conn->wait_queue); return; -- cgit v1.2.3 From 06582bc86d7f48d35cd044098ca1e246e8c7c52e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 26 Jan 2022 11:58:30 +0800 Subject: block: loop:use kstatfs.f_bsize of backing file to set discard granularity If backing file's filesystem has implemented ->fallocate(), we think the loop device can support discard, then pass sb->s_blocksize as discard_granularity. However, some underlying FS, such as overlayfs, doesn't set sb->s_blocksize, and causes discard_granularity to be set as zero, then the warning in __blkdev_issue_discard() is triggered. Christoph suggested to pass kstatfs.f_bsize as discard granularity, and this way is fine because kstatfs.f_bsize means 'Optimal transfer block size', which still matches with definition of discard granularity. So fix the issue by setting discard_granularity as kstatfs.f_bsize if it is available, otherwise claims discard isn't supported. Cc: Christoph Hellwig Cc: Vivek Goyal Reported-by: Pei Zhang Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220126035830.296465-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 150012ffb387..19fe19eaa50e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -79,6 +79,7 @@ #include #include #include +#include #include "loop.h" @@ -774,8 +775,13 @@ static void loop_config_discard(struct loop_device *lo) granularity = 0; } else { + struct kstatfs sbuf; + max_discard_sectors = UINT_MAX >> 9; - granularity = inode->i_sb->s_blocksize; + if (!vfs_statfs(&file->f_path, &sbuf)) + granularity = sbuf.f_bsize; + else + max_discard_sectors = 0; } if (max_discard_sectors) { -- cgit v1.2.3 From 28df029d53a2fd80c1b8674d47895648ad26dcfb Mon Sep 17 00:00:00 2001 From: Cheng Jui Wang Date: Thu, 10 Feb 2022 18:50:11 +0800 Subject: lockdep: Correct lock_classes index mapping A kernel exception was hit when trying to dump /proc/lockdep_chains after lockdep report "BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!": Unable to handle kernel paging request at virtual address 00054005450e05c3 ... 00054005450e05c3] address between user and kernel address ranges ... pc : [0xffffffece769b3a8] string+0x50/0x10c lr : [0xffffffece769ac88] vsnprintf+0x468/0x69c ... Call trace: string+0x50/0x10c vsnprintf+0x468/0x69c seq_printf+0x8c/0xd8 print_name+0x64/0xf4 lc_show+0xb8/0x128 seq_read_iter+0x3cc/0x5fc proc_reg_read_iter+0xdc/0x1d4 The cause of the problem is the function lock_chain_get_class() will shift lock_classes index by 1, but the index don't need to be shifted anymore since commit 01bb6f0af992 ("locking/lockdep: Change the range of class_idx in held_lock struct") already change the index to start from 0. The lock_classes[-1] located at chain_hlocks array. When printing lock_classes[-1] after the chain_hlocks entries are modified, the exception happened. The output of lockdep_chains are incorrect due to this problem too. Fixes: f611e8cf98ec ("lockdep: Take read/write status in consideration when generate chainkey") Signed-off-by: Cheng Jui Wang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Boqun Feng Link: https://lore.kernel.org/r/20220210105011.21712-1-cheng-jui.wang@mediatek.com --- kernel/locking/lockdep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 4a882f83aeb9..f8a0212189ca 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3462,7 +3462,7 @@ struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i) u16 chain_hlock = chain_hlocks[chain->base + i]; unsigned int class_idx = chain_hlock_class_idx(chain_hlock); - return lock_classes + class_idx - 1; + return lock_classes + class_idx; } /* @@ -3530,7 +3530,7 @@ static void print_chain_keys_chain(struct lock_chain *chain) hlock_id = chain_hlocks[chain->base + i]; chain_key = print_chain_key_iteration(hlock_id, chain_key); - print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id) - 1); + print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id)); printk("\n"); } } -- cgit v1.2.3 From 9405b5f8b20c2bfa6523a555279a0379640dc136 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 12 Feb 2022 01:54:14 -0600 Subject: smb3: fix snapshot mount option The conversion to the new API broke the snapshot mount option due to 32 vs. 64 bit type mismatch Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Cc: stable@vger.kernel.org # 5.11+ Reported-by: Acked-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/fs_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 7ec35f3f0a5f..a92e9eec521f 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -149,7 +149,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_u32("echo_interval", Opt_echo_interval), fsparam_u32("max_credits", Opt_max_credits), fsparam_u32("handletimeout", Opt_handletimeout), - fsparam_u32("snapshot", Opt_snapshot), + fsparam_u64("snapshot", Opt_snapshot), fsparam_u32("max_channels", Opt_max_channels), /* Mount options which take string value */ @@ -1078,7 +1078,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->echo_interval = result.uint_32; break; case Opt_snapshot: - ctx->snapshot_time = result.uint_32; + ctx->snapshot_time = result.uint_64; break; case Opt_max_credits: if (result.uint_32 < 20 || result.uint_32 > 60000) { -- cgit v1.2.3 From dd5a927e411836eaef44eb9b00fece615e82e242 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 3 Jan 2022 16:50:25 +0200 Subject: cifs: fix set of group SID via NTSD xattrs 'setcifsacl -g ' silently fails to set the group SID on server. Actually, the bug existed since commit 438471b67963 ("CIFS: Add support for setting owner info, dos attributes, and create time"), but this fix will not apply cleanly to kernel versions <= v5.10. Fixes: 3970acf7ddb9 ("SMB3: Add support for getting and setting SACLs") Cc: stable@vger.kernel.org # 5.11+ Signed-off-by: Amir Goldstein Signed-off-by: Steve French --- fs/cifs/xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 7d8b72d67c80..9d486fbbfbbd 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -175,11 +175,13 @@ static int cifs_xattr_set(const struct xattr_handler *handler, switch (handler->flags) { case XATTR_CIFS_NTSD_FULL: aclflags = (CIFS_ACL_OWNER | + CIFS_ACL_GROUP | CIFS_ACL_DACL | CIFS_ACL_SACL); break; case XATTR_CIFS_NTSD: aclflags = (CIFS_ACL_OWNER | + CIFS_ACL_GROUP | CIFS_ACL_DACL); break; case XATTR_CIFS_ACL: -- cgit v1.2.3 From 26d3dadebbcbddfaf1d9caad42527a28a0ed28d8 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Sat, 12 Feb 2022 08:16:20 +1000 Subject: cifs: do not use uninitialized data in the owner/group sid When idsfromsid is used we create a special SID for owner/group. This structure must be initialized or else the first 5 bytes of the Authority field of the SID will contain uninitialized data and thus not be a valid SID. Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index ee3aab3dd4ac..5df21d63dd04 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1297,7 +1297,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, if (uid_valid(uid)) { /* chown */ uid_t id; - nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid), + nowner_sid_ptr = kzalloc(sizeof(struct cifs_sid), GFP_KERNEL); if (!nowner_sid_ptr) { rc = -ENOMEM; @@ -1326,7 +1326,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, } if (gid_valid(gid)) { /* chgrp */ gid_t id; - ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid), + ngroup_sid_ptr = kzalloc(sizeof(struct cifs_sid), GFP_KERNEL); if (!ngroup_sid_ptr) { rc = -ENOMEM; -- cgit v1.2.3 From 3d6cc9898efdfb062efb74dc18cfc700e082f5d5 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 11 Feb 2022 02:59:15 +1000 Subject: cifs: fix double free race when mount fails in cifs_get_root() When cifs_get_root() fails during cifs_smb3_do_mount() we call deactivate_locked_super() which eventually will call delayed_free() which will free the context. In this situation we should not proceed to enter the out: section in cifs_smb3_do_mount() and free the same resources a second time. [Thu Feb 10 12:59:06 2022] BUG: KASAN: use-after-free in rcu_cblist_dequeue+0x32/0x60 [Thu Feb 10 12:59:06 2022] Read of size 8 at addr ffff888364f4d110 by task swapper/1/0 [Thu Feb 10 12:59:06 2022] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G OE 5.17.0-rc3+ #4 [Thu Feb 10 12:59:06 2022] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.0 12/17/2019 [Thu Feb 10 12:59:06 2022] Call Trace: [Thu Feb 10 12:59:06 2022] [Thu Feb 10 12:59:06 2022] dump_stack_lvl+0x5d/0x78 [Thu Feb 10 12:59:06 2022] print_address_description.constprop.0+0x24/0x150 [Thu Feb 10 12:59:06 2022] ? rcu_cblist_dequeue+0x32/0x60 [Thu Feb 10 12:59:06 2022] kasan_report.cold+0x7d/0x117 [Thu Feb 10 12:59:06 2022] ? rcu_cblist_dequeue+0x32/0x60 [Thu Feb 10 12:59:06 2022] __asan_load8+0x86/0xa0 [Thu Feb 10 12:59:06 2022] rcu_cblist_dequeue+0x32/0x60 [Thu Feb 10 12:59:06 2022] rcu_core+0x547/0xca0 [Thu Feb 10 12:59:06 2022] ? call_rcu+0x3c0/0x3c0 [Thu Feb 10 12:59:06 2022] ? __this_cpu_preempt_check+0x13/0x20 [Thu Feb 10 12:59:06 2022] ? lock_is_held_type+0xea/0x140 [Thu Feb 10 12:59:06 2022] rcu_core_si+0xe/0x10 [Thu Feb 10 12:59:06 2022] __do_softirq+0x1d4/0x67b [Thu Feb 10 12:59:06 2022] __irq_exit_rcu+0x100/0x150 [Thu Feb 10 12:59:06 2022] irq_exit_rcu+0xe/0x30 [Thu Feb 10 12:59:06 2022] sysvec_hyperv_stimer0+0x9d/0xc0 ... [Thu Feb 10 12:59:07 2022] Freed by task 58179: [Thu Feb 10 12:59:07 2022] kasan_save_stack+0x26/0x50 [Thu Feb 10 12:59:07 2022] kasan_set_track+0x25/0x30 [Thu Feb 10 12:59:07 2022] kasan_set_free_info+0x24/0x40 [Thu Feb 10 12:59:07 2022] ____kasan_slab_free+0x137/0x170 [Thu Feb 10 12:59:07 2022] __kasan_slab_free+0x12/0x20 [Thu Feb 10 12:59:07 2022] slab_free_freelist_hook+0xb3/0x1d0 [Thu Feb 10 12:59:07 2022] kfree+0xcd/0x520 [Thu Feb 10 12:59:07 2022] cifs_smb3_do_mount+0x149/0xbe0 [cifs] [Thu Feb 10 12:59:07 2022] smb3_get_tree+0x1a0/0x2e0 [cifs] [Thu Feb 10 12:59:07 2022] vfs_get_tree+0x52/0x140 [Thu Feb 10 12:59:07 2022] path_mount+0x635/0x10c0 [Thu Feb 10 12:59:07 2022] __x64_sys_mount+0x1bf/0x210 [Thu Feb 10 12:59:07 2022] do_syscall_64+0x5c/0xc0 [Thu Feb 10 12:59:07 2022] entry_SYSCALL_64_after_hwframe+0x44/0xae [Thu Feb 10 12:59:07 2022] Last potentially related work creation: [Thu Feb 10 12:59:07 2022] kasan_save_stack+0x26/0x50 [Thu Feb 10 12:59:07 2022] __kasan_record_aux_stack+0xb6/0xc0 [Thu Feb 10 12:59:07 2022] kasan_record_aux_stack_noalloc+0xb/0x10 [Thu Feb 10 12:59:07 2022] call_rcu+0x76/0x3c0 [Thu Feb 10 12:59:07 2022] cifs_umount+0xce/0xe0 [cifs] [Thu Feb 10 12:59:07 2022] cifs_kill_sb+0xc8/0xe0 [cifs] [Thu Feb 10 12:59:07 2022] deactivate_locked_super+0x5d/0xd0 [Thu Feb 10 12:59:07 2022] cifs_smb3_do_mount+0xab9/0xbe0 [cifs] [Thu Feb 10 12:59:07 2022] smb3_get_tree+0x1a0/0x2e0 [cifs] [Thu Feb 10 12:59:07 2022] vfs_get_tree+0x52/0x140 [Thu Feb 10 12:59:07 2022] path_mount+0x635/0x10c0 [Thu Feb 10 12:59:07 2022] __x64_sys_mount+0x1bf/0x210 [Thu Feb 10 12:59:07 2022] do_syscall_64+0x5c/0xc0 [Thu Feb 10 12:59:07 2022] entry_SYSCALL_64_after_hwframe+0x44/0xae Reported-by: Shyam Prasad N Reviewed-by: Shyam Prasad N Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 199edac0cb59..082c21478686 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -919,6 +919,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, out_super: deactivate_locked_super(sb); + return root; out: if (cifs_sb) { kfree(cifs_sb->prepath); -- cgit v1.2.3 From 2874b7911132f6975e668f6849c8ac93bc4e1f35 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 12 Feb 2022 00:44:11 +0100 Subject: netfilter: xt_socket: missing ifdef CONFIG_IP6_NF_IPTABLES dependency nf_defrag_ipv6_disable() requires CONFIG_IP6_NF_IPTABLES. Fixes: 75063c9294fb ("netfilter: xt_socket: fix a typo in socket_mt_destroy()") Reported-by: kernel test robot Reviewed-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 662e5eb1cc39..7013f55f05d1 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -220,8 +220,10 @@ static void socket_mt_destroy(const struct xt_mtdtor_param *par) { if (par->family == NFPROTO_IPV4) nf_defrag_ipv4_disable(par->net); +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) else if (par->family == NFPROTO_IPV6) nf_defrag_ipv6_disable(par->net); +#endif } static struct xt_match socket_mt_reg[] __read_mostly = { -- cgit v1.2.3 From 538f4f022a4612f969d5324ee227403c9f8b1d72 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 3 Feb 2022 14:14:07 +0100 Subject: fs: add kernel doc for mnt_{hold,unhold}_writers() When I introduced mnt_{hold,unhold}_writers() in commit fbdc2f6c40f6 ("fs: split out functions to hold writers") I did not add kernel doc for them. Fix this and introduce proper documentation. Link: https://lore.kernel.org/r/20220203131411.3093040-4-brauner@kernel.org Fixes: fbdc2f6c40f6 ("fs: split out functions to hold writers") Cc: Seth Forshee Cc: Christoph Hellwig Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/namespace.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/fs/namespace.c b/fs/namespace.c index 40b994a29e90..de6fae84f1a1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -469,6 +469,24 @@ void mnt_drop_write_file(struct file *file) } EXPORT_SYMBOL(mnt_drop_write_file); +/** + * mnt_hold_writers - prevent write access to the given mount + * @mnt: mnt to prevent write access to + * + * Prevents write access to @mnt if there are no active writers for @mnt. + * This function needs to be called and return successfully before changing + * properties of @mnt that need to remain stable for callers with write access + * to @mnt. + * + * After this functions has been called successfully callers must pair it with + * a call to mnt_unhold_writers() in order to stop preventing write access to + * @mnt. + * + * Context: This function expects lock_mount_hash() to be held serializing + * setting MNT_WRITE_HOLD. + * Return: On success 0 is returned. + * On error, -EBUSY is returned. + */ static inline int mnt_hold_writers(struct mount *mnt) { mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; @@ -500,6 +518,18 @@ static inline int mnt_hold_writers(struct mount *mnt) return 0; } +/** + * mnt_unhold_writers - stop preventing write access to the given mount + * @mnt: mnt to stop preventing write access to + * + * Stop preventing write access to @mnt allowing callers to gain write access + * to @mnt again. + * + * This function can only be called after a successful call to + * mnt_hold_writers(). + * + * Context: This function expects lock_mount_hash() to be held. + */ static inline void mnt_unhold_writers(struct mount *mnt) { /* -- cgit v1.2.3 From 19d20c7a29bf2e46ff1ab8e8c4fcd2da8a4f38e2 Mon Sep 17 00:00:00 2001 From: Matteo Martelli Date: Fri, 11 Feb 2022 23:49:13 +0100 Subject: ALSA: usb-audio: revert to IMPLICIT_FB_FIXED_DEV for M-Audio FastTrack Ultra Commit 83b7dcbc51c930fc2079ab6c6fc9d719768321f1 introduced a generic implicit feedback parser, which fails to execute for M-Audio FastTrack Ultra sound cards. The issue is with the ENDPOINT_SYNCTYPE check in add_generic_implicit_fb() where the SYNCTYPE is ADAPTIVE instead of ASYNC. The reason is that the sync type of the FastTrack output endpoints are set to adaptive in the quirks table since commit 65f04443c96dbda11b8fff21d6390e082846aa3c. Fixes: 83b7dcbc51c9 ("ALSA: usb-audio: Add generic implicit fb parsing") Signed-off-by: Matteo Martelli Cc: Link: https://lore.kernel.org/r/20220211224913.20683-2-matteomartelli3@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/implicit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 70319c822c10..2d444ec74202 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -47,13 +47,13 @@ struct snd_usb_implicit_fb_match { static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { /* Generic matching */ IMPLICIT_FB_GENERIC_DEV(0x0499, 0x1509), /* Steinberg UR22 */ - IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2080), /* M-Audio FastTrack Ultra */ - IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2081), /* M-Audio FastTrack Ultra */ IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2030), /* M-Audio Fast Track C400 */ IMPLICIT_FB_GENERIC_DEV(0x0763, 0x2031), /* M-Audio Fast Track C600 */ /* Fixed EP */ /* FIXME: check the availability of generic matching */ + IMPLICIT_FB_FIXED_DEV(0x0763, 0x2080, 0x81, 2), /* M-Audio FastTrack Ultra */ + IMPLICIT_FB_FIXED_DEV(0x0763, 0x2081, 0x81, 2), /* M-Audio FastTrack Ultra */ IMPLICIT_FB_FIXED_DEV(0x2466, 0x8010, 0x81, 2), /* Fractal Audio Axe-Fx III */ IMPLICIT_FB_FIXED_DEV(0x31e9, 0x0001, 0x81, 2), /* Solid State Logic SSL2 */ IMPLICIT_FB_FIXED_DEV(0x31e9, 0x0002, 0x81, 2), /* Solid State Logic SSL2+ */ -- cgit v1.2.3 From c07f2c7b45413a9e50ba78630fda04ecfa17b4f2 Mon Sep 17 00:00:00 2001 From: Yu Huang Date: Sun, 13 Feb 2022 00:08:33 +0800 Subject: ALSA: hda/realtek: Add quirk for Legion Y9000X 2019 Legion Y9000X 2019 has the same speaker with Y9000X 2020, but with a different quirk address. Add one quirk entry to make the speaker work on Y9000X 2019 too. Signed-off-by: Yu Huang Cc: Link: https://lore.kernel.org/r/20220212160835.165065-1-diwang90@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8315bf7d4c38..9473fb76ff19 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9170,6 +9170,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF), SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), + SND_PCI_QUIRK(0x17aa, 0x383d, "Legion Y9000X 2019", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP), SND_PCI_QUIRK(0x17aa, 0x3847, "Legion 7 16ACHG6", ALC287_FIXUP_LEGION_16ACHG6), SND_PCI_QUIRK(0x17aa, 0x384a, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), -- cgit v1.2.3 From 698bef8ff5d2edea5d1c9d6e5adf1bfed1e8a106 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 7 Feb 2022 15:26:59 +0200 Subject: drm/i915: Fix dbuf slice config lookup Apparently I totally fumbled the loop condition when I removed the ARRAY_SIZE() stuff from the dbuf slice config lookup. Comparing the loop index with the active_pipes bitmask is utter nonsense, what we want to do is check to see if the mask is zero or not. Note that the code actually ended up working correctly despite the fumble, up until commit eef173954432 ("drm/i915: Allow !join_mbus cases for adlp+ dbuf configuration") when things broke for real. Cc: stable@vger.kernel.org Fixes: 05e8155afe35 ("drm/i915: Use a sentinel to terminate the dbuf slice arrays") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220207132700.481-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit a28fde308c3c1c174249ff9559b57f24e6850086) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3edba7fd0c49..da6ce24a42b2 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4870,7 +4870,7 @@ static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus, { int i; - for (i = 0; i < dbuf_slices[i].active_pipes; i++) { + for (i = 0; dbuf_slices[i].active_pipes != 0; i++) { if (dbuf_slices[i].active_pipes == active_pipes && dbuf_slices[i].join_mbus == join_mbus) return dbuf_slices[i].dbuf_mask[pipe]; -- cgit v1.2.3 From 8d9d2a723d64b650f2e6423024ccb4a33f0cdc40 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 7 Feb 2022 15:27:00 +0200 Subject: drm/i915: Fix mbus join config lookup The bogus loop from compute_dbuf_slices() was copied into check_mbus_joined() as well. So this lookup is wrong as well. Fix it. Cc: stable@vger.kernel.org Fixes: f4dc00863226 ("drm/i915/adl_p: MBUS programming") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220207132700.481-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 053f2b85631316a9226f6340c1c0fd95634f7a5b) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index da6ce24a42b2..32bc155f5dc0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4853,7 +4853,7 @@ static bool check_mbus_joined(u8 active_pipes, { int i; - for (i = 0; i < dbuf_slices[i].active_pipes; i++) { + for (i = 0; dbuf_slices[i].active_pipes != 0; i++) { if (dbuf_slices[i].active_pipes == active_pipes) return dbuf_slices[i].join_mbus; } -- cgit v1.2.3 From 0bdc0a0699929c814a8aecd55d2accb8c11beae2 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 9 Feb 2022 11:16:52 +0000 Subject: drm/i915/ttm: tweak priority hint selection For some reason we are selecting PRIO_HAS_PAGES when we don't have mm.pages, and vice versa. v2(Thomas): - Add missing fixes tag Fixes: 213d50927763 ("drm/i915/ttm: Introduce a TTM i915 gem object backend") Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20220209111652.468762-1-matthew.auld@intel.com (cherry picked from commit ba2c5d15022a565da187d90e2fe44768e33e5034) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index de3fe79b665a..1f880c8c66e7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -842,11 +842,9 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) } else if (obj->mm.madv != I915_MADV_WILLNEED) { bo->priority = I915_TTM_PRIO_PURGE; } else if (!i915_gem_object_has_pages(obj)) { - if (bo->priority < I915_TTM_PRIO_HAS_PAGES) - bo->priority = I915_TTM_PRIO_HAS_PAGES; + bo->priority = I915_TTM_PRIO_NO_PAGES; } else { - if (bo->priority > I915_TTM_PRIO_NO_PAGES) - bo->priority = I915_TTM_PRIO_NO_PAGES; + bo->priority = I915_TTM_PRIO_HAS_PAGES; } ttm_bo_move_to_lru_tail(bo, bo->resource, NULL); -- cgit v1.2.3 From ea958422291de248b9e2eaaeea36004e84b64043 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 10 Feb 2022 12:36:42 +0200 Subject: drm/i915/opregion: check port number bounds for SWSCI display power state The mapping from enum port to whatever port numbering scheme is used by the SWSCI Display Power State Notification is odd, and the memory of it has faded. In any case, the parameter only has space for ports numbered [0..4], and UBSAN reports bit shift beyond it when the platform has port F or more. Since the SWSCI functionality is supposed to be obsolete for new platforms (i.e. ones that might have port F or more), just bail out early if the mapped and mangled port number is beyond what the Display Power State Notification can support. Fixes: 9c4b0a683193 ("drm/i915: add opregion function to notify bios of encoder enable/disable") Cc: # v3.13+ Cc: Ville Syrjälä Cc: Lucas De Marchi Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4800 Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/cc363f42d6b5a5932b6d218fefcc8bdfb15dbbe5.1644489329.git.jani.nikula@intel.com (cherry picked from commit 24a644ebbfd3b13cda702f98907f9dd123e34bf9) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_opregion.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 0065111593a6..4a2662838cd8 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -360,6 +360,21 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, port++; } + /* + * The port numbering and mapping here is bizarre. The now-obsolete + * swsci spec supports ports numbered [0..4]. Port E is handled as a + * special case, but port F and beyond are not. The functionality is + * supposed to be obsolete for new platforms. Just bail out if the port + * number is out of bounds after mapping. + */ + if (port > 4) { + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", + intel_encoder->base.base.id, intel_encoder->base.name, + port_name(intel_encoder->port), port); + return -EINVAL; + } + if (!enable) parm |= 4 << 8; -- cgit v1.2.3 From 26ace8793aaefbcd0d6bb664573ded35c69cd6ef Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 10 Feb 2022 12:31:07 +0200 Subject: drm/i915/fbc: Fix the plane end Y offset check We lost the required >>16 when I refactored the FBC plane state checks. Bring it back so the check does what it's supposed to. Cc: Mika Kahola Fixes: 2e6c99f88679 ("drm/i915/fbc: Nuke lots of crap from intel_fbc_state_cache") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220210103107.24492-1-ville.syrjala@linux.intel.com Reviewed-by: Mika Kahola (cherry picked from commit f7bc440bc79ae5dcf648b90209910ea8dba6ef0c) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_fbc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 160fd2bdafe5..957feeccff3f 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1115,7 +1115,8 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, /* Wa_22010751166: icl, ehl, tgl, dg1, rkl */ if (DISPLAY_VER(i915) >= 11 && - (plane_state->view.color_plane[0].y + drm_rect_height(&plane_state->uapi.src)) & 3) { + (plane_state->view.color_plane[0].y + + (drm_rect_height(&plane_state->uapi.src) >> 16)) & 3) { plane_state->no_fbc_reason = "plane end Y offset misaligned"; return false; } -- cgit v1.2.3 From 6317f7449348a897483a2b4841f7a9190745c81b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 14 Feb 2022 11:00:19 +0100 Subject: ALSA: hda: Fix regression on forced probe mask option The forced probe mask via probe_mask 0x100 bit doesn't work any longer as expected since the bus init code was moved and it's clearing the codec_mask value that was set beforehand. This patch fixes the long-time regression by moving the check_probe_mask() call. Fixes: a41d122449be ("ALSA: hda - Embed bus into controller object") Reported-by: dmummenschanz@web.de Cc: Link: https://lore.kernel.org/r/trinity-f018660b-95c9-442b-a2a8-c92a56eb07ed-1644345967148@3c-app-webde-bap22 Link: https://lore.kernel.org/r/20220214100020.8870-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 4b0338c4c543..18b795220b52 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1798,8 +1798,6 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, assign_position_fix(chip, check_position_fix(chip, position_fix[dev])); - check_probe_mask(chip, dev); - if (single_cmd < 0) /* allow fallback to single_cmd at errors */ chip->fallback_to_single_cmd = 1; else /* explicitly set to single_cmd or not */ @@ -1825,6 +1823,8 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, chip->bus.core.needs_damn_long_delay = 1; } + check_probe_mask(chip, dev); + err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); if (err < 0) { dev_err(card->dev, "Error creating device [card]!\n"); -- cgit v1.2.3 From dd8e5b161d7fb9cefa1f1d6e35a39b9e1563c8d3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 14 Feb 2022 11:00:20 +0100 Subject: ALSA: hda: Fix missing codec probe on Shenker Dock 15 By some unknown reason, BIOS on Shenker Dock 15 doesn't set up the codec mask properly for the onboard audio. Let's set the forced codec mask to enable the codec discovery. Reported-by: dmummenschanz@web.de Cc: Link: https://lore.kernel.org/r/trinity-f018660b-95c9-442b-a2a8-c92a56eb07ed-1644345967148@3c-app-webde-bap22 Link: https://lore.kernel.org/r/20220214100020.8870-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 18b795220b52..917ad9d375b1 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1615,6 +1615,7 @@ static const struct snd_pci_quirk probe_mask_list[] = { /* forced codec slots */ SND_PCI_QUIRK(0x1043, 0x1262, "ASUS W5Fm", 0x103), SND_PCI_QUIRK(0x1046, 0x1262, "ASUS W5F", 0x103), + SND_PCI_QUIRK(0x1558, 0x0351, "Schenker Dock 15", 0x105), /* WinFast VP200 H (Teradici) user reported broken communication */ SND_PCI_QUIRK(0x3a21, 0x040d, "WinFast VP200 H", 0x101), {} -- cgit v1.2.3 From 9a5adeb28b77416446658e75bdef3bbe5fb92a83 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 14 Feb 2022 13:57:11 +0100 Subject: ALSA: usb-audio: Don't abort resume upon errors The default mixer resume code treats the errors at restoring the modified mixer items as a fatal error, and it returns back to the caller. This ends up in the resume failure, and the device will be come unavailable, although basically those errors are intermittent and can be safely ignored. The problem itself has been present from the beginning, but it didn't hit usually because the code tries to resume only the modified items. But now with the recent commit to forcibly initialize each item at the probe time, the problem surfaced more often, hence it appears as a regression. This patch fixes the regression simply by ignoring the errors at resume. Fixes: b96681bd5827 ("ALSA: usb-audio: Initialize every feature unit once at probe time") Cc: BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215561 Link: https://lore.kernel.org/r/20220214125711.20531-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 630766ba259f..a5641956ef10 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -3678,17 +3678,14 @@ static int restore_mixer_value(struct usb_mixer_elem_list *list) err = snd_usb_set_cur_mix_value(cval, c + 1, idx, cval->cache_val[idx]); if (err < 0) - return err; + break; } idx++; } } else { /* master */ - if (cval->cached) { - err = snd_usb_set_cur_mix_value(cval, 0, 0, *cval->cache_val); - if (err < 0) - return err; - } + if (cval->cached) + snd_usb_set_cur_mix_value(cval, 0, 0, *cval->cache_val); } return 0; -- cgit v1.2.3 From 2a845837e3d0ddaed493b4c5c4643d7f0542804d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 14 Feb 2022 14:04:10 +0100 Subject: ALSA: hda/realtek: Fix deadlock by COEF mutex The recently introduced coef_mutex for Realtek codec seems causing a deadlock when the relevant code is invoked from the power-off state; then the HD-audio core tries to power-up internally, and this kicks off the codec runtime PM code that tries to take the same coef_mutex. In order to avoid the deadlock, do the temporary power up/down around the coef_mutex acquisition and release. This assures that the power-up sequence runs before the mutex, hence no re-entrance will happen. Fixes: b837a9f5ab3b ("ALSA: hda: realtek: Fix race at concurrent COEF updates") Reported-and-tested-by: Julian Wollrath Cc: Link: https://lore.kernel.org/r/20220214132838.4db10fca@schienar Link: https://lore.kernel.org/r/20220214130410.21230-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9473fb76ff19..3a42457984e9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -138,6 +138,22 @@ struct alc_spec { * COEF access helper functions */ +static void coef_mutex_lock(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + snd_hda_power_up_pm(codec); + mutex_lock(&spec->coef_mutex); +} + +static void coef_mutex_unlock(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + mutex_unlock(&spec->coef_mutex); + snd_hda_power_down_pm(codec); +} + static int __alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, unsigned int coef_idx) { @@ -151,12 +167,11 @@ static int __alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, unsigned int coef_idx) { - struct alc_spec *spec = codec->spec; unsigned int val; - mutex_lock(&spec->coef_mutex); + coef_mutex_lock(codec); val = __alc_read_coefex_idx(codec, nid, coef_idx); - mutex_unlock(&spec->coef_mutex); + coef_mutex_unlock(codec); return val; } @@ -173,11 +188,9 @@ static void __alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid, static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid, unsigned int coef_idx, unsigned int coef_val) { - struct alc_spec *spec = codec->spec; - - mutex_lock(&spec->coef_mutex); + coef_mutex_lock(codec); __alc_write_coefex_idx(codec, nid, coef_idx, coef_val); - mutex_unlock(&spec->coef_mutex); + coef_mutex_unlock(codec); } #define alc_write_coef_idx(codec, coef_idx, coef_val) \ @@ -198,11 +211,9 @@ static void alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid, unsigned int coef_idx, unsigned int mask, unsigned int bits_set) { - struct alc_spec *spec = codec->spec; - - mutex_lock(&spec->coef_mutex); + coef_mutex_lock(codec); __alc_update_coefex_idx(codec, nid, coef_idx, mask, bits_set); - mutex_unlock(&spec->coef_mutex); + coef_mutex_unlock(codec); } #define alc_update_coef_idx(codec, coef_idx, mask, bits_set) \ @@ -235,9 +246,7 @@ struct coef_fw { static void alc_process_coef_fw(struct hda_codec *codec, const struct coef_fw *fw) { - struct alc_spec *spec = codec->spec; - - mutex_lock(&spec->coef_mutex); + coef_mutex_lock(codec); for (; fw->nid; fw++) { if (fw->mask == (unsigned short)-1) __alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val); @@ -245,7 +254,7 @@ static void alc_process_coef_fw(struct hda_codec *codec, __alc_update_coefex_idx(codec, fw->nid, fw->idx, fw->mask, fw->val); } - mutex_unlock(&spec->coef_mutex); + coef_mutex_unlock(codec); } /* -- cgit v1.2.3 From c49ae619905eebd3f54598a84e4cd2bd58ba8fe9 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Mon, 14 Feb 2022 12:02:28 +0100 Subject: PCI: mvebu: Fix device enumeration regression Jan reported that on Turris Omnia (Armada 385), no PCIe devices were detected after upgrading from v5.16.1 to v5.16.3 and identified the cause as the backport of 91a8d79fc797 ("PCI: mvebu: Fix configuring secondary bus of PCIe Root Port via emulated bridge"), which appeared in v5.17-rc1. 91a8d79fc797 was incorrectly applied from mailing list patch [1] to the linux git repository [2] probably due to resolving merge conflicts incorrectly. Fix it now. [1] https://lore.kernel.org/r/20211125124605.25915-12-pali@kernel.org [2] https://git.kernel.org/linus/91a8d79fc797 [bhelgaas: commit log] BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215540 Fixes: 91a8d79fc797 ("PCI: mvebu: Fix configuring secondary bus of PCIe Root Port via emulated bridge") Link: https://lore.kernel.org/r/20220214110228.25825-1-pali@kernel.org Link: https://lore.kernel.org/r/20220127234917.GA150851@bhelgaas Reported-by: Jan Palus Signed-off-by: Pali Rohár Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pci-mvebu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index 71258ea3d35f..f8e82c5e2d87 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -1329,7 +1329,8 @@ static int mvebu_pcie_probe(struct platform_device *pdev) * indirectly via kernel emulated PCI bridge driver. */ mvebu_pcie_setup_hw(port); - mvebu_pcie_set_local_dev_nr(port, 0); + mvebu_pcie_set_local_dev_nr(port, 1); + mvebu_pcie_set_local_bus_nr(port, 0); } pcie->nports = i; -- cgit v1.2.3 From 0c6f4ebf8835d01866eb686d47578cde80097981 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 14 Feb 2022 08:40:52 +1000 Subject: cifs: modefromsids must add an ACE for authenticated users When we create a file with modefromsids we set an ACL that has one ACE for the magic modefromsid as well as a second ACE that grants full access to all authenticated users. When later we chante the mode on the file we strip away this, and other, ACE for authenticated users in set_chmod_dacl() and then just add back/update the modefromsid ACE. Thus leaving the file with a single ACE that is for the mode and no ACE to grant any user any rights to access the file. Fix this by always adding back also the modefromsid ACE so that we do not drop the rights to access the file. Signed-off-by: Ronnie Sahlberg Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 5df21d63dd04..bf861fef2f0c 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -949,6 +949,9 @@ static void populate_new_aces(char *nacl_base, pnntace = (struct cifs_ace *) (nacl_base + nsize); nsize += setup_special_mode_ACE(pnntace, nmode); num_aces++; + pnntace = (struct cifs_ace *) (nacl_base + nsize); + nsize += setup_authusers_ACE(pnntace); + num_aces++; goto set_size; } @@ -1613,7 +1616,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, nsecdesclen = secdesclen; if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */ if (mode_from_sid) - nsecdesclen += sizeof(struct cifs_ace); + nsecdesclen += 2 * sizeof(struct cifs_ace); else /* cifsacl */ nsecdesclen += 5 * sizeof(struct cifs_ace); } else { /* chown */ -- cgit v1.2.3 From 21bffcb76ee2fbafc7d5946cef10abc9df5cfff7 Mon Sep 17 00:00:00 2001 From: Sherry Yang Date: Thu, 10 Feb 2022 12:30:49 -0800 Subject: selftests/seccomp: Fix seccomp failure by adding missing headers seccomp_bpf failed on tests 47 global.user_notification_filter_empty and 48 global.user_notification_filter_empty_threaded when it's tested on updated kernel but with old kernel headers. Because old kernel headers don't have definition of macro __NR_clone3 which is required for these two tests. Since under selftests/, we can install headers once for all tests (the default INSTALL_HDR_PATH is usr/include), fix it by adding usr/include to the list of directories to be searched. Use "-isystem" to indicate it's a system directory as the real kernel headers directories are. Signed-off-by: Sherry Yang Tested-by: Sherry Yang Reviewed-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/seccomp/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile index 0ebfe8b0e147..585f7a0c10cb 100644 --- a/tools/testing/selftests/seccomp/Makefile +++ b/tools/testing/selftests/seccomp/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -Wl,-no-as-needed -Wall +CFLAGS += -Wl,-no-as-needed -Wall -isystem ../../../../usr/include/ LDFLAGS += -lpthread TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark -- cgit v1.2.3 From 4f6de676d94ee8ddfc2e7e7cd935fc7cb2feff3a Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Mon, 14 Feb 2022 18:56:43 +0100 Subject: arm64: Correct wrong label in macro __init_el2_gicv3 In commit: 114945d84a30a5fe ("arm64: Fix labels in el2_setup macros") We renamed a label from '1' to '.Lskip_gicv3_\@', but failed to update a branch to it, which now targets a later label also called '1'. The branch is taken rarely, when GICv3 is present but SRE is disabled at EL3, causing a boot-time crash. Update the caller to the new label name. Fixes: 114945d84a30 ("arm64: Fix labels in el2_setup macros") Cc: # 5.12.x Signed-off-by: Joakim Tjernlund Link: https://lore.kernel.org/r/20220214175643.21931-1-joakim.tjernlund@infinera.com Reviewed-by: Mark Rutland Reviewed-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/el2_setup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 3198acb2aad8..7f3c87f7a0ce 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -106,7 +106,7 @@ msr_s SYS_ICC_SRE_EL2, x0 isb // Make sure SRE is now set mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, - tbz x0, #0, 1f // and check that it sticks + tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults .Lskip_gicv3_\@: .endm -- cgit v1.2.3 From 05c7b7a92cc87ff8d7fde189d0fade250697573c Mon Sep 17 00:00:00 2001 From: Zhang Qiao Date: Fri, 21 Jan 2022 18:12:10 +0800 Subject: cgroup/cpuset: Fix a race between cpuset_attach() and cpu hotplug As previously discussed(https://lkml.org/lkml/2022/1/20/51), cpuset_attach() is affected with similar cpu hotplug race, as follow scenario: cpuset_attach() cpu hotplug --------------------------- ---------------------- down_write(cpuset_rwsem) guarantee_online_cpus() // (load cpus_attach) sched_cpu_deactivate set_cpu_active() // will change cpu_active_mask set_cpus_allowed_ptr(cpus_attach) __set_cpus_allowed_ptr_locked() // (if the intersection of cpus_attach and cpu_active_mask is empty, will return -EINVAL) up_write(cpuset_rwsem) To avoid races such as described above, protect cpuset_attach() call with cpu_hotplug_lock. Fixes: be367d099270 ("cgroups: let ss->can_attach and ss->attach do whole threadgroups at a time") Cc: stable@vger.kernel.org # v2.6.32+ Reported-by: Zhao Gongyi Signed-off-by: Zhang Qiao Acked-by: Waiman Long Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 4c7254e8f49a..97c53f3cc917 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2289,6 +2289,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) cgroup_taskset_first(tset, &css); cs = css_cs(css); + cpus_read_lock(); percpu_down_write(&cpuset_rwsem); guarantee_online_mems(cs, &cpuset_attach_nodemask_to); @@ -2342,6 +2343,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) wake_up(&cpuset_attach_wq); percpu_up_write(&cpuset_rwsem); + cpus_read_unlock(); } /* The various types of files and directories in a cpuset file system */ -- cgit v1.2.3 From 9d047bf68fe8cdb4086deaf4edd119731a9481ed Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 8 Feb 2022 12:14:44 -0500 Subject: NFS: Remove an incorrect revalidation in nfs4_update_changeattr_locked() In nfs4_update_changeattr_locked(), we don't need to set the NFS_INO_REVAL_PAGECACHE flag, because we already know the value of the change attribute, and we're already flagging the size. In fact, this forces us to revalidate the change attribute a second time for no good reason. This extra flag appears to have been introduced as part of the xattr feature, when update_changeattr_locked() was converted for use by the xattr code. Fixes: 1b523ca972ed ("nfs: modify update_changeattr to deal with regular files") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f5020828ab65..0e0db6c27619 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1229,8 +1229,7 @@ nfs4_update_changeattr_locked(struct inode *inode, NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER | NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK | - NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR | - NFS_INO_REVAL_PAGECACHE; + NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR; nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); } nfsi->attrtimeo_timestamp = jiffies; -- cgit v1.2.3 From e0caaf75d443e02e55e146fd75fe2efc8aed5540 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 8 Feb 2022 13:38:23 -0500 Subject: NFS: LOOKUP_DIRECTORY is also ok with symlinks Commit ac795161c936 (NFSv4: Handle case where the lookup of a directory fails) [1], part of Linux since 5.17-rc2, introduced a regression, where a symbolic link on an NFS mount to a directory on another NFS does not resolve(?) the first time it is accessed: Reported-by: Paul Menzel Fixes: ac795161c936 ("NFSv4: Handle case where the lookup of a directory fails") Signed-off-by: Trond Myklebust Tested-by: Donald Buczek Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7bc7cf6b26f0..75cb1cbe4cde 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2010,14 +2010,14 @@ no_open: if (!res) { inode = d_inode(dentry); if ((lookup_flags & LOOKUP_DIRECTORY) && inode && - !S_ISDIR(inode->i_mode)) + !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) res = ERR_PTR(-ENOTDIR); else if (inode && S_ISREG(inode->i_mode)) res = ERR_PTR(-EOPENSTALE); } else if (!IS_ERR(res)) { inode = d_inode(res); if ((lookup_flags & LOOKUP_DIRECTORY) && inode && - !S_ISDIR(inode->i_mode)) { + !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) { dput(res); res = ERR_PTR(-ENOTDIR); } else if (inode && S_ISREG(inode->i_mode)) { -- cgit v1.2.3 From 0136f5844b006e2286f873457c3fcba8c45a3735 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 18 Jan 2022 14:07:51 +0800 Subject: drm/amd/pm: correct UMD pstate clocks for Dimgrey Cavefish and Beige Goby Correct the UMD pstate profiling clocks for Dimgrey Cavefish and Beige Goby. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 26 +++++++++++++++++----- .../drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h | 8 +++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index a4207293158c..2320bd750876 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1238,21 +1238,37 @@ static int sienna_cichlid_populate_umd_state_clk(struct smu_context *smu) &dpm_context->dpm_tables.soc_table; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; + struct amdgpu_device *adev = smu->adev; pstate_table->gfxclk_pstate.min = gfx_table->min; pstate_table->gfxclk_pstate.peak = gfx_table->max; - if (gfx_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK) - pstate_table->gfxclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK; pstate_table->uclk_pstate.min = mem_table->min; pstate_table->uclk_pstate.peak = mem_table->max; - if (mem_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK) - pstate_table->uclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK; pstate_table->socclk_pstate.min = soc_table->min; pstate_table->socclk_pstate.peak = soc_table->max; - if (soc_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK) + + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + pstate_table->gfxclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK; + pstate_table->uclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK; pstate_table->socclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK; + break; + case CHIP_DIMGREY_CAVEFISH: + pstate_table->gfxclk_pstate.standard = DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_GFXCLK; + pstate_table->uclk_pstate.standard = DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_MEMCLK; + pstate_table->socclk_pstate.standard = DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_SOCCLK; + break; + case CHIP_BEIGE_GOBY: + pstate_table->gfxclk_pstate.standard = BEIGE_GOBY_UMD_PSTATE_PROFILING_GFXCLK; + pstate_table->uclk_pstate.standard = BEIGE_GOBY_UMD_PSTATE_PROFILING_MEMCLK; + pstate_table->socclk_pstate.standard = BEIGE_GOBY_UMD_PSTATE_PROFILING_SOCCLK; + break; + default: + break; + } return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h index 38cd0ece24f6..42f705c7a36f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h @@ -33,6 +33,14 @@ typedef enum { #define SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK 960 #define SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK 1000 +#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_GFXCLK 1950 +#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_SOCCLK 960 +#define DIMGREY_CAVEFISH_UMD_PSTATE_PROFILING_MEMCLK 676 + +#define BEIGE_GOBY_UMD_PSTATE_PROFILING_GFXCLK 2200 +#define BEIGE_GOBY_UMD_PSTATE_PROFILING_SOCCLK 960 +#define BEIGE_GOBY_UMD_PSTATE_PROFILING_MEMCLK 1000 + extern void sienna_cichlid_set_ppt_funcs(struct smu_context *smu); #endif -- cgit v1.2.3 From f8f4e2a518347063179def4e64580b2d28233d03 Mon Sep 17 00:00:00 2001 From: Rajib Mahapatra Date: Thu, 10 Feb 2022 18:46:40 +0530 Subject: drm/amdgpu: skipping SDMA hw_init and hw_fini for S0ix. [Why] SDMA ring buffer test failed if suspend is aborted during S0i3 resume. [How] If suspend is aborted for some reason during S0i3 resume cycle, it follows SDMA ring test failing and errors in amdgpu resume. For RN/CZN/Picasso, SMU saves and restores SDMA registers during S0ix cycle. So, skipping SDMA suspend and resume from driver solves the issue. This time, the system is able to resume gracefully even the suspend is aborted. Reviewed-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Rajib Mahapatra Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index e8e4749e9c79..f0638db57111 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2057,6 +2057,10 @@ static int sdma_v4_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* SMU saves SDMA state for us */ + if (adev->in_s0ix) + return 0; + return sdma_v4_0_hw_fini(adev); } @@ -2064,6 +2068,10 @@ static int sdma_v4_0_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* SMU restores SDMA state for us */ + if (adev->in_s0ix) + return 0; + return sdma_v4_0_hw_init(adev); } -- cgit v1.2.3 From 9c4f59ea3f865693150edf0c91d1cc6b451360dd Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Fri, 11 Feb 2022 17:58:08 +0800 Subject: drm/amd/pm: correct the sequence of sending gpu reset msg the 2nd parameter should be smu msg type rather than asic msg index. Fixes: 7d38d9dc4ecc ("drm/amdgpu: add mode2 reset support for yellow carp") Signed-off-by: Yifan Zhang Acked-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index caf1775d48ef..0bc84b709a93 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -282,14 +282,9 @@ static int yellow_carp_post_smu_init(struct smu_context *smu) static int yellow_carp_mode_reset(struct smu_context *smu, int type) { - int ret = 0, index = 0; - - index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG, - SMU_MSG_GfxDeviceDriverReset); - if (index < 0) - return index == -EACCES ? 0 : index; + int ret = 0; - ret = smu_cmn_send_smc_msg_with_param(smu, (uint16_t)index, type, NULL); + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, type, NULL); if (ret) dev_err(smu->adev->dev, "Failed to mode reset!\n"); -- cgit v1.2.3 From 364438fd629f7611a84c8e6d7de91659300f1502 Mon Sep 17 00:00:00 2001 From: Nicholas Bishop Date: Fri, 11 Feb 2022 14:57:39 -0500 Subject: drm/radeon: Fix backlight control on iMac 12,1 The iMac 12,1 does not use the gmux driver for backlight, so the radeon backlight device is needed to set the brightness. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1838 Signed-off-by: Nicholas Bishop Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/atombios_encoders.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 0fce73b9a646..70bd84b7ef2b 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -198,7 +198,8 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder, * so don't register a backlight device */ if ((rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) && - (rdev->pdev->device == 0x6741)) + (rdev->pdev->device == 0x6741) && + !dmi_match(DMI_PRODUCT_NAME, "iMac12,1")) return; if (!radeon_encoder->enc_priv) -- cgit v1.2.3 From 12f4a665cc3568328728e46c3162873b5b69cd27 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 31 Jan 2022 14:26:20 +0100 Subject: RISC-V: Fix hartid mask handling for hartid 31 and up Jessica reports that using "1 << hartid" causes undefined behavior for hartid 31 and up. Fix this by using the BIT() helper instead of an explicit shift. Reported-by: Jessica Clarke Fixes: 26fb751ca37846c9 ("RISC-V: Do not use cpumask data structure for hartid bitmap") Signed-off-by: Geert Uytterhoeven Reviewed-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sbi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index f72527fcb347..f93bc75f2bc4 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -5,6 +5,7 @@ * Copyright (c) 2020 Western Digital Corporation or its affiliates. */ +#include #include #include #include @@ -85,7 +86,7 @@ static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mas pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n"); break; } - hmask |= 1 << hartid; + hmask |= BIT(hartid); } return hmask; @@ -268,7 +269,7 @@ static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask) } if (!hmask) hbase = hartid; - hmask |= 1UL << (hartid - hbase); + hmask |= BIT(hartid - hbase); } if (hmask) { @@ -362,7 +363,7 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, } if (!hmask) hbase = hartid; - hmask |= 1UL << (hartid - hbase); + hmask |= BIT(hartid - hbase); } if (hmask) { -- cgit v1.2.3 From 2b35d5b7d13062b805aa82dc53812a5f56249287 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 31 Jan 2022 14:26:21 +0100 Subject: RISC-V: Fix handling of empty cpu masks The cpumask rework slightly changed the behavior of the code. Fix this by treating an empty cpumask as meaning all online CPUs. Extracted from a patch by Atish Patra . Reported-by: Jessica Clarke Fixes: 26fb751ca37846c9 ("RISC-V: Do not use cpumask data structure for hartid bitmap") Signed-off-by: Geert Uytterhoeven Reviewed-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sbi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index f93bc75f2bc4..22444cfcd56c 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -161,7 +161,7 @@ static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) { unsigned long hart_mask; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); @@ -177,7 +177,7 @@ static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, int result = 0; unsigned long hart_mask; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); @@ -254,7 +254,7 @@ static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask) struct sbiret ret = {0}; int result; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; for_each_cpu(cpuid, cpu_mask) { @@ -348,7 +348,7 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, unsigned long hartid, cpuid, hmask = 0, hbase = 0; int result; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; for_each_cpu(cpuid, cpu_mask) { -- cgit v1.2.3 From 5feef64f4c67068c49f5409d43c67cabf2327f66 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 31 Jan 2022 14:26:22 +0100 Subject: RISC-V: Fix IPI/RFENCE hmask on non-monotonic hartid ordering If the boot CPU does not have the lowest hartid, "hartid - hbase" can become negative, leading to an incorrect hmask, causing userspace to crash with SEGV. This is observed on e.g. Starlight Beta, where cpuid 1 maps to hartid 0, and cpuid 0 maps to hartid 1. Fix this by detecting this case, and shifting the accumulated mask and updating hbase, if possible. Fixes: 26fb751ca37846c9 ("RISC-V: Do not use cpumask data structure for hartid bitmap") Signed-off-by: Geert Uytterhoeven Reviewed-by: Atish Patra Tested-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sbi.c | 57 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 22444cfcd56c..775d3322b422 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -250,7 +250,7 @@ static void __sbi_set_timer_v02(uint64_t stime_value) static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask) { - unsigned long hartid, cpuid, hmask = 0, hbase = 0; + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; struct sbiret ret = {0}; int result; @@ -259,16 +259,27 @@ static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask) for_each_cpu(cpuid, cpu_mask) { hartid = cpuid_to_hartid_map(cpuid); - if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { - ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, - hmask, hbase, 0, 0, 0, 0); - if (ret.error) - goto ecall_failed; - hmask = 0; - hbase = 0; + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + ret = sbi_ecall(SBI_EXT_IPI, + SBI_EXT_IPI_SEND_IPI, hmask, + hbase, 0, 0, 0, 0); + if (ret.error) + goto ecall_failed; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } } - if (!hmask) + if (!hmask) { hbase = hartid; + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } hmask |= BIT(hartid - hbase); } @@ -345,7 +356,7 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) { - unsigned long hartid, cpuid, hmask = 0, hbase = 0; + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; int result; if (!cpu_mask || cpumask_empty(cpu_mask)) @@ -353,16 +364,26 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, for_each_cpu(cpuid, cpu_mask) { hartid = cpuid_to_hartid_map(cpuid); - if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { - result = __sbi_rfence_v02_call(fid, hmask, hbase, - start, size, arg4, arg5); - if (result) - return result; - hmask = 0; - hbase = 0; + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + result = __sbi_rfence_v02_call(fid, hmask, + hbase, start, size, arg4, arg5); + if (result) + return result; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } } - if (!hmask) + if (!hmask) { hbase = hartid; + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } hmask |= BIT(hartid - hbase); } -- cgit v1.2.3 From 6fec1ab67f8d60704cc7de64abcfd389ab131542 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 14 Feb 2022 09:36:57 +0100 Subject: selftests/ftrace: Do not trace do_softirq because of PREEMPT_RT The PREEMPT_RT patchset does not use do_softirq() function thus trying to filter for do_softirq fails for such kernel: echo do_softirq ftracetest: 81: echo: echo: I/O error Choose some other visible function for the test. The function does not have to be actually executed during the test, because it is only testing filter API interface. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Shuah Khan Acked-by: Sebastian Andrzej Siewior Reviewed-by: Steven Rostedt (Google) Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc index e96e279e0533..25432b8cd5bd 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -19,7 +19,7 @@ fail() { # mesg FILTER=set_ftrace_filter FUNC1="schedule" -FUNC2="do_softirq" +FUNC2="scheduler_tick" ALL_FUNCS="#### all functions enabled ####" -- cgit v1.2.3 From 7f4c5a26f735dea4bbc0eb8eb9da99cda95a8563 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 12 Feb 2022 08:31:20 -0800 Subject: scsi: lpfc: Fix pt2pt NVMe PRLI reject LOGO loop When connected point to point, the driver does not know the FC4's supported by the other end. In Fabrics, it can query the nameserver. Thus the driver must send PRLIs for the FC4s it supports and enable support based on the acc(ept) or rej(ect) of the respective FC4 PRLI. Currently the driver supports SCSI and NVMe PRLIs. Unfortunately, although the behavior is per standard, many devices have come to expect only SCSI PRLIs. In this particular example, the NVMe PRLI is properly RJT'd but the target decided that it must LOGO after seeing the unexpected NVMe PRLI. The LOGO causes the sequence to restart and login is now in an infinite failure loop. Fix the problem by having the driver, on a pt2pt link, remember NVMe PRLI accept or reject status across logout as long as the link stays "up". When retrying login, if the prior NVMe PRLI was rejected, it will not be sent on the next login. Link: https://lore.kernel.org/r/20220212163120.15385-1-jsmart2021@gmail.com Cc: # v5.4+ Reviewed-by: Ewan D. Milne Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 1 + drivers/scsi/lpfc/lpfc_attr.c | 3 +++ drivers/scsi/lpfc/lpfc_els.c | 20 +++++++++++++++++++- drivers/scsi/lpfc/lpfc_nportdisc.c | 5 +++-- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index a1e0a106c132..98cabe09c040 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -592,6 +592,7 @@ struct lpfc_vport { #define FC_VPORT_LOGO_RCVD 0x200 /* LOGO received on vport */ #define FC_RSCN_DISCOVERY 0x400 /* Auth all devices after RSCN */ #define FC_LOGO_RCVD_DID_CHNG 0x800 /* FDISC on phys port detect DID chng*/ +#define FC_PT2PT_NO_NVME 0x1000 /* Don't send NVME PRLI */ #define FC_SCSI_SCAN_TMO 0x4000 /* scsi scan timer running */ #define FC_ABORT_DISCOVERY 0x8000 /* we want to abort discovery */ #define FC_NDISC_ACTIVE 0x10000 /* NPort discovery active */ diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index bac78fbce8d6..fa8415259cb8 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1315,6 +1315,9 @@ lpfc_issue_lip(struct Scsi_Host *shost) pmboxq->u.mb.mbxCommand = MBX_DOWN_LINK; pmboxq->u.mb.mbxOwner = OWN_HOST; + if ((vport->fc_flag & FC_PT2PT) && (vport->fc_flag & FC_PT2PT_NO_NVME)) + vport->fc_flag &= ~FC_PT2PT_NO_NVME; + mbxstatus = lpfc_sli_issue_mbox_wait(phba, pmboxq, LPFC_MBOX_TMO * 2); if ((mbxstatus == MBX_SUCCESS) && diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index db5ccae1b63d..f936833c9909 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1072,7 +1072,8 @@ stop_rr_fcf_flogi: /* FLOGI failed, so there is no fabric */ spin_lock_irq(shost->host_lock); - vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); + vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP | + FC_PT2PT_NO_NVME); spin_unlock_irq(shost->host_lock); /* If private loop, then allow max outstanding els to be @@ -4607,6 +4608,23 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* Added for Vendor specifc support * Just keep retrying for these Rsn / Exp codes */ + if ((vport->fc_flag & FC_PT2PT) && + cmd == ELS_CMD_NVMEPRLI) { + switch (stat.un.b.lsRjtRsnCode) { + case LSRJT_UNABLE_TPC: + case LSRJT_INVALID_CMD: + case LSRJT_LOGICAL_ERR: + case LSRJT_CMD_UNSUPPORTED: + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "0168 NVME PRLI LS_RJT " + "reason %x port doesn't " + "support NVME, disabling NVME\n", + stat.un.b.lsRjtRsnCode); + retry = 0; + vport->fc_flag |= FC_PT2PT_NO_NVME; + goto out_retry; + } + } switch (stat.un.b.lsRjtRsnCode) { case LSRJT_UNABLE_TPC: /* The driver has a VALID PLOGI but the rport has diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 7d717a4ac14d..fdf5e777bf11 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -1961,8 +1961,9 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport, * is configured try it. */ ndlp->nlp_fc4_type |= NLP_FC4_FCP; - if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) || - (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) { + if ((!(vport->fc_flag & FC_PT2PT_NO_NVME)) && + (vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH || + vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) { ndlp->nlp_fc4_type |= NLP_FC4_NVME; /* We need to update the localport also */ lpfc_nvme_update_localport(vport); -- cgit v1.2.3 From 10af115646171afc0217177d6eae92917b785897 Mon Sep 17 00:00:00 2001 From: Jinyoung Choi Date: Mon, 14 Feb 2022 19:33:52 +0900 Subject: scsi: ufs: core: Fix divide by zero in ufshcd_map_queues() Before calling blk_mq_map_queues(), the mq_map and nr_queues belonging to struct blk_mq_queue_map must have a valid value. If nr_queues is set to 0, the system may encounter a divide by zero depending on the type of architecture. blk_mq_map_queues() -> queue_index() Link: https://lore.kernel.org/r/1891546521.01644873481638.JavaMail.epsvc@epcpadp4 Reviewed-by: Bart Van Assche Signed-off-by: Jinyoung Choi Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 50b12d60dc1b..9349557b8a01 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2681,7 +2681,7 @@ static int ufshcd_map_queues(struct Scsi_Host *shost) break; case HCTX_TYPE_READ: map->nr_queues = 0; - break; + continue; default: WARN_ON_ONCE(true); } -- cgit v1.2.3 From 3c62fd3406e0b2277c76a6984d3979c7f3f1d129 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 5 Feb 2022 07:58:44 +0100 Subject: dmaengine: ptdma: Fix the error handling path in pt_core_init() In order to free resources correctly in the error handling path of pt_core_init(), 2 goto's have to be switched. Otherwise, some resources will leak and we will try to release things that have not been allocated yet. Also move a dev_err() to a place where it is more meaningful. Fixes: fa5d823b16a9 ("dmaengine: ptdma: Initial driver for the AMD PTDMA") Signed-off-by: Christophe JAILLET Acked-by: Sanjay R Mehta Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/41a963a35173f89c874f5c44df5530dc09fea8da.1644044244.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/ptdma/ptdma-dev.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/dma/ptdma/ptdma-dev.c b/drivers/dma/ptdma/ptdma-dev.c index 8a6bf291a73f..daafea5bc35d 100644 --- a/drivers/dma/ptdma/ptdma-dev.c +++ b/drivers/dma/ptdma/ptdma-dev.c @@ -207,7 +207,7 @@ int pt_core_init(struct pt_device *pt) if (!cmd_q->qbase) { dev_err(dev, "unable to allocate command queue\n"); ret = -ENOMEM; - goto e_dma_alloc; + goto e_destroy_pool; } cmd_q->qidx = 0; @@ -229,8 +229,10 @@ int pt_core_init(struct pt_device *pt) /* Request an irq */ ret = request_irq(pt->pt_irq, pt_core_irq_handler, 0, dev_name(pt->dev), pt); - if (ret) - goto e_pool; + if (ret) { + dev_err(dev, "unable to allocate an IRQ\n"); + goto e_free_dma; + } /* Update the device registers with queue information. */ cmd_q->qcontrol &= ~CMD_Q_SIZE; @@ -250,21 +252,20 @@ int pt_core_init(struct pt_device *pt) /* Register the DMA engine support */ ret = pt_dmaengine_register(pt); if (ret) - goto e_dmaengine; + goto e_free_irq; /* Set up debugfs entries */ ptdma_debugfs_setup(pt); return 0; -e_dmaengine: +e_free_irq: free_irq(pt->pt_irq, pt); -e_dma_alloc: +e_free_dma: dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase, cmd_q->qbase_dma); -e_pool: - dev_err(dev, "unable to allocate an IRQ\n"); +e_destroy_pool: dma_pool_destroy(pt->cmd_q.dma_pool); return ret; -- cgit v1.2.3 From aa7accb7f91c4c2c98bfdde62446d96ecc1ef2c6 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 7 Jan 2022 10:40:47 +0800 Subject: dmaengine: at_xdmac: Fix missing unlock in at_xdmac_tasklet() Add the missing unlock before return from at_xdmac_tasklet(). Fixes: e77e561925df ("dmaengine: at_xdmac: Fix race over irq_status") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20220107024047.1051915-1-yangyingliang@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index a1da2b4b6d73..1476156af74b 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1681,8 +1681,10 @@ static void at_xdmac_tasklet(struct tasklet_struct *t) __func__, atchan->irq_status); if (!(atchan->irq_status & AT_XDMAC_CIS_LIS) && - !(atchan->irq_status & error_mask)) + !(atchan->irq_status & error_mask)) { + spin_unlock_irq(&atchan->lock); return; + } if (atchan->irq_status & error_mask) at_xdmac_handle_error(atchan); -- cgit v1.2.3 From 2d21543efe332cd8c8f212fb7d365bc8b0690bfa Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 6 Jan 2022 11:09:39 +0800 Subject: dmaengine: sh: rcar-dmac: Check for error num after setting mask Because of the possible failure of the dma_supported(), the dma_set_mask_and_coherent() may return error num. Therefore, it should be better to check it and return the error if fails. Fixes: dc312349e875 ("dmaengine: rcar-dmac: Widen DMA mask to 40 bits") Signed-off-by: Jiasheng Jiang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20220106030939.2644320-1-jiasheng@iscas.ac.cn Signed-off-by: Vinod Koul --- drivers/dma/sh/rcar-dmac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 481f45c77ce1..4a34ddd9c1c6 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1869,7 +1869,9 @@ static int rcar_dmac_probe(struct platform_device *pdev) dmac->dev = &pdev->dev; platform_set_drvdata(pdev, dmac); dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); - dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); + ret = dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); + if (ret) + return ret; ret = rcar_dmac_parse_of(&pdev->dev, dmac); if (ret < 0) -- cgit v1.2.3 From e831c7aba950f3ae94002b10321279654525e5ec Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sat, 8 Jan 2022 08:53:36 +0000 Subject: dmaengine: stm32-dmamux: Fix PM disable depth imbalance in stm32_dmamux_probe The pm_runtime_enable will increase power disable depth. If the probe fails, we should use pm_runtime_disable() to balance pm_runtime_enable(). Fixes: 4f3ceca254e0 ("dmaengine: stm32-dmamux: Add PM Runtime support") Signed-off-by: Miaoqian Lin Reviewed-by: Amelie Delaunay Link: https://lore.kernel.org/r/20220108085336.11992-1-linmq006@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dmamux.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c index a42164389ebc..d5d55732adba 100644 --- a/drivers/dma/stm32-dmamux.c +++ b/drivers/dma/stm32-dmamux.c @@ -292,10 +292,12 @@ static int stm32_dmamux_probe(struct platform_device *pdev) ret = of_dma_router_register(node, stm32_dmamux_route_allocate, &stm32_dmamux->dmarouter); if (ret) - goto err_clk; + goto pm_disable; return 0; +pm_disable: + pm_runtime_disable(&pdev->dev); err_clk: clk_disable_unprepare(stm32_dmamux->clk); -- cgit v1.2.3 From da2ad87fba0891576aadda9161b8505fde81a84d Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 11 Jan 2022 09:12:39 +0800 Subject: dmaengine: sh: rcar-dmac: Check for error num after dma_set_max_seg_size As the possible failure of the dma_set_max_seg_size(), it should be better to check the return value of the dma_set_max_seg_size(). Fixes: 97d49c59e219 ("dmaengine: rcar-dmac: set scatter/gather max segment size") Reported-by: Geert Uytterhoeven Signed-off-by: Jiasheng Jiang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20220111011239.452837-1-jiasheng@iscas.ac.cn Signed-off-by: Vinod Koul --- drivers/dma/sh/rcar-dmac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 4a34ddd9c1c6..13d12d660cc2 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1868,7 +1868,10 @@ static int rcar_dmac_probe(struct platform_device *pdev) dmac->dev = &pdev->dev; platform_set_drvdata(pdev, dmac); - dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); + ret = dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); + if (ret) + return ret; + ret = dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); if (ret) return ret; -- cgit v1.2.3 From 455896c53d5b803733ddd84e1bf8a430644439b6 Mon Sep 17 00:00:00 2001 From: Yongzhi Liu Date: Sat, 15 Jan 2022 21:34:56 -0800 Subject: dmaengine: shdma: Fix runtime PM imbalance on error pm_runtime_get_() increments the runtime PM usage counter even when it returns an error code, thus a matching decrement is needed on the error handling path to keep the counter balanced. Signed-off-by: Yongzhi Liu Link: https://lore.kernel.org/r/1642311296-87020-1-git-send-email-lyz_cs@pku.edu.cn Signed-off-by: Vinod Koul --- drivers/dma/sh/shdma-base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c index 158e5e7defae..b26ed690f03c 100644 --- a/drivers/dma/sh/shdma-base.c +++ b/drivers/dma/sh/shdma-base.c @@ -115,8 +115,10 @@ static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx) ret = pm_runtime_get(schan->dev); spin_unlock_irq(&schan->chan_lock); - if (ret < 0) + if (ret < 0) { dev_err(schan->dev, "%s(): GET = %d\n", __func__, ret); + pm_runtime_put(schan->dev); + } pm_runtime_barrier(schan->dev); -- cgit v1.2.3 From 0b0dcb3882c8f08bdeafa03adb4487e104d26050 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 12 Feb 2022 20:45:48 +0100 Subject: i2c: cadence: allow COMPILE_TEST Driver builds fine with COMPILE_TEST. Enable it for wider test coverage and easier maintenance. Signed-off-by: Wolfram Sang Acked-by: Michal Simek --- drivers/i2c/busses/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 42da31c1ab70..bad2fadc94a3 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -488,7 +488,7 @@ config I2C_BRCMSTB config I2C_CADENCE tristate "Cadence I2C Controller" - depends on ARCH_ZYNQ || ARM64 || XTENSA + depends on ARCH_ZYNQ || ARM64 || XTENSA || COMPILE_TEST help Say yes here to select Cadence I2C Host Controller. This controller is e.g. used by Xilinx Zynq. -- cgit v1.2.3 From 2ce4462f2724d1b3cedccea441c6d18bb360629a Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 12 Feb 2022 20:46:57 +0100 Subject: i2c: imx: allow COMPILE_TEST Driver builds fine with COMPILE_TEST. Enable it for wider test coverage and easier maintenance. Signed-off-by: Wolfram Sang Acked-by: Oleksij Rempel --- drivers/i2c/busses/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index bad2fadc94a3..255fd4198433 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -680,7 +680,7 @@ config I2C_IMG config I2C_IMX tristate "IMX I2C interface" - depends on ARCH_MXC || ARCH_LAYERSCAPE || COLDFIRE + depends on ARCH_MXC || ARCH_LAYERSCAPE || COLDFIRE || COMPILE_TEST select I2C_SLAVE help Say Y here if you want to use the IIC bus controller on -- cgit v1.2.3 From 5de717974005fcad2502281e9f82e139ca91f4bb Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 12 Feb 2022 20:47:07 +0100 Subject: i2c: qup: allow COMPILE_TEST Driver builds fine with COMPILE_TEST. Enable it for wider test coverage and easier maintenance. Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 255fd4198433..8a6c6ee28556 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -935,7 +935,7 @@ config I2C_QCOM_GENI config I2C_QUP tristate "Qualcomm QUP based I2C controller" - depends on ARCH_QCOM + depends on ARCH_QCOM || COMPILE_TEST help If you say yes to this option, support will be included for the built-in I2C interface on the Qualcomm SoCs. -- cgit v1.2.3 From fe663df7825811358531dc2e8a52d9eaa5e3515e Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 11 Feb 2022 01:51:13 +0100 Subject: powerpc/lib/sstep: fix 'ptesync' build error Building tinyconfig with gcc (Debian 11.2.0-16) and assembler (Debian 2.37.90.20220207) the following build error shows up: {standard input}: Assembler messages: {standard input}:2088: Error: unrecognized opcode: `ptesync' make[3]: *** [/builds/linux/scripts/Makefile.build:287: arch/powerpc/lib/sstep.o] Error 1 Add the 'ifdef CONFIG_PPC64' around the 'ptesync' in function 'emulate_update_regs()' to like it is in 'analyse_instr()'. Since it looks like it got dropped inadvertently by commit 3cdfcbfd32b9 ("powerpc: Change analyse_instr so it doesn't modify *regs"). A key detail is that analyse_instr() will never recognise lwsync or ptesync on 32-bit (because of the existing ifdef), and as a result emulate_update_regs() should never be called with an op specifying either of those on 32-bit. So removing them from emulate_update_regs() should be a nop in terms of runtime behaviour. Fixes: 3cdfcbfd32b9 ("powerpc: Change analyse_instr so it doesn't modify *regs") Cc: stable@vger.kernel.org # v4.14+ Suggested-by: Arnd Bergmann Signed-off-by: Anders Roxell [mpe: Add last paragraph of change log mentioning analyse_instr() details] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220211005113.1361436-1-anders.roxell@linaro.org --- arch/powerpc/lib/sstep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a94b0cd0bdc5..bd3734d5be89 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -3264,12 +3264,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) case BARRIER_EIEIO: eieio(); break; +#ifdef CONFIG_PPC64 case BARRIER_LWSYNC: asm volatile("lwsync" : : : "memory"); break; case BARRIER_PTESYNC: asm volatile("ptesync" : : : "memory"); break; +#endif } break; -- cgit v1.2.3 From f240762f88b4b1b58561939ffd44837759756477 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Feb 2022 20:10:03 -0800 Subject: io_uring: add a schedule point in io_add_buffers() Looping ~65535 times doing kmalloc() calls can trigger soft lockups, especially with DEBUG features (like KASAN). [ 253.536212] watchdog: BUG: soft lockup - CPU#64 stuck for 26s! [b219417889:12575] [ 253.544433] Modules linked in: vfat fat i2c_mux_pca954x i2c_mux spidev cdc_acm xhci_pci xhci_hcd sha3_generic gq(O) [ 253.544451] CPU: 64 PID: 12575 Comm: b219417889 Tainted: G S O 5.17.0-smp-DEV #801 [ 253.544457] RIP: 0010:kernel_text_address (./include/asm-generic/sections.h:192 ./include/linux/kallsyms.h:29 kernel/extable.c:67 kernel/extable.c:98) [ 253.544464] Code: 0f 93 c0 48 c7 c1 e0 63 d7 a4 48 39 cb 0f 92 c1 20 c1 0f b6 c1 5b 5d c3 90 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 53 48 89 fb <48> c7 c0 00 00 80 a0 41 be 01 00 00 00 48 39 c7 72 0c 48 c7 c0 40 [ 253.544468] RSP: 0018:ffff8882d8baf4c0 EFLAGS: 00000246 [ 253.544471] RAX: 1ffff1105b175e00 RBX: ffffffffa13ef09a RCX: 00000000a13ef001 [ 253.544474] RDX: ffffffffa13ef09a RSI: ffff8882d8baf558 RDI: ffffffffa13ef09a [ 253.544476] RBP: ffff8882d8baf4d8 R08: ffff8882d8baf5e0 R09: 0000000000000004 [ 253.544479] R10: ffff8882d8baf5e8 R11: ffffffffa0d59a50 R12: ffff8882eab20380 [ 253.544481] R13: ffffffffa0d59a50 R14: dffffc0000000000 R15: 1ffff1105b175eb0 [ 253.544483] FS: 00000000016d3380(0000) GS:ffff88af48c00000(0000) knlGS:0000000000000000 [ 253.544486] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 253.544488] CR2: 00000000004af0f0 CR3: 00000002eabfa004 CR4: 00000000003706e0 [ 253.544491] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 253.544492] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 253.544494] Call Trace: [ 253.544496] [ 253.544498] ? io_queue_sqe (fs/io_uring.c:7143) [ 253.544505] __kernel_text_address (kernel/extable.c:78) [ 253.544508] unwind_get_return_address (arch/x86/kernel/unwind_frame.c:19) [ 253.544514] arch_stack_walk (arch/x86/kernel/stacktrace.c:27) [ 253.544517] ? io_queue_sqe (fs/io_uring.c:7143) [ 253.544521] stack_trace_save (kernel/stacktrace.c:123) [ 253.544527] ____kasan_kmalloc (mm/kasan/common.c:39 mm/kasan/common.c:45 mm/kasan/common.c:436 mm/kasan/common.c:515) [ 253.544531] ? ____kasan_kmalloc (mm/kasan/common.c:39 mm/kasan/common.c:45 mm/kasan/common.c:436 mm/kasan/common.c:515) [ 253.544533] ? __kasan_kmalloc (mm/kasan/common.c:524) [ 253.544535] ? kmem_cache_alloc_trace (./include/linux/kasan.h:270 mm/slab.c:3567) [ 253.544541] ? io_issue_sqe (fs/io_uring.c:4556 fs/io_uring.c:4589 fs/io_uring.c:6828) [ 253.544544] ? __io_queue_sqe (fs/io_uring.c:?) [ 253.544551] __kasan_kmalloc (mm/kasan/common.c:524) [ 253.544553] kmem_cache_alloc_trace (./include/linux/kasan.h:270 mm/slab.c:3567) [ 253.544556] ? io_issue_sqe (fs/io_uring.c:4556 fs/io_uring.c:4589 fs/io_uring.c:6828) [ 253.544560] io_issue_sqe (fs/io_uring.c:4556 fs/io_uring.c:4589 fs/io_uring.c:6828) [ 253.544564] ? __kasan_slab_alloc (mm/kasan/common.c:45 mm/kasan/common.c:436 mm/kasan/common.c:469) [ 253.544567] ? __kasan_slab_alloc (mm/kasan/common.c:39 mm/kasan/common.c:45 mm/kasan/common.c:436 mm/kasan/common.c:469) [ 253.544569] ? kmem_cache_alloc_bulk (mm/slab.h:732 mm/slab.c:3546) [ 253.544573] ? __io_alloc_req_refill (fs/io_uring.c:2078) [ 253.544578] ? io_submit_sqes (fs/io_uring.c:7441) [ 253.544581] ? __se_sys_io_uring_enter (fs/io_uring.c:10154 fs/io_uring.c:10096) [ 253.544584] ? __x64_sys_io_uring_enter (fs/io_uring.c:10096) [ 253.544587] ? do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) [ 253.544590] ? entry_SYSCALL_64_after_hwframe (??:?) [ 253.544596] __io_queue_sqe (fs/io_uring.c:?) [ 253.544600] io_queue_sqe (fs/io_uring.c:7143) [ 253.544603] io_submit_sqe (fs/io_uring.c:?) [ 253.544608] io_submit_sqes (fs/io_uring.c:?) [ 253.544612] __se_sys_io_uring_enter (fs/io_uring.c:10154 fs/io_uring.c:10096) [ 253.544616] __x64_sys_io_uring_enter (fs/io_uring.c:10096) [ 253.544619] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) [ 253.544623] entry_SYSCALL_64_after_hwframe (??:?) Fixes: ddf0322db79c ("io_uring: add IORING_OP_PROVIDE_BUFFERS") Signed-off-by: Eric Dumazet Cc: Jens Axboe Cc: Pavel Begunkov Cc: io-uring Reported-by: syzbot Link: https://lore.kernel.org/r/20220215041003.2394784-1-eric.dumazet@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 77b9c7e4793b..b928928617a4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4567,6 +4567,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head) } else { list_add_tail(&buf->list, &(*head)->list); } + cond_resched(); } return i ? i : -ENOMEM; -- cgit v1.2.3 From f8efca92ae509c25e0a4bd5d0a86decea4f0c41e Mon Sep 17 00:00:00 2001 From: Eliav Farber Date: Thu, 13 Jan 2022 10:06:19 +0000 Subject: EDAC: Fix calculation of returned address and next offset in edac_align_ptr() Do alignment logic properly and use the "ptr" local variable for calculating the remainder of the alignment. This became an issue because struct edac_mc_layer has a size that is not zero modulo eight, and the next offset that was prepared for the private data was unaligned, causing an alignment exception. The patch in Fixes: which broke this actually wanted to "what we actually care about is the alignment of the actual pointer that's about to be returned." But it didn't check that alignment. Use the correct variable "ptr" for that. [ bp: Massage commit message. ] Fixes: 8447c4d15e35 ("edac: Do alignment logic properly in edac_align_ptr()") Signed-off-by: Eliav Farber Signed-off-by: Borislav Petkov Cc: Link: https://lore.kernel.org/r/20220113100622.12783-2-farbere@amazon.com --- drivers/edac/edac_mc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 9d9aabdec96b..f5677d81bd2d 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -215,7 +215,7 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems) else return (char *)ptr; - r = (unsigned long)p % align; + r = (unsigned long)ptr % align; if (r == 0) return (char *)ptr; -- cgit v1.2.3 From f98da1d66298882b1d2061051ea14ddc15c58884 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 15 Feb 2022 10:54:23 -0800 Subject: ACPI: tables: Quiet ACPI table not found warning Paul reports that the ACPI core complains on every boot about a missing CEDT table. Unlike the standard NUMA tables (SRAT, MADT, and SLIT) that are critical to NUMA init, CEDT is only expected on CXL platforms. Given the notice is not actionable lower its severity to debug. Link: https://lore.kernel.org/r/55f5c077-061c-7e53-b02d-53dde1dd654f@molgen.mpg.de Fixes: fd49f99c1809 ("ACPI: NUMA: Add a node and memblk for each CFMWS not in SRAT") Reported-by: Paul Menzel Signed-off-by: Dan Williams Signed-off-by: Rafael J. Wysocki --- drivers/acpi/tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 0741a4933f62..34600b5b9d8e 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -400,7 +400,7 @@ int __init_or_acpilib acpi_table_parse_entries_array( acpi_get_table(id, instance, &table_header); if (!table_header) { - pr_warn("%4.4s not present\n", id); + pr_debug("%4.4s not present\n", id); return -ENODEV; } -- cgit v1.2.3 From de8aa31ac7c23af98fe24d1c1b43b065027d6af5 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 15 Feb 2022 16:29:08 -0800 Subject: Input: zinitix - add new compatible strings This driver works just fine with the BT404 version of the touchscreen as well. Tested on the Samsung GT-I8160 (Codina) mobile phone. Add all the new variants from the binding document so people can easily test them, we believe most of them work more or less. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220214234033.1052681-1-linus.walleij@linaro.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/zinitix.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c index 7c82c4f5fa6b..129ebc810de8 100644 --- a/drivers/input/touchscreen/zinitix.c +++ b/drivers/input/touchscreen/zinitix.c @@ -571,8 +571,20 @@ static SIMPLE_DEV_PM_OPS(zinitix_pm_ops, zinitix_suspend, zinitix_resume); #ifdef CONFIG_OF static const struct of_device_id zinitix_of_match[] = { + { .compatible = "zinitix,bt402" }, + { .compatible = "zinitix,bt403" }, + { .compatible = "zinitix,bt404" }, + { .compatible = "zinitix,bt412" }, + { .compatible = "zinitix,bt413" }, + { .compatible = "zinitix,bt431" }, + { .compatible = "zinitix,bt432" }, + { .compatible = "zinitix,bt531" }, { .compatible = "zinitix,bt532" }, + { .compatible = "zinitix,bt538" }, { .compatible = "zinitix,bt541" }, + { .compatible = "zinitix,bt548" }, + { .compatible = "zinitix,bt554" }, + { .compatible = "zinitix,at100" }, { } }; MODULE_DEVICE_TABLE(of, zinitix_of_match); -- cgit v1.2.3 From 439cf34c8e0a8a33d8c15a31be1b7423426bc765 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 9 Feb 2022 11:19:27 +0200 Subject: drm/atomic: Don't pollute crtc_state->mode_blob with error pointers Make sure we don't assign an error pointer to crtc_state->mode_blob as that will break all kinds of places that assume either NULL or a valid pointer (eg. drm_property_blob_put()). Cc: stable@vger.kernel.org Reported-by: fuyufan Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220209091928.14766-1-ville.syrjala@linux.intel.com Acked-by: Maxime Ripard --- drivers/gpu/drm/drm_atomic_uapi.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 9781722519c3..54d62fdb4ef9 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -76,15 +76,17 @@ int drm_atomic_set_mode_for_crtc(struct drm_crtc_state *state, state->mode_blob = NULL; if (mode) { + struct drm_property_blob *blob; + drm_mode_convert_to_umode(&umode, mode); - state->mode_blob = - drm_property_create_blob(state->crtc->dev, - sizeof(umode), - &umode); - if (IS_ERR(state->mode_blob)) - return PTR_ERR(state->mode_blob); + blob = drm_property_create_blob(crtc->dev, + sizeof(umode), &umode); + if (IS_ERR(blob)) + return PTR_ERR(blob); drm_mode_copy(&state->mode, mode); + + state->mode_blob = blob; state->enable = true; drm_dbg_atomic(crtc->dev, "Set [MODE:%s] for [CRTC:%d:%s] state %p\n", -- cgit v1.2.3 From bfe55a1f7fd6bfede16078bf04c6250fbca11588 Mon Sep 17 00:00:00 2001 From: Woody Suwalski Date: Wed, 9 Feb 2022 16:05:09 -0500 Subject: ACPI: processor: idle: fix lockup regression on 32-bit ThinkPad T40 Add and ACPI idle power level limit for 32-bit ThinkPad T40. There is a regression on T40 introduced by commit d6b88ce2, starting with kernel 5.16: commit d6b88ce2eb9d2698eb24451eb92c0a1649b17bb1 Author: Richard Gong Date:   Wed Sep 22 08:31:16 2021 -0500 ACPI: processor idle: Allow playing dead in C3 state The above patch is trying to enter C3 state during init, what is causing a T40 system freeze. I have not found a similar issue on any other of my 32-bit machines. The fix is to add another exception to the processor_power_dmi_table[] list. As a result the dmesg shows as expected: [2.155398] ACPI: IBM ThinkPad T40 detected - limiting to C2 max_cstate. Override with "processor.max_cstate=9" [2.155404] ACPI: processor limited to max C-state 2 The fix is trivial and affects only vintage T40 systems. Fixes: d6b88ce2eb9d ("CPI: processor idle: Allow playing dead in C3 state") Signed-off-by: Woody Suwalski Reviewed-by: Hans de Goede Cc: 5.16+ # 5.16+ [ rjw: New subject ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 86560a28751b..f8e9fa82cb9b 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -96,6 +96,11 @@ static const struct dmi_system_id processor_power_dmi_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME,"L8400B series Notebook PC")}, (void *)1}, + /* T40 can not handle C3 idle state */ + { set_max_cstate, "IBM ThinkPad T40", { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "23737CU")}, + (void *)2}, {}, }; -- cgit v1.2.3 From d19e0183a88306acda07f4a01fedeeffe2a2a06b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Feb 2022 18:05:18 -0500 Subject: NFS: Do not report writeback errors in nfs_getattr() The result of the writeback, whether it is an ENOSPC or an EIO, or anything else, does not inhibit the NFS client from reporting the correct file timestamps. Fixes: 79566ef018f5 ("NFS: Getattr doesn't require data sync semantics") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a918c3a834b6..d96baa4450e3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -853,12 +853,9 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, } /* Flush out writes to the server in order to update c/mtime. */ - if ((request_mask & (STATX_CTIME|STATX_MTIME)) && - S_ISREG(inode->i_mode)) { - err = filemap_write_and_wait(inode->i_mapping); - if (err) - goto out; - } + if ((request_mask & (STATX_CTIME | STATX_MTIME)) && + S_ISREG(inode->i_mode)) + filemap_write_and_wait(inode->i_mapping); /* * We may force a getattr if the user cares about atime. -- cgit v1.2.3 From 53923e0fe2098f90f339510aeaa0e1413ae99a16 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 16 Feb 2022 13:23:53 -0600 Subject: cifs: fix confusing unneeded warning message on smb2.1 and earlier When mounting with SMB2.1 or earlier, even with nomultichannel, we log the confusing warning message: "CIFS: VFS: multichannel is not supported on this protocol version, use 3.0 or above" Fix this so that we don't log this unless they really are trying to mount with multichannel. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215608 Reported-by: Kim Scarborough Cc: stable@vger.kernel.org # 5.11+ Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/sess.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 5723d50340e5..32f478c7a66d 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -127,11 +127,6 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) struct cifs_server_iface *ifaces = NULL; size_t iface_count; - if (ses->server->dialect < SMB30_PROT_ID) { - cifs_dbg(VFS, "multichannel is not supported on this protocol version, use 3.0 or above\n"); - return 0; - } - spin_lock(&ses->chan_lock); new_chan_count = old_chan_count = ses->chan_count; @@ -145,6 +140,12 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) return 0; } + if (ses->server->dialect < SMB30_PROT_ID) { + spin_unlock(&ses->chan_lock); + cifs_dbg(VFS, "multichannel is not supported on this protocol version, use 3.0 or above\n"); + return 0; + } + if (!(ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { ses->chan_max = 1; spin_unlock(&ses->chan_lock); -- cgit v1.2.3 From acd289e04a0a1f52bea7ff1129b365626059e3c2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 15 Feb 2022 14:27:54 +0100 Subject: ALSA: hda: Set max DMA segment size The recent code refactoring to use the standard DMA helper requires the max DMA segment size setup for SG list management. Without it, the kernel may spew warnings when a large buffer is allocated. This patch sets up dma_set_max_seg_size() for avoiding spurious warnings. Fixes: 2c95b92ecd92 ("ALSA: memalloc: Unify x86 SG-buffer handling (take#3)") Cc: BugLink: https://github.com/thesofproject/linux/issues/3430 Link: https://lore.kernel.org/r/20220215132756.31236-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 917ad9d375b1..572ff0d1fafe 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1941,6 +1941,7 @@ static int azx_first_init(struct azx *chip) dma_bits = 32; if (dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(dma_bits))) dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(32)); + dma_set_max_seg_size(&pci->dev, UINT_MAX); /* read number of streams from GCAP register instead of using * hardcoded value -- cgit v1.2.3 From 8872fc0d04592925b74ad9ab1b5686f4e016befe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 15 Feb 2022 14:27:55 +0100 Subject: ASoC: SOF: hda: Set max DMA segment size The recent code refactoring to use the standard DMA helper requires the max DMA segment size setup for SG list management. Without it, the kernel may spew warnings when a large buffer is allocated. This patch sets up dma_set_max_seg_size() for avoiding spurious warnings. Fixes: 2c95b92ecd92 ("ALSA: memalloc: Unify x86 SG-buffer handling (take#3)") Acked-by: Mark Brown Cc: BugLink: https://github.com/thesofproject/linux/issues/3430 Link: https://lore.kernel.org/r/20220215132756.31236-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/soc/sof/intel/hda.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index c8fb082209ce..1385695d7745 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -956,6 +956,7 @@ int hda_dsp_probe(struct snd_sof_dev *sdev) dev_dbg(sdev->dev, "DMA mask is 32 bit\n"); dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(32)); } + dma_set_max_seg_size(&pci->dev, UINT_MAX); /* init streams */ ret = hda_dsp_stream_init(sdev); -- cgit v1.2.3 From c22a8086b384025ab97ce07465420a219697d3f2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 15 Feb 2022 14:27:56 +0100 Subject: ASoC: intel: skylake: Set max DMA segment size The recent code refactoring to use the standard DMA helper requires the max DMA segment size setup for SG list management. Without it, the kernel may spew warnings when a large buffer is allocated. This patch sets up dma_set_max_seg_size() for avoiding spurious warnings. Fixes: 2c95b92ecd92 ("ALSA: memalloc: Unify x86 SG-buffer handling (take#3)") Acked-by: Cezary Rojewski Acked-by: Mark Brown Cc: BugLink: https://github.com/thesofproject/linux/issues/3430 Link: https://lore.kernel.org/r/20220215132756.31236-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/soc/intel/skylake/skl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c index 148ddf4cace0..aeca58246fc7 100644 --- a/sound/soc/intel/skylake/skl.c +++ b/sound/soc/intel/skylake/skl.c @@ -952,6 +952,7 @@ static int skl_first_init(struct hdac_bus *bus) /* allow 64bit DMA address if supported by H/W */ if (dma_set_mask_and_coherent(bus->dev, DMA_BIT_MASK(64))) dma_set_mask_and_coherent(bus->dev, DMA_BIT_MASK(32)); + dma_set_max_seg_size(bus->dev, UINT_MAX); /* initialize streams */ snd_hdac_ext_stream_init_all -- cgit v1.2.3 From cc8f7fe1f5eab010191aa4570f27641876fa1267 Mon Sep 17 00:00:00 2001 From: Haimin Zhang Date: Wed, 16 Feb 2022 16:40:38 +0800 Subject: block-map: add __GFP_ZERO flag for alloc_page in function bio_copy_kern Add __GFP_ZERO flag for alloc_page in function bio_copy_kern to initialize the buffer of a bio. Signed-off-by: Haimin Zhang Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220216084038.15635-1-tcs.kernel@gmail.com Signed-off-by: Jens Axboe --- block/blk-map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-map.c b/block/blk-map.c index 4526adde0156..c7f71d83eff1 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -446,7 +446,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data, if (bytes > len) bytes = len; - page = alloc_page(GFP_NOIO | gfp_mask); + page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask); if (!page) goto cleanup; -- cgit v1.2.3 From 7a5428dcb7902700b830e912feee4e845df7c019 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Feb 2022 08:52:31 +0100 Subject: block: fix surprise removal for drivers calling blk_set_queue_dying Various block drivers call blk_set_queue_dying to mark a disk as dead due to surprise removal events, but since commit 8e141f9eb803 that doesn't work given that the GD_DEAD flag needs to be set to stop I/O. Replace the driver calls to blk_set_queue_dying with a new (and properly documented) blk_mark_disk_dead API, and fold blk_set_queue_dying into the only remaining caller. Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk") Reported-by: Markus Blöchl Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Link: https://lore.kernel.org/r/20220217075231.1140-1-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 10 ++-------- block/genhd.c | 14 ++++++++++++++ drivers/block/mtip32xx/mtip32xx.c | 2 +- drivers/block/rbd.c | 2 +- drivers/block/xen-blkfront.c | 2 +- drivers/md/dm.c | 2 +- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/multipath.c | 2 +- include/linux/blkdev.h | 3 ++- 9 files changed, 24 insertions(+), 15 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index d93e3bb9a769..1039515c99d6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -284,13 +284,6 @@ void blk_queue_start_drain(struct request_queue *q) wake_up_all(&q->mq_freeze_wq); } -void blk_set_queue_dying(struct request_queue *q) -{ - blk_queue_flag_set(QUEUE_FLAG_DYING, q); - blk_queue_start_drain(q); -} -EXPORT_SYMBOL_GPL(blk_set_queue_dying); - /** * blk_cleanup_queue - shutdown a request queue * @q: request queue to shutdown @@ -308,7 +301,8 @@ void blk_cleanup_queue(struct request_queue *q) WARN_ON_ONCE(blk_queue_registered(q)); /* mark @q DYING, no new request or merges will be allowed afterwards */ - blk_set_queue_dying(q); + blk_queue_flag_set(QUEUE_FLAG_DYING, q); + blk_queue_start_drain(q); blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); diff --git a/block/genhd.c b/block/genhd.c index 626c8406f21a..9eca1f7d35c9 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -548,6 +548,20 @@ out_free_ext_minor: } EXPORT_SYMBOL(device_add_disk); +/** + * blk_mark_disk_dead - mark a disk as dead + * @disk: disk to mark as dead + * + * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O + * to this disk. + */ +void blk_mark_disk_dead(struct gendisk *disk) +{ + set_bit(GD_DEAD, &disk->state); + blk_queue_start_drain(disk->queue); +} +EXPORT_SYMBOL_GPL(blk_mark_disk_dead); + /** * del_gendisk - remove the gendisk * @disk: the struct gendisk to remove diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index e6005c232328..2b588b62cbbb 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -4112,7 +4112,7 @@ static void mtip_pci_remove(struct pci_dev *pdev) "Completion workers still active!\n"); } - blk_set_queue_dying(dd->queue); + blk_mark_disk_dead(dd->disk); set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); /* Clean up the block layer. */ diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4203cdab8abf..b844432bad20 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -7185,7 +7185,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus, * IO to complete/fail. */ blk_mq_freeze_queue(rbd_dev->disk->queue); - blk_set_queue_dying(rbd_dev->disk->queue); + blk_mark_disk_dead(rbd_dev->disk); } del_gendisk(rbd_dev->disk); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ccd0dd0c6b83..ca71a0585333 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2126,7 +2126,7 @@ static void blkfront_closing(struct blkfront_info *info) /* No more blkif_request(). */ blk_mq_stop_hw_queues(info->rq); - blk_set_queue_dying(info->rq); + blk_mark_disk_dead(info->gd); set_capacity(info->gd, 0); for_each_rinfo(info, rinfo, i) { diff --git a/drivers/md/dm.c b/drivers/md/dm.c index dcbd6d201619..997ace47bbd5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2077,7 +2077,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait) set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); - blk_set_queue_dying(md->queue); + blk_mark_disk_dead(md->disk); /* * Take suspend_lock so that presuspend and postsuspend methods diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 79005ea1a33e..469f23186159 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4574,7 +4574,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns) if (test_and_set_bit(NVME_NS_DEAD, &ns->flags)) return; - blk_set_queue_dying(ns->queue); + blk_mark_disk_dead(ns->disk); nvme_start_ns_queue(ns); set_capacity_and_notify(ns->disk, 0); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index f8bf6606eb2f..ff775235534c 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -848,7 +848,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) { if (!head->disk) return; - blk_set_queue_dying(head->disk->queue); + blk_mark_disk_dead(head->disk); /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); flush_work(&head->requeue_work); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f35aea98bc35..16b47035e4b0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -748,7 +748,8 @@ extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, bool __must_check blk_get_queue(struct request_queue *); extern void blk_put_queue(struct request_queue *); -extern void blk_set_queue_dying(struct request_queue *); + +void blk_mark_disk_dead(struct gendisk *disk); #ifdef CONFIG_BLOCK /* -- cgit v1.2.3 From e92bc4cd34de2ce454bdea8cd198b8067ee4e123 Mon Sep 17 00:00:00 2001 From: Laibin Qiu Date: Sat, 22 Jan 2022 19:10:45 +0800 Subject: block/wbt: fix negative inflight counter when remove scsi device Now that we disable wbt by set WBT_STATE_OFF_DEFAULT in wbt_disable_default() when switch elevator to bfq. And when we remove scsi device, wbt will be enabled by wbt_enable_default. If it become false positive between wbt_wait() and wbt_track() when submit write request. The following is the scenario that triggered the problem. T1 T2 T3 elevator_switch_mq bfq_init_queue wbt_disable_default <= Set rwb->enable_state (OFF) Submit_bio blk_mq_make_request rq_qos_throttle <= rwb->enable_state (OFF) scsi_remove_device sd_remove del_gendisk blk_unregister_queue elv_unregister_queue wbt_enable_default <= Set rwb->enable_state (ON) q_qos_track <= rwb->enable_state (ON) ^^^^^^ this request will mark WBT_TRACKED without inflight add and will lead to drop rqw->inflight to -1 in wbt_done() which will trigger IO hung. Fix this by move wbt_enable_default() from elv_unregister to bfq_exit_queue(). Only re-enable wbt when bfq exit. Fixes: 76a8040817b4b ("blk-wbt: make sure throttle is enabled properly") Remove oneline stale comment, and kill one oneshot local variable. Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/linux-block/20211214133103.551813-1-qiulaibin@huawei.com/ Signed-off-by: Laibin Qiu Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 2 ++ block/elevator.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0c612a911696..36a66e97e3c2 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7018,6 +7018,8 @@ static void bfq_exit_queue(struct elevator_queue *e) spin_unlock_irq(&bfqd->lock); #endif + wbt_enable_default(bfqd->queue); + kfree(bfqd); } diff --git a/block/elevator.c b/block/elevator.c index ec98aed39c4f..482df2a350fc 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -525,8 +525,6 @@ void elv_unregister_queue(struct request_queue *q) kobject_del(&e->kobj); e->registered = 0; - /* Re-enable throttling in case elevator disabled it */ - wbt_enable_default(q); } } -- cgit v1.2.3 From c16bdeb5a39ffa3f32b32f812831a2092d2a3061 Mon Sep 17 00:00:00 2001 From: Eric W. Biederman Date: Fri, 11 Feb 2022 13:57:44 -0600 Subject: rlimit: Fix RLIMIT_NPROC enforcement failure caused by capability calls in set_user Solar Designer wrote: > I'm not aware of anyone actually running into this issue and reporting > it. The systems that I personally know use suexec along with rlimits > still run older/distro kernels, so would not yet be affected. > > So my mention was based on my understanding of how suexec works, and > code review. Specifically, Apache httpd has the setting RLimitNPROC, > which makes it set RLIMIT_NPROC: > > https://httpd.apache.org/docs/2.4/mod/core.html#rlimitnproc > > The above documentation for it includes: > > "This applies to processes forked from Apache httpd children servicing > requests, not the Apache httpd children themselves. This includes CGI > scripts and SSI exec commands, but not any processes forked from the > Apache httpd parent, such as piped logs." > > In code, there are: > > ./modules/generators/mod_cgid.c: ( (cgid_req.limits.limit_nproc_set) && ((rc = apr_procattr_limit_set(procattr, APR_LIMIT_NPROC, > ./modules/generators/mod_cgi.c: ((rc = apr_procattr_limit_set(procattr, APR_LIMIT_NPROC, > ./modules/filters/mod_ext_filter.c: rv = apr_procattr_limit_set(procattr, APR_LIMIT_NPROC, conf->limit_nproc); > > For example, in mod_cgi.c this is in run_cgi_child(). > > I think this means an httpd child sets RLIMIT_NPROC shortly before it > execs suexec, which is a SUID root program. suexec then switches to the > target user and execs the CGI script. > > Before 2863643fb8b9, the setuid() in suexec would set the flag, and the > target user's process count would be checked against RLIMIT_NPROC on > execve(). After 2863643fb8b9, the setuid() in suexec wouldn't set the > flag because setuid() is (naturally) called when the process is still > running as root (thus, has those limits bypass capabilities), and > accordingly execve() would not check the target user's process count > against RLIMIT_NPROC. In commit 2863643fb8b9 ("set_user: add capability check when rlimit(RLIMIT_NPROC) exceeds") capable calls were added to set_user to make it more consistent with fork. Unfortunately because of call site differences those capable calls were checking the credentials of the user before set*id() instead of after set*id(). This breaks enforcement of RLIMIT_NPROC for applications that set the rlimit and then call set*id() while holding a full set of capabilities. The capabilities are only changed in the new credential in security_task_fix_setuid(). The code in apache suexec appears to follow this pattern. Commit 909cc4ae86f3 ("[PATCH] Fix two bugs with process limits (RLIMIT_NPROC)") where this check was added describes the targes of this capability check as: 2/ When a root-owned process (e.g. cgiwrap) sets up process limits and then calls setuid, the setuid should fail if the user would then be running more than rlim_cur[RLIMIT_NPROC] processes, but it doesn't. This patch adds an appropriate test. With this patch, and per-user process limit imposed in cgiwrap really works. So the original use case of this check also appears to match the broken pattern. Restore the enforcement of RLIMIT_NPROC by removing the bad capable checks added in set_user. This unfortunately restores the inconsistent state the code has been in for the last 11 years, but dealing with the inconsistencies looks like a larger problem. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20210907213042.GA22626@openwall.com/ Link: https://lkml.kernel.org/r/20220212221412.GA29214@openwall.com Link: https://lkml.kernel.org/r/20220216155832.680775-1-ebiederm@xmission.com Fixes: 2863643fb8b9 ("set_user: add capability check when rlimit(RLIMIT_NPROC) exceeds") History-Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Reviewed-by: Solar Designer Signed-off-by: "Eric W. Biederman" --- kernel/sys.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index ecc4cf019242..8dd938a3d2bf 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -480,8 +480,7 @@ static int set_user(struct cred *new) * failure to the execve() stage. */ if (is_ucounts_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) && - new_user != INIT_USER && - !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) + new_user != INIT_USER) current->flags |= PF_NPROC_EXCEEDED; else current->flags &= ~PF_NPROC_EXCEEDED; -- cgit v1.2.3 From 8f2f9c4d82f24f172ae439e5035fc1e0e4c229dd Mon Sep 17 00:00:00 2001 From: Eric W. Biederman Date: Wed, 9 Feb 2022 20:03:19 -0600 Subject: ucounts: Enforce RLIMIT_NPROC not RLIMIT_NPROC+1 Michal Koutný wrote: > It was reported that v5.14 behaves differently when enforcing > RLIMIT_NPROC limit, namely, it allows one more task than previously. > This is consequence of the commit 21d1c5e386bc ("Reimplement > RLIMIT_NPROC on top of ucounts") that missed the sharpness of > equality in the forking path. This can be fixed either by fixing the test or by moving the increment to be before the test. Fix it my moving copy_creds which contains the increment before is_ucounts_overlimit. In the case of CLONE_NEWUSER the ucounts in the task_cred changes. The function is_ucounts_overlimit needs to use the final version of the ucounts for the new process. Which means moving the is_ucounts_overlimit test after copy_creds is necessary. Both the test in fork and the test in set_user were semantically changed when the code moved to ucounts. The change of the test in fork was bad because it was before the increment. The test in set_user was wrong and the change to ucounts fixed it. So this fix only restores the old behavior in one lcation not two. Link: https://lkml.kernel.org/r/20220204181144.24462-1-mkoutny@suse.com Link: https://lkml.kernel.org/r/20220216155832.680775-2-ebiederm@xmission.com Cc: stable@vger.kernel.org Reported-by: Michal Koutný Reviewed-by: Michal Koutný Fixes: 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts") Signed-off-by: "Eric W. Biederman" --- kernel/fork.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index d75a528f7b21..17d8a8c85e3b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2021,18 +2021,18 @@ static __latent_entropy struct task_struct *copy_process( #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif + retval = copy_creds(p, clone_flags); + if (retval < 0) + goto bad_fork_free; + retval = -EAGAIN; if (is_ucounts_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) { if (p->real_cred->user != INIT_USER && !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) - goto bad_fork_free; + goto bad_fork_cleanup_count; } current->flags &= ~PF_NPROC_EXCEEDED; - retval = copy_creds(p, clone_flags); - if (retval < 0) - goto bad_fork_free; - /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there -- cgit v1.2.3 From a55d07294f1e9b576093bdfa95422f8119941e83 Mon Sep 17 00:00:00 2001 From: Eric W. Biederman Date: Wed, 9 Feb 2022 16:22:20 -0600 Subject: ucounts: Base set_cred_ucounts changes on the real user Michal Koutný wrote: > Tasks are associated to multiple users at once. Historically and as per > setrlimit(2) RLIMIT_NPROC is enforce based on real user ID. > > The commit 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts") > made the accounting structure "indexed" by euid and hence potentially > account tasks differently. > > The effective user ID may be different e.g. for setuid programs but > those are exec'd into already existing task (i.e. below limit), so > different accounting is moot. > > Some special setresuid(2) users may notice the difference, justifying > this fix. I looked at cred->ucount and it is only used for rlimit operations that were previously stored in cred->user. Making the fact cred->ucount can refer to a different user from cred->user a bug, affecting all uses of cred->ulimit not just RLIMIT_NPROC. Fix set_cred_ucounts to always use the real uid not the effective uid. Further simplify set_cred_ucounts by noticing that set_cred_ucounts somehow retained a draft version of the check to see if alloc_ucounts was needed that checks the new->user and new->user_ns against the current_real_cred(). Remove that draft version of the check. All that matters for setting the cred->ucounts are the user_ns and uid fields in the cred. Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20220207121800.5079-4-mkoutny@suse.com Link: https://lkml.kernel.org/r/20220216155832.680775-3-ebiederm@xmission.com Reported-by: Michal Koutný Reviewed-by: Michal Koutný Fixes: 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts") Signed-off-by: "Eric W. Biederman" --- kernel/cred.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/kernel/cred.c b/kernel/cred.c index 473d17c431f3..933155c96922 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -665,21 +665,16 @@ EXPORT_SYMBOL(cred_fscmp); int set_cred_ucounts(struct cred *new) { - struct task_struct *task = current; - const struct cred *old = task->real_cred; struct ucounts *new_ucounts, *old_ucounts = new->ucounts; - if (new->user == old->user && new->user_ns == old->user_ns) - return 0; - /* * This optimization is needed because alloc_ucounts() uses locks * for table lookups. */ - if (old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid)) + if (old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->uid)) return 0; - if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid))) + if (!(new_ucounts = alloc_ucounts(new->user_ns, new->uid))) return -EAGAIN; new->ucounts = new_ucounts; -- cgit v1.2.3 From c923a8e7edb010da67424077cbf1a6f1396ebd2e Mon Sep 17 00:00:00 2001 From: Eric W. Biederman Date: Mon, 14 Feb 2022 09:40:25 -0600 Subject: ucounts: Move RLIMIT_NPROC handling after set_user During set*id() which cred->ucounts to charge the the current process to is not known until after set_cred_ucounts. So move the RLIMIT_NPROC checking into a new helper flag_nproc_exceeded and call flag_nproc_exceeded after set_cred_ucounts. This is very much an arbitrary subset of the places where we currently change the RLIMIT_NPROC accounting, designed to preserve the existing logic. Fixing the existing logic will be the subject of another series of changes. Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20220216155832.680775-4-ebiederm@xmission.com Fixes: 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts") Signed-off-by: "Eric W. Biederman" --- kernel/sys.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 8dd938a3d2bf..97dc9e5d6bf9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -472,6 +472,16 @@ static int set_user(struct cred *new) if (!new_user) return -EAGAIN; + free_uid(new->user); + new->user = new_user; + return 0; +} + +static void flag_nproc_exceeded(struct cred *new) +{ + if (new->ucounts == current_ucounts()) + return; + /* * We don't fail in case of NPROC limit excess here because too many * poorly written programs don't check set*uid() return code, assuming @@ -480,14 +490,10 @@ static int set_user(struct cred *new) * failure to the execve() stage. */ if (is_ucounts_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) && - new_user != INIT_USER) + new->user != INIT_USER) current->flags |= PF_NPROC_EXCEEDED; else current->flags &= ~PF_NPROC_EXCEEDED; - - free_uid(new->user); - new->user = new_user; - return 0; } /* @@ -562,6 +568,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid) if (retval < 0) goto error; + flag_nproc_exceeded(new); return commit_creds(new); error: @@ -624,6 +631,7 @@ long __sys_setuid(uid_t uid) if (retval < 0) goto error; + flag_nproc_exceeded(new); return commit_creds(new); error: @@ -703,6 +711,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) if (retval < 0) goto error; + flag_nproc_exceeded(new); return commit_creds(new); error: -- cgit v1.2.3 From 0cbae9e24fa7d6c6e9f828562f084da82217a0c5 Mon Sep 17 00:00:00 2001 From: Eric W. Biederman Date: Wed, 9 Feb 2022 18:09:41 -0600 Subject: ucounts: Handle wrapping in is_ucounts_overlimit While examining is_ucounts_overlimit and reading the various messages I realized that is_ucounts_overlimit fails to deal with counts that may have wrapped. Being wrapped should be a transitory state for counts and they should never be wrapped for long, but it can happen so handle it. Cc: stable@vger.kernel.org Fixes: 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts") Link: https://lkml.kernel.org/r/20220216155832.680775-5-ebiederm@xmission.com Reviewed-by: Shuah Khan Signed-off-by: "Eric W. Biederman" --- kernel/ucount.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 65b597431c86..06ea04d44685 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -350,7 +350,8 @@ bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsign if (rlimit > LONG_MAX) max = LONG_MAX; for (iter = ucounts; iter; iter = iter->ns->ucounts) { - if (get_ucounts_value(iter, type) > max) + long val = get_ucounts_value(iter, type); + if (val < 0 || val > max) return true; max = READ_ONCE(iter->ns->ucount_max[type]); } -- cgit v1.2.3 From e5733d8c89c3b57c8fcd40b8acf508388fabaa42 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Wed, 2 Feb 2022 11:41:12 -0800 Subject: x86/sgx: Fix missing poison handling in reclaimer The SGX reclaimer code lacks page poison handling in its main free path. This can lead to avoidable machine checks if a poisoned page is freed and reallocated instead of being isolated. A troublesome scenario is: 1. Machine check (#MC) occurs (asynchronous, !MF_ACTION_REQUIRED) 2. arch_memory_failure() is eventually called 3. (SGX) page->poison set to 1 4. Page is reclaimed 5. Page added to normal free lists by sgx_reclaim_pages() ^ This is the bug (poison pages should be isolated on the sgx_poison_page_list instead) 6. Page is reallocated by some innocent enclave, a second (synchronous) in-kernel #MC is induced, probably during EADD instruction. ^ This is the fallout from the bug (6) is unfortunate and can be avoided by replacing the open coded enclave page freeing code in the reclaimer with sgx_free_epc_page() to obtain support for poison page handling that includes placing the poisoned page on the correct list. Fixes: d6d261bded8a ("x86/sgx: Add new sgx_epc_page flag bit to mark free pages") Fixes: 992801ae9243 ("x86/sgx: Initial poison handling for dirty and free pages") Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Link: https://lkml.kernel.org/r/dcc95eb2aaefb042527ac50d0a50738c7c160dac.1643830353.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/sgx/main.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 4b41efc9e367..8e4bc6453d26 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -344,10 +344,8 @@ static void sgx_reclaim_pages(void) { struct sgx_epc_page *chunk[SGX_NR_TO_SCAN]; struct sgx_backing backing[SGX_NR_TO_SCAN]; - struct sgx_epc_section *section; struct sgx_encl_page *encl_page; struct sgx_epc_page *epc_page; - struct sgx_numa_node *node; pgoff_t page_index; int cnt = 0; int ret; @@ -418,13 +416,7 @@ skip: kref_put(&encl_page->encl->refcount, sgx_encl_release); epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; - section = &sgx_epc_sections[epc_page->section]; - node = section->node; - - spin_lock(&node->lock); - list_add_tail(&epc_page->list, &node->free_page_list); - spin_unlock(&node->lock); - atomic_long_inc(&sgx_nr_free_pages); + sgx_free_epc_page(epc_page); } } -- cgit v1.2.3 From 132507ed04ce0c5559be04dd378fec4f3bbc00e8 Mon Sep 17 00:00:00 2001 From: Nikhil Gupta Date: Fri, 28 Jan 2022 09:53:21 +0530 Subject: of/fdt: move elfcorehdr reservation early for crash dump kernel elfcorehdr_addr is fixed address passed to Second kernel which may be conflicted with potential reserved memory in Second kernel,so fdt_reserve_elfcorehdr() ahead of fdt_init_reserved_mem() can relieve this situation. Signed-off-by: Nikhil Gupta Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220128042321.15228-1-nikhil.gupta@nxp.com --- drivers/of/fdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index ad85ff6474ff..ec315b060cd5 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -648,8 +648,8 @@ void __init early_init_fdt_scan_reserved_mem(void) } fdt_scan_reserved_mem(); - fdt_init_reserved_mem(); fdt_reserve_elfcorehdr(); + fdt_init_reserved_mem(); } /** -- cgit v1.2.3 From 834cea3a252ed4847db076a769ad9efe06afe2d5 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Tue, 15 Feb 2022 08:27:35 +0100 Subject: i2c: brcmstb: fix support for DSL and CM variants DSL and CM (Cable Modem) support 8 B max transfer size and have a custom DT binding for that reason. This driver was checking for a wrong "compatible" however which resulted in an incorrect setup. Fixes: e2e5a2c61837 ("i2c: brcmstb: Adding support for CM and DSL SoCs") Signed-off-by: Rafał Miłecki Acked-by: Florian Fainelli Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-brcmstb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c index 490ee3962645..b00f35c0b066 100644 --- a/drivers/i2c/busses/i2c-brcmstb.c +++ b/drivers/i2c/busses/i2c-brcmstb.c @@ -673,7 +673,7 @@ static int brcmstb_i2c_probe(struct platform_device *pdev) /* set the data in/out register size for compatible SoCs */ if (of_device_is_compatible(dev->device->of_node, - "brcmstb,brcmper-i2c")) + "brcm,brcmper-i2c")) dev->data_regsz = sizeof(u8); else dev->data_regsz = sizeof(u32); -- cgit v1.2.3 From 44cad52cc14ae10062f142ec16ede489bccf4469 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 14 Feb 2022 13:05:49 +0100 Subject: x86/ptrace: Fix xfpregs_set()'s incorrect xmm clearing xfpregs_set() handles 32-bit REGSET_XFP and 64-bit REGSET_FP. The actual code treats these regsets as modern FX state (i.e. the beginning part of XSTATE). The declarations of the regsets thought they were the legacy i387 format. The code thought they were the 32-bit (no xmm8..15) variant of XSTATE and, for good measure, made the high bits disappear by zeroing the wrong part of the buffer. The latter broke ptrace, and everything else confused anyone trying to understand the code. In particular, the nonsense definitions of the regsets confused me when I wrote this code. Clean this all up. Change the declarations to match reality (which shouldn't change the generated code, let alone the ABI) and fix xfpregs_set() to clear the correct bits and to only do so for 32-bit callers. Fixes: 6164331d15f7 ("x86/fpu: Rewrite xfpregs_set()") Reported-by: Luís Ferreira Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=215524 Link: https://lore.kernel.org/r/YgpFnZpF01WwR8wU@zn.tnic --- arch/x86/kernel/fpu/regset.c | 9 ++++----- arch/x86/kernel/ptrace.c | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 437d7c930c0b..75ffaef8c299 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -91,11 +91,9 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; - struct user32_fxsr_struct newstate; + struct fxregs_state newstate; int ret; - BUILD_BUG_ON(sizeof(newstate) != sizeof(struct fxregs_state)); - if (!cpu_feature_enabled(X86_FEATURE_FXSR)) return -ENODEV; @@ -116,9 +114,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, /* Copy the state */ memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate)); - /* Clear xmm8..15 */ + /* Clear xmm8..15 for 32-bit callers */ BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16); - memset(&fpu->fpstate->regs.fxsave.xmm_space[8], 0, 8 * 16); + if (in_ia32_syscall()) + memset(&fpu->fpstate->regs.fxsave.xmm_space[8*4], 0, 8 * 16); /* Mark FP and SSE as in use when XSAVE is enabled */ if (use_xsave()) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 6d2244c94799..8d2f2f995539 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1224,7 +1224,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { }, [REGSET_FP] = { .core_note_type = NT_PRFPREG, - .n = sizeof(struct user_i387_struct) / sizeof(long), + .n = sizeof(struct fxregs_state) / sizeof(long), .size = sizeof(long), .align = sizeof(long), .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, @@ -1271,7 +1271,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { }, [REGSET_XFP] = { .core_note_type = NT_PRXFPREG, - .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), + .n = sizeof(struct fxregs_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, -- cgit v1.2.3 From efe4186e6a1b54bf38b9e05450d43b0da1fd7739 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 17 Feb 2022 09:43:03 +0800 Subject: drivers: hamradio: 6pack: fix UAF bug caused by mod_timer() When a 6pack device is detaching, the sixpack_close() will act to cleanup necessary resources. Although del_timer_sync() in sixpack_close() won't return if there is an active timer, one could use mod_timer() in sp_xmit_on_air() to wake up timer again by calling userspace syscall such as ax25_sendmsg(), ax25_connect() and ax25_ioctl(). This unexpected waked handler, sp_xmit_on_air(), realizes nothing about the undergoing cleanup and may still call pty_write() to use driver layer resources that have already been released. One of the possible race conditions is shown below: (USE) | (FREE) ax25_sendmsg() | ax25_queue_xmit() | ... | sp_xmit() | sp_encaps() | sixpack_close() sp_xmit_on_air() | del_timer_sync(&sp->tx_t) mod_timer(&sp->tx_t,...) | ... | unregister_netdev() | ... (wait a while) | tty_release() | tty_release_struct() | release_tty() sp_xmit_on_air() | tty_kref_put(tty_struct) //FREE pty_write(tty_struct) //USE | ... The corresponding fail log is shown below: =============================================================== BUG: KASAN: use-after-free in __run_timers.part.0+0x170/0x470 Write of size 8 at addr ffff88800a652ab8 by task swapper/2/0 ... Call Trace: ... queue_work_on+0x3f/0x50 pty_write+0xcd/0xe0pty_write+0xcd/0xe0 sp_xmit_on_air+0xb2/0x1f0 call_timer_fn+0x28/0x150 __run_timers.part.0+0x3c2/0x470 run_timer_softirq+0x3b/0x80 __do_softirq+0xf1/0x380 ... This patch reorders the del_timer_sync() after the unregister_netdev() to avoid UAF bugs. Because the unregister_netdev() is well synchronized, it flushs out any pending queues, waits the refcount of net_device decreases to zero and removes net_device from kernel. There is not any running routines after executing unregister_netdev(). Therefore, we could not arouse timer from userspace again. Signed-off-by: Duoming Zhou Reviewed-by: Lin Ma Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index b1fc153125d9..45c3c4a1101b 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -668,11 +668,11 @@ static void sixpack_close(struct tty_struct *tty) */ netif_stop_queue(sp->dev); + unregister_netdev(sp->dev); + del_timer_sync(&sp->tx_t); del_timer_sync(&sp->resync_t); - unregister_netdev(sp->dev); - /* Free all 6pack frame buffers after unreg. */ kfree(sp->rbuff); kfree(sp->xbuff); -- cgit v1.2.3 From 4224cfd7fb6523f7a9d1c8bb91bb5df1e38eb624 Mon Sep 17 00:00:00 2001 From: suresh kumar Date: Thu, 17 Feb 2022 07:25:18 +0530 Subject: net-sysfs: add check for netdevice being present to speed_show When bringing down the netdevice or system shutdown, a panic can be triggered while accessing the sysfs path because the device is already removed. [ 755.549084] mlx5_core 0000:12:00.1: Shutdown was called [ 756.404455] mlx5_core 0000:12:00.0: Shutdown was called ... [ 757.937260] BUG: unable to handle kernel NULL pointer dereference at (null) [ 758.031397] IP: [] dma_pool_alloc+0x1ab/0x280 crash> bt ... PID: 12649 TASK: ffff8924108f2100 CPU: 1 COMMAND: "amsd" ... #9 [ffff89240e1a38b0] page_fault at ffffffff8f38c778 [exception RIP: dma_pool_alloc+0x1ab] RIP: ffffffff8ee11acb RSP: ffff89240e1a3968 RFLAGS: 00010046 RAX: 0000000000000246 RBX: ffff89243d874100 RCX: 0000000000001000 RDX: 0000000000000000 RSI: 0000000000000246 RDI: ffff89243d874090 RBP: ffff89240e1a39c0 R8: 000000000001f080 R9: ffff8905ffc03c00 R10: ffffffffc04680d4 R11: ffffffff8edde9fd R12: 00000000000080d0 R13: ffff89243d874090 R14: ffff89243d874080 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #10 [ffff89240e1a39c8] mlx5_alloc_cmd_msg at ffffffffc04680f3 [mlx5_core] #11 [ffff89240e1a3a18] cmd_exec at ffffffffc046ad62 [mlx5_core] #12 [ffff89240e1a3ab8] mlx5_cmd_exec at ffffffffc046b4fb [mlx5_core] #13 [ffff89240e1a3ae8] mlx5_core_access_reg at ffffffffc0475434 [mlx5_core] #14 [ffff89240e1a3b40] mlx5e_get_fec_caps at ffffffffc04a7348 [mlx5_core] #15 [ffff89240e1a3bb0] get_fec_supported_advertised at ffffffffc04992bf [mlx5_core] #16 [ffff89240e1a3c08] mlx5e_get_link_ksettings at ffffffffc049ab36 [mlx5_core] #17 [ffff89240e1a3ce8] __ethtool_get_link_ksettings at ffffffff8f25db46 #18 [ffff89240e1a3d48] speed_show at ffffffff8f277208 #19 [ffff89240e1a3dd8] dev_attr_show at ffffffff8f0b70e3 #20 [ffff89240e1a3df8] sysfs_kf_seq_show at ffffffff8eedbedf #21 [ffff89240e1a3e18] kernfs_seq_show at ffffffff8eeda596 #22 [ffff89240e1a3e28] seq_read at ffffffff8ee76d10 #23 [ffff89240e1a3e98] kernfs_fop_read at ffffffff8eedaef5 #24 [ffff89240e1a3ed8] vfs_read at ffffffff8ee4e3ff #25 [ffff89240e1a3f08] sys_read at ffffffff8ee4f27f #26 [ffff89240e1a3f50] system_call_fastpath at ffffffff8f395f92 crash> net_device.state ffff89443b0c0000 state = 0x5 (__LINK_STATE_START| __LINK_STATE_NOCARRIER) To prevent this scenario, we also make sure that the netdevice is present. Signed-off-by: suresh kumar Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 53ea262ecafd..fbddf966206b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -213,7 +213,7 @@ static ssize_t speed_show(struct device *dev, if (!rtnl_trylock()) return restart_syscall(); - if (netif_running(netdev)) { + if (netif_running(netdev) && netif_device_present(netdev)) { struct ethtool_link_ksettings cmd; if (!__ethtool_get_link_ksettings(netdev, &cmd)) -- cgit v1.2.3 From 2f131de361f6d0eaff17db26efdb844c178432f8 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 17 Feb 2022 11:30:48 +0200 Subject: net/sched: act_ct: Fix flow table lookup after ct clear or switching zones Flow table lookup is skipped if packet either went through ct clear action (which set the IP_CT_UNTRACKED flag on the packet), or while switching zones and there is already a connection associated with the packet. This will result in no SW offload of the connection, and the and connection not being removed from flow table with TCP teardown (fin/rst packet). To fix the above, remove these unneccary checks in flow table lookup. Fixes: 46475bb20f4b ("net/sched: act_ct: Software offload of established flows") Signed-off-by: Paul Blakey Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sched/act_ct.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f99247fc6468..33e70d60f0bf 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -533,11 +533,6 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, struct nf_conn *ct; u8 dir; - /* Previously seen or loopback */ - ct = nf_ct_get(skb, &ctinfo); - if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) - return false; - switch (family) { case NFPROTO_IPV4: if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph)) -- cgit v1.2.3 From e9da0b56fe27206b49f39805f7dcda8a89379062 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 17 Feb 2022 14:10:44 +0100 Subject: sr9700: sanity check for packet length A malicious device can leak heap data to user space providing bogus frame lengths. Introduce a sanity check. Signed-off-by: Oliver Neukum Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/usb/sr9700.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index b658510cc9a4..5a53e63d33a6 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -413,7 +413,7 @@ static int sr9700_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* ignore the CRC length */ len = (skb->data[1] | (skb->data[2] << 8)) - 4; - if (len > ETH_FRAME_LEN) + if (len > ETH_FRAME_LEN || len > skb->len) return 0; /* the last packet of current skb */ -- cgit v1.2.3 From a1cdec57e03a1352e92fbbe7974039dda4efcec0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Feb 2022 09:05:02 -0800 Subject: net-timestamp: convert sk->sk_tskey to atomic_t UDP sendmsg() can be lockless, this is causing all kinds of data races. This patch converts sk->sk_tskey to remove one of these races. BUG: KCSAN: data-race in __ip_append_data / __ip_append_data read to 0xffff8881035d4b6c of 4 bytes by task 8877 on cpu 1: __ip_append_data+0x1c1/0x1de0 net/ipv4/ip_output.c:994 ip_make_skb+0x13f/0x2d0 net/ipv4/ip_output.c:1636 udp_sendmsg+0x12bd/0x14c0 net/ipv4/udp.c:1249 inet_sendmsg+0x5f/0x80 net/ipv4/af_inet.c:819 sock_sendmsg_nosec net/socket.c:705 [inline] sock_sendmsg net/socket.c:725 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2413 ___sys_sendmsg net/socket.c:2467 [inline] __sys_sendmmsg+0x267/0x4c0 net/socket.c:2553 __do_sys_sendmmsg net/socket.c:2582 [inline] __se_sys_sendmmsg net/socket.c:2579 [inline] __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2579 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae write to 0xffff8881035d4b6c of 4 bytes by task 8880 on cpu 0: __ip_append_data+0x1d8/0x1de0 net/ipv4/ip_output.c:994 ip_make_skb+0x13f/0x2d0 net/ipv4/ip_output.c:1636 udp_sendmsg+0x12bd/0x14c0 net/ipv4/udp.c:1249 inet_sendmsg+0x5f/0x80 net/ipv4/af_inet.c:819 sock_sendmsg_nosec net/socket.c:705 [inline] sock_sendmsg net/socket.c:725 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2413 ___sys_sendmsg net/socket.c:2467 [inline] __sys_sendmmsg+0x267/0x4c0 net/socket.c:2553 __do_sys_sendmmsg net/socket.c:2582 [inline] __se_sys_sendmmsg net/socket.c:2579 [inline] __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2579 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x0000054d -> 0x0000054e Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 8880 Comm: syz-executor.5 Not tainted 5.17.0-rc2-syzkaller-00167-gdcb85f85fa6f-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 09c2d251b707 ("net-timestamp: add key to disambiguate concurrent datagrams") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- net/can/j1939/transport.c | 2 +- net/core/skbuff.c | 2 +- net/core/sock.c | 4 ++-- net/ipv4/ip_output.c | 2 +- net/ipv6/ip6_output.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index ff9b508d9c5f..50aecd28b355 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -507,7 +507,7 @@ struct sock { #endif u16 sk_tsflags; u8 sk_shutdown; - u32 sk_tskey; + atomic_t sk_tskey; atomic_t sk_zckey; u8 sk_clockid; @@ -2667,7 +2667,7 @@ static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags, __sock_tx_timestamp(tsflags, tx_flags); if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey && tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) - *tskey = sk->sk_tskey++; + *tskey = atomic_inc_return(&sk->sk_tskey) - 1; } if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) *tx_flags |= SKBTX_WIFI_STATUS; diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index a271688780a2..307ee1174a6e 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -2006,7 +2006,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv, /* set the end-packet for broadcast */ session->pkt.last = session->pkt.total; - skcb->tskey = session->sk->sk_tskey++; + skcb->tskey = atomic_inc_return(&session->sk->sk_tskey) - 1; session->tskey = skcb->tskey; return session; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9d0388bed0c1..6a15ce3eb1d3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4730,7 +4730,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; if (sk_is_tcp(sk)) - serr->ee.ee_data -= sk->sk_tskey; + serr->ee.ee_data -= atomic_read(&sk->sk_tskey); } err = sock_queue_err_skb(sk, skb); diff --git a/net/core/sock.c b/net/core/sock.c index 4ff806d71921..6eb174805bf0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -879,9 +879,9 @@ int sock_set_timestamping(struct sock *sk, int optname, if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) return -EINVAL; - sk->sk_tskey = tcp_sk(sk)->snd_una; + atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una); } else { - sk->sk_tskey = 0; + atomic_set(&sk->sk_tskey, 0); } } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 139cec29ed06..7911916a480b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -991,7 +991,7 @@ static int __ip_append_data(struct sock *sk, if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) - tskey = sk->sk_tskey++; + tskey = atomic_inc_return(&sk->sk_tskey) - 1; hh_len = LL_RESERVED_SPACE(rt->dst.dev); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2995f8d89e7e..304a295de84f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1465,7 +1465,7 @@ static int __ip6_append_data(struct sock *sk, if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) - tskey = sk->sk_tskey++; + tskey = atomic_inc_return(&sk->sk_tskey) - 1; hh_len = LL_RESERVED_SPACE(rt->dst.dev); -- cgit v1.2.3 From b352c3465bb808ab700d03f5bac2f7a6f37c5350 Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Fri, 18 Feb 2022 10:19:39 +0800 Subject: net: ll_temac: check the return value of devm_kmalloc() devm_kmalloc() returns a pointer to allocated memory on success, NULL on failure. While lp->indirect_lock is allocated by devm_kmalloc() without proper check. It is better to check the value of it to prevent potential wrong memory access. Fixes: f14f5c11f051 ("net: ll_temac: Support indirect_mutex share within TEMAC IP") Signed-off-by: Xiaoke Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/ll_temac_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index b900ab5aef2a..64c7e26c3b75 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1433,6 +1433,8 @@ static int temac_probe(struct platform_device *pdev) lp->indirect_lock = devm_kmalloc(&pdev->dev, sizeof(*lp->indirect_lock), GFP_KERNEL); + if (!lp->indirect_lock) + return -ENOMEM; spin_lock_init(lp->indirect_lock); } -- cgit v1.2.3 From b70bc066d77b460a63a8c3fb2ea0d811ce862a83 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 17 Dec 2021 12:36:25 +0100 Subject: ice: Match on all profiles in slow-path In switchdev mode, slow-path rules need to match all protocols, in order to correctly redirect unfiltered or missed packets to the uplink. To set this up for the virtual function to uplink flow, the rule that redirects packets to the control VSI must have the tunnel type set to ICE_SW_TUN_AND_NON_TUN. As a result of that new tunnel type being set, ice_get_compat_fv_bitmap will select ICE_PROF_ALL. At that point all profiles would be selected for this rule, resulting in the desired behavior. Without this change slow-path would not work with tunnel protocols. Fixes: 8b032a55c1bd ("ice: low level support for tunnels") Signed-off-by: Wojciech Drewek Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 1 + drivers/net/ethernet/intel/ice/ice_protocol_type.h | 1 + drivers/net/ethernet/intel/ice/ice_switch.c | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 864692b157b6..73edc24d81d5 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -44,6 +44,7 @@ ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, const u8 *mac) ctrl_vsi->rxq_map[vf->vf_id]; rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE; rule_info.flags_info.act_valid = true; + rule_info.tun_type = ICE_SW_TUN_AND_NON_TUN; err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, vf->repr->mac_rule); diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index dc1b0e9e6df5..695b6dd61dc2 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -47,6 +47,7 @@ enum ice_protocol_type { enum ice_sw_tunnel_type { ICE_NON_TUN = 0, + ICE_SW_TUN_AND_NON_TUN, ICE_SW_TUN_VXLAN, ICE_SW_TUN_GENEVE, ICE_SW_TUN_NVGRE, diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 11ae0bee3590..475ec2afa210 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -4537,6 +4537,7 @@ ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo, case ICE_SW_TUN_NVGRE: prof_type = ICE_PROF_TUN_GRE; break; + case ICE_SW_TUN_AND_NON_TUN: default: prof_type = ICE_PROF_ALL; break; @@ -5305,7 +5306,8 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (status) goto err_ice_add_adv_rule; - if (rinfo->tun_type != ICE_NON_TUN) { + if (rinfo->tun_type != ICE_NON_TUN && + rinfo->tun_type != ICE_SW_TUN_AND_NON_TUN) { status = ice_fill_adv_packet_tun(hw, rinfo->tun_type, s_rule->pdata.lkup_tx_rx.hdr, pkt_offsets); -- cgit v1.2.3 From 932645c298c41aad64ef13016ff4c2034eef5aed Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 3 Jan 2022 07:41:21 +0100 Subject: ice: fix setting l4 port flag when adding filter Accidentally filter flag for none encapsulated l4 port field is always set. Even if user wants to add encapsulated l4 port field. Remove this unnecessary flag setting. Fixes: 9e300987d4a81 ("ice: VXLAN and Geneve TC support") Signed-off-by: Michal Swiatkowski Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_tc_lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index e8aab664270a..65cf32eb4046 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -709,7 +709,7 @@ ice_tc_set_port(struct flow_match_ports match, fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT; else fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT; - fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT; + headers->l4_key.dst_port = match.key->dst; headers->l4_mask.dst_port = match.mask->dst; } @@ -718,7 +718,7 @@ ice_tc_set_port(struct flow_match_ports match, fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT; else fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT; - fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT; + headers->l4_key.src_port = match.key->src; headers->l4_mask.src_port = match.mask->src; } -- cgit v1.2.3 From fadead80fe4c033b5e514fcbadd20b55c4494112 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 7 Feb 2022 10:23:29 -0800 Subject: ice: fix concurrent reset and removal of VFs Commit c503e63200c6 ("ice: Stop processing VF messages during teardown") introduced a driver state flag, ICE_VF_DEINIT_IN_PROGRESS, which is intended to prevent some issues with concurrently handling messages from VFs while tearing down the VFs. This change was motivated by crashes caused while tearing down and bringing up VFs in rapid succession. It turns out that the fix actually introduces issues with the VF driver caused because the PF no longer responds to any messages sent by the VF during its .remove routine. This results in the VF potentially removing its DMA memory before the PF has shut down the device queues. Additionally, the fix doesn't actually resolve concurrency issues within the ice driver. It is possible for a VF to initiate a reset just prior to the ice driver removing VFs. This can result in the remove task concurrently operating while the VF is being reset. This results in similar memory corruption and panics purportedly fixed by that commit. Fix this concurrency at its root by protecting both the reset and removal flows using the existing VF cfg_lock. This ensures that we cannot remove the VF while any outstanding critical tasks such as a virtchnl message or a reset are occurring. This locking change also fixes the root cause originally fixed by commit c503e63200c6 ("ice: Stop processing VF messages during teardown"), so we can simply revert it. Note that I kept these two changes together because simply reverting the original commit alone would leave the driver vulnerable to worse race conditions. Fixes: c503e63200c6 ("ice: Stop processing VF messages during teardown") Signed-off-by: Jacob Keller Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 - drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 42 ++++++++++++++---------- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index a9fa701aaa95..473b1f6be9de 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -280,7 +280,6 @@ enum ice_pf_state { ICE_VFLR_EVENT_PENDING, ICE_FLTR_OVERFLOW_PROMISC, ICE_VF_DIS, - ICE_VF_DEINIT_IN_PROGRESS, ICE_CFG_BUSY, ICE_SERVICE_SCHED, ICE_SERVICE_DIS, diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 17a9bb461dc3..f3c346e13b7a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1799,7 +1799,9 @@ static void ice_handle_mdd_event(struct ice_pf *pf) * reset, so print the event prior to reset. */ ice_print_vf_rx_mdd_event(vf); + mutex_lock(&pf->vf[i].cfg_lock); ice_reset_vf(&pf->vf[i], false); + mutex_unlock(&pf->vf[i].cfg_lock); } } } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 39b80124d282..408f78e3eb13 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -500,8 +500,6 @@ void ice_free_vfs(struct ice_pf *pf) struct ice_hw *hw = &pf->hw; unsigned int tmp, i; - set_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state); - if (!pf->vf) return; @@ -519,22 +517,26 @@ void ice_free_vfs(struct ice_pf *pf) else dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n"); - /* Avoid wait time by stopping all VFs at the same time */ - ice_for_each_vf(pf, i) - ice_dis_vf_qs(&pf->vf[i]); - tmp = pf->num_alloc_vfs; pf->num_qps_per_vf = 0; pf->num_alloc_vfs = 0; for (i = 0; i < tmp; i++) { - if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) { + struct ice_vf *vf = &pf->vf[i]; + + mutex_lock(&vf->cfg_lock); + + ice_dis_vf_qs(vf); + + if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { /* disable VF qp mappings and set VF disable state */ - ice_dis_vf_mappings(&pf->vf[i]); - set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states); - ice_free_vf_res(&pf->vf[i]); + ice_dis_vf_mappings(vf); + set_bit(ICE_VF_STATE_DIS, vf->vf_states); + ice_free_vf_res(vf); } - mutex_destroy(&pf->vf[i].cfg_lock); + mutex_unlock(&vf->cfg_lock); + + mutex_destroy(&vf->cfg_lock); } if (ice_sriov_free_msix_res(pf)) @@ -570,7 +572,6 @@ void ice_free_vfs(struct ice_pf *pf) i); clear_bit(ICE_VF_DIS, pf->state); - clear_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state); clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags); } @@ -1498,6 +1499,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) ice_for_each_vf(pf, v) { vf = &pf->vf[v]; + mutex_lock(&vf->cfg_lock); + vf->driver_caps = 0; ice_vc_set_default_allowlist(vf); @@ -1512,6 +1515,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) ice_vf_pre_vsi_rebuild(vf); ice_vf_rebuild_vsi(vf); ice_vf_post_vsi_rebuild(vf); + + mutex_unlock(&vf->cfg_lock); } if (ice_is_eswitch_mode_switchdev(pf)) @@ -1562,6 +1567,8 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) u32 reg; int i; + lockdep_assert_held(&vf->cfg_lock); + dev = ice_pf_to_dev(pf); if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) { @@ -2061,9 +2068,12 @@ void ice_process_vflr_event(struct ice_pf *pf) bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32; /* read GLGEN_VFLRSTAT register to find out the flr VFs */ reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx)); - if (reg & BIT(bit_idx)) + if (reg & BIT(bit_idx)) { /* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */ + mutex_lock(&vf->cfg_lock); ice_reset_vf(vf, true); + mutex_unlock(&vf->cfg_lock); + } } } @@ -2140,7 +2150,9 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) if (!vf) return; + mutex_lock(&vf->cfg_lock); ice_vc_reset_vf(vf); + mutex_unlock(&vf->cfg_lock); } /** @@ -4625,10 +4637,6 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) struct device *dev; int err = 0; - /* if de-init is underway, don't process messages from VF */ - if (test_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state)) - return; - dev = ice_pf_to_dev(pf); if (ice_validate_vf_id(pf, vf_id)) { err = -EINVAL; -- cgit v1.2.3 From ed22d9c8d128293fc7b0b086c7d3654bcb99a8dd Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 14 Feb 2022 06:33:27 -0800 Subject: ice: check the return of ice_ptp_gettimex64 Clang static analysis reports this issue time64.h:69:50: warning: The left operand of '+' is a garbage value set_normalized_timespec64(&ts_delta, lhs.tv_sec + rhs.tv_sec, ~~~~~~~~~~ ^ In ice_ptp_adjtime_nonatomic(), the timespec64 variable 'now' is set by ice_ptp_gettimex64(). This function can fail with -EBUSY, so 'now' can have a gargbage value. So check the return. Fixes: 06c16d89d2cb ("ice: register 1588 PTP clock device object for E810 devices") Signed-off-by: Tom Rix Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index ae291d442539..000c39d163a2 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1533,9 +1533,12 @@ exit: static int ice_ptp_adjtime_nonatomic(struct ptp_clock_info *info, s64 delta) { struct timespec64 now, then; + int ret; then = ns_to_timespec64(delta); - ice_ptp_gettimex64(info, &now, NULL); + ret = ice_ptp_gettimex64(info, &now, NULL); + if (ret) + return ret; now = timespec64_add(now, then); return ice_ptp_settime64(info, (const struct timespec64 *)&now); -- cgit v1.2.3 From 5950bdc88dd1d158f2845fdff8fb1de86476806c Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 14 Feb 2022 07:40:43 -0800 Subject: ice: initialize local variable 'tlv' Clang static analysis reports this issues ice_common.c:5008:21: warning: The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage ldo->phy_type_low |= ((u64)buf << (i * 16)); ~~~~~~~~~~~~~~~~~ ^ When called from ice_cfg_phy_fec() ldo is the uninitialized local variable tlv. So initialize. Fixes: ea78ce4dab05 ("ice: add link lenient and default override support") Signed-off-by: Tom Rix Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index a6d7d3eff186..e2af99a763ed 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -3340,7 +3340,7 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(hw) && !ice_fw_supports_report_dflt_cfg(hw)) { - struct ice_link_default_override_tlv tlv; + struct ice_link_default_override_tlv tlv = { 0 }; status = ice_get_link_default_override(&tlv, pi); if (status) -- cgit v1.2.3 From 7b1f781f2d2460693f43d5f764198df558e3494b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 15 Feb 2022 13:32:26 -0800 Subject: Input: psmouse - set up dependency between PS/2 and SMBus companions When we switch from emulated PS/2 to native (RMI4 or Elan) protocols, we create SMBus companion devices that are attached to I2C/SMBus controllers. However, when suspending and resuming, we also need to make sure that we take into account the PS/2 device they are associated with, so that PS/2 device is suspended after the companion and resumed before it, otherwise companions will not work properly. Before I2C devices were marked for asynchronous suspend/resume, this ordering happened naturally, but now we need to enforce it by establishing device links, with PS/2 devices being suppliers and SMBus companions being consumers. Fixes: 172d931910e1 ("i2c: enable async suspend/resume on i2c client devices") Reported-and-tested-by: Hugh Dickins Tested-by: Jarkko Nikula Link: https://lore.kernel.org/r/89456fcd-a113-4c82-4b10-a9bcaefac68f@google.com Link: https://lore.kernel.org/r/YgwQN8ynO88CPMju@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/psmouse-smbus.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/input/mouse/psmouse-smbus.c b/drivers/input/mouse/psmouse-smbus.c index a472489ccbad..164f6c757f6b 100644 --- a/drivers/input/mouse/psmouse-smbus.c +++ b/drivers/input/mouse/psmouse-smbus.c @@ -75,6 +75,8 @@ static void psmouse_smbus_detach_i2c_client(struct i2c_client *client) "Marking SMBus companion %s as gone\n", dev_name(&smbdev->client->dev)); smbdev->dead = true; + device_link_remove(&smbdev->client->dev, + &smbdev->psmouse->ps2dev.serio->dev); serio_rescan(smbdev->psmouse->ps2dev.serio); } else { list_del(&smbdev->node); @@ -174,6 +176,8 @@ static void psmouse_smbus_disconnect(struct psmouse *psmouse) kfree(smbdev); } else { smbdev->dead = true; + device_link_remove(&smbdev->client->dev, + &psmouse->ps2dev.serio->dev); psmouse_dbg(smbdev->psmouse, "posting removal request for SMBus companion %s\n", dev_name(&smbdev->client->dev)); @@ -270,6 +274,12 @@ int psmouse_smbus_init(struct psmouse *psmouse, if (smbdev->client) { /* We have our companion device */ + if (!device_link_add(&smbdev->client->dev, + &psmouse->ps2dev.serio->dev, + DL_FLAG_STATELESS)) + psmouse_warn(psmouse, + "failed to set up link with iSMBus companion %s\n", + dev_name(&smbdev->client->dev)); return 0; } -- cgit v1.2.3 From 5a2aba71cd2610d3ed08867a1b1bf617cd8f89b8 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 14 Feb 2022 17:18:52 -0600 Subject: net: mvpp2: always set port pcs ops Booting a MACCHIATObin with 5.17, the system OOPs with a null pointer deref when the network is started. This is caused by the pcs->ops structure being null in mcpp2_acpi_start() when it tries to call pcs_config(). Hoisting the code which sets pcs_gmac.ops and pcs_xlg.ops, assuring they are always set, fixes the problem. The OOPs looks like: [ 18.687760] Unable to handle kernel access to user memory outside uaccess routines at virtual address 0000000000000010 [ 18.698561] Mem abort info: [ 18.698564] ESR = 0x96000004 [ 18.698567] EC = 0x25: DABT (current EL), IL = 32 bits [ 18.709821] SET = 0, FnV = 0 [ 18.714292] EA = 0, S1PTW = 0 [ 18.718833] FSC = 0x04: level 0 translation fault [ 18.725126] Data abort info: [ 18.729408] ISV = 0, ISS = 0x00000004 [ 18.734655] CM = 0, WnR = 0 [ 18.738933] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000111bbf000 [ 18.745409] [0000000000000010] pgd=0000000000000000, p4d=0000000000000000 [ 18.752235] Internal error: Oops: 96000004 [#1] SMP [ 18.757134] Modules linked in: rfkill ip_set nf_tables nfnetlink qrtr sunrpc vfat fat omap_rng fuse zram xfs crct10dif_ce mvpp2 ghash_ce sbsa_gwdt phylink xhci_plat_hcd ahci_plam [ 18.773481] CPU: 0 PID: 681 Comm: NetworkManager Not tainted 5.17.0-0.rc3.89.fc36.aarch64 #1 [ 18.781954] Hardware name: Marvell Armada 7k/8k Family Board /Armada 7k/8k Family Board , BIOS EDK II Jun 4 2019 [ 18.795222] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 18.802213] pc : mvpp2_start_dev+0x2b0/0x300 [mvpp2] [ 18.807208] lr : mvpp2_start_dev+0x298/0x300 [mvpp2] [ 18.812197] sp : ffff80000b4732c0 [ 18.815522] x29: ffff80000b4732c0 x28: 0000000000000000 x27: ffffccab38ae57f8 [ 18.822689] x26: ffff6eeb03065a10 x25: ffff80000b473a30 x24: ffff80000b4735b8 [ 18.829855] x23: 0000000000000000 x22: 00000000000001e0 x21: ffff6eeb07b6ab68 [ 18.837021] x20: ffff6eeb07b6ab30 x19: ffff6eeb07b6a9c0 x18: 0000000000000014 [ 18.844187] x17: 00000000f6232bfe x16: ffffccab899b1dc0 x15: 000000006a30f9fa [ 18.851353] x14: 000000003b77bd50 x13: 000006dc896f0e8e x12: 001bbbfccfd0d3a2 [ 18.858519] x11: 0000000000001528 x10: 0000000000001548 x9 : ffffccab38ad0fb0 [ 18.865685] x8 : ffff80000b473330 x7 : 0000000000000000 x6 : 0000000000000000 [ 18.872851] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff80000b4732f8 [ 18.880017] x2 : 000000000000001a x1 : 0000000000000002 x0 : ffff6eeb07b6ab68 [ 18.887183] Call trace: [ 18.889637] mvpp2_start_dev+0x2b0/0x300 [mvpp2] [ 18.894279] mvpp2_open+0x134/0x2b4 [mvpp2] [ 18.898483] __dev_open+0x128/0x1e4 [ 18.901988] __dev_change_flags+0x17c/0x1d0 [ 18.906187] dev_change_flags+0x30/0x70 [ 18.910038] do_setlink+0x278/0xa7c [ 18.913540] __rtnl_newlink+0x44c/0x7d0 [ 18.917391] rtnl_newlink+0x5c/0x8c [ 18.920892] rtnetlink_rcv_msg+0x254/0x314 [ 18.925006] netlink_rcv_skb+0x48/0x10c [ 18.928858] rtnetlink_rcv+0x24/0x30 [ 18.932449] netlink_unicast+0x290/0x2f4 [ 18.936386] netlink_sendmsg+0x1d0/0x41c [ 18.940323] sock_sendmsg+0x60/0x70 [ 18.943825] ____sys_sendmsg+0x248/0x260 [ 18.947762] ___sys_sendmsg+0x74/0xa0 [ 18.951438] __sys_sendmsg+0x64/0xcc [ 18.955027] __arm64_sys_sendmsg+0x30/0x40 [ 18.959140] invoke_syscall+0x50/0x120 [ 18.962906] el0_svc_common.constprop.0+0x4c/0xf4 [ 18.967629] do_el0_svc+0x30/0x9c [ 18.970958] el0_svc+0x28/0xb0 [ 18.974025] el0t_64_sync_handler+0x10c/0x140 [ 18.978400] el0t_64_sync+0x1a4/0x1a8 [ 18.982078] Code: 52800004 b9416262 aa1503e0 52800041 (f94008a5) [ 18.988196] ---[ end trace 0000000000000000 ]--- Fixes: cff056322372 ("net: mvpp2: use .mac_select_pcs() interface") Suggested-by: Russell King (Oracle) Signed-off-by: Jeremy Linton Reviewed-by: Marcin Wojtas Link: https://lore.kernel.org/r/20220214231852.3331430-1-jeremy.linton@arm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 7cdbf8b8bbf6..1a835b48791b 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -6870,6 +6870,9 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE; dev->dev.of_node = port_node; + port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops; + port->pcs_xlg.ops = &mvpp2_phylink_xlg_pcs_ops; + if (!mvpp2_use_acpi_compat_mode(port_fwnode)) { port->phylink_config.dev = &dev->dev; port->phylink_config.type = PHYLINK_NETDEV; @@ -6940,9 +6943,6 @@ static int mvpp2_port_probe(struct platform_device *pdev, port->phylink_config.supported_interfaces); } - port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops; - port->pcs_xlg.ops = &mvpp2_phylink_xlg_pcs_ops; - phylink = phylink_create(&port->phylink_config, port_fwnode, phy_mode, &mvpp2_phylink_ops); if (IS_ERR(phylink)) { -- cgit v1.2.3 From ba88b5533728c54bdea68431988eff2d9a7a1237 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 18 Feb 2022 01:50:18 -0700 Subject: MAINTAINERS: rmnet: Update email addresses Switch to the quicinc.com ids. Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Link: https://lore.kernel.org/r/1645174218-32632-1-git-send-email-quic_subashab@quicinc.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6db79f3b209e..b57077b935ba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16077,8 +16077,8 @@ F: Documentation/devicetree/bindings/mtd/qcom,nandc.yaml F: drivers/mtd/nand/raw/qcom_nandc.c QUALCOMM RMNET DRIVER -M: Subash Abhinov Kasiviswanathan -M: Sean Tranchetti +M: Subash Abhinov Kasiviswanathan +M: Sean Tranchetti L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst -- cgit v1.2.3 From 3a14d0888eb4b0045884126acc69abfb7b87814d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 18 Feb 2022 14:15:35 +0100 Subject: nfp: flower: Fix a potential leak in nfp_tunnel_add_shared_mac() ida_simple_get() returns an id between min (0) and max (NFP_MAX_MAC_INDEX) inclusive. So NFP_MAX_MAC_INDEX (0xff) is a valid id. In order for the error handling path to work correctly, the 'invalid' value for 'ida_idx' should not be in the 0..NFP_MAX_MAC_INDEX range, inclusive. So set it to -1. Fixes: 20cce8865098 ("nfp: flower: enable MAC address sharing for offloadable devs") Signed-off-by: Christophe JAILLET Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20220218131535.100258-1-simon.horman@corigine.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 0a326e04e692..cb43651ea9ba 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -922,8 +922,8 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, int port, bool mod) { struct nfp_flower_priv *priv = app->priv; - int ida_idx = NFP_MAX_MAC_INDEX, err; struct nfp_tun_offloaded_mac *entry; + int ida_idx = -1, err; u16 nfp_mac_idx = 0; entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); @@ -997,7 +997,7 @@ err_remove_hash: err_free_entry: kfree(entry); err_free_ida: - if (ida_idx != NFP_MAX_MAC_INDEX) + if (ida_idx != -1) ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); return err; -- cgit v1.2.3 From b1e8206582f9d680cff7d04828708c8b6ab32957 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Feb 2022 10:16:57 +0100 Subject: sched: Fix yet more sched_fork() races Where commit 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid sched_task_group") fixed a fork race vs cgroup, it opened up a race vs syscalls by not placing the task on the runqueue before it gets exposed through the pidhash. Commit 13765de8148f ("sched/fair: Fix fault in reweight_entity") is trying to fix a single instance of this, instead fix the whole class of issues, effectively reverting this commit. Fixes: 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid sched_task_group") Reported-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Tested-by: Tadeusz Struk Tested-by: Zhang Qiao Tested-by: Dietmar Eggemann Link: https://lkml.kernel.org/r/YgoeCbwj5mbCR0qA@hirez.programming.kicks-ass.net --- include/linux/sched/task.h | 4 ++-- kernel/fork.c | 13 ++++++++++++- kernel/sched/core.c | 34 +++++++++++++++++++++------------- 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index b9198a1b3a84..e84e54d1b490 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -54,8 +54,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_post_fork(struct task_struct *p, - struct kernel_clone_args *kargs); +extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs); +extern void sched_post_fork(struct task_struct *p); extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); diff --git a/kernel/fork.c b/kernel/fork.c index d75a528f7b21..c607d238fc23 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2266,6 +2266,17 @@ static __latent_entropy struct task_struct *copy_process( if (retval) goto bad_fork_put_pidfd; + /* + * Now that the cgroups are pinned, re-clone the parent cgroup and put + * the new task on the correct runqueue. All this *before* the task + * becomes visible. + * + * This isn't part of ->can_fork() because while the re-cloning is + * cgroup specific, it unconditionally needs to place the task on a + * runqueue. + */ + sched_cgroup_fork(p, args); + /* * From this point on we must avoid any synchronous user-space * communication until we take the tasklist-lock. In particular, we do @@ -2376,7 +2387,7 @@ static __latent_entropy struct task_struct *copy_process( write_unlock_irq(&tasklist_lock); proc_fork_connector(p); - sched_post_fork(p, args); + sched_post_fork(p); cgroup_post_fork(p, args); perf_event_fork(p); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fcf0c180617c..9745613d531c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1214,9 +1214,8 @@ int tg_nop(struct task_group *tg, void *data) } #endif -static void set_load_weight(struct task_struct *p) +static void set_load_weight(struct task_struct *p, bool update_load) { - bool update_load = !(READ_ONCE(p->__state) & TASK_NEW); int prio = p->static_prio - MAX_RT_PRIO; struct load_weight *load = &p->se.load; @@ -4407,7 +4406,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->static_prio = NICE_TO_PRIO(0); p->prio = p->normal_prio = p->static_prio; - set_load_weight(p); + set_load_weight(p, false); /* * We don't need the reset flag anymore after the fork. It has @@ -4425,6 +4424,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) init_entity_runnable_average(&p->se); + #ifdef CONFIG_SCHED_INFO if (likely(sched_info_on())) memset(&p->sched_info, 0, sizeof(p->sched_info)); @@ -4440,18 +4440,23 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) return 0; } -void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) +void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) { unsigned long flags; -#ifdef CONFIG_CGROUP_SCHED - struct task_group *tg; -#endif + /* + * Because we're not yet on the pid-hash, p->pi_lock isn't strictly + * required yet, but lockdep gets upset if rules are violated. + */ raw_spin_lock_irqsave(&p->pi_lock, flags); #ifdef CONFIG_CGROUP_SCHED - tg = container_of(kargs->cset->subsys[cpu_cgrp_id], - struct task_group, css); - p->sched_task_group = autogroup_task_group(p, tg); + if (1) { + struct task_group *tg; + tg = container_of(kargs->cset->subsys[cpu_cgrp_id], + struct task_group, css); + tg = autogroup_task_group(p, tg); + p->sched_task_group = tg; + } #endif rseq_migrate(p); /* @@ -4462,7 +4467,10 @@ void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) if (p->sched_class->task_fork) p->sched_class->task_fork(p); raw_spin_unlock_irqrestore(&p->pi_lock, flags); +} +void sched_post_fork(struct task_struct *p) +{ uclamp_post_fork(p); } @@ -6922,7 +6930,7 @@ void set_user_nice(struct task_struct *p, long nice) put_prev_task(rq, p); p->static_prio = NICE_TO_PRIO(nice); - set_load_weight(p); + set_load_weight(p, true); old_prio = p->prio; p->prio = effective_prio(p); @@ -7213,7 +7221,7 @@ static void __setscheduler_params(struct task_struct *p, */ p->rt_priority = attr->sched_priority; p->normal_prio = normal_prio(p); - set_load_weight(p); + set_load_weight(p, true); } /* @@ -9446,7 +9454,7 @@ void __init sched_init(void) #endif } - set_load_weight(&init_task); + set_load_weight(&init_task, false); /* * The boot idle thread does lazy MMU switching as well: -- cgit v1.2.3 From 2428766e201565a5fa964d7461d9f6608eb04d7d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 18 Feb 2022 11:49:04 +0100 Subject: MAINTAINERS: remove duplicate entry for i2c-qcom-geni The driver is already covered in the ARM/QUALCOMM section. Also, Akash Asthana's email bounces meanwhile and Mukesh Savaliya has never responded to mails regarding this driver. Signed-off-by: Wolfram Sang Acked-by: Bjorn Andersson Signed-off-by: Wolfram Sang --- MAINTAINERS | 8 -------- 1 file changed, 8 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ea3e6c914384..f0485f61295d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15972,14 +15972,6 @@ F: Documentation/devicetree/bindings/misc/qcom,fastrpc.txt F: drivers/misc/fastrpc.c F: include/uapi/misc/fastrpc.h -QUALCOMM GENERIC INTERFACE I2C DRIVER -M: Akash Asthana -M: Mukesh Savaliya -L: linux-i2c@vger.kernel.org -L: linux-arm-msm@vger.kernel.org -S: Supported -F: drivers/i2c/busses/i2c-qcom-geni.c - QUALCOMM HEXAGON ARCHITECTURE M: Brian Cain L: linux-hexagon@vger.kernel.org -- cgit v1.2.3 From 0cd33c5ffec12bd77a1c02db2469fac08f840939 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 18 Feb 2022 13:35:38 -0800 Subject: selftests: mptcp: fix diag instability Instead of waiting for an arbitrary amount of time for the MPTCP MP_CAPABLE handshake to complete, explicitly wait for the relevant socket to enter into the established status. Additionally let the data transfer application use the slowest transfer mode available (-r), to cope with very slow host, or high jitter caused by hosting VMs. Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/258 Reported-and-tested-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/diag.sh | 44 ++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 2674ba20d524..ff821025d309 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -71,6 +71,36 @@ chk_msk_remote_key_nr() __chk_nr "grep -c remote_key" $* } +# $1: ns, $2: port +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} + +wait_connected() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec ${listener_ns} grep -q " 0100007F:${port_hex} " /proc/net/tcp && break + sleep 0.1 + done +} trap cleanup EXIT ip netns add $ns @@ -81,15 +111,15 @@ echo "a" | \ ip netns exec $ns \ ./mptcp_connect -p 10000 -l -t ${timeout_poll} \ 0.0.0.0 >/dev/null & -sleep 0.1 +wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10000 -j -t ${timeout_poll} \ + ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} \ 127.0.0.1 >/dev/null & -sleep 0.1 +wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" @@ -101,13 +131,13 @@ echo "a" | \ ip netns exec $ns \ ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \ 0.0.0.0 >/dev/null & -sleep 0.1 +wait_local_port_listen $ns 10001 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10001 -j -t ${timeout_poll} \ + ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} \ 127.0.0.1 >/dev/null & -sleep 0.1 +wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" flush_pids @@ -119,7 +149,7 @@ for I in `seq 1 $NR_CLIENTS`; do ./mptcp_connect -p $((I+10001)) -l -w 10 \ -t ${timeout_poll} 0.0.0.0 >/dev/null & done -sleep 0.1 +wait_local_port_listen $ns $((NR_CLIENTS + 10001)) for I in `seq 1 $NR_CLIENTS`; do echo "b" | \ -- cgit v1.2.3 From 5b31dda736e31c58d1941c7349569c7452eafb6b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 18 Feb 2022 13:35:39 -0800 Subject: selftests: mptcp: improve 'fair usage on close' stability The mentioned test has to wait for a subflow creation failure. The current code looks for TCP sockets in TW state and sometimes misses the relevant event. Switch to a more stable check, looking for the associated mib counter. Fixes: 46e967d187ed ("selftests: mptcp: add tests for subflow creation failure") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/257 Reported-and-tested-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index c0801df15f54..10b3bd805ac6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -961,7 +961,7 @@ wait_for_tw() local ns=$1 while [ $time -lt $timeout_ms ]; do - local cnt=$(ip netns exec $ns ss -t state time-wait |wc -l) + local cnt=$(ip netns exec $ns nstat -as TcpAttemptFails | grep TcpAttemptFails | awk '{print $2}') [ "$cnt" = 1 ] && return 1 time=$((time + 100)) -- cgit v1.2.3 From 98247bc16a27cf8ead4c47ce9f15888be85841fc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 18 Feb 2022 13:35:40 -0800 Subject: mptcp: fix race in overlapping signal events After commit a88c9e496937 ("mptcp: do not block subflows creation on errors"), if a signal address races with a failing subflow creation, the subflow creation failure control path can trigger the selection of the next address to be announced while the current announced is still pending. The above will cause the unintended suppression of the ADD_ADDR announce. Fix the issue skipping the to-be-suppressed announce before it will mark an endpoint as already used. The relevant announce will be triggered again when the current one will complete. Fixes: a88c9e496937 ("mptcp: do not block subflows creation on errors") Reviewed-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 356f596e2032..82f82a513f5b 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -546,6 +546,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (msk->pm.add_addr_signaled < add_addr_signal_max) { local = select_signal_address(pernet, msk); + /* due to racing events on both ends we can reach here while + * previous add address is still running: if we invoke now + * mptcp_pm_announce_addr(), that will fail and the + * corresponding id will be marked as used. + * Instead let the PM machinery reschedule us when the + * current address announce will be completed. + */ + if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) + return; + if (local) { if (mptcp_pm_alloc_anno_list(msk, local)) { __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); -- cgit v1.2.3 From 837cf45df163a3780bc04b555700231e95b31dc9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 18 Feb 2022 13:35:41 -0800 Subject: mptcp: fix race in incoming ADD_ADDR option processing If an MPTCP endpoint received multiple consecutive incoming ADD_ADDR options, mptcp_pm_add_addr_received() can overwrite the current remote address value after the PM lock is released in mptcp_pm_nl_add_addr_received() and before such address is echoed. Fix the issue caching the remote address value a little earlier and always using the cached value after releasing the PM lock. Fixes: f7efc7771eac ("mptcp: drop argument port from mptcp_pm_announce_addr") Reviewed-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 82f82a513f5b..4b5d795383cd 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -660,6 +660,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) unsigned int add_addr_accept_max; struct mptcp_addr_info remote; unsigned int subflows_max; + bool reset_port = false; int i, nr; add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); @@ -669,15 +670,19 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) msk->pm.add_addr_accepted, add_addr_accept_max, msk->pm.remote.family); - if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote)) + remote = msk->pm.remote; + if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) goto add_addr_echo; + /* pick id 0 port, if none is provided the remote address */ + if (!remote.port) { + reset_port = true; + remote.port = sk->sk_dport; + } + /* connect to the specified remote address, using whatever * local address the routing configuration will pick. */ - remote = msk->pm.remote; - if (!remote.port) - remote.port = sk->sk_dport; nr = fill_local_addresses_vec(msk, addrs); msk->pm.add_addr_accepted++; @@ -690,8 +695,12 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) __mptcp_subflow_connect(sk, &addrs[i], &remote); spin_lock_bh(&msk->pm.lock); + /* be sure to echo exactly the received address */ + if (reset_port) + remote.port = 0; + add_addr_echo: - mptcp_pm_announce_addr(msk, &msk->pm.remote, true); + mptcp_pm_announce_addr(msk, &remote, true); mptcp_pm_nl_addr_send_ack(msk); } -- cgit v1.2.3 From f73c1194634506ab60af0debef04671fc431a435 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 18 Feb 2022 13:35:42 -0800 Subject: mptcp: add mibs counter for ignored incoming options The MPTCP in kernel path manager has some constraints on incoming addresses announce processing, so that in edge scenarios it can end-up dropping (ignoring) some of such announces. The above is not very limiting in practice since such scenarios are very uncommon and MPTCP will recover due to ADD_ADDR retransmissions. This patch adds a few MIB counters to account for such drop events to allow easier introspection of the critical scenarios. Fixes: f7efc7771eac ("mptcp: drop argument port from mptcp_pm_announce_addr") Reviewed-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/mib.c | 2 ++ net/mptcp/mib.h | 2 ++ net/mptcp/pm.c | 8 ++++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 3240b72271a7..7558802a1435 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -35,12 +35,14 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR), SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD), SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD), + SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP), SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX), SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX), SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX), SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX), SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX), SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR), + SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index ecd3d8b117e0..2966fcb6548b 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -28,12 +28,14 @@ enum linux_mptcp_mib_field { MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */ MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */ MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */ + MPTCP_MIB_ADDADDRDROP, /* Dropped incoming ADD_ADDR */ MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */ MPTCP_MIB_JOINPORTSYNACKRX, /* Received a SYNACK MP_JOIN with a different port-number */ MPTCP_MIB_JOINPORTACKRX, /* Received an ACK MP_JOIN with a different port-number */ MPTCP_MIB_MISMATCHPORTSYNRX, /* Received a SYN MP_JOIN with a mismatched port-number */ MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */ MPTCP_MIB_RMADDR, /* Received RM_ADDR */ + MPTCP_MIB_RMADDRDROP, /* Dropped incoming RM_ADDR */ MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 696b2c4613a7..7bea318ac5f2 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -213,6 +213,8 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, mptcp_pm_add_addr_send_ack(msk); } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { pm->remote = *addr; + } else { + __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); } spin_unlock_bh(&pm->lock); @@ -253,8 +255,10 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, mptcp_event_addr_removed(msk, rm_list->ids[i]); spin_lock_bh(&pm->lock); - mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED); - pm->rm_list_rx = *rm_list; + if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED)) + pm->rm_list_rx = *rm_list; + else + __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP); spin_unlock_bh(&pm->lock); } -- cgit v1.2.3 From 6ef84b1517e08f6c2fc105b798a9d21bf4caa6cb Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 18 Feb 2022 13:35:43 -0800 Subject: selftests: mptcp: more robust signal race test The in kernel MPTCP PM implementation can process a single incoming add address option at any given time. In the mentioned test the server can surpass such limit. Let the setup cope with that allowing a faster add_addr retransmission. Fixes: a88c9e496937 ("mptcp: do not block subflows creation on errors") Fixes: f7efc7771eac ("mptcp: drop argument port from mptcp_pm_announce_addr") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/254 Reported-and-tested-by: Matthieu Baerts Reviewed-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 10b3bd805ac6..0d6a71e7bb59 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -752,11 +752,17 @@ chk_add_nr() local mis_ack_nr=${8:-0} local count local dump_stats + local timeout + + timeout=`ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout` printf "%-39s %s" " " "add" - count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'` + count=`ip netns exec $ns2 nstat -as MPTcpExtAddAddr | grep MPTcpExtAddAddr | awk '{print $2}'` [ -z "$count" ] && count=0 - if [ "$count" != "$add_nr" ]; then + + # if the test configured a short timeout tolerate greater then expected + # add addrs options, due to retransmissions + if [ "$count" != "$add_nr" ] && [ "$timeout" -gt 1 -o "$count" -lt "$add_nr" ]; then echo "[fail] got $count ADD_ADDR[s] expected $add_nr" ret=1 dump_stats=1 @@ -1158,7 +1164,10 @@ signal_address_tests() ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal - run_tests $ns1 $ns2 10.0.1.1 + + # the peer could possibly miss some addr notification, allow retransmission + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr "signal addresses race test" 3 3 3 # the server will not signal the address terminating -- cgit v1.2.3 From e35f885b357d47e04380a2056d1b2cc3e6f4f24b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 18 Feb 2022 13:35:44 -0800 Subject: selftests: mptcp: be more conservative with cookie MPJ limits Since commit 2843ff6f36db ("mptcp: remote addresses fullmesh"), an MPTCP client can attempt creating multiple MPJ subflow simultaneusly. In such scenario the server, when syncookies are enabled, could end-up accepting incoming MPJ syn even above the configured subflow limit, as the such limit can be enforced in a reliable way only after the subflow creation. In case of syncookie, only after the 3rd ack reception. As a consequence the related self-tests case sporadically fails, as it verify that the server always accept the expected number of MPJ syn. Address the issues relaxing the MPJ syn number constrain. Note that the check on the accepted number of MPJ 3rd ack still remains intact. Fixes: 2843ff6f36db ("mptcp: remote addresses fullmesh") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 0d6a71e7bb59..0c8a2a20b96c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -660,6 +660,7 @@ chk_join_nr() local ack_nr=$4 local count local dump_stats + local with_cookie printf "%02u %-36s %s" "$TEST_COUNT" "$msg" "syn" count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'` @@ -673,12 +674,20 @@ chk_join_nr() fi echo -n " - synack" + with_cookie=`ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies` count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'` [ -z "$count" ] && count=0 if [ "$count" != "$syn_ack_nr" ]; then - echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr" - ret=1 - dump_stats=1 + # simult connections exceeding the limit with cookie enabled could go up to + # synack validation as the conn limit can be enforced reliably only after + # the subflow creation + if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then + echo -n "[ ok ]" + else + echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr" + ret=1 + dump_stats=1 + fi else echo -n "[ ok ]" fi -- cgit v1.2.3 From 5486f5bf790b5c664913076c3194b8f916a5c7ad Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Feb 2022 14:35:49 +0100 Subject: net: Force inlining of checksum functions in net/checksum.h All functions defined as static inline in net/checksum.h are meant to be inlined for performance reason. But since commit ac7c3e4ff401 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly") the compiler is allowed to uninline functions when it wants. Fair enough in the general case, but for tiny performance critical checksum helpers that's counter-productive. The problem mainly arises when selecting CONFIG_CC_OPTIMISE_FOR_SIZE, Those helpers being 'static inline' in header files you suddenly find them duplicated many times in the resulting vmlinux. Here is a typical exemple when building powerpc pmac32_defconfig with CONFIG_CC_OPTIMISE_FOR_SIZE. csum_sub() appears 4 times: c04a23cc : c04a23cc: 7c 84 20 f8 not r4,r4 c04a23d0: 7c 63 20 14 addc r3,r3,r4 c04a23d4: 7c 63 01 94 addze r3,r3 c04a23d8: 4e 80 00 20 blr ... c04a2ce8: 4b ff f6 e5 bl c04a23cc ... c04a2d2c: 4b ff f6 a1 bl c04a23cc ... c04a2d54: 4b ff f6 79 bl c04a23cc ... c04a754c : c04a754c: 7c 84 20 f8 not r4,r4 c04a7550: 7c 63 20 14 addc r3,r3,r4 c04a7554: 7c 63 01 94 addze r3,r3 c04a7558: 4e 80 00 20 blr ... c04ac930: 4b ff ac 1d bl c04a754c ... c04ad264: 4b ff a2 e9 bl c04a754c ... c04e3b08 : c04e3b08: 7c 84 20 f8 not r4,r4 c04e3b0c: 7c 63 20 14 addc r3,r3,r4 c04e3b10: 7c 63 01 94 addze r3,r3 c04e3b14: 4e 80 00 20 blr ... c04e5788: 4b ff e3 81 bl c04e3b08 ... c04e65c8: 4b ff d5 41 bl c04e3b08 ... c0512d34 : c0512d34: 7c 84 20 f8 not r4,r4 c0512d38: 7c 63 20 14 addc r3,r3,r4 c0512d3c: 7c 63 01 94 addze r3,r3 c0512d40: 4e 80 00 20 blr ... c0512dfc: 4b ff ff 39 bl c0512d34 ... c05138bc: 4b ff f4 79 bl c0512d34 ... Restore the expected behaviour by using __always_inline for all functions defined in net/checksum.h vmlinux size is even reduced by 256 bytes with this patch: text data bss dec hex filename 6980022 2515362 194384 9689768 93daa8 vmlinux.before 6979862 2515266 194384 9689512 93d9a8 vmlinux.now Fixes: ac7c3e4ff401 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly") Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Andrew Morton Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller --- include/net/checksum.h | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/include/net/checksum.h b/include/net/checksum.h index 5218041e5c8f..02d0c2d01014 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -22,7 +22,7 @@ #include #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER -static inline +static __always_inline __wsum csum_and_copy_from_user (const void __user *src, void *dst, int len) { @@ -33,7 +33,7 @@ __wsum csum_and_copy_from_user (const void __user *src, void *dst, #endif #ifndef HAVE_CSUM_COPY_USER -static __inline__ __wsum csum_and_copy_to_user +static __always_inline __wsum csum_and_copy_to_user (const void *src, void __user *dst, int len) { __wsum sum = csum_partial(src, len, ~0U); @@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user #endif #ifndef _HAVE_ARCH_CSUM_AND_COPY -static inline __wsum +static __always_inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len) { memcpy(dst, src, len); @@ -54,7 +54,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len) #endif #ifndef HAVE_ARCH_CSUM_ADD -static inline __wsum csum_add(__wsum csum, __wsum addend) +static __always_inline __wsum csum_add(__wsum csum, __wsum addend) { u32 res = (__force u32)csum; res += (__force u32)addend; @@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) } #endif -static inline __wsum csum_sub(__wsum csum, __wsum addend) +static __always_inline __wsum csum_sub(__wsum csum, __wsum addend) { return csum_add(csum, ~addend); } -static inline __sum16 csum16_add(__sum16 csum, __be16 addend) +static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend) { u16 res = (__force u16)csum; @@ -75,12 +75,12 @@ static inline __sum16 csum16_add(__sum16 csum, __be16 addend) return (__force __sum16)(res + (res < (__force u16)addend)); } -static inline __sum16 csum16_sub(__sum16 csum, __be16 addend) +static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend) { return csum16_add(csum, ~addend); } -static inline __wsum csum_shift(__wsum sum, int offset) +static __always_inline __wsum csum_shift(__wsum sum, int offset) { /* rotate sum to align it with a 16b boundary */ if (offset & 1) @@ -88,42 +88,43 @@ static inline __wsum csum_shift(__wsum sum, int offset) return sum; } -static inline __wsum +static __always_inline __wsum csum_block_add(__wsum csum, __wsum csum2, int offset) { return csum_add(csum, csum_shift(csum2, offset)); } -static inline __wsum +static __always_inline __wsum csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) { return csum_block_add(csum, csum2, offset); } -static inline __wsum +static __always_inline __wsum csum_block_sub(__wsum csum, __wsum csum2, int offset) { return csum_block_add(csum, ~csum2, offset); } -static inline __wsum csum_unfold(__sum16 n) +static __always_inline __wsum csum_unfold(__sum16 n) { return (__force __wsum)n; } -static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) +static __always_inline +__wsum csum_partial_ext(const void *buff, int len, __wsum sum) { return csum_partial(buff, len, sum); } #define CSUM_MANGLED_0 ((__force __sum16)0xffff) -static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) +static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) { *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); } -static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) +static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); @@ -136,7 +137,7 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) * m : old value of a 16bit field * m' : new value of a 16bit field */ -static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) +static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) { *sum = ~csum16_add(csum16_sub(~(*sum), old), new); } @@ -150,16 +151,16 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, __wsum diff, bool pseudohdr); -static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, - __be16 from, __be16 to, - bool pseudohdr) +static __always_inline +void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, + __be16 from, __be16 to, bool pseudohdr) { inet_proto_csum_replace4(sum, skb, (__force __be32)from, (__force __be32)to, pseudohdr); } -static inline __wsum remcsum_adjust(void *ptr, __wsum csum, - int start, int offset) +static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum, + int start, int offset) { __sum16 *psum = (__sum16 *)(ptr + offset); __wsum delta; @@ -175,12 +176,12 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum, return delta; } -static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) +static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta) { *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); } -static inline __wsum wsum_negate(__wsum val) +static __always_inline __wsum wsum_negate(__wsum val) { return (__force __wsum)-((__force u32)val); } -- cgit v1.2.3 From 3d00827a90db6f79abc7cdc553887f89a2e0a184 Mon Sep 17 00:00:00 2001 From: Svenning Sørensen Date: Fri, 18 Feb 2022 11:27:01 +0000 Subject: net: dsa: microchip: fix bridging with more than two member ports Commit b3612ccdf284 ("net: dsa: microchip: implement multi-bridge support") plugged a packet leak between ports that were members of different bridges. Unfortunately, this broke another use case, namely that of more than two ports that are members of the same bridge. After that commit, when a port is added to a bridge, hardware bridging between other member ports of that bridge will be cleared, preventing packet exchange between them. Fix by ensuring that the Port VLAN Membership bitmap includes any existing ports in the bridge, not just the port being added. Fixes: b3612ccdf284 ("net: dsa: microchip: implement multi-bridge support") Signed-off-by: Svenning Sørensen Tested-by: Oleksij Rempel Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 55dbda04ea62..243f8ad6d06e 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -26,7 +26,7 @@ void ksz_update_port_member(struct ksz_device *dev, int port) struct dsa_switch *ds = dev->ds; u8 port_member = 0, cpu_port; const struct dsa_port *dp; - int i; + int i, j; if (!dsa_is_user_port(ds, port)) return; @@ -45,13 +45,33 @@ void ksz_update_port_member(struct ksz_device *dev, int port) continue; if (!dsa_port_bridge_same(dp, other_dp)) continue; + if (other_p->stp_state != BR_STATE_FORWARDING) + continue; - if (other_p->stp_state == BR_STATE_FORWARDING && - p->stp_state == BR_STATE_FORWARDING) { + if (p->stp_state == BR_STATE_FORWARDING) { val |= BIT(port); port_member |= BIT(i); } + /* Retain port [i]'s relationship to other ports than [port] */ + for (j = 0; j < ds->num_ports; j++) { + const struct dsa_port *third_dp; + struct ksz_port *third_p; + + if (j == i) + continue; + if (j == port) + continue; + if (!dsa_is_user_port(ds, j)) + continue; + third_p = &dev->ports[j]; + if (third_p->stp_state != BR_STATE_FORWARDING) + continue; + third_dp = dsa_to_port(ds, j); + if (dsa_port_bridge_same(other_dp, third_dp)) + val |= BIT(j); + } + dev->dev_ops->cfg_port_member(dev, i, val | cpu_port); } -- cgit v1.2.3 From 8940e6b669ca1196ce0a0549c819078096390f76 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Feb 2022 14:13:02 +0200 Subject: net: dsa: avoid call to __dev_set_promiscuity() while rtnl_mutex isn't held If the DSA master doesn't support IFF_UNICAST_FLT, then the following call path is possible: dsa_slave_switchdev_event_work -> dsa_port_host_fdb_add -> dev_uc_add -> __dev_set_rx_mode -> __dev_set_promiscuity Since the blamed commit, dsa_slave_switchdev_event_work() no longer holds rtnl_lock(), which triggers the ASSERT_RTNL() from __dev_set_promiscuity(). Taking rtnl_lock() around dev_uc_add() is impossible, because all the code paths that call dsa_flush_workqueue() do so from contexts where the rtnl_mutex is already held - so this would lead to an instant deadlock. dev_uc_add() in itself doesn't require the rtnl_mutex for protection. There is this comment in __dev_set_rx_mode() which assumes so: /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. */ but it is from commit 4417da668c00 ("[NET]: dev: secondary unicast address support") dated June 2007, and in the meantime, commit f1f28aa3510d ("netdev: Add addr_list_lock to struct net_device."), dated July 2008, has added &dev->addr_list_lock to protect this instead of the global rtnl_mutex. Nonetheless, __dev_set_promiscuity() does assume rtnl_mutex protection, but it is the uncommon path of what we typically expect dev_uc_add() to do. So since only the uncommon path requires rtnl_lock(), just check ahead of time whether dev_uc_add() would result into a call to __dev_set_promiscuity(), and handle that condition separately. DSA already configures the master interface to be promiscuous if the tagger requires this. We can extend this to also cover the case where the master doesn't handle dev_uc_add() (doesn't support IFF_UNICAST_FLT), and on the premise that we'd end up making it promiscuous during operation anyway, either if a DSA slave has a non-inherited MAC address, or if the bridge notifies local FDB entries for its own MAC address, the address of a station learned on a foreign port, etc. Fixes: 0faf890fc519 ("net: dsa: drop rtnl_lock from dsa_slave_switchdev_event_work") Reported-by: Oleksij Rempel Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/master.c | 7 ++++++- net/dsa/port.c | 20 ++++++++++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/net/dsa/master.c b/net/dsa/master.c index 2199104ca7df..880f910b23a9 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -260,11 +260,16 @@ static void dsa_netdev_ops_set(struct net_device *dev, dev->dsa_ptr->netdev_ops = ops; } +/* Keep the master always promiscuous if the tagging protocol requires that + * (garbles MAC DA) or if it doesn't support unicast filtering, case in which + * it would revert to promiscuous mode as soon as we call dev_uc_add() on it + * anyway. + */ static void dsa_master_set_promiscuity(struct net_device *dev, int inc) { const struct dsa_device_ops *ops = dev->dsa_ptr->tag_ops; - if (!ops->promisc_on_master) + if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_master) return; ASSERT_RTNL(); diff --git a/net/dsa/port.c b/net/dsa/port.c index bd78192e0e47..eef4a98f2628 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -781,9 +781,15 @@ int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr, struct dsa_port *cpu_dp = dp->cpu_dp; int err; - err = dev_uc_add(cpu_dp->master, addr); - if (err) - return err; + /* Avoid a call to __dev_set_promiscuity() on the master, which + * requires rtnl_lock(), since we can't guarantee that is held here, + * and we can't take it either. + */ + if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_add(cpu_dp->master, addr); + if (err) + return err; + } return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info); } @@ -800,9 +806,11 @@ int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr, struct dsa_port *cpu_dp = dp->cpu_dp; int err; - err = dev_uc_del(cpu_dp->master, addr); - if (err) - return err; + if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_del(cpu_dp->master, addr); + if (err) + return err; + } return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info); } -- cgit v1.2.3 From b1a5983f56e371046dcf164f90bfaf704d2b89f6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 17 Feb 2022 23:41:20 +0100 Subject: netfilter: nf_tables_offload: incorrect flow offload action array size immediate verdict expression needs to allocate one slot in the flow offload action array, however, immediate data expression does not need to do so. fwd and dup expression need to allocate one slot, this is missing. Add a new offload_action interface to report if this expression needs to allocate one slot in the flow offload action array. Fixes: be2861dc36d7 ("netfilter: nft_{fwd,dup}_netdev: add offload support") Reported-and-tested-by: Nick Gregory Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- include/net/netfilter/nf_tables_offload.h | 2 -- net/netfilter/nf_tables_offload.c | 3 ++- net/netfilter/nft_dup_netdev.c | 6 ++++++ net/netfilter/nft_fwd_netdev.c | 6 ++++++ net/netfilter/nft_immediate.c | 12 +++++++++++- 6 files changed, 26 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index eaf55da9a205..c4c0861deac1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -905,9 +905,9 @@ struct nft_expr_ops { int (*offload)(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr); + bool (*offload_action)(const struct nft_expr *expr); void (*offload_stats)(struct nft_expr *expr, const struct flow_stats *stats); - u32 offload_flags; const struct nft_expr_type *type; void *data; }; diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index f9d95ff82df8..797147843958 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -67,8 +67,6 @@ struct nft_flow_rule { struct flow_rule *rule; }; -#define NFT_OFFLOAD_F_ACTION (1 << 0) - void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, enum flow_dissector_key_id addr_type); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 9656c1646222..2d36952b1392 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -94,7 +94,8 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, expr = nft_expr_first(rule); while (nft_expr_more(rule, expr)) { - if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION) + if (expr->ops->offload_action && + expr->ops->offload_action(expr)) num_actions++; expr = nft_expr_next(expr); diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index bbf3fcba3df4..5b5c607fbf83 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -67,6 +67,11 @@ static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx, return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif); } +static bool nft_dup_netdev_offload_action(const struct nft_expr *expr) +{ + return true; +} + static struct nft_expr_type nft_dup_netdev_type; static const struct nft_expr_ops nft_dup_netdev_ops = { .type = &nft_dup_netdev_type, @@ -75,6 +80,7 @@ static const struct nft_expr_ops nft_dup_netdev_ops = { .init = nft_dup_netdev_init, .dump = nft_dup_netdev_dump, .offload = nft_dup_netdev_offload, + .offload_action = nft_dup_netdev_offload_action, }; static struct nft_expr_type nft_dup_netdev_type __read_mostly = { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index fa9301ca6033..619e394a91de 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -79,6 +79,11 @@ static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx, return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif); } +static bool nft_fwd_netdev_offload_action(const struct nft_expr *expr) +{ + return true; +} + struct nft_fwd_neigh { u8 sreg_dev; u8 sreg_addr; @@ -222,6 +227,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = { .dump = nft_fwd_netdev_dump, .validate = nft_fwd_validate, .offload = nft_fwd_netdev_offload, + .offload_action = nft_fwd_netdev_offload_action, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 90c64d27ae53..d0f67d325bdf 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -213,6 +213,16 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx, return 0; } +static bool nft_immediate_offload_action(const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (priv->dreg == NFT_REG_VERDICT) + return true; + + return false; +} + static const struct nft_expr_ops nft_imm_ops = { .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), @@ -224,7 +234,7 @@ static const struct nft_expr_ops nft_imm_ops = { .dump = nft_immediate_dump, .validate = nft_immediate_validate, .offload = nft_immediate_offload, - .offload_flags = NFT_OFFLOAD_F_ACTION, + .offload_action = nft_immediate_offload_action, }; struct nft_expr_type nft_imm_type __read_mostly = { -- cgit v1.2.3 From e23e40fd6de5c1c94793bc4147e8f34387d58576 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 5 Feb 2022 01:58:04 +0100 Subject: hwmon: (ntc_thermistor) Underscore Samsung thermistor The sysfs does not like that we name the thermistor something that contains a dash: ntc-thermistor thermistor: hwmon: 'ssg1404-001221' is not a valid name attribute, please fix Fix it up by switching to an underscore. Fixes: e13e979b2b3d ("hwmon: (ntc_thermistor) Add Samsung 1404-001221 NTC") Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20220205005804.123245-1-linus.walleij@linaro.org Signed-off-by: Guenter Roeck --- drivers/hwmon/ntc_thermistor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index 414204f5704c..9c9e9f4ccb9e 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -59,7 +59,7 @@ static const struct platform_device_id ntc_thermistor_id[] = { [NTC_NCP15XH103] = { "ncp15xh103", TYPE_NCPXXXH103 }, [NTC_NCP18WB473] = { "ncp18wb473", TYPE_NCPXXWB473 }, [NTC_NCP21WB473] = { "ncp21wb473", TYPE_NCPXXWB473 }, - [NTC_SSG1404001221] = { "ssg1404-001221", TYPE_NCPXXWB473 }, + [NTC_SSG1404001221] = { "ssg1404_001221", TYPE_NCPXXWB473 }, [NTC_LAST] = { }, }; -- cgit v1.2.3 From 84d3c83e6ea7d46cf3de3a54578af73eb24a64f2 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Sun, 20 Feb 2022 04:05:47 -0500 Subject: bnxt_en: Fix active FEC reporting to ethtool ethtool --show-fec does not show anything when the Active FEC setting in the chip is set to None. Fix it to properly return ETHTOOL_FEC_OFF in that case. Fixes: 8b2775890ad8 ("bnxt_en: Report FEC settings to ethtool.") Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 003330e8cd58..e195f4a669d8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1969,6 +1969,9 @@ static int bnxt_get_fecparam(struct net_device *dev, case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_IEEE_ACTIVE: fec->active_fec |= ETHTOOL_FEC_LLRS; break; + case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_NONE_ACTIVE: + fec->active_fec |= ETHTOOL_FEC_OFF; + break; } return 0; } -- cgit v1.2.3 From 6758f937669dba14c6aac7ca004edda42ec1b18d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 20 Feb 2022 04:05:48 -0500 Subject: bnxt_en: Fix offline ethtool selftest with RDMA enabled For offline (destructive) self tests, we need to stop the RDMA driver first. Otherwise, the RDMA driver will run into unrecoverable errors when destructive firmware tests are being performed. The irq_re_init parameter used in the half close and half open sequence when preparing the NIC for offline tests should be set to true because the RDMA driver will free all IRQs before the offline tests begin. Fixes: 55fd0cf320c3 ("bnxt_en: Add external loopback test to ethtool selftest.") Reviewed-by: Edwin Peer Reviewed-by: Ben Li Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++----- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 12 +++++++++--- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4f94136a011a..23bbb1c5812d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10330,12 +10330,12 @@ int bnxt_half_open_nic(struct bnxt *bp) goto half_open_err; } - rc = bnxt_alloc_mem(bp, false); + rc = bnxt_alloc_mem(bp, true); if (rc) { netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); goto half_open_err; } - rc = bnxt_init_nic(bp, false); + rc = bnxt_init_nic(bp, true); if (rc) { netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc); goto half_open_err; @@ -10344,7 +10344,7 @@ int bnxt_half_open_nic(struct bnxt *bp) half_open_err: bnxt_free_skbs(bp); - bnxt_free_mem(bp, false); + bnxt_free_mem(bp, true); dev_close(bp->dev); return rc; } @@ -10354,9 +10354,9 @@ half_open_err: */ void bnxt_half_close_nic(struct bnxt *bp) { - bnxt_hwrm_resource_free(bp, false, false); + bnxt_hwrm_resource_free(bp, false, true); bnxt_free_skbs(bp); - bnxt_free_mem(bp, false); + bnxt_free_mem(bp, true); } void bnxt_reenable_sriov(struct bnxt *bp) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index e195f4a669d8..a85b18858b32 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -25,6 +25,7 @@ #include "bnxt_hsi.h" #include "bnxt.h" #include "bnxt_hwrm.h" +#include "bnxt_ulp.h" #include "bnxt_xdp.h" #include "bnxt_ptp.h" #include "bnxt_ethtool.h" @@ -3551,9 +3552,12 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, if (!offline) { bnxt_run_fw_tests(bp, test_mask, &test_results); } else { - rc = bnxt_close_nic(bp, false, false); - if (rc) + bnxt_ulp_stop(bp); + rc = bnxt_close_nic(bp, true, false); + if (rc) { + bnxt_ulp_start(bp, rc); return; + } bnxt_run_fw_tests(bp, test_mask, &test_results); buf[BNXT_MACLPBK_TEST_IDX] = 1; @@ -3563,6 +3567,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, if (rc) { bnxt_hwrm_mac_loopback(bp, false); etest->flags |= ETH_TEST_FL_FAILED; + bnxt_ulp_start(bp, rc); return; } if (bnxt_run_loopback(bp)) @@ -3588,7 +3593,8 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, } bnxt_hwrm_phy_loopback(bp, false, false); bnxt_half_close_nic(bp); - rc = bnxt_open_nic(bp, false, true); + rc = bnxt_open_nic(bp, true, true); + bnxt_ulp_start(bp, rc); } if (rc || bnxt_test_irq(bp)) { buf[BNXT_IRQ_TEST_IDX] = 1; -- cgit v1.2.3 From cfcab3b3b61584a02bb523ffa99564eafa761dfe Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 20 Feb 2022 04:05:49 -0500 Subject: bnxt_en: Fix occasional ethtool -t loopback test failures In the current code, we setup the port to PHY or MAC loopback mode and then transmit a test broadcast packet for the loopback test. This scheme fails sometime if the port is shared with management firmware that can also send packets. The driver may receive the management firmware's packet and the test will fail when the contents don't match the test packet. Change the test packet to use it's own MAC address as the destination and setup the port to only receive it's own MAC address. This should filter out other packets sent by management firmware. Fixes: 91725d89b97a ("bnxt_en: Add PHY loopback to ethtool self-test.") Reviewed-by: Pavan Chebbi Reviewed-by: Edwin Peer Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +++++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 23bbb1c5812d..785436f6dd24 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8639,6 +8639,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) vnic->uc_filter_count = 1; vnic->rx_mask = 0; + if (test_bit(BNXT_STATE_HALF_OPEN, &bp->state)) + goto skip_rx_mask; + if (bp->dev->flags & IFF_BROADCAST) vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_BCAST; @@ -8659,6 +8662,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) if (rc) goto err_out; +skip_rx_mask: rc = bnxt_hwrm_set_coal(bp); if (rc) netdev_warn(bp->dev, "HWRM set coalescing failure rc: %x\n", @@ -10335,8 +10339,10 @@ int bnxt_half_open_nic(struct bnxt *bp) netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); goto half_open_err; } + set_bit(BNXT_STATE_HALF_OPEN, &bp->state); rc = bnxt_init_nic(bp, true); if (rc) { + clear_bit(BNXT_STATE_HALF_OPEN, &bp->state); netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc); goto half_open_err; } @@ -10357,6 +10363,7 @@ void bnxt_half_close_nic(struct bnxt *bp) bnxt_hwrm_resource_free(bp, false, true); bnxt_free_skbs(bp); bnxt_free_mem(bp, true); + clear_bit(BNXT_STATE_HALF_OPEN, &bp->state); } void bnxt_reenable_sriov(struct bnxt *bp) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 440dfeb4948b..666fc1e7a7d2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1921,6 +1921,7 @@ struct bnxt { #define BNXT_STATE_RECOVER 12 #define BNXT_STATE_FW_NON_FATAL_COND 13 #define BNXT_STATE_FW_ACTIVATE_RESET 14 +#define BNXT_STATE_HALF_OPEN 15 /* For offline ethtool tests */ #define BNXT_NO_FW_ACCESS(bp) \ (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index a85b18858b32..8aaa2335f848 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -3458,7 +3458,7 @@ static int bnxt_run_loopback(struct bnxt *bp) if (!skb) return -ENOMEM; data = skb_put(skb, pkt_size); - eth_broadcast_addr(data); + ether_addr_copy(&data[i], bp->dev->dev_addr); i += ETH_ALEN; ether_addr_copy(&data[i], bp->dev->dev_addr); i += ETH_ALEN; -- cgit v1.2.3 From 8cdb15924252e27af16c4a8fe0fc606ce5fd04dc Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Sun, 20 Feb 2022 04:05:50 -0500 Subject: bnxt_en: Fix incorrect multicast rx mask setting when not requested We should setup multicast only when net_device flags explicitly has IFF_MULTICAST set. Otherwise we will incorrectly turn it on even when not asked. Fix it by only passing the multicast table to the firmware if IFF_MULTICAST is set. Fixes: 7d2837dd7a32 ("bnxt_en: Setup multicast properly after resetting device.") Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 785436f6dd24..fc5b1d816bdb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4747,8 +4747,10 @@ static int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, u16 vnic_id) return rc; req->vnic_id = cpu_to_le32(vnic->fw_vnic_id); - req->num_mc_entries = cpu_to_le32(vnic->mc_list_count); - req->mc_tbl_addr = cpu_to_le64(vnic->mc_list_mapping); + if (vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_MCAST) { + req->num_mc_entries = cpu_to_le32(vnic->mc_list_count); + req->mc_tbl_addr = cpu_to_le64(vnic->mc_list_mapping); + } req->mask = cpu_to_le32(vnic->rx_mask); return hwrm_req_send_silent(bp, req); } @@ -8651,7 +8653,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) if (bp->dev->flags & IFF_ALLMULTI) { vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; vnic->mc_list_count = 0; - } else { + } else if (bp->dev->flags & IFF_MULTICAST) { u32 mask = 0; bnxt_mc_list_updated(bp, &mask); @@ -10779,7 +10781,7 @@ static void bnxt_set_rx_mode(struct net_device *dev) if (dev->flags & IFF_ALLMULTI) { mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; vnic->mc_list_count = 0; - } else { + } else if (dev->flags & IFF_MULTICAST) { mc_update = bnxt_mc_list_updated(bp, &mask); } @@ -10856,9 +10858,10 @@ skip_uc: !bnxt_promisc_ok(bp)) vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); - if (rc && vnic->mc_list_count) { + if (rc && (vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_MCAST)) { netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n", rc); + vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_MCAST; vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; vnic->mc_list_count = 0; rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); -- cgit v1.2.3 From 0e0e3c5358470cbad10bd7ca29f84a44d179d286 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Sun, 20 Feb 2022 04:05:51 -0500 Subject: bnxt_en: Restore the resets_reliable flag in bnxt_open() During ifdown, we call bnxt_inv_fw_health_reg() which will clear both the status_reliable and resets_reliable flags if these registers are mapped. This is correct because a FW reset during ifdown will clear these register mappings. If we detect that FW has gone through reset during the next ifup, we will remap these registers. But during normal ifup with no FW reset, we need to restore the resets_reliable flag otherwise we will not show the reset counter during devlink diagnose. Fixes: 8cc95ceb7087 ("bnxt_en: improve fw diagnose devlink health messages") Reviewed-by: Vikas Gupta Reviewed-by: Pavan Chebbi Reviewed-by: Somnath Kotur Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index fc5b1d816bdb..b1c98d1408b8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7789,6 +7789,19 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp) return 0; } +static void bnxt_remap_fw_health_regs(struct bnxt *bp) +{ + if (!bp->fw_health) + return; + + if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) { + bp->fw_health->status_reliable = true; + bp->fw_health->resets_reliable = true; + } else { + bnxt_try_map_fw_health_reg(bp); + } +} + static int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp) { struct bnxt_fw_health *fw_health = bp->fw_health; @@ -9856,8 +9869,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) resc_reinit = true; if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE) fw_reset = true; - else if (bp->fw_health && !bp->fw_health->status_reliable) - bnxt_try_map_fw_health_reg(bp); + else + bnxt_remap_fw_health_regs(bp); if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && !fw_reset) { netdev_err(bp->dev, "RESET_DONE not set during FW reset.\n"); -- cgit v1.2.3 From b891106da52b2c12dbaf73400f6d225b06a38d80 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 20 Feb 2022 04:05:52 -0500 Subject: bnxt_en: Increase firmware message response DMA wait time When polling for the firmware message response, we first poll for the response message header. Once the valid length is detected in the header, we poll for the valid bit at the end of the message which signals DMA completion. Normally, this poll time for DMA completion is extremely short (0 to a few usec). But on some devices under some rare conditions, it can be up to about 20 msec. Increase this delay to 50 msec and use udelay() for the first 10 usec for the common case, and usleep_range() beyond that. Also, change the error message to include the above delay time when printing the timeout value. Fixes: 3c8c20db769c ("bnxt_en: move HWRM API implementation into separate file") Reviewed-by: Vladimir Olovyannikov Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c | 12 +++++++++--- drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c index 566c9487ef55..b01d42928a53 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c @@ -644,17 +644,23 @@ static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx) /* Last byte of resp contains valid bit */ valid = ((u8 *)ctx->resp) + len - 1; - for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; j++) { + for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; ) { /* make sure we read from updated DMA memory */ dma_rmb(); if (*valid) break; - usleep_range(1, 5); + if (j < 10) { + udelay(1); + j++; + } else { + usleep_range(20, 30); + j += 20; + } } if (j >= HWRM_VALID_BIT_DELAY_USEC) { hwrm_err(bp, ctx, "Error (timeout: %u) msg {0x%x 0x%x} len:%d v:%d\n", - hwrm_total_timeout(i), req_type, + hwrm_total_timeout(i) + j, req_type, le16_to_cpu(ctx->req->seq_id), len, *valid); goto exit; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h index d52bd2d63aec..c98032e38188 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h @@ -90,7 +90,7 @@ static inline unsigned int hwrm_total_timeout(unsigned int n) } -#define HWRM_VALID_BIT_DELAY_USEC 150 +#define HWRM_VALID_BIT_DELAY_USEC 50000 static inline bool bnxt_cfa_hwrm_message(u16 req_type) { -- cgit v1.2.3 From 1278d17a1fb860e7eab4bc3ff4b026a87cbf5105 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Sun, 20 Feb 2022 04:05:53 -0500 Subject: bnxt_en: Fix devlink fw_activate To install a livepatch, first flash the package to NVM, and then activate the patch through the "HWRM_FW_LIVEPATCH" fw command. To uninstall a patch from NVM, flash the removal package and then activate it through the "HWRM_FW_LIVEPATCH" fw command. The "HWRM_FW_LIVEPATCH" fw command has to consider following scenarios: 1. no patch in NVM and no patch active. Do nothing. 2. patch in NVM, but not active. Activate the patch currently in NVM. 3. patch is not in NVM, but active. Deactivate the patch. 4. patch in NVM and the patch active. Do nothing. Fix the code to handle these scenarios during devlink "fw_activate". To install and activate a live patch: devlink dev flash pci/0000:c1:00.0 file thor_patch.pkg devlink -f dev reload pci/0000:c1:00.0 action fw_activate limit no_reset To remove and deactivate a live patch: devlink dev flash pci/0000:c1:00.0 file thor_patch_rem.pkg devlink -f dev reload pci/0000:c1:00.0 action fw_activate limit no_reset Fixes: 3c4153394e2c ("bnxt_en: implement firmware live patching") Reviewed-by: Vikas Gupta Reviewed-by: Somnath Kotur Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 39 ++++++++++++++++++----- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 4da31b1b84f9..f6e21fac0e69 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -367,6 +367,16 @@ bnxt_dl_livepatch_report_err(struct bnxt *bp, struct netlink_ext_ack *extack, } } +/* Live patch status in NVM */ +#define BNXT_LIVEPATCH_NOT_INSTALLED 0 +#define BNXT_LIVEPATCH_INSTALLED FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL +#define BNXT_LIVEPATCH_REMOVED FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE +#define BNXT_LIVEPATCH_MASK (FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL | \ + FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE) +#define BNXT_LIVEPATCH_ACTIVATED BNXT_LIVEPATCH_MASK + +#define BNXT_LIVEPATCH_STATE(flags) ((flags) & BNXT_LIVEPATCH_MASK) + static int bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack) { @@ -374,8 +384,9 @@ bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack) struct hwrm_fw_livepatch_query_input *query_req; struct hwrm_fw_livepatch_output *patch_resp; struct hwrm_fw_livepatch_input *patch_req; + u16 flags, live_patch_state; + bool activated = false; u32 installed = 0; - u16 flags; u8 target; int rc; @@ -394,7 +405,6 @@ bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack) hwrm_req_drop(bp, query_req); return rc; } - patch_req->opcode = FW_LIVEPATCH_REQ_OPCODE_ACTIVATE; patch_req->loadtype = FW_LIVEPATCH_REQ_LOADTYPE_NVM_INSTALL; patch_resp = hwrm_req_hold(bp, patch_req); @@ -407,12 +417,20 @@ bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack) } flags = le16_to_cpu(query_resp->status_flags); - if (~flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_INSTALL) + live_patch_state = BNXT_LIVEPATCH_STATE(flags); + + if (live_patch_state == BNXT_LIVEPATCH_NOT_INSTALLED) continue; - if ((flags & FW_LIVEPATCH_QUERY_RESP_STATUS_FLAGS_ACTIVE) && - !strncmp(query_resp->active_ver, query_resp->install_ver, - sizeof(query_resp->active_ver))) + + if (live_patch_state == BNXT_LIVEPATCH_ACTIVATED) { + activated = true; continue; + } + + if (live_patch_state == BNXT_LIVEPATCH_INSTALLED) + patch_req->opcode = FW_LIVEPATCH_REQ_OPCODE_ACTIVATE; + else if (live_patch_state == BNXT_LIVEPATCH_REMOVED) + patch_req->opcode = FW_LIVEPATCH_REQ_OPCODE_DEACTIVATE; patch_req->fw_target = target; rc = hwrm_req_send(bp, patch_req); @@ -424,8 +442,13 @@ bnxt_dl_livepatch_activate(struct bnxt *bp, struct netlink_ext_ack *extack) } if (!rc && !installed) { - NL_SET_ERR_MSG_MOD(extack, "No live patches found"); - rc = -ENOENT; + if (activated) { + NL_SET_ERR_MSG_MOD(extack, "Live patch already activated"); + rc = -EEXIST; + } else { + NL_SET_ERR_MSG_MOD(extack, "No live patches found"); + rc = -ENOENT; + } } hwrm_req_drop(bp, query_req); hwrm_req_drop(bp, patch_req); -- cgit v1.2.3 From cfb92440ee71adcc2105b0890bb01ac3cddb8507 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Feb 2022 13:07:20 -0800 Subject: Linux 5.17-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 51e142f760f7..289ce2be8032 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Superb Owl # *DOCUMENTATION* -- cgit v1.2.3 From 93dd04ab0b2b32ae6e70284afc764c577156658e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 18 Feb 2022 14:13:58 +0100 Subject: slab: remove __alloc_size attribute from __kmalloc_track_caller Commit c37495d6254c ("slab: add __alloc_size attributes for better bounds checking") added __alloc_size attributes to a bunch of kmalloc function prototypes. Unfortunately the change to __kmalloc_track_caller seems to cause clang to generate broken code and the first time this is called when booting, the box will crash. While the compiler problems are being reworked and attempted to be solved [1], let's just drop the attribute to solve the issue now. Once it is resolved it can be added back. [1] https://github.com/ClangBuiltLinux/linux/issues/1599 Fixes: c37495d6254c ("slab: add __alloc_size attributes for better bounds checking") Cc: stable Cc: Kees Cook Cc: Daniel Micay Cc: Nick Desaulniers Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Andrew Morton Cc: Vlastimil Babka Cc: Nathan Chancellor Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org Cc: llvm@lists.linux.dev Signed-off-by: Greg Kroah-Hartman Acked-by: Nick Desaulniers Acked-by: David Rientjes Acked-by: Kees Cook Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/r/20220218131358.3032912-1-gregkh@linuxfoundation.org --- include/linux/slab.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 37bde99b74af..5b6193fd8bd9 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -660,8 +660,7 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag * allocator where we care about the real place the memory allocation * request comes from. */ -extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller) - __alloc_size(1); +extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller); #define kmalloc_track_caller(size, flags) \ __kmalloc_track_caller(size, flags, _RET_IP_) -- cgit v1.2.3 From 221944736f66f38e9bdbce52c616d10df7f15c54 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 16 Feb 2022 12:43:30 -0800 Subject: tools/cgroup/slabinfo: update to work with struct slab After the introduction of the dedicated struct slab to describe slab pages by commit d122019bf061 ("mm: Split slab into its own type") and the following removal of the corresponding struct page's fields by commit 07f910f9b729 ("mm: Remove slab from struct page") the memcg_slabinfo tool broke. An attempt to run it produces a trace like this: Traceback (most recent call last): File "/usr/bin/drgn", line 33, in sys.exit(load_entry_point('drgn==0.0.16', 'console_scripts', 'drgn')()) File "/usr/lib64/python3.9/site-packages/drgn/internal/cli.py", line 133, in main runpy.run_path(args.script[0], init_globals=init_globals, run_name="__main__") File "/usr/lib64/python3.9/runpy.py", line 268, in run_path return _run_module_code(code, init_globals, run_name, File "/usr/lib64/python3.9/runpy.py", line 97, in _run_module_code _run_code(code, mod_globals, init_globals, File "/usr/lib64/python3.9/runpy.py", line 87, in _run_code exec(code, run_globals) File "memcg_slabinfo.py", line 226, in main() File "memcg_slabinfo.py", line 199, in main cache = page.slab_cache AttributeError: 'struct page' has no member 'slab_cache' The problem can be fixed by explicitly casting struct page * to struct slab * for slab pages. The tools works as expected with this fix, e.g.: cred_jar 776 776 192 21 1 : tunables 0 0 0 : slabdata 547 547 0 kmalloc-cg-32 6 6 32 128 1 : tunables 0 0 0 : slabdata 9 9 0 files_cache 3 3 832 39 8 : tunables 0 0 0 : slabdata 8 8 0 kmalloc-cg-512 1 1 512 32 4 : tunables 0 0 0 : slabdata 10 10 0 task_struct 10 10 6720 4 8 : tunables 0 0 0 : slabdata 63 63 0 mm_struct 3 3 1664 19 8 : tunables 0 0 0 : slabdata 9 9 0 kmalloc-cg-16 1 1 16 256 1 : tunables 0 0 0 : slabdata 8 8 0 pde_opener 1 1 40 102 1 : tunables 0 0 0 : slabdata 8 8 0 anon_vma_chain 375 375 64 64 1 : tunables 0 0 0 : slabdata 81 81 0 radix_tree_node 3 3 584 28 4 : tunables 0 0 0 : slabdata 419 419 0 dentry 98 98 312 26 2 : tunables 0 0 0 : slabdata 1420 1420 0 btrfs_inode 3 3 2368 13 8 : tunables 0 0 0 : slabdata 730 730 0 signal_cache 3 3 1600 20 8 : tunables 0 0 0 : slabdata 17 17 0 sighand_cache 3 3 2240 14 8 : tunables 0 0 0 : slabdata 20 20 0 filp 90 90 512 32 4 : tunables 0 0 0 : slabdata 95 95 0 anon_vma 214 214 200 20 1 : tunables 0 0 0 : slabdata 162 162 0 kmalloc-cg-1k 1 1 1024 32 8 : tunables 0 0 0 : slabdata 22 22 0 pid 10 10 256 32 2 : tunables 0 0 0 : slabdata 14 14 0 kmalloc-cg-64 2 2 64 64 1 : tunables 0 0 0 : slabdata 8 8 0 kmalloc-cg-96 3 3 96 42 1 : tunables 0 0 0 : slabdata 8 8 0 sock_inode_cache 5 5 1408 23 8 : tunables 0 0 0 : slabdata 29 29 0 UNIX 7 7 1920 17 8 : tunables 0 0 0 : slabdata 21 21 0 inode_cache 36 36 1152 28 8 : tunables 0 0 0 : slabdata 680 680 0 proc_inode_cache 26 26 1224 26 8 : tunables 0 0 0 : slabdata 64 64 0 kmalloc-cg-2k 2 2 2048 16 8 : tunables 0 0 0 : slabdata 9 9 0 v2: change naming and count_partial()/count_free()/for_each_slab() signatures to work with slabs, suggested by Matthew Wilcox Fixes: 07f910f9b729 ("mm: Remove slab from struct page") Reported-by: Vasily Averin Signed-off-by: Roman Gushchin Tested-by: Vasily Averin Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/linux-patches/Yg2cKKnIboNu7j+p@carbon.DHCP.thefacebook.com/ --- tools/cgroup/memcg_slabinfo.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py index 1600b17dbb8a..1d3a90d93fe2 100644 --- a/tools/cgroup/memcg_slabinfo.py +++ b/tools/cgroup/memcg_slabinfo.py @@ -11,7 +11,7 @@ from drgn.helpers.linux import list_for_each_entry, list_empty from drgn.helpers.linux import for_each_page from drgn.helpers.linux.cpumask import for_each_online_cpu from drgn.helpers.linux.percpu import per_cpu_ptr -from drgn import container_of, FaultError, Object +from drgn import container_of, FaultError, Object, cast DESC = """ @@ -69,15 +69,15 @@ def oo_objects(s): def count_partial(n, fn): - nr_pages = 0 - for page in list_for_each_entry('struct page', n.partial.address_of_(), - 'lru'): - nr_pages += fn(page) - return nr_pages + nr_objs = 0 + for slab in list_for_each_entry('struct slab', n.partial.address_of_(), + 'slab_list'): + nr_objs += fn(slab) + return nr_objs -def count_free(page): - return page.objects - page.inuse +def count_free(slab): + return slab.objects - slab.inuse def slub_get_slabinfo(s, cfg): @@ -145,14 +145,14 @@ def detect_kernel_config(): return cfg -def for_each_slab_page(prog): +def for_each_slab(prog): PGSlab = 1 << prog.constant('PG_slab') PGHead = 1 << prog.constant('PG_head') for page in for_each_page(prog): try: if page.flags.value_() & PGSlab: - yield page + yield cast('struct slab *', page) except FaultError: pass @@ -190,13 +190,13 @@ def main(): 'list'): obj_cgroups.add(ptr.value_()) - # look over all slab pages, belonging to non-root memcgs - # and look for objects belonging to the given memory cgroup - for page in for_each_slab_page(prog): - objcg_vec_raw = page.memcg_data.value_() + # look over all slab folios and look for objects belonging + # to the given memory cgroup + for slab in for_each_slab(prog): + objcg_vec_raw = slab.memcg_data.value_() if objcg_vec_raw == 0: continue - cache = page.slab_cache + cache = slab.slab_cache if not cache: continue addr = cache.value_() -- cgit v1.2.3 From cc20cced0598d9a5ff91ae4ab147b3b5e99ee819 Mon Sep 17 00:00:00 2001 From: Tao Liu Date: Fri, 18 Feb 2022 22:35:24 +0800 Subject: gso: do not skip outer ip header in case of ipip and net_failover We encounter a tcp drop issue in our cloud environment. Packet GROed in host forwards to a VM virtio_net nic with net_failover enabled. VM acts as a IPVS LB with ipip encapsulation. The full path like: host gro -> vm virtio_net rx -> net_failover rx -> ipvs fullnat -> ipip encap -> net_failover tx -> virtio_net tx When net_failover transmits a ipip pkt (gso_type = 0x0103, which means SKB_GSO_TCPV4, SKB_GSO_DODGY and SKB_GSO_IPXIP4), there is no gso did because it supports TSO and GSO_IPXIP4. But network_header points to inner ip header. Call Trace: tcp4_gso_segment ------> return NULL inet_gso_segment ------> inner iph, network_header points to ipip_gso_segment inet_gso_segment ------> outer iph skb_mac_gso_segment Afterwards virtio_net transmits the pkt, only inner ip header is modified. And the outer one just keeps unchanged. The pkt will be dropped in remote host. Call Trace: inet_gso_segment ------> inner iph, outer iph is skipped skb_mac_gso_segment __skb_gso_segment validate_xmit_skb validate_xmit_skb_list sch_direct_xmit __qdisc_run __dev_queue_xmit ------> virtio_net dev_hard_start_xmit __dev_queue_xmit ------> net_failover ip_finish_output2 ip_output iptunnel_xmit ip_tunnel_xmit ipip_tunnel_xmit ------> ipip dev_hard_start_xmit __dev_queue_xmit ip_finish_output2 ip_output ip_forward ip_rcv __netif_receive_skb_one_core netif_receive_skb_internal napi_gro_receive receive_buf virtnet_poll net_rx_action The root cause of this issue is specific with the rare combination of SKB_GSO_DODGY and a tunnel device that adds an SKB_GSO_ tunnel option. SKB_GSO_DODGY is set from external virtio_net. We need to reset network header when callbacks.gso_segment() returns NULL. This patch also includes ipv6_gso_segment(), considering SIT, etc. Fixes: cb32f511a70b ("ipip: add GSO/TSO support") Signed-off-by: Tao Liu Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 5 ++++- net/ipv6/ip6_offload.c | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9c465bac1eb0..72fde2888ad2 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1376,8 +1376,11 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, } ops = rcu_dereference(inet_offloads[proto]); - if (likely(ops && ops->callbacks.gso_segment)) + if (likely(ops && ops->callbacks.gso_segment)) { segs = ops->callbacks.gso_segment(skb, features); + if (!segs) + skb->network_header = skb_mac_header(skb) + nhoff - skb->head; + } if (IS_ERR_OR_NULL(segs)) goto out; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index b29e9ba5e113..5f577e21459b 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -114,6 +114,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); segs = ops->callbacks.gso_segment(skb, features); + if (!segs) + skb->network_header = skb_mac_header(skb) + nhoff - skb->head; } if (IS_ERR_OR_NULL(segs)) -- cgit v1.2.3 From 228339662b398a59b3560cd571deb8b25b253c7e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 21 Feb 2022 05:49:30 -0700 Subject: io_uring: don't convert to jiffies for waiting on timeouts If an application calls io_uring_enter(2) with a timespec passed in, convert that timespec to ktime_t rather than jiffies. The latter does not provide the granularity the application may expect, and may in fact provided different granularity on different systems, depending on what the HZ value is configured at. Turn the timespec into an absolute ktime_t, and use that with schedule_hrtimeout() instead. Link: https://github.com/axboe/liburing/issues/531 Cc: stable@vger.kernel.org Reported-by: Bob Chen Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b928928617a4..928446fe1319 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7694,7 +7694,7 @@ static int io_run_task_work_sig(void) /* when returns >0, the caller should retry */ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, - signed long *timeout) + ktime_t timeout) { int ret; @@ -7706,8 +7706,9 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, if (test_bit(0, &ctx->check_cq_overflow)) return 1; - *timeout = schedule_timeout(*timeout); - return !*timeout ? -ETIME : 1; + if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) + return -ETIME; + return 1; } /* @@ -7720,7 +7721,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, { struct io_wait_queue iowq; struct io_rings *rings = ctx->rings; - signed long timeout = MAX_SCHEDULE_TIMEOUT; + ktime_t timeout = KTIME_MAX; int ret; do { @@ -7736,7 +7737,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, if (get_timespec64(&ts, uts)) return -EFAULT; - timeout = timespec64_to_jiffies(&ts); + timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); } if (sig) { @@ -7768,7 +7769,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, TASK_INTERRUPTIBLE); - ret = io_cqring_wait_schedule(ctx, &iowq, &timeout); + ret = io_cqring_wait_schedule(ctx, &iowq, timeout); finish_wait(&ctx->cq_wait, &iowq.wq); cond_resched(); } while (ret > 0); -- cgit v1.2.3 From b6ad6261d27708567b309fdb3102b12c42a070cc Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 21 Feb 2022 13:45:57 +0200 Subject: net: mdio-ipq4019: add delay after clock enable Experimentation shows that PHY detect might fail when the code attempts MDIO bus read immediately after clock enable. Add delay to stabilize the clock before bus access. PHY detect failure started to show after commit 7590fc6f80ac ("net: mdio: Demote probed message to debug print") that removed coincidental delay between clock enable and bus access. 10ms is meant to match the time it take to send the probed message over UART at 115200 bps. This might be a far overshoot. Fixes: 23a890d493e3 ("net: mdio: Add the reset function for IPQ MDIO driver") Signed-off-by: Baruch Siach Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/mdio/mdio-ipq4019.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c index 5f4cd24a0241..4eba5a91075c 100644 --- a/drivers/net/mdio/mdio-ipq4019.c +++ b/drivers/net/mdio/mdio-ipq4019.c @@ -200,7 +200,11 @@ static int ipq_mdio_reset(struct mii_bus *bus) if (ret) return ret; - return clk_prepare_enable(priv->mdio_clk); + ret = clk_prepare_enable(priv->mdio_clk); + if (ret == 0) + mdelay(10); + + return ret; } static int ipq4019_mdio_probe(struct platform_device *pdev) -- cgit v1.2.3 From ae09639e3b2a0291b37b122c94dd4f773cd4e513 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Wed, 16 Feb 2022 22:53:02 +0000 Subject: platform/x86: int3472: Add terminator to gpiod_lookup_table Without the terminator, if a con_id is passed to gpio_find() that does not exist in the lookup table the function will not stop looping correctly, and eventually cause an oops. Fixes: 19d8d6e36b4b ("platform/x86: int3472: Pass tps68470_regulator_platform_data to the tps68470-regulator MFD-cell") Signed-off-by: Daniel Scally Link: https://lore.kernel.org/r/20220216225304.53911-5-djrscally@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/int3472/tps68470_board_data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/int3472/tps68470_board_data.c b/drivers/platform/x86/intel/int3472/tps68470_board_data.c index f93d437fd192..525f09a3b5ff 100644 --- a/drivers/platform/x86/intel/int3472/tps68470_board_data.c +++ b/drivers/platform/x86/intel/int3472/tps68470_board_data.c @@ -100,7 +100,8 @@ static struct gpiod_lookup_table surface_go_tps68470_gpios = { .dev_id = "i2c-INT347A:00", .table = { GPIO_LOOKUP("tps68470-gpio", 9, "reset", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("tps68470-gpio", 7, "powerdown", GPIO_ACTIVE_LOW) + GPIO_LOOKUP("tps68470-gpio", 7, "powerdown", GPIO_ACTIVE_LOW), + { } } }; -- cgit v1.2.3 From 6069da443bf65f513bb507bb21e2f87cfb1ad0b6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 18 Feb 2022 12:45:32 +0100 Subject: netfilter: nf_tables: unregister flowtable hooks on netns exit Unregister flowtable hooks before they are releases via nf_tables_flowtable_destroy() otherwise hook core reports UAF. BUG: KASAN: use-after-free in nf_hook_entries_grow+0x5a7/0x700 net/netfilter/core.c:142 net/netfilter/core.c:142 Read of size 4 at addr ffff8880736f7438 by task syz-executor579/3666 CPU: 0 PID: 3666 Comm: syz-executor579 Not tainted 5.16.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] __dump_stack lib/dump_stack.c:88 [inline] lib/dump_stack.c:106 dump_stack_lvl+0x1dc/0x2d8 lib/dump_stack.c:106 lib/dump_stack.c:106 print_address_description+0x65/0x380 mm/kasan/report.c:247 mm/kasan/report.c:247 __kasan_report mm/kasan/report.c:433 [inline] __kasan_report mm/kasan/report.c:433 [inline] mm/kasan/report.c:450 kasan_report+0x19a/0x1f0 mm/kasan/report.c:450 mm/kasan/report.c:450 nf_hook_entries_grow+0x5a7/0x700 net/netfilter/core.c:142 net/netfilter/core.c:142 __nf_register_net_hook+0x27e/0x8d0 net/netfilter/core.c:429 net/netfilter/core.c:429 nf_register_net_hook+0xaa/0x180 net/netfilter/core.c:571 net/netfilter/core.c:571 nft_register_flowtable_net_hooks+0x3c5/0x730 net/netfilter/nf_tables_api.c:7232 net/netfilter/nf_tables_api.c:7232 nf_tables_newflowtable+0x2022/0x2cf0 net/netfilter/nf_tables_api.c:7430 net/netfilter/nf_tables_api.c:7430 nfnetlink_rcv_batch net/netfilter/nfnetlink.c:513 [inline] nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline] nfnetlink_rcv_batch net/netfilter/nfnetlink.c:513 [inline] net/netfilter/nfnetlink.c:652 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline] net/netfilter/nfnetlink.c:652 nfnetlink_rcv+0x10e6/0x2550 net/netfilter/nfnetlink.c:652 net/netfilter/nfnetlink.c:652 __nft_release_hook() calls nft_unregister_flowtable_net_hooks() which only unregisters the hooks, then after RCU grace period, it is guaranteed that no packets add new entries to the flowtable (no flow offload rules and flowtable hooks are reachable from packet path), so it is safe to call nf_flow_table_free() which cleans up the remaining entries from the flowtable (both software and hardware) and it unbinds the flow_block. Fixes: ff4bf2f42a40 ("netfilter: nf_tables: add nft_unregister_flowtable_hook()") Reported-by: syzbot+e918523f77e62790d6d9@syzkaller.appspotmail.com Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5fa16990da95..3081c4399f10 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9636,10 +9636,13 @@ EXPORT_SYMBOL_GPL(__nft_release_basechain); static void __nft_release_hook(struct net *net, struct nft_table *table) { + struct nft_flowtable *flowtable; struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) nf_tables_unregister_hook(net, table, chain); + list_for_each_entry(flowtable, &table->flowtables, list) + nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list); } static void __nft_release_hooks(struct net *net) -- cgit v1.2.3 From 1a58f84ea5df7f026bf92a0009f931bf547fe965 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 18 Feb 2022 13:17:05 +0100 Subject: netfilter: nft_limit: fix stateful object memory leak We need to provide a destroy callback to release the extra fields. Fixes: 3b9e2ea6c11b ("netfilter: nft_limit: move stateful fields out of expression data") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_limit.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index c4f308460dd1..a726b623963d 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -340,11 +340,20 @@ static int nft_limit_obj_pkts_dump(struct sk_buff *skb, return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS); } +static void nft_limit_obj_pkts_destroy(const struct nft_ctx *ctx, + struct nft_object *obj) +{ + struct nft_limit_priv_pkts *priv = nft_obj_data(obj); + + nft_limit_destroy(ctx, &priv->limit); +} + static struct nft_object_type nft_limit_obj_type; static const struct nft_object_ops nft_limit_obj_pkts_ops = { .type = &nft_limit_obj_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)), .init = nft_limit_obj_pkts_init, + .destroy = nft_limit_obj_pkts_destroy, .eval = nft_limit_obj_pkts_eval, .dump = nft_limit_obj_pkts_dump, }; @@ -378,11 +387,20 @@ static int nft_limit_obj_bytes_dump(struct sk_buff *skb, return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES); } +static void nft_limit_obj_bytes_destroy(const struct nft_ctx *ctx, + struct nft_object *obj) +{ + struct nft_limit_priv *priv = nft_obj_data(obj); + + nft_limit_destroy(ctx, priv); +} + static struct nft_object_type nft_limit_obj_type; static const struct nft_object_ops nft_limit_obj_bytes_ops = { .type = &nft_limit_obj_type, .size = sizeof(struct nft_limit_priv), .init = nft_limit_obj_bytes_init, + .destroy = nft_limit_obj_bytes_destroy, .eval = nft_limit_obj_bytes_eval, .dump = nft_limit_obj_bytes_dump, }; -- cgit v1.2.3 From 9d2231c5d74e13b2a0546fee6737ee4446017903 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 21 Feb 2022 11:03:13 +0100 Subject: lib/iov_iter: initialize "flags" in new pipe_buffer The functions copy_page_to_iter_pipe() and push_pipe() can both allocate a new pipe_buffer, but the "flags" member initializer is missing. Fixes: 241699cd72a8 ("new iov_iter flavour: pipe-backed") To: Alexander Viro To: linux-fsdevel@vger.kernel.org To: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Max Kellermann Signed-off-by: Al Viro --- lib/iov_iter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index b0e0acdf96c1..6dd5330f7a99 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -414,6 +414,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by return 0; buf->ops = &page_cache_pipe_buf_ops; + buf->flags = 0; get_page(page); buf->page = page; buf->offset = offset; @@ -577,6 +578,7 @@ static size_t push_pipe(struct iov_iter *i, size_t size, break; buf->ops = &default_pipe_buf_ops; + buf->flags = 0; buf->page = page; buf->offset = 0; buf->len = min_t(ssize_t, left, PAGE_SIZE); -- cgit v1.2.3 From dad3bdeef45f81a6e90204bcc85360bb76eccec7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Feb 2022 13:31:49 +0100 Subject: netfilter: nf_tables: fix memory leak during stateful obj update stateful objects can be updated from the control plane. The transaction logic allocates a temporary object for this purpose. The ->init function was called for this object, so plain kfree() leaks resources. We must call ->destroy function of the object. nft_obj_destroy does this, but it also decrements the module refcount, but the update path doesn't increment it. To avoid special-casing the update object release, do module_get for the update case too and release it via nft_obj_destroy(). Fixes: d62d0ba97b58 ("netfilter: nf_tables: Introduce stateful object update operation") Cc: Fernando Fernandez Mancera Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3081c4399f10..9cd1d7a62804 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6551,12 +6551,15 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, { struct nft_object *newobj; struct nft_trans *trans; - int err; + int err = -ENOMEM; + + if (!try_module_get(type->owner)) + return -ENOENT; trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ, sizeof(struct nft_trans_obj)); if (!trans) - return -ENOMEM; + goto err_trans; newobj = nft_obj_init(ctx, type, attr); if (IS_ERR(newobj)) { @@ -6573,6 +6576,8 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, err_free_trans: kfree(trans); +err_trans: + module_put(type->owner); return err; } @@ -8185,7 +8190,7 @@ static void nft_obj_commit_update(struct nft_trans *trans) if (obj->ops->update) obj->ops->update(obj, newobj); - kfree(newobj); + nft_obj_destroy(&trans->ctx, newobj); } static void nft_commit_release(struct nft_trans *trans) @@ -8976,7 +8981,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - kfree(nft_trans_obj_newobj(trans)); + nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { trans->ctx.table->use--; -- cgit v1.2.3 From bb49c6fa8b845591b317b0d7afea4ae60ec7f3aa Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 11 Feb 2022 10:01:36 +0100 Subject: block: clear iocb->private in blkdev_bio_end_io_async() iocb_bio_iopoll() expects iocb->private to be cleared before releasing the bio. We already do this in blkdev_bio_end_io(), but we forgot in the recently added blkdev_bio_end_io_async(). Fixes: 54a88eb838d3 ("block: add single bio async direct IO helper") Cc: asml.silence@gmail.com Signed-off-by: Stefano Garzarella Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220211090136.44471-1-sgarzare@redhat.com Signed-off-by: Jens Axboe --- block/fops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/fops.c b/block/fops.c index 4f59e0f5bf30..a18e7fbd97b8 100644 --- a/block/fops.c +++ b/block/fops.c @@ -289,6 +289,8 @@ static void blkdev_bio_end_io_async(struct bio *bio) struct kiocb *iocb = dio->iocb; ssize_t ret; + WRITE_ONCE(iocb->private, NULL); + if (likely(!bio->bi_status)) { ret = dio->size; iocb->ki_pos += ret; -- cgit v1.2.3 From 1b5f517cca36292076d9e38fa6e33a257703e62e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 21 Feb 2022 08:32:14 -0800 Subject: hwmon: Handle failure to register sensor with thermal zone correctly If an attempt is made to a sensor with a thermal zone and it fails, the call to devm_thermal_zone_of_sensor_register() may return -ENODEV. This may result in crashes similar to the following. Unable to handle kernel NULL pointer dereference at virtual address 00000000000003cd ... Internal error: Oops: 96000021 [#1] PREEMPT SMP ... pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : mutex_lock+0x18/0x60 lr : thermal_zone_device_update+0x40/0x2e0 sp : ffff800014c4fc60 x29: ffff800014c4fc60 x28: ffff365ee3f6e000 x27: ffffdde218426790 x26: ffff365ee3f6e000 x25: 0000000000000000 x24: ffff365ee3f6e000 x23: ffffdde218426870 x22: ffff365ee3f6e000 x21: 00000000000003cd x20: ffff365ee8bf3308 x19: ffffffffffffffed x18: 0000000000000000 x17: ffffdde21842689c x16: ffffdde1cb7a0b7c x15: 0000000000000040 x14: ffffdde21a4889a0 x13: 0000000000000228 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : 0000000001120000 x7 : 0000000000000001 x6 : 0000000000000000 x5 : 0068000878e20f07 x4 : 0000000000000000 x3 : 00000000000003cd x2 : ffff365ee3f6e000 x1 : 0000000000000000 x0 : 00000000000003cd Call trace: mutex_lock+0x18/0x60 hwmon_notify_event+0xfc/0x110 0xffffdde1cb7a0a90 0xffffdde1cb7a0b7c irq_thread_fn+0x2c/0xa0 irq_thread+0x134/0x240 kthread+0x178/0x190 ret_from_fork+0x10/0x20 Code: d503201f d503201f d2800001 aa0103e4 (c8e47c02) Jon Hunter reports that the exact call sequence is: hwmon_notify_event() --> hwmon_thermal_notify() --> thermal_zone_device_update() --> update_temperature() --> mutex_lock() The hwmon core needs to handle all errors returned from calls to devm_thermal_zone_of_sensor_register(). If the call fails with -ENODEV, report that the sensor was not attached to a thermal zone but continue to register the hwmon device. Reported-by: Jon Hunter Cc: Dmitry Osipenko Fixes: 1597b374af222 ("hwmon: Add notification support") Reviewed-by: Dmitry Osipenko Tested-by: Jon Hunter Signed-off-by: Guenter Roeck --- drivers/hwmon/hwmon.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 3501a3ead4ba..3ae961986fc3 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -214,12 +214,14 @@ static int hwmon_thermal_add_sensor(struct device *dev, int index) tzd = devm_thermal_zone_of_sensor_register(dev, index, tdata, &hwmon_thermal_ops); - /* - * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV, - * so ignore that error but forward any other error. - */ - if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV)) - return PTR_ERR(tzd); + if (IS_ERR(tzd)) { + if (PTR_ERR(tzd) != -ENODEV) + return PTR_ERR(tzd); + dev_info(dev, "temp%d_input not attached to any thermal zone\n", + index + 1); + devm_kfree(dev, tdata); + return 0; + } err = devm_add_action(dev, hwmon_thermal_remove_sensor, &tdata->node); if (err) -- cgit v1.2.3 From 35f165f08950a876f1b95a61d79c93678fba2fd6 Mon Sep 17 00:00:00 2001 From: Vikash Chandola Date: Tue, 22 Feb 2022 13:12:53 +0000 Subject: hwmon: (pmbus) Clear pmbus fault/warning bits after read Almost all fault/warning bits in pmbus status registers remain set even after fault/warning condition are removed. As per pmbus specification these faults must be cleared by user. Modify hwmon behavior to clear fault/warning bit after fetching data if fault/warning bit was set. This allows to get fresh data in next read. Signed-off-by: Vikash Chandola Link: https://lore.kernel.org/r/20220222131253.2426834-1-vikash.chandola@linux.intel.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 776ee2237be2..ac2fbee1ba9c 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -911,6 +911,11 @@ static int pmbus_get_boolean(struct i2c_client *client, struct pmbus_boolean *b, pmbus_update_sensor_data(client, s2); regval = status & mask; + if (regval) { + ret = pmbus_write_byte_data(client, page, reg, regval); + if (ret) + goto unlock; + } if (s1 && s2) { s64 v1, v2; -- cgit v1.2.3 From 80912cef18f16f8fe59d1fb9548d4364342be360 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Tue, 22 Feb 2022 08:17:51 -0800 Subject: io_uring: disallow modification of rsrc_data during quiesce io_rsrc_ref_quiesce will unlock the uring while it waits for references to the io_rsrc_data to be killed. There are other places to the data that might add references to data via calls to io_rsrc_node_switch. There is a race condition where this reference can be added after the completion has been signalled. At this point the io_rsrc_ref_quiesce call will wake up and relock the uring, assuming the data is unused and can be freed - although it is actually being used. To fix this check in io_rsrc_ref_quiesce if a resource has been revived. Reported-by: syzbot+ca8bf833622a1662745b@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220222161751.995746-1-dylany@fb.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 928446fe1319..4715980e9015 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7926,7 +7926,15 @@ static __cold int io_rsrc_ref_quiesce(struct io_rsrc_data *data, ret = wait_for_completion_interruptible(&data->done); if (!ret) { mutex_lock(&ctx->uring_lock); - break; + if (atomic_read(&data->refs) > 0) { + /* + * it has been revived by another thread while + * we were unlocked + */ + mutex_unlock(&ctx->uring_lock); + } else { + break; + } } atomic_inc(&data->refs); -- cgit v1.2.3 From 6d3971dab239e7db1691690a02ce6becf30689cb Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Feb 2022 16:16:39 +0100 Subject: cgroup: clarify cgroup_css_set_fork() With recent fixes for the permission checking when moving a task into a cgroup using a file descriptor to a cgroup's cgroup.procs file and calling write() it seems a good idea to clarify CLONE_INTO_CGROUP permission checking with a comment. Cc: Tejun Heo Cc: Signed-off-by: Christian Brauner Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index b31e1465868a..77702e089d6a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6161,6 +6161,20 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) if (ret) goto err; + /* + * Spawning a task directly into a cgroup works by passing a file + * descriptor to the target cgroup directory. This can even be an O_PATH + * file descriptor. But it can never be a cgroup.procs file descriptor. + * This was done on purpose so spawning into a cgroup could be + * conceptualized as an atomic + * + * fd = openat(dfd_cgroup, "cgroup.procs", ...); + * write(fd, , ...); + * + * sequence, i.e. it's a shorthand for the caller opening and writing + * cgroup.procs of the cgroup indicated by @dfd_cgroup. This allows us + * to always use the caller's credentials. + */ ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, !(kargs->flags & CLONE_THREAD), current->nsproxy->cgroup_ns); -- cgit v1.2.3 From 467a726b754f474936980da793b4ff2ec3e382a7 Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Thu, 17 Feb 2022 17:11:28 +0100 Subject: cgroup-v1: Correct privileges check in release_agent writes The idea is to check: a) the owning user_ns of cgroup_ns, b) capabilities in init_user_ns. The commit 24f600856418 ("cgroup-v1: Require capabilities to set release_agent") got this wrong in the write handler of release_agent since it checked user_ns of the opener (may be different from the owning user_ns of cgroup_ns). Secondly, to avoid possibly confused deputy, the capability of the opener must be checked. Fixes: 24f600856418 ("cgroup-v1: Require capabilities to set release_agent") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/stable/20220216121142.GB30035@blackbody.suse.cz/ Signed-off-by: Michal Koutný Reviewed-by: Masami Ichikawa(CIP) Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-v1.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 0e877dbcfeea..afc6c0e9c966 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -546,6 +546,7 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cgroup *cgrp; + struct cgroup_file_ctx *ctx; BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); @@ -553,8 +554,9 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, * Release agent gets called with all capabilities, * require capabilities to set release agent. */ - if ((of->file->f_cred->user_ns != &init_user_ns) || - !capable(CAP_SYS_ADMIN)) + ctx = of->priv; + if ((ctx->ns->user_ns != &init_user_ns) || + !file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN)) return -EPERM; cgrp = cgroup_kn_lock_live(of->kn, false); -- cgit v1.2.3 From c70cd039f1d779126347a896a58876782dcc5284 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 16 Feb 2022 11:17:53 +0800 Subject: cpuset: Fix kernel-doc Fix the following W=1 kernel warnings: kernel/cgroup/cpuset.c:3718: warning: expecting prototype for cpuset_memory_pressure_bump(). Prototype was for __cpuset_memory_pressure_bump() instead. kernel/cgroup/cpuset.c:3568: warning: expecting prototype for cpuset_node_allowed(). Prototype was for __cpuset_node_allowed() instead. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 97c53f3cc917..5de18448016c 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3524,8 +3524,8 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) return cs; } -/** - * cpuset_node_allowed - Can we allocate on a memory node? +/* + * __cpuset_node_allowed - Can we allocate on a memory node? * @node: is this an allowed node? * @gfp_mask: memory allocation flags * @@ -3696,8 +3696,8 @@ void cpuset_print_current_mems_allowed(void) int cpuset_memory_pressure_enabled __read_mostly; -/** - * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims. +/* + * __cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims. * * Keep a running average of the rate of synchronous (direct) * page reclaim efforts initiated by tasks in each cpuset. @@ -3712,7 +3712,7 @@ int cpuset_memory_pressure_enabled __read_mostly; * "memory_pressure". Value displayed is an integer * representing the recent rate of entry into the synchronous * (direct) page reclaim by any task attached to the cpuset. - **/ + */ void __cpuset_memory_pressure_bump(void) { -- cgit v1.2.3 From 34f3eda8c8ffd4d0b2145ac11c91cc365cd1ada3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 14 Feb 2022 09:23:49 +0100 Subject: MAINTAINERS: sifive: drop Yash Shah Emails to Yash Shah bounce with "The email account that you tried to reach does not exist.", so drop him from all maintainer entries. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220214082349.162973-1-krzysztof.kozlowski@canonical.com --- Documentation/devicetree/bindings/gpio/sifive,gpio.yaml | 1 - Documentation/devicetree/bindings/pwm/pwm-sifive.yaml | 1 - Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml | 1 - MAINTAINERS | 6 ------ 4 files changed, 9 deletions(-) diff --git a/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml b/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml index e04349567eeb..427c5873f96a 100644 --- a/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml +++ b/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: SiFive GPIO controller maintainers: - - Yash Shah - Paul Walmsley properties: diff --git a/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml b/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml index 84e66913d042..db41cd7bf150 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml +++ b/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml @@ -8,7 +8,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: SiFive PWM controller maintainers: - - Yash Shah - Sagar Kadam - Paul Walmsley diff --git a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml index 2b1f91603897..e2d330bd4608 100644 --- a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml +++ b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml @@ -9,7 +9,6 @@ title: SiFive L2 Cache Controller maintainers: - Sagar Kadam - - Yash Shah - Paul Walmsley description: diff --git a/MAINTAINERS b/MAINTAINERS index ea3e6c914384..9d89755f911e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7006,12 +7006,6 @@ L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/sb_edac.c -EDAC-SIFIVE -M: Yash Shah -L: linux-edac@vger.kernel.org -S: Supported -F: drivers/edac/sifive_edac.c - EDAC-SKYLAKE M: Tony Luck L: linux-edac@vger.kernel.org -- cgit v1.2.3 From 0c0822bcb73f154d96ee648644ec5a8628e3b864 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 21 Feb 2022 11:07:01 +0100 Subject: dt-bindings: update Roger Quadros email Emails to Roger Quadros TI account bounce with: 550 Invalid recipient (#5.1.1) Signed-off-by: Krzysztof Kozlowski Acked-by: Roger Quadros Acked-By: Vinod Koul Acked-by: Lee Jones Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220221100701.48593-1-krzysztof.kozlowski@canonical.com --- Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml | 2 +- Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml | 2 +- Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml | 2 +- Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml b/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml index 272832e9f8f2..fa86691ebf16 100644 --- a/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml +++ b/Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml @@ -20,7 +20,7 @@ description: | maintainers: - Kishon Vijay Abraham I - - Roger Quadros properties: compatible: diff --git a/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml b/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml index cbbf5e8b1197..f78d3246fbdc 100644 --- a/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml +++ b/Documentation/devicetree/bindings/phy/ti,omap-usb2.yaml @@ -8,7 +8,7 @@ title: OMAP USB2 PHY maintainers: - Kishon Vijay Abraham I - - Roger Quadros + - Roger Quadros properties: compatible: diff --git a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml index a634774c537c..eedde385d299 100644 --- a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml +++ b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml @@ -7,7 +7,7 @@ $schema: "http://devicetree.org/meta-schemas/core.yaml#" title: Bindings for the TI wrapper module for the Cadence USBSS-DRD controller maintainers: - - Roger Quadros + - Roger Quadros properties: compatible: diff --git a/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml b/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml index f6e91a5fd8fe..4f7a212fddd3 100644 --- a/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/ti,keystone-dwc3.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: TI Keystone Soc USB Controller maintainers: - - Roger Quadros + - Roger Quadros properties: compatible: -- cgit v1.2.3 From ce2fc710c9d2b25afc710f49bb2065b4439a62bc Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Mon, 21 Feb 2022 15:06:49 +0100 Subject: selinux: fix misuse of mutex_is_locked() mutex_is_locked() tests whether the mutex is locked *by any task*, while here we want to test if it is held *by the current task*. To avoid false/missed WARNINGs, use lockdep_assert_is_held() and lockdep_assert_is_not_held() instead, which do the right thing (though they are a no-op if CONFIG_LOCKDEP=n). Cc: stable@vger.kernel.org Fixes: 2554a48f4437 ("selinux: measure state and policy capabilities") Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore --- security/selinux/ima.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/selinux/ima.c b/security/selinux/ima.c index 727c4e43219d..ff7aea6b3774 100644 --- a/security/selinux/ima.c +++ b/security/selinux/ima.c @@ -77,7 +77,7 @@ void selinux_ima_measure_state_locked(struct selinux_state *state) size_t policy_len; int rc = 0; - WARN_ON(!mutex_is_locked(&state->policy_mutex)); + lockdep_assert_held(&state->policy_mutex); state_str = selinux_ima_collect_state(state); if (!state_str) { @@ -117,7 +117,7 @@ void selinux_ima_measure_state_locked(struct selinux_state *state) */ void selinux_ima_measure_state(struct selinux_state *state) { - WARN_ON(mutex_is_locked(&state->policy_mutex)); + lockdep_assert_not_held(&state->policy_mutex); mutex_lock(&state->policy_mutex); selinux_ima_measure_state_locked(state); -- cgit v1.2.3 From ef527f968ae05c6717c39f49c8709a7e2c19183a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 20 Feb 2022 07:40:52 -0800 Subject: net: __pskb_pull_tail() & pskb_carve_frag_list() drop_monitor friends Whenever one of these functions pull all data from an skb in a frag_list, use consume_skb() instead of kfree_skb() to avoid polluting drop monitoring. Fixes: 6fa01ccd8830 ("skbuff: Add pskb_extract() helper function") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20220220154052.1308469-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6a15ce3eb1d3..b8138c372535 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2276,7 +2276,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) /* Free pulled out fragments. */ while ((list = skb_shinfo(skb)->frag_list) != insp) { skb_shinfo(skb)->frag_list = list->next; - kfree_skb(list); + consume_skb(list); } /* And insert new clone at head. */ if (clone) { @@ -6105,7 +6105,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb, /* Free pulled out fragments. */ while ((list = shinfo->frag_list) != insp) { shinfo->frag_list = list->next; - kfree_skb(list); + consume_skb(list); } /* And insert new clone at head. */ if (clone) { -- cgit v1.2.3 From 342b6419193c6f697fd47d9c72fcff9cafc70687 Mon Sep 17 00:00:00 2001 From: Alvin Šipraga Date: Mon, 21 Feb 2022 21:35:38 +0100 Subject: net: dsa: fix panic when removing unoffloaded port from bridge If a bridged port is not offloaded to the hardware - either because the underlying driver does not implement the port_bridge_{join,leave} ops, or because the operation failed - then its dp->bridge pointer will be NULL when dsa_port_bridge_leave() is called. Avoid dereferncing NULL. This fixes the following splat when removing a port from a bridge: Unable to handle kernel access to user memory outside uaccess routines at virtual address 0000000000000000 Internal error: Oops: 96000004 [#1] PREEMPT_RT SMP CPU: 3 PID: 1119 Comm: brctl Tainted: G O 5.17.0-rc4-rt4 #1 Call trace: dsa_port_bridge_leave+0x8c/0x1e4 dsa_slave_changeupper+0x40/0x170 dsa_slave_netdevice_event+0x494/0x4d4 notifier_call_chain+0x80/0xe0 raw_notifier_call_chain+0x1c/0x24 call_netdevice_notifiers_info+0x5c/0xac __netdev_upper_dev_unlink+0xa4/0x200 netdev_upper_dev_unlink+0x38/0x60 del_nbp+0x1b0/0x300 br_del_if+0x38/0x114 add_del_if+0x60/0xa0 br_ioctl_stub+0x128/0x2dc br_ioctl_call+0x68/0xb0 dev_ifsioc+0x390/0x554 dev_ioctl+0x128/0x400 sock_do_ioctl+0xb4/0xf4 sock_ioctl+0x12c/0x4e0 __arm64_sys_ioctl+0xa8/0xf0 invoke_syscall+0x4c/0x110 el0_svc_common.constprop.0+0x48/0xf0 do_el0_svc+0x28/0x84 el0_svc+0x1c/0x50 el0t_64_sync_handler+0xa8/0xb0 el0t_64_sync+0x17c/0x180 Code: f9402f00 f0002261 f9401302 913cc021 (a9401404) ---[ end trace 0000000000000000 ]--- Fixes: d3eed0e57d5d ("net: dsa: keep the bridge_dev and bridge_num as part of the same structure") Signed-off-by: Alvin Šipraga Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20220221203539.310690-1-alvin@pqrs.dk Signed-off-by: Jakub Kicinski --- net/dsa/port.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/dsa/port.c b/net/dsa/port.c index eef4a98f2628..1a40c52f5a42 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -395,10 +395,17 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) .tree_index = dp->ds->dst->index, .sw_index = dp->ds->index, .port = dp->index, - .bridge = *dp->bridge, }; int err; + /* If the port could not be offloaded to begin with, then + * there is nothing to do. + */ + if (!dp->bridge) + return; + + info.bridge = *dp->bridge; + /* Here the port is already unbridged. Reflect the current configuration * so that drivers can program their chips accordingly. */ -- cgit v1.2.3 From 277f2bb14361790a70e4b3c649e794b75a91a597 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 21 Feb 2022 15:05:45 -0600 Subject: ibmvnic: schedule failover only if vioctl fails If client is unable to initiate a failover reset via H_VIOCTL hcall, then it should schedule a failover reset as a last resort. Otherwise, there is no need to do a last resort. Fixes: 334c42414729 ("ibmvnic: improve failover sysfs entry") Reported-by: Cris Forno Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Dany Madden Link: https://lore.kernel.org/r/20220221210545.115283-1-drt@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 29617a86b299..dee05a353dbd 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -5917,10 +5917,14 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr, be64_to_cpu(session_token)); rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, H_SESSION_ERR_DETECTED, session_token, 0, 0); - if (rc) + if (rc) { netdev_err(netdev, "H_VIOCTL initiated failover failed, rc %ld\n", rc); + goto last_resort; + } + + return count; last_resort: netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n"); -- cgit v1.2.3 From a58da53ffd70294ebea8ecd0eb45fd0d74add9f9 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 22 Feb 2022 10:47:42 +0100 Subject: vhost/vsock: don't check owner in vhost_vsock_stop() while releasing vhost_vsock_stop() calls vhost_dev_check_owner() to check the device ownership. It expects current->mm to be valid. vhost_vsock_stop() is also called by vhost_vsock_dev_release() when the user has not done close(), so when we are in do_exit(). In this case current->mm is invalid and we're releasing the device, so we should clean it anyway. Let's check the owner only when vhost_vsock_stop() is called by an ioctl. When invoked from release we can not fail so we don't check return code of vhost_vsock_stop(). We need to stop vsock even if it's not the owner. Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko") Cc: stable@vger.kernel.org Reported-by: syzbot+1e3ea63db39f2b4440e0@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+3140b17cb44a7b174008@syzkaller.appspotmail.com Signed-off-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/vhost/vsock.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index d6ca1c7ad513..37f0b4274113 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -629,16 +629,18 @@ err: return ret; } -static int vhost_vsock_stop(struct vhost_vsock *vsock) +static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner) { size_t i; - int ret; + int ret = 0; mutex_lock(&vsock->dev.mutex); - ret = vhost_dev_check_owner(&vsock->dev); - if (ret) - goto err; + if (check_owner) { + ret = vhost_dev_check_owner(&vsock->dev); + if (ret) + goto err; + } for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { struct vhost_virtqueue *vq = &vsock->vqs[i]; @@ -753,7 +755,12 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) * inefficient. Room for improvement here. */ vsock_for_each_connected_socket(vhost_vsock_reset_orphans); - vhost_vsock_stop(vsock); + /* Don't check the owner, because we are in the release path, so we + * need to stop the vsock device in any case. + * vhost_vsock_stop() can not fail in this case, so we don't need to + * check the return code. + */ + vhost_vsock_stop(vsock, false); vhost_vsock_flush(vsock); vhost_dev_stop(&vsock->dev); @@ -868,7 +875,7 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, if (start) return vhost_vsock_start(vsock); else - return vhost_vsock_stop(vsock); + return vhost_vsock_stop(vsock, true); case VHOST_GET_FEATURES: features = VHOST_VSOCK_FEATURES; if (copy_to_user(argp, &features, sizeof(features))) -- cgit v1.2.3 From de7b2efacf4e83954aed3f029d347dfc0b7a4f49 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 22 Feb 2022 16:42:51 +0300 Subject: udp_tunnel: Fix end of loop test in udp_tunnel_nic_unregister() This test is checking if we exited the list via break or not. However if it did not exit via a break then "node" does not point to a valid udp_tunnel_nic_shared_node struct. It will work because of the way the structs are laid out it's the equivalent of "if (info->shared->udp_tunnel_nic_info != dev)" which will always be true, but it's not the right way to test. Fixes: 74cc6d182d03 ("udp_tunnel: add the ability to share port tables") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/ipv4/udp_tunnel_nic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index b91003538d87..bc3a043a5d5c 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -846,7 +846,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) list_for_each_entry(node, &info->shared->devices, list) if (node->dev == dev) break; - if (node->dev != dev) + if (list_entry_is_head(node, &info->shared->devices, list)) return; list_del(&node->list); -- cgit v1.2.3 From a1f8fec4dac8bc7b172b2bdbd881e015261a6322 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 22 Feb 2022 16:43:12 +0300 Subject: tipc: Fix end of loop tests for list_for_each_entry() These tests are supposed to check if the loop exited via a break or not. However the tests are wrong because if we did not exit via a break then "p" is not a valid pointer. In that case, it's the equivalent of "if (*(u32 *)sr == *last_key) {". That's going to work most of the time, but there is a potential for those to be equal. Fixes: 1593123a6a49 ("tipc: add name table dump to new netlink api") Fixes: 1a1a143daf84 ("tipc: add publication dump to new netlink api") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/tipc/name_table.c | 2 +- net/tipc/socket.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 01396dd1c899..1d8ba233d047 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -967,7 +967,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, list_for_each_entry(p, &sr->all_publ, all_publ) if (p->key == *last_key) break; - if (p->key != *last_key) + if (list_entry_is_head(p, &sr->all_publ, all_publ)) return -EPIPE; } else { p = list_first_entry(&sr->all_publ, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3e63c83e641c..7545321c3440 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3749,7 +3749,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, if (p->key == *last_publ) break; } - if (p->key != *last_publ) { + if (list_entry_is_head(p, &tsk->publications, binding_sock)) { /* We never set seq or call nl_dump_check_consistent() * this means that setting prev_seq here will cause the * consistence check to fail in the netlink callback -- cgit v1.2.3 From 404ba13a6588d72b3fb9e5c17b73e4725f18c047 Mon Sep 17 00:00:00 2001 From: Alvin Šipraga Date: Tue, 22 Feb 2022 17:14:08 +0100 Subject: MAINTAINERS: add myself as co-maintainer for Realtek DSA switch drivers Adding myself (Alvin Šipraga) as another maintainer for the Realtek DSA switch drivers. I intend to help Linus out with reviewing and testing changes to these drivers, particularly the rtl8365mb driver which I authored and have hardware access to. Cc: Linus Walleij Signed-off-by: Alvin Šipraga Reviewed-by: Linus Walleij Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index b57077b935ba..0171f3e949fb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16370,6 +16370,7 @@ F: drivers/watchdog/realtek_otto_wdt.c REALTEK RTL83xx SMI DSA ROUTER CHIPS M: Linus Walleij +M: Alvin Šipraga S: Maintained F: Documentation/devicetree/bindings/net/dsa/realtek-smi.txt F: drivers/net/dsa/realtek-smi* -- cgit v1.2.3 From ecf4a24cf97838fb0b78d4ede0f91d80b058289c Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Wed, 23 Feb 2022 10:34:19 +0800 Subject: net: sched: avoid newline at end of message in NL_SET_ERR_MSG_MOD Fix following coccicheck warning: ./net/sched/act_api.c:277:7-49: WARNING avoid newline at end of message in NL_SET_ERR_MSG_MOD Signed-off-by: Wan Jiabing Signed-off-by: David S. Miller --- net/sched/act_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2811348f3acc..ca03e7284254 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -274,7 +274,7 @@ static int tcf_action_offload_add_ex(struct tc_action *action, err = tc_setup_action(&fl_action->action, actions); if (err) { NL_SET_ERR_MSG_MOD(extack, - "Failed to setup tc actions for offload\n"); + "Failed to setup tc actions for offload"); goto fl_err; } -- cgit v1.2.3 From 4f1e72850d452e5c3302faa82a01f179ff5f9482 Mon Sep 17 00:00:00 2001 From: Heyi Guo Date: Wed, 23 Feb 2022 11:14:34 +0800 Subject: drivers/net/ftgmac100: refactor ftgmac100_reset_task to enable direct function call This is to prepare for ftgmac100_adjust_link() to call reset function directly, instead of task schedule. Signed-off-by: Heyi Guo Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 691605c15265..1f3eb4431475 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1410,10 +1410,8 @@ static int ftgmac100_init_all(struct ftgmac100 *priv, bool ignore_alloc_err) return err; } -static void ftgmac100_reset_task(struct work_struct *work) +static void ftgmac100_reset(struct ftgmac100 *priv) { - struct ftgmac100 *priv = container_of(work, struct ftgmac100, - reset_task); struct net_device *netdev = priv->netdev; int err; @@ -1459,6 +1457,14 @@ static void ftgmac100_reset_task(struct work_struct *work) rtnl_unlock(); } +static void ftgmac100_reset_task(struct work_struct *work) +{ + struct ftgmac100 *priv = container_of(work, struct ftgmac100, + reset_task); + + ftgmac100_reset(priv); +} + static int ftgmac100_open(struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); -- cgit v1.2.3 From 3c773dba8182cdfea7b32caafe9290240ab8de5f Mon Sep 17 00:00:00 2001 From: Heyi Guo Date: Wed, 23 Feb 2022 11:14:35 +0800 Subject: drivers/net/ftgmac100: adjust code place for function call dependency This is to prepare for ftgmac100_adjust_link() to call ftgmac100_reset() directly. Only code places are changed. Signed-off-by: Heyi Guo Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 222 +++++++++++++++---------------- 1 file changed, 111 insertions(+), 111 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 1f3eb4431475..c1deb6e5d26c 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -989,117 +989,6 @@ static int ftgmac100_alloc_rx_buffers(struct ftgmac100 *priv) return 0; } -static void ftgmac100_adjust_link(struct net_device *netdev) -{ - struct ftgmac100 *priv = netdev_priv(netdev); - struct phy_device *phydev = netdev->phydev; - bool tx_pause, rx_pause; - int new_speed; - - /* We store "no link" as speed 0 */ - if (!phydev->link) - new_speed = 0; - else - new_speed = phydev->speed; - - /* Grab pause settings from PHY if configured to do so */ - if (priv->aneg_pause) { - rx_pause = tx_pause = phydev->pause; - if (phydev->asym_pause) - tx_pause = !rx_pause; - } else { - rx_pause = priv->rx_pause; - tx_pause = priv->tx_pause; - } - - /* Link hasn't changed, do nothing */ - if (phydev->speed == priv->cur_speed && - phydev->duplex == priv->cur_duplex && - rx_pause == priv->rx_pause && - tx_pause == priv->tx_pause) - return; - - /* Print status if we have a link or we had one and just lost it, - * don't print otherwise. - */ - if (new_speed || priv->cur_speed) - phy_print_status(phydev); - - priv->cur_speed = new_speed; - priv->cur_duplex = phydev->duplex; - priv->rx_pause = rx_pause; - priv->tx_pause = tx_pause; - - /* Link is down, do nothing else */ - if (!new_speed) - return; - - /* Disable all interrupts */ - iowrite32(0, priv->base + FTGMAC100_OFFSET_IER); - - /* Reset the adapter asynchronously */ - schedule_work(&priv->reset_task); -} - -static int ftgmac100_mii_probe(struct net_device *netdev) -{ - struct ftgmac100 *priv = netdev_priv(netdev); - struct platform_device *pdev = to_platform_device(priv->dev); - struct device_node *np = pdev->dev.of_node; - struct phy_device *phydev; - phy_interface_t phy_intf; - int err; - - /* Default to RGMII. It's a gigabit part after all */ - err = of_get_phy_mode(np, &phy_intf); - if (err) - phy_intf = PHY_INTERFACE_MODE_RGMII; - - /* Aspeed only supports these. I don't know about other IP - * block vendors so I'm going to just let them through for - * now. Note that this is only a warning if for some obscure - * reason the DT really means to lie about it or it's a newer - * part we don't know about. - * - * On the Aspeed SoC there are additionally straps and SCU - * control bits that could tell us what the interface is - * (or allow us to configure it while the IP block is held - * in reset). For now I chose to keep this driver away from - * those SoC specific bits and assume the device-tree is - * right and the SCU has been configured properly by pinmux - * or the firmware. - */ - if (priv->is_aspeed && !(phy_interface_mode_is_rgmii(phy_intf))) { - netdev_warn(netdev, - "Unsupported PHY mode %s !\n", - phy_modes(phy_intf)); - } - - phydev = phy_find_first(priv->mii_bus); - if (!phydev) { - netdev_info(netdev, "%s: no PHY found\n", netdev->name); - return -ENODEV; - } - - phydev = phy_connect(netdev, phydev_name(phydev), - &ftgmac100_adjust_link, phy_intf); - - if (IS_ERR(phydev)) { - netdev_err(netdev, "%s: Could not attach to PHY\n", netdev->name); - return PTR_ERR(phydev); - } - - /* Indicate that we support PAUSE frames (see comment in - * Documentation/networking/phy.rst) - */ - phy_support_asym_pause(phydev); - - /* Display what we found */ - phy_attached_info(phydev); - - return 0; -} - static int ftgmac100_mdiobus_read(struct mii_bus *bus, int phy_addr, int regnum) { struct net_device *netdev = bus->priv; @@ -1465,6 +1354,117 @@ static void ftgmac100_reset_task(struct work_struct *work) ftgmac100_reset(priv); } +static void ftgmac100_adjust_link(struct net_device *netdev) +{ + struct ftgmac100 *priv = netdev_priv(netdev); + struct phy_device *phydev = netdev->phydev; + bool tx_pause, rx_pause; + int new_speed; + + /* We store "no link" as speed 0 */ + if (!phydev->link) + new_speed = 0; + else + new_speed = phydev->speed; + + /* Grab pause settings from PHY if configured to do so */ + if (priv->aneg_pause) { + rx_pause = tx_pause = phydev->pause; + if (phydev->asym_pause) + tx_pause = !rx_pause; + } else { + rx_pause = priv->rx_pause; + tx_pause = priv->tx_pause; + } + + /* Link hasn't changed, do nothing */ + if (phydev->speed == priv->cur_speed && + phydev->duplex == priv->cur_duplex && + rx_pause == priv->rx_pause && + tx_pause == priv->tx_pause) + return; + + /* Print status if we have a link or we had one and just lost it, + * don't print otherwise. + */ + if (new_speed || priv->cur_speed) + phy_print_status(phydev); + + priv->cur_speed = new_speed; + priv->cur_duplex = phydev->duplex; + priv->rx_pause = rx_pause; + priv->tx_pause = tx_pause; + + /* Link is down, do nothing else */ + if (!new_speed) + return; + + /* Disable all interrupts */ + iowrite32(0, priv->base + FTGMAC100_OFFSET_IER); + + /* Reset the adapter asynchronously */ + schedule_work(&priv->reset_task); +} + +static int ftgmac100_mii_probe(struct net_device *netdev) +{ + struct ftgmac100 *priv = netdev_priv(netdev); + struct platform_device *pdev = to_platform_device(priv->dev); + struct device_node *np = pdev->dev.of_node; + struct phy_device *phydev; + phy_interface_t phy_intf; + int err; + + /* Default to RGMII. It's a gigabit part after all */ + err = of_get_phy_mode(np, &phy_intf); + if (err) + phy_intf = PHY_INTERFACE_MODE_RGMII; + + /* Aspeed only supports these. I don't know about other IP + * block vendors so I'm going to just let them through for + * now. Note that this is only a warning if for some obscure + * reason the DT really means to lie about it or it's a newer + * part we don't know about. + * + * On the Aspeed SoC there are additionally straps and SCU + * control bits that could tell us what the interface is + * (or allow us to configure it while the IP block is held + * in reset). For now I chose to keep this driver away from + * those SoC specific bits and assume the device-tree is + * right and the SCU has been configured properly by pinmux + * or the firmware. + */ + if (priv->is_aspeed && !(phy_interface_mode_is_rgmii(phy_intf))) { + netdev_warn(netdev, + "Unsupported PHY mode %s !\n", + phy_modes(phy_intf)); + } + + phydev = phy_find_first(priv->mii_bus); + if (!phydev) { + netdev_info(netdev, "%s: no PHY found\n", netdev->name); + return -ENODEV; + } + + phydev = phy_connect(netdev, phydev_name(phydev), + &ftgmac100_adjust_link, phy_intf); + + if (IS_ERR(phydev)) { + netdev_err(netdev, "%s: Could not attach to PHY\n", netdev->name); + return PTR_ERR(phydev); + } + + /* Indicate that we support PAUSE frames (see comment in + * Documentation/networking/phy.rst) + */ + phy_support_asym_pause(phydev); + + /* Display what we found */ + phy_attached_info(phydev); + + return 0; +} + static int ftgmac100_open(struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); -- cgit v1.2.3 From 1baf2e50e48f10f0ea07d53e13381fd0da1546d2 Mon Sep 17 00:00:00 2001 From: Heyi Guo Date: Wed, 23 Feb 2022 11:14:36 +0800 Subject: drivers/net/ftgmac100: fix DHCP potential failure with systemd DHCP failures were observed with systemd 247.6. The issue could be reproduced by rebooting Aspeed 2600 and then running ifconfig ethX down/up. It is caused by below procedures in the driver: 1. ftgmac100_open() enables net interface and call phy_start() 2. When PHY is link up, it calls netif_carrier_on() and then adjust_link callback 3. ftgmac100_adjust_link() will schedule the reset task 4. ftgmac100_reset_task() will then reset the MAC in another schedule After step 2, systemd will be notified to send DHCP discover packet, while the packet might be corrupted by MAC reset operation in step 4. Call ftgmac100_reset() directly instead of scheduling task to fix the issue. Signed-off-by: Heyi Guo Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index c1deb6e5d26c..d5356db7539a 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1402,8 +1402,17 @@ static void ftgmac100_adjust_link(struct net_device *netdev) /* Disable all interrupts */ iowrite32(0, priv->base + FTGMAC100_OFFSET_IER); - /* Reset the adapter asynchronously */ - schedule_work(&priv->reset_task); + /* Release phy lock to allow ftgmac100_reset to aquire it, keeping lock + * order consistent to prevent dead lock. + */ + if (netdev->phydev) + mutex_unlock(&netdev->phydev->lock); + + ftgmac100_reset(priv); + + if (netdev->phydev) + mutex_lock(&netdev->phydev->lock); + } static int ftgmac100_mii_probe(struct net_device *netdev) -- cgit v1.2.3 From 363f6368603743072e5f318c668c632bccb097a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:07:15 +0100 Subject: nvme: don't return an error from nvme_configure_metadata When a fabrics controller claims to support an invalidate metadata configuration we already warn and disable metadata support. No need to also return an error during revalidation. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Daniel Wagner Tested-by: Kanchan Joshi --- drivers/nvme/host/core.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 469f23186159..5fc040b279bf 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1723,7 +1723,7 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return 0; } -static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) { struct nvme_ctrl *ctrl = ns->ctrl; @@ -1739,7 +1739,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) - return 0; + return; + if (ctrl->ops->flags & NVME_F_FABRICS) { /* * The NVMe over Fabrics specification only supports metadata as @@ -1747,7 +1748,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * remap the separate metadata buffer from the block layer. */ if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) - return -EINVAL; + return; ns->features |= NVME_NS_EXT_LBAS; @@ -1774,8 +1775,6 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) else ns->features |= NVME_NS_METADATA_SUPPORTED; } - - return 0; } static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, @@ -1916,9 +1915,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) ns->lba_shift = id->lbaf[lbaf].ds; nvme_set_queue_limits(ns->ctrl, ns->queue); - ret = nvme_configure_metadata(ns, id); - if (ret) - goto out_unfreeze; + nvme_configure_metadata(ns, id); nvme_set_chunk_sectors(ns, id); nvme_update_disk_info(ns->disk, ns, id); -- cgit v1.2.3 From 602e57c9799c19f27e440639deed3ec45cfe1651 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 14:14:58 +0100 Subject: nvme: also mark passthrough-only namespaces ready in nvme_update_ns_info Commit e7d65803e2bb ("nvme-multipath: revalidate paths during rescan") introduced the NVME_NS_READY flag, which nvme_path_is_disabled() uses to check if a path can be used or not. We also need to set this flag for devices that fail the ZNS feature validation and which are available through passthrough devices only to that they can be used in multipathing setups. Fixes: e7d65803e2bb ("nvme-multipath: revalidate paths during rescan") Reported-by: Kanchan Joshi Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Daniel Wagner Tested-by: Kanchan Joshi --- drivers/nvme/host/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5fc040b279bf..fd4720d37cc0 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1931,7 +1931,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (blk_queue_is_zoned(ns->queue)) { ret = nvme_revalidate_zones(ns); if (ret && !nvme_first_scan(ns->disk)) - goto out; + return ret; } if (nvme_ns_head_multipath(ns->head)) { @@ -1946,16 +1946,16 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) return 0; out_unfreeze: - blk_mq_unfreeze_queue(ns->disk->queue); -out: /* * If probing fails due an unsupported feature, hide the block device, * but still allow other access. */ if (ret == -ENODEV) { ns->disk->flags |= GENHD_FL_HIDDEN; + set_bit(NVME_NS_READY, &ns->flags); ret = 0; } + blk_mq_unfreeze_queue(ns->disk->queue); return ret; } -- cgit v1.2.3 From c2700d2886a87f83f31e0a301de1d2350b52c79b Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sat, 22 Jan 2022 22:27:44 +0530 Subject: nvme-tcp: send H2CData PDUs based on MAXH2CDATA As per NVMe/TCP specification (revision 1.0a, section 3.6.2.3) Maximum Host to Controller Data length (MAXH2CDATA): Specifies the maximum number of PDU-Data bytes per H2CData PDU in bytes. This value is a multiple of dwords and should be no less than 4,096. Current code sets H2CData PDU data_length to r2t_length, it does not check MAXH2CDATA value. Fix this by setting H2CData PDU data_length to min(req->h2cdata_left, queue->maxh2cdata). Also validate MAXH2CDATA value returned by target in ICResp PDU, if it is not a multiple of dword or if it is less than 4096 return -EINVAL from nvme_tcp_init_connection(). Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 63 +++++++++++++++++++++++++++++++++++++----------- include/linux/nvme-tcp.h | 1 + 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 891a36d02e7c..65e00c64a588 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -44,6 +44,8 @@ struct nvme_tcp_request { u32 data_len; u32 pdu_len; u32 pdu_sent; + u32 h2cdata_left; + u32 h2cdata_offset; u16 ttag; __le16 status; struct list_head entry; @@ -95,6 +97,7 @@ struct nvme_tcp_queue { struct nvme_tcp_request *request; int queue_size; + u32 maxh2cdata; size_t cmnd_capsule_len; struct nvme_tcp_ctrl *ctrl; unsigned long flags; @@ -572,23 +575,26 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, return ret; } -static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, - struct nvme_tcp_r2t_pdu *pdu) +static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req) { struct nvme_tcp_data_pdu *data = req->pdu; struct nvme_tcp_queue *queue = req->queue; struct request *rq = blk_mq_rq_from_pdu(req); + u32 h2cdata_sent = req->pdu_len; u8 hdgst = nvme_tcp_hdgst_len(queue); u8 ddgst = nvme_tcp_ddgst_len(queue); req->state = NVME_TCP_SEND_H2C_PDU; req->offset = 0; - req->pdu_len = le32_to_cpu(pdu->r2t_length); + req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata); req->pdu_sent = 0; + req->h2cdata_left -= req->pdu_len; + req->h2cdata_offset += h2cdata_sent; memset(data, 0, sizeof(*data)); data->hdr.type = nvme_tcp_h2c_data; - data->hdr.flags = NVME_TCP_F_DATA_LAST; + if (!req->h2cdata_left) + data->hdr.flags = NVME_TCP_F_DATA_LAST; if (queue->hdr_digest) data->hdr.flags |= NVME_TCP_F_HDGST; if (queue->data_digest) @@ -597,9 +603,9 @@ static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, data->hdr.pdo = data->hdr.hlen + hdgst; data->hdr.plen = cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst); - data->ttag = pdu->ttag; + data->ttag = req->ttag; data->command_id = nvme_cid(rq); - data->data_offset = pdu->r2t_offset; + data->data_offset = cpu_to_le32(req->h2cdata_offset); data->data_length = cpu_to_le32(req->pdu_len); } @@ -609,6 +615,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, struct nvme_tcp_request *req; struct request *rq; u32 r2t_length = le32_to_cpu(pdu->r2t_length); + u32 r2t_offset = le32_to_cpu(pdu->r2t_offset); rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); if (!rq) { @@ -633,14 +640,19 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, return -EPROTO; } - if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) { + if (unlikely(r2t_offset < req->data_sent)) { dev_err(queue->ctrl->ctrl.device, "req %d unexpected r2t offset %u (expected %zu)\n", - rq->tag, le32_to_cpu(pdu->r2t_offset), req->data_sent); + rq->tag, r2t_offset, req->data_sent); return -EPROTO; } - nvme_tcp_setup_h2c_data_pdu(req, pdu); + req->pdu_len = 0; + req->h2cdata_left = r2t_length; + req->h2cdata_offset = r2t_offset; + req->ttag = pdu->ttag; + + nvme_tcp_setup_h2c_data_pdu(req); nvme_tcp_queue_request(req, false, true); return 0; @@ -928,6 +940,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; int req_data_len = req->data_len; + u32 h2cdata_left = req->h2cdata_left; while (true) { struct page *page = nvme_tcp_req_cur_page(req); @@ -972,7 +985,10 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) req->state = NVME_TCP_SEND_DDGST; req->offset = 0; } else { - nvme_tcp_done_send_req(queue); + if (h2cdata_left) + nvme_tcp_setup_h2c_data_pdu(req); + else + nvme_tcp_done_send_req(queue); } return 1; } @@ -1030,9 +1046,14 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) if (queue->hdr_digest && !req->offset) nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); - ret = kernel_sendpage(queue->sock, virt_to_page(pdu), - offset_in_page(pdu) + req->offset, len, - MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); + if (!req->h2cdata_left) + ret = kernel_sendpage(queue->sock, virt_to_page(pdu), + offset_in_page(pdu) + req->offset, len, + MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); + else + ret = sock_no_sendpage(queue->sock, virt_to_page(pdu), + offset_in_page(pdu) + req->offset, len, + MSG_DONTWAIT | MSG_MORE); if (unlikely(ret <= 0)) return ret; @@ -1052,6 +1073,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; size_t offset = req->offset; + u32 h2cdata_left = req->h2cdata_left; int ret; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { @@ -1069,7 +1091,10 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) return ret; if (offset + ret == NVME_TCP_DIGEST_LENGTH) { - nvme_tcp_done_send_req(queue); + if (h2cdata_left) + nvme_tcp_setup_h2c_data_pdu(req); + else + nvme_tcp_done_send_req(queue); return 1; } @@ -1261,6 +1286,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) struct msghdr msg = {}; struct kvec iov; bool ctrl_hdgst, ctrl_ddgst; + u32 maxh2cdata; int ret; icreq = kzalloc(sizeof(*icreq), GFP_KERNEL); @@ -1344,6 +1370,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) goto free_icresp; } + maxh2cdata = le32_to_cpu(icresp->maxdata); + if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) { + pr_err("queue %d: invalid maxh2cdata returned %u\n", + nvme_tcp_queue_id(queue), maxh2cdata); + goto free_icresp; + } + queue->maxh2cdata = maxh2cdata; + ret = 0; free_icresp: kfree(icresp); @@ -2329,6 +2363,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, req->data_sent = 0; req->pdu_len = 0; req->pdu_sent = 0; + req->h2cdata_left = 0; req->data_len = blk_rq_nr_phys_segments(rq) ? blk_rq_payload_bytes(rq) : 0; req->curr_bio = rq->bio; diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 959e0bd9a913..75470159a194 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -12,6 +12,7 @@ #define NVME_TCP_DISC_PORT 8009 #define NVME_TCP_ADMIN_CCSZ SZ_8K #define NVME_TCP_DIGEST_LENGTH 4 +#define NVME_TCP_MIN_MAXH2CDATA 4096 enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0, -- cgit v1.2.3 From dd2288f4a020d693360e3e8d72f8b9d9c25f5ef6 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 18 Feb 2022 09:25:20 +0100 Subject: parisc/unaligned: Fix fldd and fstd unaligned handlers on 32-bit kernel Usually the kernel provides fixup routines to emulate the fldd and fstd floating-point instructions if they load or store 8-byte from/to a not natuarally aligned memory location. On a 32-bit kernel I noticed that those unaligned handlers didn't worked and instead the application got a SEGV. While checking the code I found two problems: First, the OPCODE_FLDD_L and OPCODE_FSTD_L cases were ifdef'ed out by the CONFIG_PA20 option, and as such those weren't built on a pure 32-bit kernel. This is now fixed by moving the CONFIG_PA20 #ifdef to prevent the compilation of OPCODE_LDD_L and OPCODE_FSTD_L only, and handling the fldd and fstd instructions. The second problem are two bugs in the 32-bit inline assembly code, where the wrong registers where used. The calculation of the natural alignment used %2 (vall) instead of %3 (ior), and the first word was stored back to address %1 (valh) instead of %3 (ior). Signed-off-by: Helge Deller Cc: stable@vger.kernel.org --- arch/parisc/kernel/unaligned.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 237d20dd5622..a238b7fe8908 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -397,7 +397,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) __asm__ __volatile__ ( " mtsp %4, %%sr1\n" " zdep %2, 29, 2, %%r19\n" -" dep %%r0, 31, 2, %2\n" +" dep %%r0, 31, 2, %3\n" " mtsar %%r19\n" " zvdepi -2, 32, %%r19\n" "1: ldw 0(%%sr1,%3),%%r20\n" @@ -409,7 +409,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) " andcm %%r21, %%r19, %%r21\n" " or %1, %%r20, %1\n" " or %2, %%r21, %2\n" -"3: stw %1,0(%%sr1,%1)\n" +"3: stw %1,0(%%sr1,%3)\n" "4: stw %%r1,4(%%sr1,%3)\n" "5: stw %2,8(%%sr1,%3)\n" " copy %%r0, %0\n" @@ -596,7 +596,6 @@ void handle_unaligned(struct pt_regs *regs) ret = ERR_NOTHANDLED; /* "undefined", but lets kill them. */ break; } -#ifdef CONFIG_PA20 switch (regs->iir & OPCODE2_MASK) { case OPCODE_FLDD_L: @@ -607,14 +606,15 @@ void handle_unaligned(struct pt_regs *regs) flop=1; ret = emulate_std(regs, R2(regs->iir),1); break; +#ifdef CONFIG_PA20 case OPCODE_LDD_L: ret = emulate_ldd(regs, R2(regs->iir),0); break; case OPCODE_STD_L: ret = emulate_std(regs, R2(regs->iir),0); break; - } #endif + } switch (regs->iir & OPCODE3_MASK) { case OPCODE_FLDW_L: -- cgit v1.2.3 From a97279836867b1cb50a3d4f0b1bf60e0abe6d46c Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 18 Feb 2022 23:40:14 +0100 Subject: parisc/unaligned: Fix ldw() and stw() unalignment handlers Fix 3 bugs: a) emulate_stw() doesn't return the error code value, so faulting instructions are not reported and aborted. b) Tell emulate_ldw() to handle fldw_l as floating point instruction c) Tell emulate_ldw() to handle ldw_m as integer instruction Signed-off-by: Helge Deller Cc: stable@vger.kernel.org --- arch/parisc/kernel/unaligned.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index a238b7fe8908..286cec4d86d7 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -340,7 +340,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER ); - return 0; + return ret; } static int emulate_std(struct pt_regs *regs, int frreg, int flop) { @@ -619,10 +619,10 @@ void handle_unaligned(struct pt_regs *regs) { case OPCODE_FLDW_L: flop=1; - ret = emulate_ldw(regs, R2(regs->iir),0); + ret = emulate_ldw(regs, R2(regs->iir), 1); break; case OPCODE_LDW_M: - ret = emulate_ldw(regs, R2(regs->iir),1); + ret = emulate_ldw(regs, R2(regs->iir), 0); break; case OPCODE_FSTW_L: -- cgit v1.2.3 From 3f1271b54edcc692da5a3663f2aa2a64781f9bc3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 22 Feb 2022 11:08:01 -0500 Subject: PCI: Mark all AMD Navi10 and Navi14 GPU ATS as broken There are enough VBIOS escapes without the proper workaround that some users still hit this. Microsoft never productized ATS on Windows so OEM platforms that were Windows-only didn't always validate ATS. The advantages of ATS are not worth it compared to the potential instabilities on harvested boards. Disable ATS on all Navi10 and Navi14 boards. Symptoms include: amdgpu 0000:07:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0007 address=0xffffc02000 flags=0x0000] AMD-Vi: Event logged [IO_PAGE_FAULT device=07:00.0 domain=0x0007 address=0xffffc02000 flags=0x0000] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring sdma0 timeout, signaled seq=6047, emitted seq=6049 amdgpu 0000:07:00.0: amdgpu: GPU reset begin! amdgpu 0000:07:00.0: amdgpu: GPU reset succeeded, trying to resume amdgpu 0000:07:00.0: [drm:amdgpu_ring_test_helper [amdgpu]] *ERROR* ring sdma0 test failed (-110) [drm:amdgpu_device_ip_resume_phase2 [amdgpu]] *ERROR* resume of IP block failed -110 amdgpu 0000:07:00.0: amdgpu: GPU reset(1) failed Related commits: e8946a53e2a6 ("PCI: Mark AMD Navi14 GPU ATS as broken") a2da5d8cc0b0 ("PCI: Mark AMD Raven iGPU ATS as broken in some platforms") 45beb31d3afb ("PCI: Mark AMD Navi10 GPU rev 0x00 ATS as broken") 5e89cd303e3a ("PCI: Mark AMD Navi14 GPU rev 0xc5 ATS as broken") d28ca864c493 ("PCI: Mark AMD Stoney Radeon R7 GPU ATS as broken") 9b44b0b09dec ("PCI: Mark AMD Stoney GPU ATS as broken") [bhelgaas: add symptoms and related commits] Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1760 Link: https://lore.kernel.org/r/20220222160801.841643-1-alexander.deucher@amd.com Signed-off-by: Alex Deucher Signed-off-by: Bjorn Helgaas Acked-by: Christian König Acked-by: Guchun Chen --- drivers/pci/quirks.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index d2dd6a6cda60..65f7f6b0576c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5344,11 +5344,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags); */ static void quirk_amd_harvest_no_ats(struct pci_dev *pdev) { - if ((pdev->device == 0x7312 && pdev->revision != 0x00) || - (pdev->device == 0x7340 && pdev->revision != 0xc5) || - (pdev->device == 0x7341 && pdev->revision != 0x00)) - return; - if (pdev->device == 0x15d8) { if (pdev->revision == 0xcf && pdev->subsystem_vendor == 0xea50 && @@ -5370,10 +5365,19 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x98e4, quirk_amd_harvest_no_ats); /* AMD Iceland dGPU */ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6900, quirk_amd_harvest_no_ats); /* AMD Navi10 dGPU */ +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7310, quirk_amd_harvest_no_ats); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7312, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7318, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7319, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731a, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731b, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731e, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731f, quirk_amd_harvest_no_ats); /* AMD Navi14 dGPU */ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7340, quirk_amd_harvest_no_ats); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7341, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7347, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x734f, quirk_amd_harvest_no_ats); /* AMD Raven platform iGPU */ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x15d8, quirk_amd_harvest_no_ats); #endif /* CONFIG_PCI_ATS */ -- cgit v1.2.3 From f908a35b22180c4da64cf2647e4f5f0cd3054da7 Mon Sep 17 00:00:00 2001 From: Meir Lichtinger Date: Mon, 10 Jan 2022 10:14:41 +0200 Subject: net/mlx5: Update the list of the PCI supported devices Add the upcoming BlueField-4 and ConnectX-8 device IDs. Fixes: 2e9d3e83ab82 ("net/mlx5: Update the list of the PCI supported devices") Signed-off-by: Meir Lichtinger Reviewed-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2c774f367199..13f913c13a2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1840,10 +1840,12 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */ { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */ { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ + { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ { PCI_VDEVICE(MELLANOX, 0xa2dc) }, /* BlueField-3 integrated ConnectX-7 network controller */ + { PCI_VDEVICE(MELLANOX, 0xa2df) }, /* BlueField-4 integrated ConnectX-8 network controller */ { 0, } }; -- cgit v1.2.3 From e5b2bc30c21139ae10f0e56989389d0bc7b7b1d6 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Fri, 24 Dec 2021 01:07:30 +0200 Subject: net/mlx5: DR, Cache STE shadow memory During rule insertion on each ICM memory chunk we also allocate shadow memory used for management. This includes the hw_ste, dr_ste and miss list per entry. Since the scale of these allocations is large we noticed a performance hiccup that happens once malloc and free are stressed. In extreme usecases when ~1M chunks are freed at once, it might take up to 40 seconds to complete this, up to the point the kernel sees this as self-detected stall on CPU: rcu: INFO: rcu_sched self-detected stall on CPU To resolve this we will increase the reuse of shadow memory. Doing this we see that a time in the aforementioned usecase dropped from ~40 seconds to ~8-10 seconds. Fixes: 29cf8febd185 ("net/mlx5: DR, ICM pool memory allocator") Signed-off-by: Alex Vesker Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/dr_icm_pool.c | 109 ++++++++++++++------- .../ethernet/mellanox/mlx5/core/steering/mlx5dr.h | 5 + 2 files changed, 79 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index 7f6fd9c5e371..f496b7e9401b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -136,37 +136,35 @@ static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) kvfree(icm_mr); } -static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk) +static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy) { - chunk->ste_arr = kvzalloc(chunk->num_of_entries * - sizeof(chunk->ste_arr[0]), GFP_KERNEL); - if (!chunk->ste_arr) - return -ENOMEM; - - chunk->hw_ste_arr = kvzalloc(chunk->num_of_entries * - DR_STE_SIZE_REDUCED, GFP_KERNEL); - if (!chunk->hw_ste_arr) - goto out_free_ste_arr; - - chunk->miss_list = kvmalloc(chunk->num_of_entries * - sizeof(chunk->miss_list[0]), GFP_KERNEL); - if (!chunk->miss_list) - goto out_free_hw_ste_arr; + /* We support only one type of STE size, both for ConnectX-5 and later + * devices. Once the support for match STE which has a larger tag is + * added (32B instead of 16B), the STE size for devices later than + * ConnectX-5 needs to account for that. + */ + return DR_STE_SIZE_REDUCED; +} - return 0; +static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset) +{ + struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; + int index = offset / DR_STE_SIZE; -out_free_hw_ste_arr: - kvfree(chunk->hw_ste_arr); -out_free_ste_arr: - kvfree(chunk->ste_arr); - return -ENOMEM; + chunk->ste_arr = &buddy->ste_arr[index]; + chunk->miss_list = &buddy->miss_list[index]; + chunk->hw_ste_arr = buddy->hw_ste_arr + + index * dr_icm_buddy_get_ste_size(buddy); } static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk) { - kvfree(chunk->miss_list); - kvfree(chunk->hw_ste_arr); - kvfree(chunk->ste_arr); + struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; + + memset(chunk->hw_ste_arr, 0, + chunk->num_of_entries * dr_icm_buddy_get_ste_size(buddy)); + memset(chunk->ste_arr, 0, + chunk->num_of_entries * sizeof(chunk->ste_arr[0])); } static enum mlx5dr_icm_type @@ -189,6 +187,44 @@ static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk, kvfree(chunk); } +static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy) +{ + int num_of_entries = + mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz); + + buddy->ste_arr = kvcalloc(num_of_entries, + sizeof(struct mlx5dr_ste), GFP_KERNEL); + if (!buddy->ste_arr) + return -ENOMEM; + + /* Preallocate full STE size on non-ConnectX-5 devices since + * we need to support both full and reduced with the same cache. + */ + buddy->hw_ste_arr = kvcalloc(num_of_entries, + dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL); + if (!buddy->hw_ste_arr) + goto free_ste_arr; + + buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL); + if (!buddy->miss_list) + goto free_hw_ste_arr; + + return 0; + +free_hw_ste_arr: + kvfree(buddy->hw_ste_arr); +free_ste_arr: + kvfree(buddy->ste_arr); + return -ENOMEM; +} + +static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy) +{ + kvfree(buddy->ste_arr); + kvfree(buddy->hw_ste_arr); + kvfree(buddy->miss_list); +} + static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool) { struct mlx5dr_icm_buddy_mem *buddy; @@ -208,11 +244,19 @@ static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool) buddy->icm_mr = icm_mr; buddy->pool = pool; + if (pool->icm_type == DR_ICM_TYPE_STE) { + /* Reduce allocations by preallocating and reusing the STE structures */ + if (dr_icm_buddy_init_ste_cache(buddy)) + goto err_cleanup_buddy; + } + /* add it to the -start- of the list in order to search in it first */ list_add(&buddy->list_node, &pool->buddy_mem_list); return 0; +err_cleanup_buddy: + mlx5dr_buddy_cleanup(buddy); err_free_buddy: kvfree(buddy); free_mr: @@ -234,6 +278,9 @@ static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy) mlx5dr_buddy_cleanup(buddy); + if (buddy->pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_buddy_cleanup_ste_cache(buddy); + kvfree(buddy); } @@ -261,26 +308,18 @@ dr_icm_chunk_create(struct mlx5dr_icm_pool *pool, chunk->byte_size = mlx5dr_icm_pool_chunk_size_to_byte(chunk_size, pool->icm_type); chunk->seg = seg; + chunk->buddy_mem = buddy_mem_pool; - if (pool->icm_type == DR_ICM_TYPE_STE && dr_icm_chunk_ste_init(chunk)) { - mlx5dr_err(pool->dmn, - "Failed to init ste arrays (order: %d)\n", - chunk_size); - goto out_free_chunk; - } + if (pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_chunk_ste_init(chunk, offset); buddy_mem_pool->used_memory += chunk->byte_size; - chunk->buddy_mem = buddy_mem_pool; INIT_LIST_HEAD(&chunk->chunk_list); /* chunk now is part of the used_list */ list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list); return chunk; - -out_free_chunk: - kvfree(chunk); - return NULL; } static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index c7c93131b762..dfa223415fe2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -160,6 +160,11 @@ struct mlx5dr_icm_buddy_mem { * sync_ste command sets them free. */ struct list_head hot_list; + + /* Memory optimisation */ + struct mlx5dr_ste *ste_arr; + struct list_head *miss_list; + u8 *hw_ste_arr; }; int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy, -- cgit v1.2.3 From 0aec12d97b2036af0946e3d582144739860ac07b Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 11 Jan 2022 03:00:03 +0200 Subject: net/mlx5: DR, Fix slab-out-of-bounds in mlx5_cmd_dr_create_fte When adding a rule with 32 destinations, we hit the following out-of-band access issue: BUG: KASAN: slab-out-of-bounds in mlx5_cmd_dr_create_fte+0x18ee/0x1e70 This patch fixes the issue by both increasing the allocated buffers to accommodate for the needed actions and by checking the number of actions to prevent this issue when a rule with too many actions is provided. Fixes: 1ffd498901c1 ("net/mlx5: DR, Increase supported num of actions to 32") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/steering/fs_dr.c | 33 +++++++++++++++++----- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index a476da2424f8..3f311462bedf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -233,7 +233,11 @@ static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst) dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID; } -#define MLX5_FLOW_CONTEXT_ACTION_MAX 32 +/* We want to support a rule with 32 destinations, which means we need to + * account for 32 destinations plus usually a counter plus one more action + * for a multi-destination flow table. + */ +#define MLX5_FLOW_CONTEXT_ACTION_MAX 34 static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_group *group, @@ -403,9 +407,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, enum mlx5_flow_destination_type type = dst->dest_attr.type; u32 id; - if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || - num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) { - err = -ENOSPC; + if (fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + num_term_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; goto free_actions; } @@ -478,8 +482,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, MLX5_FLOW_DESTINATION_TYPE_COUNTER) continue; - if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { - err = -ENOSPC; + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; goto free_actions; } @@ -499,14 +504,28 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, params.match_sz = match_sz; params.match_buf = (u64 *)fte->val; if (num_term_actions == 1) { - if (term_actions->reformat) + if (term_actions->reformat) { + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } actions[num_actions++] = term_actions->reformat; + } + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } actions[num_actions++] = term_actions->dest; } else if (num_term_actions > 1) { bool ignore_flow_level = !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL); + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } tmp_action = mlx5dr_action_create_mult_dest_tbl(domain, term_actions, num_term_actions, -- cgit v1.2.3 From ffb0753b954763d94f52c901adfe58ed0d4005e6 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 13 Jan 2022 14:52:48 +0200 Subject: net/mlx5: DR, Don't allow match on IP w/o matching on full ethertype/ip_version Currently SMFS allows adding rule with matching on src/dst IP w/o matching on full ethertype or ip_version, which is not supported by HW. This patch fixes this issue and adds the check as it is done in DMFS. Fixes: 26d688e33f88 ("net/mlx5: DR, Add Steering entry (STE) utilities") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/dr_matcher.c | 20 +++----------- .../ethernet/mellanox/mlx5/core/steering/dr_ste.c | 32 +++++++++++++++++++++- .../mellanox/mlx5/core/steering/dr_types.h | 10 +++++++ 3 files changed, 45 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index e87cf498c77b..38971fe1dfe1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -13,18 +13,6 @@ static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec) return (spec->dmac_47_16 || spec->dmac_15_0); } -static bool dr_mask_is_src_addr_set(struct mlx5dr_match_spec *spec) -{ - return (spec->src_ip_127_96 || spec->src_ip_95_64 || - spec->src_ip_63_32 || spec->src_ip_31_0); -} - -static bool dr_mask_is_dst_addr_set(struct mlx5dr_match_spec *spec) -{ - return (spec->dst_ip_127_96 || spec->dst_ip_95_64 || - spec->dst_ip_63_32 || spec->dst_ip_31_0); -} - static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec) { return (spec->ip_protocol || spec->frag || spec->tcp_flags || @@ -503,11 +491,11 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, &mask, inner, rx); if (outer_ipv == DR_RULE_IPV6) { - if (dr_mask_is_dst_addr_set(&mask.outer)) + if (DR_MASK_IS_DST_IP_SET(&mask.outer)) mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++], &mask, inner, rx); - if (dr_mask_is_src_addr_set(&mask.outer)) + if (DR_MASK_IS_SRC_IP_SET(&mask.outer)) mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++], &mask, inner, rx); @@ -610,11 +598,11 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, &mask, inner, rx); if (inner_ipv == DR_RULE_IPV6) { - if (dr_mask_is_dst_addr_set(&mask.inner)) + if (DR_MASK_IS_DST_IP_SET(&mask.inner)) mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++], &mask, inner, rx); - if (dr_mask_is_src_addr_set(&mask.inner)) + if (DR_MASK_IS_SRC_IP_SET(&mask.inner)) mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++], &mask, inner, rx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 7e61742e58a0..187e29b409b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -602,12 +602,34 @@ int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx, used_hw_action_num); } +static int dr_ste_build_pre_check_spec(struct mlx5dr_domain *dmn, + struct mlx5dr_match_spec *spec) +{ + if (spec->ip_version) { + if (spec->ip_version != 0xf) { + mlx5dr_err(dmn, + "Partial ip_version mask with src/dst IP is not supported\n"); + return -EINVAL; + } + } else if (spec->ethertype != 0xffff && + (DR_MASK_IS_SRC_IP_SET(spec) || DR_MASK_IS_DST_IP_SET(spec))) { + mlx5dr_err(dmn, + "Partial/no ethertype mask with src/dst IP is not supported\n"); + return -EINVAL; + } + + return 0; +} + int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, u8 match_criteria, struct mlx5dr_match_param *mask, struct mlx5dr_match_param *value) { - if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) { + if (value) + return 0; + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { if (mask->misc.source_port && mask->misc.source_port != 0xffff) { mlx5dr_err(dmn, "Partial mask source_port is not supported\n"); @@ -621,6 +643,14 @@ int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, } } + if ((match_criteria & DR_MATCHER_CRITERIA_OUTER) && + dr_ste_build_pre_check_spec(dmn, &mask->outer)) + return -EINVAL; + + if ((match_criteria & DR_MATCHER_CRITERIA_INNER) && + dr_ste_build_pre_check_spec(dmn, &mask->inner)) + return -EINVAL; + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 1b3d484b99be..55fcb751e24a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -798,6 +798,16 @@ struct mlx5dr_match_param { (_misc3)->icmpv4_code || \ (_misc3)->icmpv4_header_data) +#define DR_MASK_IS_SRC_IP_SET(_spec) ((_spec)->src_ip_127_96 || \ + (_spec)->src_ip_95_64 || \ + (_spec)->src_ip_63_32 || \ + (_spec)->src_ip_31_0) + +#define DR_MASK_IS_DST_IP_SET(_spec) ((_spec)->dst_ip_127_96 || \ + (_spec)->dst_ip_95_64 || \ + (_spec)->dst_ip_63_32 || \ + (_spec)->dst_ip_31_0) + struct mlx5dr_esw_caps { u64 drop_icm_address_rx; u64 drop_icm_address_tx; -- cgit v1.2.3 From ecd9c5cd46e013659e2fad433057bad1ba66888e Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 29 Dec 2021 22:22:05 +0200 Subject: net/mlx5: DR, Fix the threshold that defines when pool sync is initiated When deciding whether to start syncing and actually free all the "hot" ICM chunks, we need to consider the type of the ICM chunks that we're dealing with. For instance, the amount of available ICM for MODIFY_ACTION is significantly lower than the usual STE ICM, so the threshold should account for that - otherwise we can deplete MODIFY_ACTION memory just by creating and deleting the same modify header action in a continuous loop. This patch replaces the hard-coded threshold with a dynamic value. Fixes: 1c58651412bb ("net/mlx5: DR, ICM memory pools sync optimization") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index f496b7e9401b..e289cfdbce07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -4,7 +4,6 @@ #include "dr_types.h" #define DR_ICM_MODIFY_HDR_ALIGN_BASE 64 -#define DR_ICM_SYNC_THRESHOLD_POOL (64 * 1024 * 1024) struct mlx5dr_icm_pool { enum mlx5dr_icm_type icm_type; @@ -324,10 +323,14 @@ dr_icm_chunk_create(struct mlx5dr_icm_pool *pool, static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool) { - if (pool->hot_memory_size > DR_ICM_SYNC_THRESHOLD_POOL) - return true; + int allow_hot_size; - return false; + /* sync when hot memory reaches half of the pool size */ + allow_hot_size = + mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) / 2; + + return pool->hot_memory_size > allow_hot_size; } static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool) -- cgit v1.2.3 From 7f839965b2d77e1926ad08b23c51d60988f10a99 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Wed, 16 Feb 2022 11:01:04 +0200 Subject: net/mlx5: Update log_max_qp value to be 17 at most Currently, log_max_qp value is dependent on what FW reports as its max capability. In reality, due to a bug, some FWs report a value greater than 17, even though they don't support log_max_qp > 17. This FW issue led the driver to exhaust memory on startup. Thus, log_max_qp value is set to be no more than 17 regardless of what FW reports, as it was before the cited commit. Fixes: f79a609ea6bf ("net/mlx5: Update log_max_qp value to FW max capability") Signed-off-by: Maher Sanalla Reviewed-by: Avihai Horon Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 13f913c13a2d..bba72b220cc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -526,7 +526,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) /* Check log_max_qp from HCA caps to set in current profile */ if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) { - prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + prof->log_max_qp = min_t(u8, 17, MLX5_CAP_GEN_MAX(dev, log_max_qp)); } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", prof->log_max_qp, -- cgit v1.2.3 From 07666c75ad17d7389b18ac0235c8cf41e1504ea8 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Sat, 29 Jan 2022 01:39:24 +0200 Subject: net/mlx5: Fix wrong limitation of metadata match on ecpf Match metadata support check returns false for ecpf device. However, this support does exist for ecpf and therefore this limitation should be removed to allow feature such as stacked devices and internal port offloaded to be supported. Fixes: 92ab1eb392c6 ("net/mlx5: E-Switch, Enable vport metadata matching if firmware supports it") Signed-off-by: Ariel Levkovich Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9a7b25692505..cfcd72bad9af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2838,10 +2838,6 @@ bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) return false; - if (mlx5_core_is_ecpf_esw_manager(esw->dev) || - mlx5_ecpf_vport_exists(esw->dev)) - return false; - return true; } -- cgit v1.2.3 From be7f4b0ab149afd19514929fad824b2117d238c9 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 14 Dec 2021 03:52:53 +0200 Subject: net/mlx5: Fix tc max supported prio for nic mode Only prio 1 is supported if firmware doesn't support ignore flow level for nic mode. The offending commit removed the check wrongly. Add it back. Fixes: 9a99c8f1253a ("net/mlx5e: E-Switch, Offload all chain 0 priorities when modify header and forward action is not supported") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index df58cba37930..1e8ec4f236b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -121,6 +121,9 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) { + if (!mlx5_chains_prios_supported(chains)) + return 1; + if (mlx5_chains_ignore_flow_level_supported(chains)) return UINT_MAX; -- cgit v1.2.3 From b645e57debca846f51b3209907546ea857ddd3f5 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 24 Jan 2022 21:25:04 +0200 Subject: net/mlx5: Fix possible deadlock on rule deletion Add missing call to up_write_ref_node() which releases the semaphore in case the FTE doesn't have destinations, such in drop rule case. Fixes: 465e7baab6d9 ("net/mlx5: Fix deletion of duplicate rules") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index b628917e38e4..537c82b9aa53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2074,6 +2074,8 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) fte->node.del_hw_func = NULL; up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); + } else { + up_write_ref_node(&fte->node, false); } kfree(handle); } -- cgit v1.2.3 From 0b89429722353d112f8b8b29ca397e95fa994d27 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 2 Feb 2022 16:07:21 +0200 Subject: net/mlx5e: Fix wrong return value on ioctl EEPROM query failure The ioctl EEPROM query wrongly returns success on read failures, fix that by returning the appropriate error code. Fixes: bb64143eee8c ("net/mlx5e: Add ethtool support for dump module EEPROM") Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 57d755db1cf5..6e80585d731f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1792,7 +1792,7 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, if (size_read < 0) { netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n", __func__, size_read); - return 0; + return size_read; } i += size_read; -- cgit v1.2.3 From 7eaf1f37b8817c608c4e959d69986ef459d345cd Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 31 Jan 2022 10:26:19 +0200 Subject: net/mlx5e: kTLS, Use CHECKSUM_UNNECESSARY for device-offloaded packets For RX TLS device-offloaded packets, the HW spec guarantees checksum validation for the offloaded packets, but does not define whether the CQE.checksum field matches the original packet (ciphertext) or the decrypted one (plaintext). This latitude allows architetctural improvements between generations of chips, resulting in different decisions regarding the value type of CQE.checksum. Hence, for these packets, the device driver should not make use of this CQE field. Here we block CHECKSUM_COMPLETE usage for RX TLS device-offloaded packets, and use CHECKSUM_UNNECESSARY instead. Value of the packet's tcp_hdr.csum is not modified by the HW, and it always matches the original ciphertext. Fixes: 1182f3659357 ("net/mlx5e: kTLS, Add kTLS RX HW offload support") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ee0a8f5206e3..6530d7bd5045 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1349,7 +1349,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, } /* True when explicitly set via priv flag, or XDP prog is loaded */ - if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)) + if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) || + get_cqe_tls_offload(cqe)) goto csum_unnecessary; /* CQE csum doesn't cover padding octets in short ethernet -- cgit v1.2.3 From 23216d387c40b090b221ad457c95912fb47eb11e Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 4 Jan 2022 10:38:02 +0200 Subject: net/mlx5e: TC, Reject rules with drop and modify hdr action This kind of action is not supported by firmware and generates a syndrome. kernel: mlx5_core 0000:08:00.0: mlx5_cmd_check:777:(pid 102063): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x8708c3) Fixes: d7e75a325cb2 ("net/mlx5e: Add offloading of E-Switch TC pedit (header re-write) actions") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2022fa4a9598..34700cf1285e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3204,6 +3204,12 @@ actions_match_supported(struct mlx5e_priv *priv, return false; } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); + return false; + } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions, ct_flow, ct_clear, extack)) -- cgit v1.2.3 From 3d65492a86d4e6675734646929759138a023d914 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 17 Jan 2022 15:00:30 +0200 Subject: net/mlx5e: TC, Reject rules with forward and drop actions Such rules are redundant but allowed and passed to the driver. The driver does not support offloading such rules so return an error. Fixes: 03a9d11e6eeb ("net/mlx5e: Add TC drop and mirred/redirect action parsing for SRIOV offloads") Signed-off-by: Roi Dayan Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 34700cf1285e..b27532a9301e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3204,6 +3204,12 @@ actions_match_supported(struct mlx5e_priv *priv, return false; } + if (!(~actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); + return false; + } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); -- cgit v1.2.3 From fb7e76ea3f3b6238dda2f19a4212052d2caf00aa Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 3 Feb 2022 09:42:19 +0200 Subject: net/mlx5e: TC, Skip redundant ct clear actions Offload of ct clear action is just resetting the reg_c register. It's done by allocating modify hdr resources which is limited. Doing it multiple times is redundant and wasting modify hdr resources and if resources depleted the driver will fail offloading the rule. Ignore redundant ct clear actions after the first one. Fixes: 806401c20a0f ("net/mlx5e: CT, Fix multiple allocations and memleak of mod acts") Signed-off-by: Roi Dayan Reviewed-by: Ariel Levkovich Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index 26efa33de56f..10a40487d536 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -16,6 +16,7 @@ struct mlx5e_tc_act_parse_state { unsigned int num_actions; struct mlx5e_tc_flow *flow; struct netlink_ext_ack *extack; + bool ct_clear; bool encap; bool decap; bool mpls_push; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c index 06ec30cdb269..58cc33f1363d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -27,8 +27,13 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { + bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; int err; + /* It's redundant to do ct clear more than once. */ + if (clear_action && parse_state->ct_clear) + return 0; + err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr, &attr->parse_attr->mod_hdr_acts, act, parse_state->extack); @@ -40,6 +45,8 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, if (mlx5e_is_eswitch_flow(parse_state->flow)) attr->esw_attr->split_count = attr->esw_attr->out_count; + parse_state->ct_clear = clear_action; + return 0; } -- cgit v1.2.3 From 7fac0529038021919ef56a9c3218d8012f187cbb Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Tue, 1 Feb 2022 11:24:41 +0200 Subject: net/mlx5e: Add feature check for set fec counters Fec counters support is checked via the PCAM feature_cap_mask, bit 0: PPCNT_counter_group_Phy_statistical_counter_group. Add feature check to avoid faulty behavior. Fixes: 0a1498ebfa55 ("net/mlx5e: Expose FEC counters via ethtool") Signed-off-by: Lama Kayal Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 26e326fe503c..00f1d16db456 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -1254,9 +1254,6 @@ static void fec_set_corrected_bits_total(struct mlx5e_priv *priv, u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) - return; - MLX5_SET(ppcnt_reg, in, local_port, 1); MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); if (mlx5_core_access_reg(mdev, in, sz, ppcnt_phy_statistical, @@ -1272,6 +1269,9 @@ static void fec_set_corrected_bits_total(struct mlx5e_priv *priv, void mlx5e_stats_fec_get(struct mlx5e_priv *priv, struct ethtool_fec_stats *fec_stats) { + if (!MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group)) + return; + fec_set_corrected_bits_total(priv, fec_stats); fec_set_block_stats(priv, fec_stats); } -- cgit v1.2.3 From c63741b426e11062631b013c3396f5452bbc0034 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Thu, 6 Jan 2022 14:10:18 +0200 Subject: net/mlx5e: Fix MPLSoUDP encap to use MPLS action information Currently the MPLSoUDP encap builds the MPLS header using encap action information (tunnel id, ttl and tos) instead of the MPLS action information (label, ttl, tc and bos) which is wrong. Fix by storing the MPLS action information during the flow action parse and later using it to create the encap MPLS header. Fixes: f828ca6a2fb6 ("net/mlx5e: Add support for hw encapsulation of MPLS over UDP") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c | 6 ++++++ drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c | 11 +++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c | 5 ++--- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 8 ++++++++ 7 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index 10a40487d536..9cc844bd00f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -22,6 +22,7 @@ struct mlx5e_tc_act_parse_state { bool mpls_push; bool ptype_host; const struct ip_tunnel_info *tun_info; + struct mlx5e_mpls_info mpls_info; struct pedit_headers_action hdrs[__PEDIT_CMD_MAX]; int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; int if_count; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index c614fc7fdc9c..2e615e0ba972 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -177,6 +177,12 @@ parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state, return -ENOMEM; parse_state->encap = false; + + if (parse_state->mpls_push) { + memcpy(&parse_attr->mpls_info[esw_attr->out_count], + &parse_state->mpls_info, sizeof(parse_state->mpls_info)); + parse_state->mpls_push = false; + } esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; esw_attr->out_count++; /* attr->dests[].rep is resolved when we handle encap */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c index 784fc4f68b1e..89ca88c78840 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c @@ -22,6 +22,16 @@ tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, return true; } +static void +copy_mpls_info(struct mlx5e_mpls_info *mpls_info, + const struct flow_action_entry *act) +{ + mpls_info->label = act->mpls_push.label; + mpls_info->tc = act->mpls_push.tc; + mpls_info->bos = act->mpls_push.bos; + mpls_info->ttl = act->mpls_push.ttl; +} + static int tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -29,6 +39,7 @@ tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5_flow_attr *attr) { parse_state->mpls_push = true; + copy_mpls_info(&parse_state->mpls_info, act); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index f832c26ff2c3..70b40ae384e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -35,6 +35,7 @@ enum { struct mlx5e_tc_flow_parse_attr { const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_mpls_info mpls_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 9918ed8c059b..d39d0dae22fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -750,6 +750,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; + const struct mlx5e_mpls_info *mpls_info; unsigned long tbl_time_before = 0; struct mlx5e_encap_entry *e; struct mlx5e_encap_key key; @@ -760,6 +761,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, parse_attr = attr->parse_attr; tun_info = parse_attr->tun_info[out_index]; + mpls_info = &parse_attr->mpls_info[out_index]; family = ip_tunnel_info_af(tun_info); key.ip_tun_key = &tun_info->key; key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); @@ -810,6 +812,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, goto out_err_init; } e->tun_info = tun_info; + memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info)); err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); if (err) goto out_err_init; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c index 60952b33b568..f40dbfcb6437 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c @@ -30,16 +30,15 @@ static int generate_ip_tun_hdr(char buf[], struct mlx5e_encap_entry *r) { const struct ip_tunnel_key *tun_key = &r->tun_info->key; + const struct mlx5e_mpls_info *mpls_info = &r->mpls_info; struct udphdr *udp = (struct udphdr *)(buf); struct mpls_shim_hdr *mpls; - u32 tun_id; - tun_id = be32_to_cpu(tunnel_id_to_key32(tun_key->tun_id)); mpls = (struct mpls_shim_hdr *)(udp + 1); *ip_proto = IPPROTO_UDP; udp->dest = tun_key->tp_dst; - *mpls = mpls_entry_encode(tun_id, tun_key->ttl, tun_key->tos, true); + *mpls = mpls_entry_encode(mpls_info->label, mpls_info->ttl, mpls_info->tc, mpls_info->bos); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index b01dacb6f527..b3f7520dfd08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -183,6 +183,13 @@ struct mlx5e_decap_entry { struct rcu_head rcu; }; +struct mlx5e_mpls_info { + u32 label; + u8 tc; + u8 bos; + u8 ttl; +}; + struct mlx5e_encap_entry { /* attached neigh hash entry */ struct mlx5e_neigh_hash_entry *nhe; @@ -196,6 +203,7 @@ struct mlx5e_encap_entry { struct list_head route_list; struct mlx5_pkt_reformat *pkt_reformat; const struct ip_tunnel_info *tun_info; + struct mlx5e_mpls_info mpls_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; -- cgit v1.2.3 From fdc18e4e4bded2a08638cdcd22dc087a64b9ddad Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Thu, 6 Jan 2022 14:46:24 +0200 Subject: net/mlx5e: MPLSoUDP decap, fix check for unsupported matches Currently offload of rule on bareudp device require tunnel key in order to match on mpls fields and without it the mpls fields are ignored, this is incorrect due to the fact udp tunnel doesn't have key to match on. Fix by returning error in case flow is matching on tunnel key. Fixes: 72046a91d134 ("net/mlx5e: Allow to match on mpls parameters") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/tc_tun_mplsoudp.c | 28 +++++++++------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c index f40dbfcb6437..c5b1617d556f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c @@ -59,37 +59,31 @@ static int parse_tunnel(struct mlx5e_priv *priv, void *headers_v) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); - struct flow_match_enc_keyid enc_keyid; struct flow_match_mpls match; void *misc2_c; void *misc2_v; - misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters_2); - misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters_2); - - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) - return 0; - - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) - return 0; - - flow_rule_match_enc_keyid(rule, &enc_keyid); - - if (!enc_keyid.mask->keyid) - return 0; - if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) && !(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP)) return -EOPNOTSUPP; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return -EOPNOTSUPP; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) + return 0; + flow_rule_match_mpls(rule, &match); /* Only support matching the first LSE */ if (match.mask->used_lses != 1) return -EOPNOTSUPP; + misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2_c, outer_first_mpls_over_udp.mpls_label, match.mask->ls[0].mpls_label); -- cgit v1.2.3 From 5ee02b7a800654ff9549807bcf0b4c9fd5cf25f9 Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Mon, 21 Feb 2022 12:26:11 +0200 Subject: net/mlx5e: Add missing increment of count Add mistakenly missing increment of count variable when looping over output buffer in mlx5e_self_test(). This resolves the issue of garbage values output when querying with self test via ethtool. before: $ ethtool -t eth2 The test result is PASS The test extra info: Link Test 0 Speed Test 1768697188 Health Test 758528120 Loopback Test 3288687 after: $ ethtool -t eth2 The test result is PASS The test extra info: Link Test 0 Speed Test 0 Health Test 0 Loopback Test 0 Fixes: 7990b1b5e8bd ("net/mlx5e: loopback test is not supported in switchdev mode") Signed-off-by: Lama Kayal Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 8c9163d2c646..08a75654f5f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -334,6 +334,7 @@ void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest, netdev_info(ndev, "\t[%d] %s start..\n", i, st.name); buf[count] = st.st_func(priv); netdev_info(ndev, "\t[%d] %s end: result(%lld)\n", i, st.name, buf[count]); + count++; } mutex_unlock(&priv->state_lock); -- cgit v1.2.3 From ca49df96f9f5efd4f0f1e64f7c4c0c63a3329cb9 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 21 Feb 2022 17:54:34 +0200 Subject: net/mlx5e: Fix VF min/max rate parameters interchange mistake The VF min and max rate were passed incorrectly and resulted in wrongly interchanging them. Fix the order of parameters in mlx5_esw_qos_set_vport_rate(). Fixes: d7df09f5e7b4 ("net/mlx5: E-switch, Enable vport QoS on demand") Signed-off-by: Gal Pressman Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 11bbcd5f5b8b..694c54066955 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -697,7 +697,7 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo } int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - u32 min_rate, u32 max_rate) + u32 max_rate, u32 min_rate) { int err; -- cgit v1.2.3 From 68af28426b3ca1bf9ba21c7d8bdd0ff639e5134c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 23 Feb 2022 11:52:37 -0600 Subject: platform/x86: amd-pmc: Set QOS during suspend on CZN w/ timer wakeup commit 59348401ebed ("platform/x86: amd-pmc: Add special handling for timer based S0i3 wakeup") adds support for using another platform timer in lieu of the RTC which doesn't work properly on some systems. This path was validated and worked well before submission. During the 5.16-rc1 merge window other patches were merged that caused this to stop working properly. When this feature was used with 5.16-rc1 or later some OEM laptops with the matching firmware requirements from that commit would shutdown instead of program a timer based wakeup. This was bisected to commit 8d89835b0467 ("PM: suspend: Do not pause cpuidle in the suspend-to-idle path"). This wasn't supposed to cause any negative impacts and also tested well on both Intel and ARM platforms. However this changed the semantics of when CPUs are allowed to be in the deepest state. For the AMD systems in question it appears this causes a firmware crash for timer based wakeup. It's hypothesized to be caused by the `amd-pmc` driver sending `OS_HINT` and all the CPUs going into a deep state while the timer is still being programmed. It's likely a firmware bug, but to avoid it don't allow setting CPUs into the deepest state while using CZN timer wakeup path. If later it's discovered that this also occurs from "regular" suspends without a timer as well or on other silicon, this may be later expanded to run in the suspend path for more scenarios. Cc: stable@vger.kernel.org # 5.16+ Suggested-by: Rafael J. Wysocki Link: https://lore.kernel.org/linux-acpi/BL1PR12MB51570F5BD05980A0DCA1F3F4E23A9@BL1PR12MB5157.namprd12.prod.outlook.com/T/#mee35f39c41a04b624700ab2621c795367f19c90e Fixes: 8d89835b0467 ("PM: suspend: Do not pause cpuidle in the suspend-to-idle path") Fixes: 23f62d7ab25b ("PM: sleep: Pause cpuidle later and resume it earlier during system transitions") Fixes: 59348401ebed ("platform/x86: amd-pmc: Add special handling for timer based S0i3 wakeup" Reviewed-by: Rafael J. Wysocki Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20220223175237.6209-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 4c72ba68b315..b1103f85a85a 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -85,6 +86,9 @@ #define PMC_MSG_DELAY_MIN_US 50 #define RESPONSE_REGISTER_LOOP_MAX 20000 +/* QoS request for letting CPUs in idle states, but not the deepest */ +#define AMD_PMC_MAX_IDLE_STATE_LATENCY 3 + #define SOC_SUBSYSTEM_IP_MAX 12 #define DELAY_MIN_US 2000 #define DELAY_MAX_US 3000 @@ -131,6 +135,7 @@ struct amd_pmc_dev { struct device *dev; struct pci_dev *rdev; struct mutex lock; /* generic mutex lock */ + struct pm_qos_request amd_pmc_pm_qos_req; #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *dbgfs_dir; #endif /* CONFIG_DEBUG_FS */ @@ -521,6 +526,14 @@ static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg) rc = rtc_alarm_irq_enable(rtc_device, 0); dev_dbg(pdev->dev, "wakeup timer programmed for %lld seconds\n", duration); + /* + * Prevent CPUs from getting into deep idle states while sending OS_HINT + * which is otherwise generally safe to send when at least one of the CPUs + * is not in deep idle states. + */ + cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, AMD_PMC_MAX_IDLE_STATE_LATENCY); + wake_up_all_idle_cpus(); + return rc; } @@ -538,24 +551,31 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev) /* Activate CZN specific RTC functionality */ if (pdev->cpu_id == AMD_CPU_ID_CZN) { rc = amd_pmc_verify_czn_rtc(pdev, &arg); - if (rc < 0) - return rc; + if (rc) + goto fail; } /* Dump the IdleMask before we send hint to SMU */ amd_pmc_idlemask_read(pdev, dev, NULL); msg = amd_pmc_get_os_hint(pdev); rc = amd_pmc_send_cmd(pdev, arg, NULL, msg, 0); - if (rc) + if (rc) { dev_err(pdev->dev, "suspend failed\n"); + goto fail; + } if (enable_stb) rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF); - if (rc) { + if (rc) { dev_err(pdev->dev, "error writing to STB\n"); - return rc; + goto fail; } + return 0; +fail: + if (pdev->cpu_id == AMD_CPU_ID_CZN) + cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, + PM_QOS_DEFAULT_VALUE); return rc; } @@ -579,12 +599,15 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) /* Write data incremented by 1 to distinguish in stb_read */ if (enable_stb) rc = amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF + 1); - if (rc) { + if (rc) dev_err(pdev->dev, "error writing to STB\n"); - return rc; - } - return 0; + /* Restore the QoS request back to defaults if it was set */ + if (pdev->cpu_id == AMD_CPU_ID_CZN) + cpu_latency_qos_update_request(&pdev->amd_pmc_pm_qos_req, + PM_QOS_DEFAULT_VALUE); + + return rc; } static const struct dev_pm_ops amd_pmc_pm_ops = { @@ -722,6 +745,7 @@ static int amd_pmc_probe(struct platform_device *pdev) amd_pmc_get_smu_version(dev); platform_set_drvdata(pdev, dev); amd_pmc_dbgfs_register(dev); + cpu_latency_qos_add_request(&dev->amd_pmc_pm_qos_req, PM_QOS_DEFAULT_VALUE); return 0; err_pci_dev_put: -- cgit v1.2.3 From 21d90aaee8d5c2a097ef41f1430d97661233ecc6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 24 Feb 2022 11:18:48 +0100 Subject: surface: surface3_power: Fix battery readings on batteries without a serial number The battery on the 2nd hand Surface 3 which I recently bought appears to not have a serial number programmed in. This results in any I2C reads from the registers containing the serial number failing with an I2C NACK. This was causing mshw0011_bix() to fail causing the battery readings to not work at all. Ignore EREMOTEIO (I2C NACK) errors when retrieving the serial number and continue with an empty serial number to fix this. Fixes: b1f81b496b0d ("platform/x86: surface3_power: MSHW0011 rev-eng implementation") BugLink: https://github.com/linux-surface/linux-surface/issues/608 Reviewed-by: Benjamin Tissoires Reviewed-by: Maximilian Luz Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20220224101848.7219-1-hdegoede@redhat.com --- drivers/platform/surface/surface3_power.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/platform/surface/surface3_power.c b/drivers/platform/surface/surface3_power.c index abac3eec565e..444ec81ba02d 100644 --- a/drivers/platform/surface/surface3_power.c +++ b/drivers/platform/surface/surface3_power.c @@ -232,14 +232,21 @@ static int mshw0011_bix(struct mshw0011_data *cdata, struct bix *bix) } bix->last_full_charg_capacity = ret; - /* get serial number */ + /* + * Get serial number, on some devices (with unofficial replacement + * battery?) reading any of the serial number range addresses gets + * nacked in this case just leave the serial number empty. + */ ret = i2c_smbus_read_i2c_block_data(client, MSHW0011_BAT0_REG_SERIAL_NO, sizeof(buf), buf); - if (ret != sizeof(buf)) { + if (ret == -EREMOTEIO) { + /* no serial number available */ + } else if (ret != sizeof(buf)) { dev_err(&client->dev, "Error reading serial no: %d\n", ret); return ret; + } else { + snprintf(bix->serial, ARRAY_SIZE(bix->serial), "%3pE%6pE", buf + 7, buf); } - snprintf(bix->serial, ARRAY_SIZE(bix->serial), "%3pE%6pE", buf + 7, buf); /* get cycle count */ ret = i2c_smbus_read_word_data(client, MSHW0011_BAT0_REG_CYCLE_CNT); -- cgit v1.2.3 From 0f4558ae91870692ce7f509c31c9d6ee721d8cdc Mon Sep 17 00:00:00 2001 From: Marek Marczykowski-Górecki Date: Tue, 22 Feb 2022 01:18:16 +0100 Subject: Revert "xen-netback: remove 'hotplug-status' once it has served its purpose" This reverts commit 1f2565780e9b7218cf92c7630130e82dcc0fe9c2. The 'hotplug-status' node should not be removed as long as the vif device remains configured. Otherwise the xen-netback would wait for re-running the network script even if it was already called (in case of the frontent re-connecting). But also, it _should_ be removed when the vif device is destroyed (for example when unbinding the driver) - otherwise hotplug script would not configure the device whenever it re-appear. Moving removal of the 'hotplug-status' node was a workaround for nothing calling network script after xen-netback module is reloaded. But when vif interface is re-created (on xen-netback unbind/bind for example), the script should be called, regardless of who does that - currently this case is not handled by the toolstack, and requires manual script call. Keeping hotplug-status=connected to skip the call is wrong and leads to not configured interface. More discussion at https://lore.kernel.org/xen-devel/afedd7cb-a291-e773-8b0d-4db9b291fa98@ipxe.org/T/#u Signed-off-by: Marek Marczykowski-Górecki Reviewed-by: Paul Durrant Link: https://lore.kernel.org/r/20220222001817.2264967-1-marmarek@invisiblethingslab.com Signed-off-by: Jakub Kicinski --- drivers/net/xen-netback/xenbus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index d24b7a7993aa..3fad58d22155 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -256,6 +256,7 @@ static void backend_disconnect(struct backend_info *be) unsigned int queue_index; xen_unregister_watchers(vif); + xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status"); #ifdef CONFIG_DEBUG_FS xenvif_debugfs_delif(vif); #endif /* CONFIG_DEBUG_FS */ @@ -675,7 +676,6 @@ static void hotplug_status_changed(struct xenbus_watch *watch, /* Not interested in this watch anymore. */ unregister_hotplug_status_watch(be); - xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status"); } kfree(str); } -- cgit v1.2.3 From e8240addd0a3919e0fd7436416afe9aa6429c484 Mon Sep 17 00:00:00 2001 From: Marek Marczykowski-Górecki Date: Tue, 22 Feb 2022 01:18:17 +0100 Subject: Revert "xen-netback: Check for hotplug-status existence before watching" This reverts commit 2afeec08ab5c86ae21952151f726bfe184f6b23d. The reasoning in the commit was wrong - the code expected to setup the watch even if 'hotplug-status' didn't exist. In fact, it relied on the watch being fired the first time - to check if maybe 'hotplug-status' is already set to 'connected'. Not registering a watch for non-existing path (which is the case if hotplug script hasn't been executed yet), made the backend not waiting for the hotplug script to execute. This in turns, made the netfront think the interface is fully operational, while in fact it was not (the vif interface on xen-netback side might not be configured yet). This was a workaround for 'hotplug-status' erroneously being removed. But since that is reverted now, the workaround is not necessary either. More discussion at https://lore.kernel.org/xen-devel/afedd7cb-a291-e773-8b0d-4db9b291fa98@ipxe.org/T/#u Signed-off-by: Marek Marczykowski-Górecki Reviewed-by: Paul Durrant Reviewed-by: Michael Brown Link: https://lore.kernel.org/r/20220222001817.2264967-2-marmarek@invisiblethingslab.com Signed-off-by: Jakub Kicinski --- drivers/net/xen-netback/xenbus.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 3fad58d22155..990360d75cb6 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -824,15 +824,11 @@ static void connect(struct backend_info *be) xenvif_carrier_on(be->vif); unregister_hotplug_status_watch(be); - if (xenbus_exists(XBT_NIL, dev->nodename, "hotplug-status")) { - err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, - NULL, hotplug_status_changed, - "%s/%s", dev->nodename, - "hotplug-status"); - if (err) - goto err; + err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, NULL, + hotplug_status_changed, + "%s/%s", dev->nodename, "hotplug-status"); + if (!err) be->have_hotplug_status_watch = 1; - } netif_tx_wake_all_queues(be->vif->dev); -- cgit v1.2.3 From e13ad1443684f7afaff24cf207e85e97885256bd Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 23 Feb 2022 00:57:20 -0800 Subject: bnx2x: fix driver load from initrd Commit b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0") added new firmware support in the driver with maintaining older firmware compatibility. However, older firmware was not added in MODULE_FIRMWARE() which caused missing firmware files in initrd image leading to driver load failure from initrd. This patch adds MODULE_FIRMWARE() for older firmware version to have firmware files included in initrd. Fixes: b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215627 Signed-off-by: Manish Chopra Signed-off-by: Alok Prasad Signed-off-by: Ariel Elior Link: https://lore.kernel.org/r/20220223085720.12021-1-manishc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 774c1f1a57c3..eedb48d945ed 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -100,6 +100,9 @@ MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FW_FILE_NAME_E1); MODULE_FIRMWARE(FW_FILE_NAME_E1H); MODULE_FIRMWARE(FW_FILE_NAME_E2); +MODULE_FIRMWARE(FW_FILE_NAME_E1_V15); +MODULE_FIRMWARE(FW_FILE_NAME_E1H_V15); +MODULE_FIRMWARE(FW_FILE_NAME_E2_V15); int bnx2x_num_queues; module_param_named(num_queues, bnx2x_num_queues, int, 0444); -- cgit v1.2.3 From 7ff57e98fb78ad94edafbdc7435f2d745e9e6bb5 Mon Sep 17 00:00:00 2001 From: Fabio M. De Francesco Date: Wed, 23 Feb 2022 11:02:52 +0100 Subject: net/smc: Use a mutex for locking "struct smc_pnettable" smc_pnetid_by_table_ib() uses read_lock() and then it calls smc_pnet_apply_ib() which, in turn, calls mutex_lock(&smc_ib_devices.mutex). read_lock() disables preemption. Therefore, the code acquires a mutex while in atomic context and it leads to a SAC bug. Fix this bug by replacing the rwlock with a mutex. Reported-and-tested-by: syzbot+4f322a6d84e991c38775@syzkaller.appspotmail.com Fixes: 64e28b52c7a6 ("net/smc: add pnet table namespace support") Confirmed-by: Tony Lu Signed-off-by: Fabio M. De Francesco Acked-by: Karsten Graul Link: https://lore.kernel.org/r/20220223100252.22562-1-fmdefrancesco@gmail.com Signed-off-by: Jakub Kicinski --- net/smc/smc_pnet.c | 42 +++++++++++++++++++++--------------------- net/smc/smc_pnet.h | 2 +- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 0599246c0376..29f0a559d884 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -113,7 +113,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) pnettable = &sn->pnettable; /* remove table entry */ - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (!pnet_name || @@ -131,7 +131,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) rc = 0; } } - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); /* if this is not the initial namespace, stop here */ if (net != &init_net) @@ -192,7 +192,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev) sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { @@ -206,7 +206,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev) break; } } - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } @@ -224,7 +224,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker); @@ -237,7 +237,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) break; } } - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } @@ -370,7 +370,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); rc = -EEXIST; new_netdev = true; - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_ETH && !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) { @@ -385,9 +385,9 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, GFP_ATOMIC); } list_add_tail(&new_pe->list, &pnettable->pnetlist); - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); } else { - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); kfree(new_pe); goto out_put; } @@ -448,7 +448,7 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, new_pe->ib_port = ib_port; new_ibdev = true; - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_IB && !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { @@ -458,9 +458,9 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, } if (new_ibdev) { list_add_tail(&new_pe->list, &pnettable->pnetlist); - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); } else { - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); kfree(new_pe); } return (new_ibdev) ? 0 : -EEXIST; @@ -605,7 +605,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, pnettable = &sn->pnettable; /* dump pnettable entries */ - read_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) continue; @@ -620,7 +620,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, break; } } - read_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return idx; } @@ -864,7 +864,7 @@ int smc_pnet_net_init(struct net *net) struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev; INIT_LIST_HEAD(&pnettable->pnetlist); - rwlock_init(&pnettable->lock); + mutex_init(&pnettable->lock); INIT_LIST_HEAD(&pnetids_ndev->list); rwlock_init(&pnetids_ndev->lock); @@ -944,7 +944,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; - read_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) { /* get pnetid of netdev device */ @@ -953,7 +953,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, break; } } - read_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } @@ -1156,7 +1156,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port) sn = net_generic(&init_net, smc_net_id); pnettable = &sn->pnettable; - read_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_IB && !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) && @@ -1166,7 +1166,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port) break; } } - read_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } @@ -1185,7 +1185,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev) sn = net_generic(&init_net, smc_net_id); pnettable = &sn->pnettable; - read_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_IB && !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { @@ -1194,7 +1194,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev) break; } } - read_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 14039272f7e4..80a88eea4949 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -29,7 +29,7 @@ struct smc_link_group; * @pnetlist: List of PNETIDs */ struct smc_pnettable { - rwlock_t lock; + struct mutex lock; struct list_head pnetlist; }; -- cgit v1.2.3 From 6c0d8833a605e195ae219b5042577ce52bf71fff Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Wed, 23 Feb 2022 14:19:56 +0100 Subject: ipv6: prevent a possible race condition with lifetimes valid_lft, prefered_lft and tstamp are always accessed under the lock "lock" in other places. Reading these without taking the lock may result in inconsistencies regarding the calculation of the valid and preferred variables since decisions are taken on these fields for those variables. Signed-off-by: Niels Dossche Reviewed-by: David Ahern Signed-off-by: Niels Dossche Link: https://lore.kernel.org/r/20220223131954.6570-1-niels.dossche@ugent.be Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3f23da8c0b10..6c8ab3e6e6fe 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4998,6 +4998,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) goto error; + spin_lock_bh(&ifa->lock); if (!((ifa->flags&IFA_F_PERMANENT) && (ifa->prefered_lft == INFINITY_LIFE_TIME))) { preferred = ifa->prefered_lft; @@ -5019,6 +5020,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, preferred = INFINITY_LIFE_TIME; valid = INFINITY_LIFE_TIME; } + spin_unlock_bh(&ifa->lock); if (!ipv6_addr_any(&ifa->peer_addr)) { if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 || -- cgit v1.2.3 From d9b5ae5c1b241b91480aa30408be12fe91af834a Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 23 Feb 2022 18:34:16 +0200 Subject: openvswitch: Fix setting ipv6 fields causing hw csum failure Ipv6 ttl, label and tos fields are modified without first pulling/pushing the ipv6 header, which would have updated the hw csum (if available). This might cause csum validation when sending the packet to the stack, as can be seen in the trace below. Fix this by updating skb->csum if available. Trace resulted by ipv6 ttl dec and then sending packet to conntrack [actions: set(ipv6(hlimit=63)),ct(zone=99)]: [295241.900063] s_pf0vf2: hw csum failure [295241.923191] Call Trace: [295241.925728] [295241.927836] dump_stack+0x5c/0x80 [295241.931240] __skb_checksum_complete+0xac/0xc0 [295241.935778] nf_conntrack_tcp_packet+0x398/0xba0 [nf_conntrack] [295241.953030] nf_conntrack_in+0x498/0x5e0 [nf_conntrack] [295241.958344] __ovs_ct_lookup+0xac/0x860 [openvswitch] [295241.968532] ovs_ct_execute+0x4a7/0x7c0 [openvswitch] [295241.979167] do_execute_actions+0x54a/0xaa0 [openvswitch] [295242.001482] ovs_execute_actions+0x48/0x100 [openvswitch] [295242.006966] ovs_dp_process_packet+0x96/0x1d0 [openvswitch] [295242.012626] ovs_vport_receive+0x6c/0xc0 [openvswitch] [295242.028763] netdev_frame_hook+0xc0/0x180 [openvswitch] [295242.034074] __netif_receive_skb_core+0x2ca/0xcb0 [295242.047498] netif_receive_skb_internal+0x3e/0xc0 [295242.052291] napi_gro_receive+0xba/0xe0 [295242.056231] mlx5e_handle_rx_cqe_mpwrq_rep+0x12b/0x250 [mlx5_core] [295242.062513] mlx5e_poll_rx_cq+0xa0f/0xa30 [mlx5_core] [295242.067669] mlx5e_napi_poll+0xe1/0x6b0 [mlx5_core] [295242.077958] net_rx_action+0x149/0x3b0 [295242.086762] __do_softirq+0xd7/0x2d6 [295242.090427] irq_exit+0xf7/0x100 [295242.093748] do_IRQ+0x7f/0xd0 [295242.096806] common_interrupt+0xf/0xf [295242.100559] [295242.102750] RIP: 0033:0x7f9022e88cbd [295242.125246] RSP: 002b:00007f9022282b20 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffda [295242.132900] RAX: 0000000000000005 RBX: 0000000000000010 RCX: 0000000000000000 [295242.140120] RDX: 00007f9022282ba8 RSI: 00007f9022282a30 RDI: 00007f9014005c30 [295242.147337] RBP: 00007f9014014d60 R08: 0000000000000020 R09: 00007f90254a8340 [295242.154557] R10: 00007f9022282a28 R11: 0000000000000246 R12: 0000000000000000 [295242.161775] R13: 00007f902308c000 R14: 000000000000002b R15: 00007f9022b71f40 Fixes: 3fdbd1ce11e5 ("openvswitch: add ipv6 'set' action") Signed-off-by: Paul Blakey Link: https://lore.kernel.org/r/20220223163416.24096-1-paulb@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/checksum.h | 5 +++++ net/openvswitch/actions.c | 46 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/include/net/checksum.h b/include/net/checksum.h index 02d0c2d01014..79c67f14c448 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -142,6 +142,11 @@ static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) *sum = ~csum16_add(csum16_sub(~(*sum), old), new); } +static inline void csum_replace(__wsum *csum, __wsum old, __wsum new) +{ + *csum = csum_add(csum_sub(*csum, old), new); +} + struct sk_buff; void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, bool pseudohdr); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 076774034bb9..780d9e2246f3 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -423,12 +423,43 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, memcpy(addr, new_addr, sizeof(__be32[4])); } -static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask) +static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask) { + u8 old_ipv6_tclass = ipv6_get_dsfield(nh); + + ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12), + (__force __wsum)(ipv6_tclass << 12)); + + ipv6_change_dsfield(nh, ~mask, ipv6_tclass); +} + +static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask) +{ + u32 ofl; + + ofl = nh->flow_lbl[0] << 16 | nh->flow_lbl[1] << 8 | nh->flow_lbl[2]; + fl = OVS_MASKED(ofl, fl, mask); + /* Bits 21-24 are always unmasked, so this retains their values. */ - OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16)); - OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8)); - OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask); + nh->flow_lbl[0] = (u8)(fl >> 16); + nh->flow_lbl[1] = (u8)(fl >> 8); + nh->flow_lbl[2] = (u8)fl; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl)); +} + +static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask) +{ + new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8), + (__force __wsum)(new_ttl << 8)); + nh->hop_limit = new_ttl; } static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl, @@ -546,18 +577,17 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, } } if (mask->ipv6_tclass) { - ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass); + set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass); flow_key->ip.tos = ipv6_get_dsfield(nh); } if (mask->ipv6_label) { - set_ipv6_fl(nh, ntohl(key->ipv6_label), + set_ipv6_fl(skb, nh, ntohl(key->ipv6_label), ntohl(mask->ipv6_label)); flow_key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); } if (mask->ipv6_hlimit) { - OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit, - mask->ipv6_hlimit); + set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit); flow_key->ip.ttl = nh->hop_limit; } return 0; -- cgit v1.2.3 From fe20371578ef640069e6ae9fa8038f60e7908565 Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Wed, 23 Feb 2022 09:53:47 -0800 Subject: Revert "i40e: Fix reset bw limit when DCB enabled with 1 TC" Revert of a patch that instead of fixing a AQ error when trying to reset BW limit introduced several regressions related to creation and managing TC. Currently there are errors when creating a TC on both PF and VF. Error log: [17428.783095] i40e 0000:3b:00.1: AQ command Config VSI BW allocation per TC failed = 14 [17428.783107] i40e 0000:3b:00.1: Failed configuring TC map 0 for VSI 391 [17428.783254] i40e 0000:3b:00.1: AQ command Config VSI BW allocation per TC failed = 14 [17428.783259] i40e 0000:3b:00.1: Unable to configure TC map 0 for VSI 391 This reverts commit 3d2504663c41104b4359a15f35670cfa82de1bbf. Fixes: 3d2504663c41 (i40e: Fix reset bw limit when DCB enabled with 1 TC) Signed-off-by: Mateusz Palczewski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20220223175347.1690692-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0c4b7dfb3b35..31b03fe78d3b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5372,15 +5372,7 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, /* There is no need to reset BW when mqprio mode is on. */ if (pf->flags & I40E_FLAG_TC_MQPRIO) return 0; - - if (!vsi->mqprio_qopt.qopt.hw) { - if (pf->flags & I40E_FLAG_DCB_ENABLED) - goto skip_reset; - - if (IS_ENABLED(CONFIG_I40E_DCB) && - i40e_dcb_hw_get_num_tc(&pf->hw) == 1) - goto skip_reset; - + if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) { ret = i40e_set_bw_limit(vsi, vsi->seid, 0); if (ret) dev_info(&pf->pdev->dev, @@ -5388,8 +5380,6 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, vsi->seid); return ret; } - -skip_reset: memset(&bw_data, 0, sizeof(bw_data)); bw_data.tc_valid_bits = enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) -- cgit v1.2.3 From cd33bdcbead882c2e58fdb4a54a7bd75b610a452 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 23 Feb 2022 22:41:08 -0500 Subject: ping: remove pr_err from ping_lookup As Jakub noticed, prints should be avoided on the datapath. Also, as packets would never come to the else branch in ping_lookup(), remove pr_err() from ping_lookup(). Fixes: 35a79e64de29 ("ping: fix the dif and sdif check in ping_lookup") Reported-by: Jakub Kicinski Signed-off-by: Xin Long Link: https://lore.kernel.org/r/1ef3f2fcd31bd681a193b1fcf235eee1603819bd.1645674068.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/ping.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 3a5994b50571..3ee947557b88 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -187,7 +187,6 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) (int)ident, &ipv6_hdr(skb)->daddr, dif); #endif } else { - pr_err("ping: protocol(%x) is not supported\n", ntohs(skb->protocol)); return NULL; } -- cgit v1.2.3 From 42404d8f1c01861b22ccfa1d70f950242720ae57 Mon Sep 17 00:00:00 2001 From: Mauri Sandberg Date: Wed, 23 Feb 2022 16:23:37 +0200 Subject: net: mv643xx_eth: process retval from of_get_mac_address Obtaining a MAC address may be deferred in cases when the MAC is stored in an NVMEM block, for example, and it may not be ready upon the first retrieval attempt and return EPROBE_DEFER. It is also possible that a port that does not rely on NVMEM has been already created when getting the defer request. Thus, also the resources allocated previously must be freed when doing a roll-back. Fixes: 76723bca2802 ("net: mv643xx_eth: add DT parsing support") Signed-off-by: Mauri Sandberg Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220223142337.41757-1-maukka@ext.kapsi.fi Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mv643xx_eth.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 105247582684..143ca8be5eb5 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2704,6 +2704,16 @@ MODULE_DEVICE_TABLE(of, mv643xx_eth_shared_ids); static struct platform_device *port_platdev[3]; +static void mv643xx_eth_shared_of_remove(void) +{ + int n; + + for (n = 0; n < 3; n++) { + platform_device_del(port_platdev[n]); + port_platdev[n] = NULL; + } +} + static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, struct device_node *pnp) { @@ -2740,7 +2750,9 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, return -EINVAL; } - of_get_mac_address(pnp, ppd.mac_addr); + ret = of_get_mac_address(pnp, ppd.mac_addr); + if (ret) + return ret; mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size); mv643xx_eth_property(pnp, "tx-sram-addr", ppd.tx_sram_addr); @@ -2804,21 +2816,13 @@ static int mv643xx_eth_shared_of_probe(struct platform_device *pdev) ret = mv643xx_eth_shared_of_add_port(pdev, pnp); if (ret) { of_node_put(pnp); + mv643xx_eth_shared_of_remove(); return ret; } } return 0; } -static void mv643xx_eth_shared_of_remove(void) -{ - int n; - - for (n = 0; n < 3; n++) { - platform_device_del(port_platdev[n]); - port_platdev[n] = NULL; - } -} #else static inline int mv643xx_eth_shared_of_probe(struct platform_device *pdev) { -- cgit v1.2.3