From 0764e365dacd0b8f75c1736f9236be280649bd18 Mon Sep 17 00:00:00 2001 From: Clément Bœsch Date: Sun, 5 Sep 2021 02:20:27 +0200 Subject: arm64: dts: allwinner: h5: NanoPI Neo 2: Fix ethernet node RX and TX delay are provided by ethernet PHY. Reflect that in ethernet node. Fixes: 44a94c7ef989 ("arm64: dts: allwinner: H5: Restore EMAC changes") Signed-off-by: Clément Bœsch Reviewed-by: Jernej Skrabec Reviewed-by: Andrew Lunn Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20210905002027.171984-1-u@pkh.me --- arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts index 02f8e72f0cad..05486cccee1c 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts @@ -75,7 +75,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3 From 3f4b57ad07d9237acf1b8cff3f8bf530cacef87a Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 20 Sep 2021 16:49:39 +0200 Subject: ASoC: pcm512x: Mend accesses to the I2S_1 and I2S_2 registers Commit 25d27c4f68d2 ("ASoC: pcm512x: Add support for more data formats") breaks the TSE-850 device, which is using a pcm5142 in I2S and CBM_CFS mode (maybe not relevant). Without this fix, the result is: pcm512x 0-004c: Failed to set data format: -16 And after that, no sound. This fix is not 100% correct. The datasheet of at least the pcm5142 states that four bits (0xcc) in the I2S_1 register are "RSV" ("Reserved. Do not access.") and no hint is given as to what the initial values are supposed to be. So, specifying defaults for these bits is wrong. But perhaps better than a broken driver? Fixes: 25d27c4f68d2 ("ASoC: pcm512x: Add support for more data formats") Cc: Liam Girdwood Cc: Mark Brown Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Kirill Marinushkin Cc: Peter Ujfalusi Cc: alsa-devel@alsa-project.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Peter Rosin Signed-off-by: Peter Ujfalusi Reviewed-by: Peter Ujfalusi Link: https://lore.kernel.org/r/2d221984-7a2e-7006-0f8a-ffb5f64ee885@axentia.se Signed-off-by: Mark Brown --- sound/soc/codecs/pcm512x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c index 4dc844f3c1fc..60dee41816dc 100644 --- a/sound/soc/codecs/pcm512x.c +++ b/sound/soc/codecs/pcm512x.c @@ -116,6 +116,8 @@ static const struct reg_default pcm512x_reg_defaults[] = { { PCM512x_FS_SPEED_MODE, 0x00 }, { PCM512x_IDAC_1, 0x01 }, { PCM512x_IDAC_2, 0x00 }, + { PCM512x_I2S_1, 0x02 }, + { PCM512x_I2S_2, 0x00 }, }; static bool pcm512x_readable(struct device *dev, unsigned int reg) -- cgit v1.2.3 From 74b7ee0e7b61838a0a161a84d105aeff0d042646 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 10 Sep 2021 17:18:30 +0800 Subject: ASoC: fsl_xcvr: Fix channel swap issue with ARC With pause and resume test for ARC, there is occasionally channel swap issue. The reason is that currently driver set the DPATH out of reset first, then start the DMA, the first data got from FIFO may not be the Left channel. Moving DPATH out of reset operation after the dma enablement to fix this issue. Fixes: 28564486866f ("ASoC: fsl_xcvr: Add XCVR ASoC CPU DAI driver") Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1631265510-27384-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_xcvr.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c index 7ba2fd15132d..d0556c79fdb1 100644 --- a/sound/soc/fsl/fsl_xcvr.c +++ b/sound/soc/fsl/fsl_xcvr.c @@ -487,8 +487,9 @@ static int fsl_xcvr_prepare(struct snd_pcm_substream *substream, return ret; } - /* clear DPATH RESET */ + /* set DPATH RESET */ m_ctl |= FSL_XCVR_EXT_CTRL_DPTH_RESET(tx); + v_ctl |= FSL_XCVR_EXT_CTRL_DPTH_RESET(tx); ret = regmap_update_bits(xcvr->regmap, FSL_XCVR_EXT_CTRL, m_ctl, v_ctl); if (ret < 0) { dev_err(dai->dev, "Error while setting EXT_CTRL: %d\n", ret); @@ -590,10 +591,6 @@ static void fsl_xcvr_shutdown(struct snd_pcm_substream *substream, val |= FSL_XCVR_EXT_CTRL_CMDC_RESET(tx); } - /* set DPATH RESET */ - mask |= FSL_XCVR_EXT_CTRL_DPTH_RESET(tx); - val |= FSL_XCVR_EXT_CTRL_DPTH_RESET(tx); - ret = regmap_update_bits(xcvr->regmap, FSL_XCVR_EXT_CTRL, mask, val); if (ret < 0) { dev_err(dai->dev, "Err setting DPATH RESET: %d\n", ret); @@ -643,6 +640,16 @@ static int fsl_xcvr_trigger(struct snd_pcm_substream *substream, int cmd, dev_err(dai->dev, "Failed to enable DMA: %d\n", ret); return ret; } + + /* clear DPATH RESET */ + ret = regmap_update_bits(xcvr->regmap, FSL_XCVR_EXT_CTRL, + FSL_XCVR_EXT_CTRL_DPTH_RESET(tx), + 0); + if (ret < 0) { + dev_err(dai->dev, "Failed to clear DPATH RESET: %d\n", ret); + return ret; + } + break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: -- cgit v1.2.3 From 55dd7e059098ce4bd0a55c251cb78e74604abb57 Mon Sep 17 00:00:00 2001 From: Bastien Roucariès Date: Thu, 16 Sep 2021 08:17:21 +0000 Subject: ARM: dts: sun7i: A20-olinuxino-lime2: Fix ethernet phy-mode Commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config") sets the RX/TX delay according to the phy-mode property in the device tree. For the A20-olinuxino-lime2 board this is "rgmii", which is the wrong setting. Following the example of a900cac3750b ("ARM: dts: sun7i: a20: bananapro: Fix ethernet phy-mode") the phy-mode is changed to "rgmii-id" which gets the Ethernet working again on this board. Signed-off-by: Bastien Roucariès Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20210916081721.237137-1-rouca@debian.org --- arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts index 8077f1716fbc..ecb91fb899ff 100644 --- a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts +++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts @@ -112,7 +112,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3 From ceef3240f9b7e592dd8d10d619c312c7336117fa Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 24 Sep 2021 20:49:56 +0100 Subject: ASoC: pcm179x: Add missing entries SPI to device ID table Currently autoloading for SPI devices does not use the DT ID table, it uses SPI modalises. Supporting OF modalises is going to be difficult if not impractical, an attempt was made but has been reverted, so ensure that module autoloading works for this driver by adding SPI IDs for parts that only have a compatible listed. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210924194956.46079-1-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/pcm179x-spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/pcm179x-spi.c b/sound/soc/codecs/pcm179x-spi.c index 0a542924ec5f..ebf63ea90a1c 100644 --- a/sound/soc/codecs/pcm179x-spi.c +++ b/sound/soc/codecs/pcm179x-spi.c @@ -36,6 +36,7 @@ static const struct of_device_id pcm179x_of_match[] = { MODULE_DEVICE_TABLE(of, pcm179x_of_match); static const struct spi_device_id pcm179x_spi_ids[] = { + { "pcm1792a", 0 }, { "pcm179x", 0 }, { }, }; -- cgit v1.2.3 From 0cc3687eadd0971d5d38ff90d14819d88f854960 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 24 Sep 2021 20:48:44 +0100 Subject: ASoC: cs4341: Add SPI device ID table Currently autoloading for SPI devices does not use the DT ID table, it uses SPI modalises. Supporting OF modalises is going to be difficult if not impractical, an attempt was made but has been reverted, so ensure that module autoloading works for this driver by adding SPI IDs for parts that only have a compatible listed. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Mark Brown Cc: patches@opensource.cirrus.com Reviewed-by: Charles Keepax Link: https://lore.kernel.org/r/20210924194844.45974-1-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/cs4341.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/codecs/cs4341.c b/sound/soc/codecs/cs4341.c index 7d3e54d8eef3..29d05e32d341 100644 --- a/sound/soc/codecs/cs4341.c +++ b/sound/soc/codecs/cs4341.c @@ -305,12 +305,19 @@ static int cs4341_spi_probe(struct spi_device *spi) return cs4341_probe(&spi->dev); } +static const struct spi_device_id cs4341_spi_ids[] = { + { "cs4341a" }, + { } +}; +MODULE_DEVICE_TABLE(spi, cs4341_spi_ids); + static struct spi_driver cs4341_spi_driver = { .driver = { .name = "cs4341-spi", .of_match_table = of_match_ptr(cs4341_dt_ids), }, .probe = cs4341_spi_probe, + .id_table = cs4341_spi_ids, }; #endif -- cgit v1.2.3 From 25b5476a294cd5f7c7730f334f6b400d30bb783d Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 10 Sep 2021 08:04:20 +0000 Subject: KVM: s390: Function documentation fixes The latest compile changes pointed us to a few instances where we use the kernel documentation style but don't explain all variables or don't adhere to it 100%. It's easy to fix so let's do that. Signed-off-by: Janosch Frank Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.c | 12 ++++++++++++ arch/s390/kvm/intercept.c | 4 +++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index b9f85b2dc053..6af59c59cc1b 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -894,6 +894,11 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, /** * guest_translate_address - translate guest logical into guest absolute address + * @vcpu: virtual cpu + * @gva: Guest virtual address + * @ar: Access register + * @gpa: Guest physical address + * @mode: Translation access mode * * Parameter semantics are the same as the ones from guest_translate. * The memory contents at the guest address are not changed. @@ -934,6 +939,11 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, /** * check_gva_range - test a range of guest virtual addresses for accessibility + * @vcpu: virtual cpu + * @gva: Guest virtual address + * @ar: Access register + * @length: Length of test range + * @mode: Translation access mode */ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, unsigned long length, enum gacc_mode mode) @@ -956,6 +966,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, /** * kvm_s390_check_low_addr_prot_real - check for low-address protection + * @vcpu: virtual cpu * @gra: Guest real address * * Checks whether an address is subject to low-address protection and set @@ -979,6 +990,7 @@ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) * @pgt: pointer to the beginning of the page table for the given address if * successful (return value 0), or to the first invalid DAT entry in * case of exceptions (return value > 0) + * @dat_protection: referenced memory is write protected * @fake: pgt references contiguous guest memory block, not a pgtable */ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 72b25b7cc6ae..2bd8f854f1b4 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -269,6 +269,7 @@ static int handle_prog(struct kvm_vcpu *vcpu) /** * handle_external_interrupt - used for external interruption interceptions + * @vcpu: virtual cpu * * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if * the new PSW does not have external interrupts disabled. In the first case, @@ -315,7 +316,8 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) } /** - * Handle MOVE PAGE partial execution interception. + * handle_mvpg_pei - Handle MOVE PAGE partial execution interception. + * @vcpu: virtual cpu * * This interception can only happen for guests with DAT disabled and * addresses that are currently not mapped in the host. Thus we try to -- cgit v1.2.3 From 42871e95a3afea8956d8cc567ea725b33a837775 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 29 Sep 2021 22:15:12 +0200 Subject: ASoC: nau8824: Fix headphone vs headset, button-press detection no longer working Commit 1d25684e2251 ("ASoC: nau8824: Fix open coded prefix handling") replaced the nau8824_dapm_enable_pin() helper with direct calls to snd_soc_dapm_enable_pin(), but the helper was using snd_soc_dapm_force_enable_pin() and not forcing the MICBIAS + SAR supplies on breaks headphone vs headset and button-press detection. Replace the snd_soc_dapm_enable_pin() calls with snd_soc_dapm_force_enable_pin() to fix this. Cc: stable@vger.kernel.org Fixes: 1d25684e2251 ("ASoC: nau8824: Fix open coded prefix handling") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210929201512.460360-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/nau8824.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c index db88be48c998..f946ef65a4c1 100644 --- a/sound/soc/codecs/nau8824.c +++ b/sound/soc/codecs/nau8824.c @@ -867,8 +867,8 @@ static void nau8824_jdet_work(struct work_struct *work) struct regmap *regmap = nau8824->regmap; int adc_value, event = 0, event_mask = 0; - snd_soc_dapm_enable_pin(dapm, "MICBIAS"); - snd_soc_dapm_enable_pin(dapm, "SAR"); + snd_soc_dapm_force_enable_pin(dapm, "MICBIAS"); + snd_soc_dapm_force_enable_pin(dapm, "SAR"); snd_soc_dapm_sync(dapm); msleep(100); -- cgit v1.2.3 From 636707e593120c9fa35f6a908c0d052f6154910d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Sep 2021 13:11:20 +0200 Subject: mac80211: mesh: fix HE operation element length check The length check here was bad, if the length doesn't at least include the length of the fixed part, we cannot call ieee80211_he_oper_size() to determine the total. Fix this, and convert to cfg80211_find_ext_elem() while at it. Cc: stable@vger.kernel.org Fixes: 70debba3ab7d ("mac80211: save HE oper info in BSS config for mesh") Link: https://lore.kernel.org/r/20210930131120.b0f940976c56.I954e1be55e9f87cc303165bff5c906afe1e54648@changeid Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 97095b7c9c64..5dcfd53a4ab6 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -672,7 +672,7 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata, u8 *ie, u8 ie_len) { struct ieee80211_supported_band *sband; - const u8 *cap; + const struct element *cap; const struct ieee80211_he_operation *he_oper = NULL; sband = ieee80211_get_sband(sdata); @@ -687,9 +687,10 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.he_support = true; - cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, ie, ie_len); - if (cap && cap[1] >= ieee80211_he_oper_size(&cap[3])) - he_oper = (void *)(cap + 3); + cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ie_len); + if (cap && cap->datalen >= 1 + sizeof(*he_oper) && + cap->datalen >= 1 + ieee80211_he_oper_size(cap->data + 1)) + he_oper = (void *)(cap->data + 1); if (he_oper) sdata->vif.bss_conf.he_oper.params = -- cgit v1.2.3 From a2083eeb119fb9307258baea9b7c243ca9a2e0b6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Sep 2021 13:11:21 +0200 Subject: cfg80211: scan: fix RCU in cfg80211_add_nontrans_list() The SSID pointer is pointing to RCU protected data, so we need to have it under rcu_read_lock() for the entire use. Fix this. Cc: stable@vger.kernel.org Fixes: 0b8fb8235be8 ("cfg80211: Parsing of Multiple BSSID information in scanning") Link: https://lore.kernel.org/r/20210930131120.6ddfc603aa1d.I2137344c4e2426525b1a8e4ce5fca82f8ecbfe7e@changeid Signed-off-by: Johannes Berg --- net/wireless/scan.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 11c68b159324..adc0d14cfd86 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -418,14 +418,17 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss, } ssid_len = ssid[1]; ssid = ssid + 2; - rcu_read_unlock(); /* check if nontrans_bss is in the list */ list_for_each_entry(bss, &trans_bss->nontrans_list, nontrans_list) { - if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) + if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) { + rcu_read_unlock(); return 0; + } } + rcu_read_unlock(); + /* add to the list */ list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list); return 0; -- cgit v1.2.3 From f33eb7f29c16ba78db3221ee02346fd832274cdd Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Tue, 14 Sep 2021 15:11:21 -0700 Subject: reset: brcmstb-rescal: fix incorrect polarity of status bit The readl_poll_timeout() should complete when the status bit is a 1, not 0. Fixes: 4cf176e52397 ("reset: Add Broadcom STB RESCAL reset controller") Signed-off-by: Jim Quinlan Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20210914221122.62315-1-f.fainelli@gmail.com Signed-off-by: Philipp Zabel --- drivers/reset/reset-brcmstb-rescal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/reset/reset-brcmstb-rescal.c b/drivers/reset/reset-brcmstb-rescal.c index b6f074d6a65f..433fa0c40e47 100644 --- a/drivers/reset/reset-brcmstb-rescal.c +++ b/drivers/reset/reset-brcmstb-rescal.c @@ -38,7 +38,7 @@ static int brcm_rescal_reset_set(struct reset_controller_dev *rcdev, } ret = readl_poll_timeout(base + BRCM_RESCAL_STATUS, reg, - !(reg & BRCM_RESCAL_STATUS_BIT), 100, 1000); + (reg & BRCM_RESCAL_STATUS_BIT), 100, 1000); if (ret) { dev_err(data->dev, "time out on SATA/PCIe rescal\n"); return ret; -- cgit v1.2.3 From 4af160707d7197d671a5b0ad9899383b6cb7c067 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 14 Sep 2021 11:15:59 +0200 Subject: reset: pistachio: Re-enable driver selection After the retirement of MACH_PISTACHIO, the Pistachio Reset Driver is no longer auto-enabled when building a kernel for Pistachio systems. Worse, the driver cannot be enabled by the user at all (unless compile-testing), as the config symbol is invisible. Fix this partially by making the symbol visible again when compiling for MIPS, and dropping the useless default. The user still has to enable the driver manually when building a kernel for Pistachio systems, though. Fixes: 104f942b2832ab13 ("MIPS: Retire MACH_PISTACHIO") Signed-off-by: Geert Uytterhoeven Reviewed-by: Rahul Bedarkar Reviewed-by: Jiaxun Yang Link: https://lore.kernel.org/r/2c399e52540536df9c4006e46ef93fbccdde88db.1631610825.git.geert+renesas@glider.be Signed-off-by: Philipp Zabel --- drivers/reset/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig index be799a5abf8a..b0056ae5d463 100644 --- a/drivers/reset/Kconfig +++ b/drivers/reset/Kconfig @@ -147,8 +147,8 @@ config RESET_OXNAS bool config RESET_PISTACHIO - bool "Pistachio Reset Driver" if COMPILE_TEST - default MACH_PISTACHIO + bool "Pistachio Reset Driver" + depends on MIPS || COMPILE_TEST help This enables the reset driver for ImgTec Pistachio SoCs. -- cgit v1.2.3 From c045ceb5a145d2a9a4bf33cbc55185ddf99f60ab Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Wed, 15 Sep 2021 11:55:14 +0300 Subject: reset: tegra-bpmp: Handle errors in BPMP response The return value from tegra_bpmp_transfer indicates the success or failure of the IPC transaction with BPMP. If the transaction succeeded, we also need to check the actual command's result code. Add code to do this. Signed-off-by: Mikko Perttunen Link: https://lore.kernel.org/r/20210915085517.1669675-2-mperttunen@nvidia.com Signed-off-by: Philipp Zabel --- drivers/reset/tegra/reset-bpmp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/reset/tegra/reset-bpmp.c b/drivers/reset/tegra/reset-bpmp.c index 24d3395964cc..4c5bba52b105 100644 --- a/drivers/reset/tegra/reset-bpmp.c +++ b/drivers/reset/tegra/reset-bpmp.c @@ -20,6 +20,7 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, struct tegra_bpmp *bpmp = to_tegra_bpmp(rstc); struct mrq_reset_request request; struct tegra_bpmp_message msg; + int err; memset(&request, 0, sizeof(request)); request.cmd = command; @@ -30,7 +31,13 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, msg.tx.data = &request; msg.tx.size = sizeof(request); - return tegra_bpmp_transfer(bpmp, &msg); + err = tegra_bpmp_transfer(bpmp, &msg); + if (err) + return err; + if (msg.rx.ret) + return -EINVAL; + + return 0; } static int tegra_bpmp_reset_module(struct reset_controller_dev *rstc, -- cgit v1.2.3 From 3ad60b4b3570937f3278509fe6797a5093ce53f8 Mon Sep 17 00:00:00 2001 From: Paweł Anikiel Date: Mon, 20 Sep 2021 14:41:41 +0200 Subject: reset: socfpga: add empty driver allowing consumers to probe The early reset driver doesn't ever probe, which causes consuming devices to be unable to probe. Add an empty driver to set this device as available, allowing consumers to probe. Signed-off-by: Paweł Anikiel Link: https://lore.kernel.org/r/20210920124141.1166544-4-pan@semihalf.com Signed-off-by: Philipp Zabel --- drivers/reset/reset-socfpga.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/reset/reset-socfpga.c b/drivers/reset/reset-socfpga.c index 2a72f861f798..8c6492e5693c 100644 --- a/drivers/reset/reset-socfpga.c +++ b/drivers/reset/reset-socfpga.c @@ -92,3 +92,29 @@ void __init socfpga_reset_init(void) for_each_matching_node(np, socfpga_early_reset_dt_ids) a10_reset_init(np); } + +/* + * The early driver is problematic, because it doesn't register + * itself as a driver. This causes certain device links to prevent + * consumer devices from probing. The hacky solution is to register + * an empty driver, whose only job is to attach itself to the reset + * manager and call probe. + */ +static const struct of_device_id socfpga_reset_dt_ids[] = { + { .compatible = "altr,rst-mgr", }, + { /* sentinel */ }, +}; + +static int reset_simple_probe(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver reset_socfpga_driver = { + .probe = reset_simple_probe, + .driver = { + .name = "socfpga-reset", + .of_match_table = socfpga_reset_dt_ids, + }, +}; +builtin_platform_driver(reset_socfpga_driver); -- cgit v1.2.3 From 1d58a17ef54599506d44c45ac95be27273a4d2b1 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 5 Oct 2021 10:01:41 +0100 Subject: KVM: arm64: Fix host stage-2 PGD refcount The KVM page-table library refcounts the pages of concatenated stage-2 PGDs individually. However, when running KVM in protected mode, the host's stage-2 PGD is currently managed by EL2 as a single high-order compound page, which can cause the refcount of the tail pages to reach 0 when they shouldn't, hence corrupting the page-table. Fix this by introducing a new hyp_split_page() helper in the EL2 page allocator (matching the kernel's split_page() function), and make use of it from host_s2_zalloc_pages_exact(). Fixes: 1025c8c0c6ac ("KVM: arm64: Wrap the host with a stage 2") Acked-by: Will Deacon Suggested-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211005090155.734578-5-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/gfp.h | 1 + arch/arm64/kvm/hyp/nvhe/mem_protect.c | 13 ++++++++++++- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index fb0f523d1492..0a048dc06a7d 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -24,6 +24,7 @@ struct hyp_pool { /* Allocation */ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order); +void hyp_split_page(struct hyp_page *page); void hyp_get_page(struct hyp_pool *pool, void *addr); void hyp_put_page(struct hyp_pool *pool, void *addr); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index bacd493a4eac..34eeb524b686 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -35,7 +35,18 @@ const u8 pkvm_hyp_id = 1; static void *host_s2_zalloc_pages_exact(size_t size) { - return hyp_alloc_pages(&host_s2_pool, get_order(size)); + void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size)); + + hyp_split_page(hyp_virt_to_page(addr)); + + /* + * The size of concatenated PGDs is always a power of two of PAGE_SIZE, + * so there should be no need to free any of the tail pages to make the + * allocation exact. + */ + WARN_ON(size != (PAGE_SIZE << get_order(size))); + + return addr; } static void *host_s2_zalloc_page(void *pool) diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 41fc25bdfb34..a6e874e61a40 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -193,6 +193,20 @@ void hyp_get_page(struct hyp_pool *pool, void *addr) hyp_spin_unlock(&pool->lock); } +void hyp_split_page(struct hyp_page *p) +{ + unsigned short order = p->order; + unsigned int i; + + p->order = 0; + for (i = 1; i < (1 << order); i++) { + struct hyp_page *tail = p + i; + + tail->order = 0; + hyp_set_page_refcounted(tail); + } +} + void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order) { unsigned short i = order; -- cgit v1.2.3 From 7615c2a514788559c6684234b8fc27f3a843c2c6 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 5 Oct 2021 10:01:42 +0100 Subject: KVM: arm64: Report corrupted refcount at EL2 Some of the refcount manipulation helpers used at EL2 are instrumented to catch a corrupted state, but not all of them are treated equally. Let's make things more consistent by instrumenting hyp_page_ref_dec_and_test() as well. Acked-by: Will Deacon Suggested-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211005090155.734578-6-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index a6e874e61a40..0bd7701ad1df 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -152,6 +152,7 @@ static inline void hyp_page_ref_inc(struct hyp_page *p) static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) { + BUG_ON(!p->refcount); p->refcount--; return (p->refcount == 0); } -- cgit v1.2.3 From 6e6a8ef088e1222cb1250942f51ad9c1ab219ab2 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 5 Oct 2021 13:20:31 +0100 Subject: KVM: arm64: Release mmap_lock when using VM_SHARED with MTE VM_SHARED mappings are currently forbidden in a memslot with MTE to prevent two VMs racing to sanitise the same page. However, this check is performed while holding current->mm's mmap_lock, but fails to release it. Fix this by releasing the lock when needed. Fixes: ea7fc1bb1cd1 ("KVM: arm64: Introduce MTE VM feature") Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211005122031.809857-1-qperret@google.com --- arch/arm64/kvm/mmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 1a94a7ca48f2..69bd1732a299 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1529,8 +1529,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * when updating the PG_mte_tagged page flag, see * sanitise_mte_tags for more details. */ - if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) - return -EINVAL; + if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) { + ret = -EINVAL; + break; + } if (vma->vm_flags & VM_PFNMAP) { /* IO region dirty page logging not allowed */ -- cgit v1.2.3 From e93c7d8e8c4cf80c6afe56e71c83c1cd31b4fce1 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 5 Oct 2021 13:23:02 -0500 Subject: RDMA/irdma: Process extended CQ entries correctly The valid bit for extended CQE's written by HW is retrieved from the incorrect quad-word. This leads to missed completions for any UD traffic particularly after a wrap-around. Get the valid bit for extended CQE's from the correct quad-word in the descriptor. Fixes: 551c46edc769 ("RDMA/irdma: Add user/kernel shared libraries") Link: https://lore.kernel.org/r/20211005182302.374-1-shiraz.saleem@intel.com Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/uk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 5fb92de1f015..9b544a3b1288 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1092,12 +1092,12 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) if (cq->avoid_mem_cflct) { ext_cqe = (__le64 *)((u8 *)cqe + 32); get_64bit_val(ext_cqe, 24, &qword7); - polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7); } else { peek_head = (cq->cq_ring.head + 1) % cq->cq_ring.size; ext_cqe = cq->cq_base[peek_head].buf; get_64bit_val(ext_cqe, 24, &qword7); - polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7); if (!peek_head) polarity ^= 1; } -- cgit v1.2.3 From 1ab52ac1e9bc9391f592c9fa8340a6e3e9c36286 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 6 Oct 2021 12:31:53 +0300 Subject: RDMA/mlx5: Set user priority for DCT Currently, the driver doesn't set the PCP-based priority for DCT, hence DCT response packets are transmitted without user priority. Fix it by setting user provided priority in the eth_prio field in the DCT context, which in turn sets the value in the transmitted packet. Fixes: 776a3906b692 ("IB/mlx5: Add support for DC target QP") Link: https://lore.kernel.org/r/5fd2d94a13f5742d8803c218927322257d53205c.1633512672.git.leonro@nvidia.com Signed-off-by: Patrisious Haddad Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index b2fca110346c..e5abbcfc1d57 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4458,6 +4458,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, MLX5_SET(dctc, dctc, mtu, attr->path_mtu); MLX5_SET(dctc, dctc, my_addr_index, attr->ah_attr.grh.sgid_index); MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit); + if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) + MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7); err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in, MLX5_ST_SZ_BYTES(create_dct_in), out, -- cgit v1.2.3 From db0767b8a6e620b99459d2e688c1983c2e5add0d Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Thu, 7 Oct 2021 19:20:19 +0530 Subject: ASoC: wcd938x: Fix jack detection issue This patch is to fix audio 3.5mm jack detection failure on wcd938x codec based target. Fixes: bcee7ed09b8e (ASoC: codecs: wcd938x: add Multi Button Headset Control support) Signed-off-by: Venkata Prasad Potturu Signed-off-by: Srinivasa Rao Mandadapu Reviewed-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/1633614619-27026-1-git-send-email-srivasam@codeaurora.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index f0daf8defcf1..52de7d14b139 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -4144,10 +4144,10 @@ static int wcd938x_codec_set_jack(struct snd_soc_component *comp, { struct wcd938x_priv *wcd = dev_get_drvdata(comp->dev); - if (!jack) + if (jack) return wcd_mbhc_start(wcd->wcd_mbhc, &wcd->mbhc_cfg, jack); - - wcd_mbhc_stop(wcd->wcd_mbhc); + else + wcd_mbhc_stop(wcd->wcd_mbhc); return 0; } -- cgit v1.2.3 From 2577b868a48ef3601116908738efbe570451e605 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 6 Oct 2021 18:04:25 +0300 Subject: ASoC: Intel: bytcht_es8316: Get platform data via dev_get_platdata() Access to platform data via dev_get_platdata() getter to make code cleaner. Signed-off-by: Andy Shevchenko Acked-by: Pierre-Louis Bossart Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20211006150428.16434-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcht_es8316.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index 055248f104b2..b1f9c9cb3355 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -456,12 +456,12 @@ static const struct dmi_system_id byt_cht_es8316_quirk_table[] = { static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; static const char * const mic_name[] = { "in1", "in2" }; + struct snd_soc_acpi_mach *mach = dev_get_platdata(dev); struct property_entry props[MAX_NO_PROPS] = {}; struct byt_cht_es8316_private *priv; const struct dmi_system_id *dmi_id; - struct device *dev = &pdev->dev; - struct snd_soc_acpi_mach *mach; struct fwnode_handle *fwnode; const char *platform_name; struct acpi_device *adev; @@ -476,7 +476,6 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; - mach = dev->platform_data; /* fix index of codec dai */ for (i = 0; i < ARRAY_SIZE(byt_cht_es8316_dais); i++) { if (!strcmp(byt_cht_es8316_dais[i].codecs->name, -- cgit v1.2.3 From 6f32c521061b704c0198be3ba9834f5a64ea5605 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 6 Oct 2021 18:04:26 +0300 Subject: ASoC: Intel: bytcht_es8316: Use temporary variable for struct device Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211006150428.16434-2-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcht_es8316.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index b1f9c9cb3355..efd71e6e42b3 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -493,7 +493,7 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) put_device(&adev->dev); byt_cht_es8316_dais[dai_index].codecs->name = codec_name; } else { - dev_err(&pdev->dev, "Error cannot find '%s' dev\n", mach->id); + dev_err(dev, "Error cannot find '%s' dev\n", mach->id); return -ENXIO; } @@ -596,7 +596,7 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) byt_cht_es8316_card.long_name = long_name; #endif - sof_parent = snd_soc_acpi_sof_parent(&pdev->dev); + sof_parent = snd_soc_acpi_sof_parent(dev); /* set card and driver name */ if (sof_parent) { -- cgit v1.2.3 From 10f4a96543b744c8cc7ef8b0799af21d911dd37d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 6 Oct 2021 18:04:27 +0300 Subject: ASoC: Intel: bytcht_es8316: Switch to use gpiod_get_optional() First of all, replace indexed API by plain one since we have index 0. Second, switch to optional variant and drop duplicated code. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211006150428.16434-3-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcht_es8316.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index efd71e6e42b3..421a04d96d84 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -566,16 +566,12 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) devm_acpi_dev_add_driver_gpios(codec_dev, byt_cht_es8316_gpios); priv->speaker_en_gpio = - gpiod_get_index(codec_dev, "speaker-enable", 0, - /* see comment in byt_cht_es8316_resume */ - GPIOD_OUT_LOW | GPIOD_FLAGS_BIT_NONEXCLUSIVE); - + gpiod_get_optional(codec_dev, "speaker-enable", + /* see comment in byt_cht_es8316_resume() */ + GPIOD_OUT_LOW | GPIOD_FLAGS_BIT_NONEXCLUSIVE); if (IS_ERR(priv->speaker_en_gpio)) { ret = PTR_ERR(priv->speaker_en_gpio); switch (ret) { - case -ENOENT: - priv->speaker_en_gpio = NULL; - break; default: dev_err(dev, "get speaker GPIO failed: %d\n", ret); fallthrough; -- cgit v1.2.3 From c25d4546ca452b2e8c03bc735e4c65bc6dd751dd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 6 Oct 2021 18:04:28 +0300 Subject: ASoC: Intel: bytcht_es8316: Utilize dev_err_probe() to avoid log saturation dev_err_probe() avoids printing into log when the deferred probe is invoked. This is possible when clock provider is pending to appear. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211006150428.16434-4-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcht_es8316.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index 421a04d96d84..4d313d0d0f23 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -532,11 +532,8 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) /* get the clock */ priv->mclk = devm_clk_get(dev, "pmc_plt_clk_3"); - if (IS_ERR(priv->mclk)) { - ret = PTR_ERR(priv->mclk); - dev_err(dev, "clk_get pmc_plt_clk_3 failed: %d\n", ret); - return ret; - } + if (IS_ERR(priv->mclk)) + return dev_err_probe(dev, PTR_ERR(priv->mclk), "clk_get pmc_plt_clk_3 failed\n"); /* get speaker enable GPIO */ codec_dev = acpi_get_first_physical_node(adev); @@ -570,14 +567,9 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) /* see comment in byt_cht_es8316_resume() */ GPIOD_OUT_LOW | GPIOD_FLAGS_BIT_NONEXCLUSIVE); if (IS_ERR(priv->speaker_en_gpio)) { - ret = PTR_ERR(priv->speaker_en_gpio); - switch (ret) { - default: - dev_err(dev, "get speaker GPIO failed: %d\n", ret); - fallthrough; - case -EPROBE_DEFER: - goto err_put_codec; - } + ret = dev_err_probe(dev, PTR_ERR(priv->speaker_en_gpio), + "get speaker GPIO failed\n"); + goto err_put_codec; } snprintf(components_string, sizeof(components_string), -- cgit v1.2.3 From 5af82c81b2c49cfb1cad84d9eb6eab0e3d1c4842 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Oct 2021 16:17:12 +0200 Subject: ASoC: DAPM: Fix missing kctl change notifications The put callback of a kcontrol is supposed to return 1 when the value is changed, and this will be notified to user-space. However, some DAPM kcontrols always return 0 (except for errors), hence the user-space misses the update of a control value. This patch corrects the behavior by properly returning 1 when the value gets updated. Reported-and-tested-by: Hans de Goede Cc: Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20211006141712.2439-1-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 7b67f1e19ae9..59d07648a7e7 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2561,6 +2561,7 @@ static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm, const char *pin, int status) { struct snd_soc_dapm_widget *w = dapm_find_widget(dapm, pin, true); + int ret = 0; dapm_assert_locked(dapm); @@ -2573,13 +2574,14 @@ static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm, dapm_mark_dirty(w, "pin configuration"); dapm_widget_invalidate_input_paths(w); dapm_widget_invalidate_output_paths(w); + ret = 1; } w->connected = status; if (status == 0) w->force = 0; - return 0; + return ret; } /** @@ -3583,14 +3585,15 @@ int snd_soc_dapm_put_pin_switch(struct snd_kcontrol *kcontrol, { struct snd_soc_card *card = snd_kcontrol_chip(kcontrol); const char *pin = (const char *)kcontrol->private_value; + int ret; if (ucontrol->value.integer.value[0]) - snd_soc_dapm_enable_pin(&card->dapm, pin); + ret = snd_soc_dapm_enable_pin(&card->dapm, pin); else - snd_soc_dapm_disable_pin(&card->dapm, pin); + ret = snd_soc_dapm_disable_pin(&card->dapm, pin); snd_soc_dapm_sync(&card->dapm); - return 0; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_dapm_put_pin_switch); @@ -4023,7 +4026,7 @@ static int snd_soc_dapm_dai_link_put(struct snd_kcontrol *kcontrol, rtd->params_select = ucontrol->value.enumerated.item[0]; - return 0; + return 1; } static void -- cgit v1.2.3 From 214174d9f56c7f81f4860a26b6b8b961a6b92654 Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Thu, 7 Oct 2021 19:21:15 +0530 Subject: ASoC: codec: wcd938x: Add irq config support This patch fixes compilation error in wcd98x codec driver. Fixes: 045442228868 ("ASoC: codecs: wcd938x: add audio routing and Kconfig") Signed-off-by: Venkata Prasad Potturu Signed-off-by: Srinivasa Rao Mandadapu Reviewed-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/1633614675-27122-1-git-send-email-srivasam@codeaurora.org Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 82ee233a269d..216cea04ad70 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1583,6 +1583,7 @@ config SND_SOC_WCD938X_SDW tristate "WCD9380/WCD9385 Codec - SDW" select SND_SOC_WCD938X select SND_SOC_WCD_MBHC + select REGMAP_IRQ depends on SOUNDWIRE select REGMAP_SOUNDWIRE help -- cgit v1.2.3 From 136f282028dae7d9dd68469b197aa2b36b410992 Mon Sep 17 00:00:00 2001 From: Miguel Bernal Marin Date: Wed, 6 Oct 2021 00:13:18 -0500 Subject: ACPI: tools: fix compilation error When ACPI tools are compiled, the following error is showed: $ cd tools/power/acpi $ make DESCEND tools/acpidbg MKDIR include CP include CC tools/acpidbg/acpidbg.o In file included from /home/linux/tools/power/acpi/include/acpi/platform/acenv.h:152, from /home/linux/tools/power/acpi/include/acpi/acpi.h:22, from acpidbg.c:9: /home/linux/tools/power/acpi/include/acpi/platform/acgcc.h:25:10: fatal error: linux/stdarg.h: No such file or directory 29 | #include | ^~~~~~~~~~~~~~~~ compilation terminated. Use the ACPICA logic: just identify when it is used inside the kernel or by an ACPI tool. Fixes: c0891ac15f04 ("isystem: ship and use stdarg.h") Signed-off-by: Miguel Bernal Marin [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/acpi/platform/acgcc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h index fb172a03a753..20ecb004f5a4 100644 --- a/include/acpi/platform/acgcc.h +++ b/include/acpi/platform/acgcc.h @@ -22,9 +22,14 @@ typedef __builtin_va_list va_list; #define va_arg(v, l) __builtin_va_arg(v, l) #define va_copy(d, s) __builtin_va_copy(d, s) #else +#ifdef __KERNEL__ #include -#endif -#endif +#else +/* Used to build acpi tools */ +#include +#endif /* __KERNEL__ */ +#endif /* ACPI_USE_BUILTIN_STDARG */ +#endif /* ! va_arg */ #define ACPI_INLINE __inline__ -- cgit v1.2.3 From 902c0b1887522a099aa4e1e6b4b476c2fe5dd13e Mon Sep 17 00:00:00 2001 From: Juhee Kang Date: Mon, 4 Oct 2021 21:14:38 +0900 Subject: netfilter: xt_IDLETIMER: fix panic that occurs when timer_type has garbage value Currently, when the rule related to IDLETIMER is added, idletimer_tg timer structure is initialized by kmalloc on executing idletimer_tg_create function. However, in this process timer->timer_type is not defined to a specific value. Thus, timer->timer_type has garbage value and it occurs kernel panic. So, this commit fixes the panic by initializing timer->timer_type using kzalloc instead of kmalloc. Test commands: # iptables -A OUTPUT -j IDLETIMER --timeout 1 --label test $ cat /sys/class/xt_idletimer/timers/test Killed Splat looks like: BUG: KASAN: user-memory-access in alarm_expires_remaining+0x49/0x70 Read of size 8 at addr 0000002e8c7bc4c8 by task cat/917 CPU: 12 PID: 917 Comm: cat Not tainted 5.14.0+ #3 79940a339f71eb14fc81aee1757a20d5bf13eb0e Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: dump_stack_lvl+0x6e/0x9c kasan_report.cold+0x112/0x117 ? alarm_expires_remaining+0x49/0x70 __asan_load8+0x86/0xb0 alarm_expires_remaining+0x49/0x70 idletimer_tg_show+0xe5/0x19b [xt_IDLETIMER 11219304af9316a21bee5ba9d58f76a6b9bccc6d] dev_attr_show+0x3c/0x60 sysfs_kf_seq_show+0x11d/0x1f0 ? device_remove_bin_file+0x20/0x20 kernfs_seq_show+0xa4/0xb0 seq_read_iter+0x29c/0x750 kernfs_fop_read_iter+0x25a/0x2c0 ? __fsnotify_parent+0x3d1/0x570 ? iov_iter_init+0x70/0x90 new_sync_read+0x2a7/0x3d0 ? __x64_sys_llseek+0x230/0x230 ? rw_verify_area+0x81/0x150 vfs_read+0x17b/0x240 ksys_read+0xd9/0x180 ? vfs_write+0x460/0x460 ? do_syscall_64+0x16/0xc0 ? lockdep_hardirqs_on+0x79/0x120 __x64_sys_read+0x43/0x50 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f0cdc819142 Code: c0 e9 c2 fe ff ff 50 48 8d 3d 3a ca 0a 00 e8 f5 19 02 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 RSP: 002b:00007fff28eee5b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f0cdc819142 RDX: 0000000000020000 RSI: 00007f0cdc032000 RDI: 0000000000000003 RBP: 00007f0cdc032000 R08: 00007f0cdc031010 R09: 0000000000000000 R10: 0000000000000022 R11: 0000000000000246 R12: 00005607e9ee31f0 R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 Fixes: 68983a354a65 ("netfilter: xtables: Add snapshot of hardidletimer target") Signed-off-by: Juhee Kang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_IDLETIMER.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 7b2f359bfce4..2f7cf5ecebf4 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -137,7 +137,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) { int ret; - info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL); + info->timer = kzalloc(sizeof(*info->timer), GFP_KERNEL); if (!info->timer) { ret = -ENOMEM; goto out; -- cgit v1.2.3 From 77076934afdcd46516caf18ed88b2f88025c9ddb Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 5 Oct 2021 22:54:54 +0200 Subject: netfilter: Kconfig: use 'default y' instead of 'm' for bool config option This option, NF_CONNTRACK_SECMARK, is a bool, so it can never be 'm'. Fixes: 33b8e77605620 ("[NETFILTER]: Add CONFIG_NETFILTER_ADVANCED option") Signed-off-by: Vegard Nossum Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 54395266339d..92a747896f80 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -109,7 +109,7 @@ config NF_CONNTRACK_MARK config NF_CONNTRACK_SECMARK bool 'Connection tracking security mark support' depends on NETWORK_SECMARK - default m if NETFILTER_ADVANCED=n + default y if NETFILTER_ADVANCED=n help This option enables security markings to be applied to connections. Typically they are copied to connections from -- cgit v1.2.3 From 68a3765c659f809dcaac20030853a054646eb739 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Oct 2021 16:20:34 +0200 Subject: netfilter: nf_tables: skip netdev events generated on netns removal syzbot reported following (harmless) WARN: WARNING: CPU: 1 PID: 2648 at net/netfilter/core.c:468 nft_netdev_unregister_hooks net/netfilter/nf_tables_api.c:230 [inline] nf_tables_unregister_hook include/net/netfilter/nf_tables.h:1090 [inline] __nft_release_basechain+0x138/0x640 net/netfilter/nf_tables_api.c:9524 nft_netdev_event net/netfilter/nft_chain_filter.c:351 [inline] nf_tables_netdev_event+0x521/0x8a0 net/netfilter/nft_chain_filter.c:382 reproducer: unshare -n bash -c 'ip link add br0 type bridge; nft add table netdev t ; \ nft add chain netdev t ingress \{ type filter hook ingress device "br0" \ priority 0\; policy drop\; \}' Problem is that when netns device exit hooks create the UNREGISTER event, the .pre_exit hook for nf_tables core has already removed the base hook. Notifier attempts to do this again. The need to do base hook unregister unconditionally was needed in the past, because notifier was last stage where reg->dev dereference was safe. Now that nf_tables does the hook removal in .pre_exit, this isn't needed anymore. Reported-and-tested-by: syzbot+154bd5be532a63aa778b@syzkaller.appspotmail.com Fixes: 767d1216bff825 ("netfilter: nftables: fix possible UAF over chains from packet path in netns") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_chain_filter.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 5b02408a920b..3ced0eb6b7c3 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -342,12 +342,6 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, return; } - /* UNREGISTER events are also happening on netns exit. - * - * Although nf_tables core releases all tables/chains, only this event - * handler provides guarantee that hook->ops.dev is still accessible, - * so we cannot skip exiting net namespaces. - */ __nft_release_basechain(ctx); } @@ -366,6 +360,9 @@ static int nf_tables_netdev_event(struct notifier_block *this, event != NETDEV_CHANGENAME) return NOTIFY_DONE; + if (!check_net(ctx.net)) + return NOTIFY_DONE; + nft_net = nft_pernet(ctx.net); mutex_lock(&nft_net->commit_mutex); list_for_each_entry(table, &nft_net->tables, list) { -- cgit v1.2.3 From 011a9ce80763141e7fa613934e76bef8948e4a4f Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Tue, 5 Oct 2021 10:48:12 -0600 Subject: dma-mapping: fix the kerneldoc for dma_map_sgtable() htmldocs began producing the following warnings: kernel/dma/mapping.c:256: WARNING: Definition list ends without a blank line; unexpected unindent. kernel/dma/mapping.c:257: WARNING: Bullet list ends without a blank line; unexpected unindent. Reformatting the list without hyphens fixes the warnings and produces both a readable text and HTML output. Fixes: fffe3cc8c219 ("dma-mapping: allow map_sg() ops to return negative error code") Reported-by: Stephen Rothwell Signed-off-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig --- kernel/dma/mapping.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 06fec5547e7c..49885023c223 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -249,12 +249,12 @@ EXPORT_SYMBOL(dma_map_sg_attrs); * Returns 0 on success or a negative error code on error. The following * error codes are supported with the given meaning: * - * -EINVAL - An invalid argument, unaligned access or other error - * in usage. Will not succeed if retried. - * -ENOMEM - Insufficient resources (like memory or IOVA space) to - * complete the mapping. Should succeed if retried later. - * -EIO - Legacy error code with an unknown meaning. eg. this is - * returned if a lower level call returned DMA_MAPPING_ERROR. + * -EINVAL An invalid argument, unaligned access or other error + * in usage. Will not succeed if retried. + * -ENOMEM Insufficient resources (like memory or IOVA space) to + * complete the mapping. Should succeed if retried later. + * -EIO Legacy error code with an unknown meaning. eg. this is + * returned if a lower level call returned DMA_MAPPING_ERROR. */ int dma_map_sgtable(struct device *dev, struct sg_table *sgt, enum dma_data_direction dir, unsigned long attrs) -- cgit v1.2.3 From 293d92cbbd2418ca2ba43fed07f1b92e884d1c77 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 6 Oct 2021 22:19:43 +0200 Subject: dma-debug: fix sg checks in debug_dma_map_sg() The following warning occurred sporadically on s390: DMA-API: nvme 0006:00:00.0: device driver maps memory from kernel text or rodata [addr=0000000048cc5e2f] [len=131072] WARNING: CPU: 4 PID: 825 at kernel/dma/debug.c:1083 check_for_illegal_area+0xa8/0x138 It is a false-positive warning, due to broken logic in debug_dma_map_sg(). check_for_illegal_area() checks for overlay of sg elements with kernel text or rodata. It is called with sg_dma_len(s) instead of s->length as parameter. After the call to ->map_sg(), sg_dma_len() will contain the length of possibly combined sg elements in the DMA address space, and not the individual sg element length, which would be s->length. The check will then use the physical start address of an sg element, and add the DMA length for the overlap check, which could result in the false warning, because the DMA length can be larger than the actual single sg element length. In addition, the call to check_for_illegal_area() happens in the iteration over mapped_ents, which will not include all individual sg elements if any of them were combined in ->map_sg(). Fix this by using s->length instead of sg_dma_len(s). Also put the call to check_for_illegal_area() in a separate loop, iterating over all the individual sg elements ("nents" instead of "mapped_ents"). While at it, as suggested by Robin Murphy, also move check_for_stack() inside the new loop, as it is similarly concerned with validating the individual sg elements. Link: https://lore.kernel.org/lkml/20210705185252.4074653-1-gerald.schaefer@linux.ibm.com Fixes: 884d05970bfb ("dma-debug: use sg_dma_len accessor") Signed-off-by: Gerald Schaefer Signed-off-by: Christoph Hellwig --- kernel/dma/debug.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 95445bd6eb72..35da4c3a6486 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -1289,6 +1289,12 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, if (unlikely(dma_debug_disabled())) return; + for_each_sg(sg, s, nents, i) { + check_for_stack(dev, sg_page(s), s->offset); + if (!PageHighMem(sg_page(s))) + check_for_illegal_area(dev, sg_virt(s), s->length); + } + for_each_sg(sg, s, mapped_ents, i) { entry = dma_entry_alloc(); if (!entry) @@ -1304,12 +1310,6 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->sg_call_ents = nents; entry->sg_mapped_ents = mapped_ents; - check_for_stack(dev, sg_page(s), s->offset); - - if (!PageHighMem(sg_page(s))) { - check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); - } - check_sg_segment(dev, s); add_dma_entry(entry); -- cgit v1.2.3 From c448b7aa3e66042fc0f849d9a0fb90d1af82e948 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 9 Oct 2021 14:58:40 +0800 Subject: ASoC: soc-core: fix null-ptr-deref in snd_soc_del_component_unlocked() 'component' is allocated in snd_soc_register_component(), but component->list is not initalized, this may cause snd_soc_del_component_unlocked() deref null ptr in the error handing case. KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] RIP: 0010:__list_del_entry_valid+0x81/0xf0 Call Trace: snd_soc_del_component_unlocked+0x69/0x1b0 [snd_soc_core] snd_soc_add_component.cold+0x54/0x6c [snd_soc_core] snd_soc_register_component+0x70/0x90 [snd_soc_core] devm_snd_soc_register_component+0x5e/0xd0 [snd_soc_core] tas2552_probe+0x265/0x320 [snd_soc_tas2552] ? tas2552_component_probe+0x1e0/0x1e0 [snd_soc_tas2552] i2c_device_probe+0xa31/0xbe0 Fix by adding INIT_LIST_HEAD() to snd_soc_component_initialize(). Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20211009065840.3196239-1-yangyingliang@huawei.com Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index c830e96afba2..80ca260595fd 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2599,6 +2599,7 @@ int snd_soc_component_initialize(struct snd_soc_component *component, INIT_LIST_HEAD(&component->dai_list); INIT_LIST_HEAD(&component->dobj_list); INIT_LIST_HEAD(&component->card_list); + INIT_LIST_HEAD(&component->list); mutex_init(&component->io_mutex); component->name = fmt_single_name(dev, &component->id); -- cgit v1.2.3 From 465f15a6d1a8f51f7e09fba12678b39031f63ca9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Sep 2021 15:12:42 +0200 Subject: selftests: nft_nat: add udp hole punch test case Add a test case that demonstrates port shadowing via UDP. ns2 sends packet to ns1, from source port used by a udp service on the router, ns0. Then, ns1 sends packet to ns0:service, but that ends up getting forwarded to ns2. Also add three test cases that demonstrate mitigations: 1. disable use of $port as source from 'unstrusted' origin 2. make the service untracked. This prevents masquerade entries from having any effects. 3. add forced PAT via 'random' mode to translate the "wrong" sport into an acceptable range. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_nat.sh | 145 +++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index d7e07f4c3d7f..da1c1e4b6c86 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -741,6 +741,149 @@ EOF return $lret } +# test port shadowing. +# create two listening services, one on router (ns0), one +# on client (ns2), which is masqueraded from ns1 point of view. +# ns2 sends udp packet coming from service port to ns1, on a highport. +# Later, if n1 uses same highport to connect to ns0:service, packet +# might be port-forwarded to ns2 instead. + +# second argument tells if we expect the 'fake-entry' to take effect +# (CLIENT) or not (ROUTER). +test_port_shadow() +{ + local test=$1 + local expect=$2 + local daddrc="10.0.1.99" + local daddrs="10.0.1.1" + local result="" + local logmsg="" + + echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 & + nc_r=$! + + echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 & + nc_c=$! + + # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405. + echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null + + # ns1 tries to connect to ns0:1405. With default settings this should connect + # to client, it matches the conntrack entry created above. + + result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405) + + if [ "$result" = "$expect" ] ;then + echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}" + else + echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended" + ret=1 + fi + + kill $nc_r $nc_c 2>/dev/null + + # flush udp entries for next test round, if any + ip netns exec "$ns0" conntrack -F >/dev/null 2>&1 +} + +# This prevents port shadow of router service via packet filter, +# packets claiming to originate from service port from internal +# network are dropped. +test_port_shadow_filter() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin < /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec "$ns0" nft -f /dev/stdin < Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211004024935.15326-1-shawn.guo@linaro.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 8eefa7d5fe85..2d80a04e11d8 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2042,6 +2042,12 @@ void sdhci_set_power_noreg(struct sdhci_host *host, unsigned char mode, break; case MMC_VDD_32_33: case MMC_VDD_33_34: + /* + * 3.4 ~ 3.6V are valid only for those platforms where it's + * known that the voltage range is supported by hardware. + */ + case MMC_VDD_34_35: + case MMC_VDD_35_36: pwr = SDHCI_POWER_330; break; default: -- cgit v1.2.3 From aa18457c4af7a9dad1f2b150b11beae1d8ab57aa Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Mon, 11 Oct 2021 15:49:03 +0100 Subject: ASoC: cs42l42: Ensure 0dB full scale volume is used for headsets Ensure the default 0dB playback path is always used. The code that set FULL_SCALE_VOL based on LOAD_DET_RCSTAT was spurious, and resulted in a -6dB attenuation being accidentally inserted into the playback path. Signed-off-by: Stefan Binding Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20211011144903.28915-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index fb1e4c33e27d..9a463ab54bdd 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -922,7 +922,6 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream) struct snd_soc_component *component = dai->component; struct cs42l42_private *cs42l42 = snd_soc_component_get_drvdata(component); unsigned int regval; - u8 fullScaleVol; int ret; if (mute) { @@ -993,20 +992,11 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream) cs42l42->stream_use |= 1 << stream; if (stream == SNDRV_PCM_STREAM_PLAYBACK) { - /* Read the headphone load */ - regval = snd_soc_component_read(component, CS42L42_LOAD_DET_RCSTAT); - if (((regval & CS42L42_RLA_STAT_MASK) >> CS42L42_RLA_STAT_SHIFT) == - CS42L42_RLA_STAT_15_OHM) { - fullScaleVol = CS42L42_HP_FULL_SCALE_VOL_MASK; - } else { - fullScaleVol = 0; - } - - /* Un-mute the headphone, set the full scale volume flag */ + /* Un-mute the headphone */ snd_soc_component_update_bits(component, CS42L42_HP_CTL, CS42L42_HP_ANA_AMUTE_MASK | - CS42L42_HP_ANA_BMUTE_MASK | - CS42L42_HP_FULL_SCALE_VOL_MASK, fullScaleVol); + CS42L42_HP_ANA_BMUTE_MASK, + 0); } } -- cgit v1.2.3 From 55e6d8037805b3400096d621091dfbf713f97e83 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 12 Oct 2021 10:37:35 +0800 Subject: regmap: Fix possible double-free in regcache_rbtree_exit() In regcache_rbtree_insert_to_block(), when 'present' realloc failed, the 'blk' which is supposed to assign to 'rbnode->block' will be freed, so 'rbnode->block' points a freed memory, in the error handling path of regcache_rbtree_init(), 'rbnode->block' will be freed again in regcache_rbtree_exit(), KASAN will report double-free as follows: BUG: KASAN: double-free or invalid-free in kfree+0xce/0x390 Call Trace: slab_free_freelist_hook+0x10d/0x240 kfree+0xce/0x390 regcache_rbtree_exit+0x15d/0x1a0 regcache_rbtree_init+0x224/0x2c0 regcache_init+0x88d/0x1310 __regmap_init+0x3151/0x4a80 __devm_regmap_init+0x7d/0x100 madera_spi_probe+0x10f/0x333 [madera_spi] spi_probe+0x183/0x210 really_probe+0x285/0xc30 To fix this, moving up the assignment of rbnode->block to immediately after the reallocation has succeeded so that the data structure stays valid even if the second reallocation fails. Reported-by: Hulk Robot Fixes: 3f4ff561bc88b ("regmap: rbtree: Make cache_present bitmap per node") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20211012023735.1632786-1-yangyingliang@huawei.com Signed-off-by: Mark Brown --- drivers/base/regmap/regcache-rbtree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index cfa29dc89bbf..fabf87058d80 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -281,14 +281,14 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, if (!blk) return -ENOMEM; + rbnode->block = blk; + if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) { present = krealloc(rbnode->cache_present, BITS_TO_LONGS(blklen) * sizeof(*present), GFP_KERNEL); - if (!present) { - kfree(blk); + if (!present) return -ENOMEM; - } memset(present + BITS_TO_LONGS(rbnode->blklen), 0, (BITS_TO_LONGS(blklen) - BITS_TO_LONGS(rbnode->blklen)) @@ -305,7 +305,6 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, } /* update the rbnode block, its size and the base register */ - rbnode->block = blk; rbnode->blklen = blklen; rbnode->base_reg = base_reg; rbnode->cache_present = present; -- cgit v1.2.3 From 50b6cb3516365cb69753b006be2b61c966b70588 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 7 Oct 2021 21:35:46 -0700 Subject: scsi: core: Fix shost->cmd_per_lun calculation in scsi_add_host_with_dma() After commit ea2f0f77538c ("scsi: core: Cap scsi_host cmd_per_lun at can_queue"), a 416-CPU VM running on Hyper-V hangs during boot because the hv_storvsc driver sets scsi_driver.can_queue to an integer value that exceeds SHRT_MAX, and hence scsi_add_host_with_dma() sets shost->cmd_per_lun to a negative "short" value. Use min_t(int, ...) to work around the issue. Link: https://lore.kernel.org/r/20211008043546.6006-1-decui@microsoft.com Fixes: ea2f0f77538c ("scsi: core: Cap scsi_host cmd_per_lun at can_queue") Cc: stable@vger.kernel.org Reviewed-by: Haiyang Zhang Reviewed-by: Ming Lei Reviewed-by: John Garry Signed-off-by: Dexuan Cui Signed-off-by: Martin K. Petersen --- drivers/scsi/hosts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 3f6f14f0cafb..24b72ee4246f 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -220,7 +220,8 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, goto fail; } - shost->cmd_per_lun = min_t(short, shost->cmd_per_lun, + /* Use min_t(int, ...) in case shost->can_queue exceeds SHRT_MAX */ + shost->cmd_per_lun = min_t(int, shost->cmd_per_lun, shost->can_queue); error = scsi_init_sense_cache(shost); -- cgit v1.2.3 From 187a580c9e7895978dcd1e627b9c9e7e3d13ca96 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 10 Oct 2021 11:19:04 -0500 Subject: scsi: iscsi: Fix set_param() handling In commit 9e67600ed6b8 ("scsi: iscsi: Fix race condition between login and sync thread") we meant to add a check where before we call ->set_param() we make sure the iscsi_cls_connection is bound. The problem is that between versions 4 and 5 of the patch the deletion of the unchecked set_param() call was dropped so we ended up with 2 calls. As a result we can still hit a crash where we access the unbound connection on the first call. This patch removes that first call. Fixes: 9e67600ed6b8 ("scsi: iscsi: Fix race condition between login and sync thread") Link: https://lore.kernel.org/r/20211010161904.60471-1-michael.christie@oracle.com Reviewed-by: Lee Duncan Reviewed-by: Li Feng Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 922e4c7bd88e..78343d3f9385 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2930,8 +2930,6 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) session->recovery_tmo = value; break; default: - err = transport->set_param(conn, ev->u.set_param.param, - data, ev->u.set_param.len); if ((conn->state == ISCSI_CONN_BOUND) || (conn->state == ISCSI_CONN_UP)) { err = transport->set_param(conn, ev->u.set_param.param, -- cgit v1.2.3 From 6fd13d699d24beaa28310848fe65fd898fbb9043 Mon Sep 17 00:00:00 2001 From: Andrea Parri (Microsoft) Date: Thu, 7 Oct 2021 14:28:28 +0200 Subject: scsi: storvsc: Fix validation for unsolicited incoming packets The validation on the length of incoming packets performed in storvsc_on_channel_callback() does not apply to unsolicited packets with ID of 0 sent by Hyper-V. Adjust the validation for such unsolicited packets. Link: https://lore.kernel.org/r/20211007122828.469289-1-parri.andrea@gmail.com Fixes: 91b1b640b834b2 ("scsi: storvsc: Validate length of incoming packet in storvsc_on_channel_callback()") Reported-by: Dexuan Cui Reviewed-by: Michael Kelley Reviewed-by: Haiyang Zhang Signed-off-by: Andrea Parri (Microsoft) Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index ebbbc1299c62..9eb1b88a29dd 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1285,11 +1285,15 @@ static void storvsc_on_channel_callback(void *context) foreach_vmbus_pkt(desc, channel) { struct vstor_packet *packet = hv_pkt_data(desc); struct storvsc_cmd_request *request = NULL; + u32 pktlen = hv_pkt_datalen(desc); u64 rqst_id = desc->trans_id; + u32 minlen = rqst_id ? sizeof(struct vstor_packet) - + stor_device->vmscsi_size_delta : sizeof(enum vstor_packet_operation); - if (hv_pkt_datalen(desc) < sizeof(struct vstor_packet) - - stor_device->vmscsi_size_delta) { - dev_err(&device->device, "Invalid packet len\n"); + if (pktlen < minlen) { + dev_err(&device->device, + "Invalid pkt: id=%llu, len=%u, minlen=%u\n", + rqst_id, pktlen, minlen); continue; } @@ -1302,13 +1306,23 @@ static void storvsc_on_channel_callback(void *context) if (rqst_id == 0) { /* * storvsc_on_receive() looks at the vstor_packet in the message - * from the ring buffer. If the operation in the vstor_packet is - * COMPLETE_IO, then we call storvsc_on_io_completion(), and - * dereference the guest memory address. Make sure we don't call - * storvsc_on_io_completion() with a guest memory address that is - * zero if Hyper-V were to construct and send such a bogus packet. + * from the ring buffer. + * + * - If the operation in the vstor_packet is COMPLETE_IO, then + * we call storvsc_on_io_completion(), and dereference the + * guest memory address. Make sure we don't call + * storvsc_on_io_completion() with a guest memory address + * that is zero if Hyper-V were to construct and send such + * a bogus packet. + * + * - If the operation in the vstor_packet is FCHBA_DATA, then + * we call cache_wwn(), and access the data payload area of + * the packet (wwn_packet); however, there is no guarantee + * that the packet is big enough to contain such area. + * Future-proof the code by rejecting such a bogus packet. */ - if (packet->operation == VSTOR_OPERATION_COMPLETE_IO) { + if (packet->operation == VSTOR_OPERATION_COMPLETE_IO || + packet->operation == VSTOR_OPERATION_FCHBA_DATA) { dev_err(&device->device, "Invalid packet with ID of 0\n"); continue; } -- cgit v1.2.3 From f2b85040acec9a928b4eb1b57a989324e8e38d3f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 8 Oct 2021 13:01:18 +0800 Subject: scsi: core: Put LLD module refcnt after SCSI device is released SCSI host release is triggered when SCSI device is freed. We have to make sure that the low-level device driver module won't be unloaded before SCSI host instance is released because shost->hostt is required in the release handler. Make sure to put LLD module refcnt after SCSI device is released. Fixes a kernel panic of 'BUG: unable to handle page fault for address' reported by Changhui and Yi. Link: https://lore.kernel.org/r/20211008050118.1440686-1-ming.lei@redhat.com Cc: Greg Kroah-Hartman Reported-by: Changhui Zhong Reported-by: Yi Zhang Tested-by: Yi Zhang Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi.c | 4 +++- drivers/scsi/scsi_sysfs.c | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index b241f9e3885c..291ecc33b1fe 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -553,8 +553,10 @@ EXPORT_SYMBOL(scsi_device_get); */ void scsi_device_put(struct scsi_device *sdev) { - module_put(sdev->host->hostt->module); + struct module *mod = sdev->host->hostt->module; + put_device(&sdev->sdev_gendev); + module_put(mod); } EXPORT_SYMBOL(scsi_device_put); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 86793259e541..a35841b34bfd 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -449,9 +449,12 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL; struct scsi_vpd *vpd_pg0 = NULL, *vpd_pg89 = NULL; unsigned long flags; + struct module *mod; sdev = container_of(work, struct scsi_device, ew.work); + mod = sdev->host->hostt->module; + scsi_dh_release_device(sdev); parent = sdev->sdev_gendev.parent; @@ -502,11 +505,17 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) if (parent) put_device(parent); + module_put(mod); } static void scsi_device_dev_release(struct device *dev) { struct scsi_device *sdp = to_scsi_device(dev); + + /* Set module pointer as NULL in case of module unloading */ + if (!try_module_get(sdp->host->hostt->module)) + sdp->host->hostt->module = NULL; + execute_in_process_context(scsi_device_dev_release_usercontext, &sdp->ew); } -- cgit v1.2.3 From 6b9b546dc00797c74bef491668ce5431ff54e1e2 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Wed, 13 Oct 2021 13:17:04 +0800 Subject: ASoC: wm8960: Fix clock configuration on slave mode There is a noise issue for 8kHz sample rate on slave mode. Compared with master mode, the difference is the DACDIV setting, after correcting the DACDIV, the noise is gone. There is no noise issue for 48kHz sample rate, because the default value of DACDIV is correct for 48kHz. So wm8960_configure_clocking() should be functional for ADC and DAC function even if it is slave mode. In order to be compatible for old use case, just add condition for checking that sysclk is zero with slave mode. Fixes: 0e50b51aa22f ("ASoC: wm8960: Let wm8960 driver configure its bit clock and frame clock") Signed-off-by: Shengjiu Wang Acked-by: Charles Keepax Link: https://lore.kernel.org/r/1634102224-3922-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm8960.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 9e621a254392..499604f1e178 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -742,9 +742,16 @@ static int wm8960_configure_clocking(struct snd_soc_component *component) int i, j, k; int ret; - if (!(iface1 & (1<<6))) { - dev_dbg(component->dev, - "Codec is slave mode, no need to configure clock\n"); + /* + * For Slave mode clocking should still be configured, + * so this if statement should be removed, but some platform + * may not work if the sysclk is not configured, to avoid such + * compatible issue, just add '!wm8960->sysclk' condition in + * this if statement. + */ + if (!(iface1 & (1 << 6)) && !wm8960->sysclk) { + dev_warn(component->dev, + "slave mode, but proceeding with no clock configuration\n"); return 0; } -- cgit v1.2.3 From d39bf40e55e666b5905fdbd46a0dced030ce87be Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 12 Oct 2021 13:55:19 -0400 Subject: IB/qib: Protect from buffer overflow in struct qib_user_sdma_pkt fields Overflowing either addrlimit or bytes_togo can allow userspace to trigger a buffer overflow of kernel memory. Check for overflows in all the places doing math on user controlled buffers. Fixes: f931551bafe1 ("IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters") Link: https://lore.kernel.org/r/20211012175519.7298.77738.stgit@awfm-01.cornelisnetworks.com Reported-by: Ilja Van Sprundel Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_user_sdma.c | 33 +++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index a67599b5a550..ac11943a5ddb 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -602,7 +602,7 @@ done: /* * How many pages in this iovec element? */ -static int qib_user_sdma_num_pages(const struct iovec *iov) +static size_t qib_user_sdma_num_pages(const struct iovec *iov) { const unsigned long addr = (unsigned long) iov->iov_base; const unsigned long len = iov->iov_len; @@ -658,7 +658,7 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, - unsigned long addr, int tlen, int npages) + unsigned long addr, int tlen, size_t npages) { struct page *pages[8]; int i, j; @@ -722,7 +722,7 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, unsigned long idx; for (idx = 0; idx < niov; idx++) { - const int npages = qib_user_sdma_num_pages(iov + idx); + const size_t npages = qib_user_sdma_num_pages(iov + idx); const unsigned long addr = (unsigned long) iov[idx].iov_base; ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr, @@ -824,8 +824,8 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, unsigned pktnw; unsigned pktnwc; int nfrags = 0; - int npages = 0; - int bytes_togo = 0; + size_t npages = 0; + size_t bytes_togo = 0; int tiddma = 0; int cfur; @@ -885,7 +885,11 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, npages += qib_user_sdma_num_pages(&iov[idx]); - bytes_togo += slen; + if (check_add_overflow(bytes_togo, slen, &bytes_togo) || + bytes_togo > type_max(typeof(pkt->bytes_togo))) { + ret = -EINVAL; + goto free_pbc; + } pktnwc += slen >> 2; idx++; nfrags++; @@ -904,8 +908,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, } if (frag_size) { - int tidsmsize, n; - size_t pktsize; + size_t tidsmsize, n, pktsize, sz, addrlimit; n = npages*((2*PAGE_SIZE/frag_size)+1); pktsize = struct_size(pkt, addr, n); @@ -923,14 +926,24 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, else tidsmsize = 0; - pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL); + if (check_add_overflow(pktsize, tidsmsize, &sz)) { + ret = -EINVAL; + goto free_pbc; + } + pkt = kmalloc(sz, GFP_KERNEL); if (!pkt) { ret = -ENOMEM; goto free_pbc; } pkt->largepkt = 1; pkt->frag_size = frag_size; - pkt->addrlimit = n + ARRAY_SIZE(pkt->addr); + if (check_add_overflow(n, ARRAY_SIZE(pkt->addr), + &addrlimit) || + addrlimit > type_max(typeof(pkt->addrlimit))) { + ret = -EINVAL; + goto free_pbc; + } + pkt->addrlimit = addrlimit; if (tiddma) { char *tidsm = (char *)pkt + pktsize; -- cgit v1.2.3 From 13bac861952a78664907a0f927d3e874e9a59034 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 13 Oct 2021 10:18:52 -0400 Subject: IB/hfi1: Fix abba locking issue with sc_disable() sc_disable() after having disabled the send context wakes up any waiters by calling hfi1_qp_wakeup() while holding the waitlock for the sc. This is contrary to the model for all other calls to hfi1_qp_wakeup() where the waitlock is dropped and a local is used to drive calls to hfi1_qp_wakeup(). Fix by moving the sc->piowait into a local list and driving the wakeup calls from the list. Fixes: 099a884ba4c0 ("IB/hfi1: Handle wakeup of orphaned QPs for pio") Link: https://lore.kernel.org/r/20211013141852.128104.2682.stgit@awfm-01.cornelisnetworks.com Signed-off-by: Mike Marciniszyn Reported-by: TOTE Robot Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/pio.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 489b436f19bb..3d42bd2b36bd 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -878,6 +878,7 @@ void sc_disable(struct send_context *sc) { u64 reg; struct pio_buf *pbuf; + LIST_HEAD(wake_list); if (!sc) return; @@ -912,19 +913,21 @@ void sc_disable(struct send_context *sc) spin_unlock(&sc->release_lock); write_seqlock(&sc->waitlock); - while (!list_empty(&sc->piowait)) { + if (!list_empty(&sc->piowait)) + list_move(&sc->piowait, &wake_list); + write_sequnlock(&sc->waitlock); + while (!list_empty(&wake_list)) { struct iowait *wait; struct rvt_qp *qp; struct hfi1_qp_priv *priv; - wait = list_first_entry(&sc->piowait, struct iowait, list); + wait = list_first_entry(&wake_list, struct iowait, list); qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); } - write_sequnlock(&sc->waitlock); spin_unlock_irq(&sc->alloc_lock); } -- cgit v1.2.3 From 663991f32857b3b63c94c97de9dbb0ec8600144f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Oct 2021 11:06:45 +0300 Subject: RDMA/rdmavt: Fix error code in rvt_create_qp() Return negative -ENOMEM instead of positive ENOMEM. Returning a postive value will cause an Oops because it becomes an ERR_PTR() in the create_qp() function. Fixes: 514aee660df4 ("RDMA: Globally allocate and release QP memory") Link: https://lore.kernel.org/r/20211013080645.GD6010@kili Signed-off-by: Dan Carpenter Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 49bdd78ac664..3305f2744bfa 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1223,7 +1223,7 @@ int rvt_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, spin_lock(&rdi->n_qps_lock); if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) { spin_unlock(&rdi->n_qps_lock); - ret = ENOMEM; + ret = -ENOMEM; goto bail_ip; } -- cgit v1.2.3 From 0398adaec3419bdfa93baae70a3b696b4abdd7ad Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Fri, 8 Oct 2021 22:59:36 +0200 Subject: Revert "dt-bindings: pinctrl: bcm4708-pinmux: rework binding to use syscon" This reverts commit 2ae80900f239484069569380e1fc4340fd6e0089. My rework was unneeded & wrong. It replaced a clear & correct "reg" property usage with a custom "offset" one. Back then I didn't understand how to properly handle CRU block binding. I heard / read about syscon and tried to use it in a totally invalid way. That change also missed Rob's review (obviously). Northstar's pin controller is a simple consistent hardware block that can be cleanly mapped using a 0x24 long reg space. Since the rework commit there wasn't any follow up modifying in-kernel DTS files to use the new binding. Broadcom also isn't known to use that bugged binding. There is close to zero chance this revert may actually cause problems / regressions. This commit is a simple revert. Example binding may (should) be updated / cleaned up but that can be handled separately. Signed-off-by: Rafał Miłecki Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20211008205938.29925-1-zajec5@gmail.com Signed-off-by: Linus Walleij --- .../devicetree/bindings/mfd/brcm,cru.yaml | 11 ++++++----- .../bindings/pinctrl/brcm,ns-pinmux.yaml | 23 ++++++++++++---------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/Documentation/devicetree/bindings/mfd/brcm,cru.yaml b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml index fc1317ab3226..28ac60acf4ac 100644 --- a/Documentation/devicetree/bindings/mfd/brcm,cru.yaml +++ b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml @@ -32,13 +32,13 @@ properties: "#size-cells": const: 1 - pinctrl: - $ref: ../pinctrl/brcm,ns-pinmux.yaml - patternProperties: '^clock-controller@[a-f0-9]+$': $ref: ../clock/brcm,iproc-clocks.yaml + '^pin-controller@[a-f0-9]+$': + $ref: ../pinctrl/brcm,ns-pinmux.yaml + '^thermal@[a-f0-9]+$': $ref: ../thermal/brcm,ns-thermal.yaml @@ -73,9 +73,10 @@ examples: "iprocfast", "sata1", "sata2"; }; - pinctrl { + pin-controller@1c0 { compatible = "brcm,bcm4708-pinmux"; - offset = <0x1c0>; + reg = <0x1c0 0x24>; + reg-names = "cru_gpio_control"; }; thermal@2c0 { diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml b/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml index 470aff599c27..78600a8fe403 100644 --- a/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml +++ b/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml @@ -17,9 +17,6 @@ description: A list of pins varies across chipsets so few bindings are available. - Node of the pinmux must be nested in the CRU (Central Resource Unit) "syscon" - node. - properties: compatible: enum: @@ -27,10 +24,11 @@ properties: - brcm,bcm4709-pinmux - brcm,bcm53012-pinmux - offset: - description: offset of pin registers in the CRU block + reg: maxItems: 1 - $ref: /schemas/types.yaml#/definitions/uint32-array + + reg-names: + const: cru_gpio_control patternProperties: '-pins$': @@ -72,19 +70,24 @@ allOf: uart1_grp ] required: - - offset + - reg + - reg-names additionalProperties: false examples: - | cru@1800c100 { - compatible = "syscon", "simple-mfd"; + compatible = "simple-bus"; reg = <0x1800c100 0x1a4>; + ranges; + #address-cells = <1>; + #size-cells = <1>; - pinctrl { + pin-controller@1c0 { compatible = "brcm,bcm4708-pinmux"; - offset = <0xc0>; + reg = <0x1c0 0x24>; + reg-names = "cru_gpio_control"; spi-pins { function = "spi"; -- cgit v1.2.3 From 1d0a779892e8aae484e11bf5f28fb41d49e8b0f6 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Fri, 8 Oct 2021 22:59:37 +0200 Subject: dt-bindings: pinctrl: brcm,ns-pinmux: drop unneeded CRU from example There is no need to include CRU in example of this binding. It wasn't complete / correct anyway. The proper binding can be find in the mfd/brcm,cru.yaml . Signed-off-by: Rafał Miłecki Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20211008205938.29925-2-zajec5@gmail.com Signed-off-by: Linus Walleij --- .../bindings/pinctrl/brcm,ns-pinmux.yaml | 24 ++++++++-------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml b/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml index 78600a8fe403..fc39e3e9f71c 100644 --- a/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml +++ b/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml @@ -77,21 +77,13 @@ additionalProperties: false examples: - | - cru@1800c100 { - compatible = "simple-bus"; - reg = <0x1800c100 0x1a4>; - ranges; - #address-cells = <1>; - #size-cells = <1>; - - pin-controller@1c0 { - compatible = "brcm,bcm4708-pinmux"; - reg = <0x1c0 0x24>; - reg-names = "cru_gpio_control"; - - spi-pins { - function = "spi"; - groups = "spi_grp"; - }; + pin-controller@1800c1c0 { + compatible = "brcm,bcm4708-pinmux"; + reg = <0x1800c1c0 0x24>; + reg-names = "cru_gpio_control"; + + spi-pins { + function = "spi"; + groups = "spi_grp"; }; }; -- cgit v1.2.3 From 6dba4bdfd7a30e77b848a45404b224588bf989e5 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Fri, 8 Oct 2021 22:59:38 +0200 Subject: Revert "pinctrl: bcm: ns: support updated DT binding as syscon subnode" This reverts commit a49d784d5a8272d0f63c448fe8dc69e589db006e. The updated binding was wrong / invalid and has been reverted. There isn't any upstream kernel DTS using it and Broadcom isn't known to use it neither. There is close to zero chance this will cause regression for anyone. Actually in-kernel bcm5301x.dtsi still uses the old good binding and so it's broken since the driver update. This revert fixes it. Signed-off-by: Rafał Miłecki Link: https://lore.kernel.org/r/20211008205938.29925-3-zajec5@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-ns.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-ns.c b/drivers/pinctrl/bcm/pinctrl-ns.c index e79690bd8b85..d7f8175d2c1c 100644 --- a/drivers/pinctrl/bcm/pinctrl-ns.c +++ b/drivers/pinctrl/bcm/pinctrl-ns.c @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -13,7 +12,6 @@ #include #include #include -#include #include #define FLAG_BCM4708 BIT(1) @@ -24,8 +22,7 @@ struct ns_pinctrl { struct device *dev; unsigned int chipset_flag; struct pinctrl_dev *pctldev; - struct regmap *regmap; - u32 offset; + void __iomem *base; struct pinctrl_desc pctldesc; struct ns_pinctrl_group *groups; @@ -232,9 +229,9 @@ static int ns_pinctrl_set_mux(struct pinctrl_dev *pctrl_dev, unset |= BIT(pin_number); } - regmap_read(ns_pinctrl->regmap, ns_pinctrl->offset, &tmp); + tmp = readl(ns_pinctrl->base); tmp &= ~unset; - regmap_write(ns_pinctrl->regmap, ns_pinctrl->offset, tmp); + writel(tmp, ns_pinctrl->base); return 0; } @@ -266,13 +263,13 @@ static const struct of_device_id ns_pinctrl_of_match_table[] = { static int ns_pinctrl_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; const struct of_device_id *of_id; struct ns_pinctrl *ns_pinctrl; struct pinctrl_desc *pctldesc; struct pinctrl_pin_desc *pin; struct ns_pinctrl_group *group; struct ns_pinctrl_function *function; + struct resource *res; int i; ns_pinctrl = devm_kzalloc(dev, sizeof(*ns_pinctrl), GFP_KERNEL); @@ -290,18 +287,12 @@ static int ns_pinctrl_probe(struct platform_device *pdev) return -EINVAL; ns_pinctrl->chipset_flag = (uintptr_t)of_id->data; - ns_pinctrl->regmap = syscon_node_to_regmap(of_get_parent(np)); - if (IS_ERR(ns_pinctrl->regmap)) { - int err = PTR_ERR(ns_pinctrl->regmap); - - dev_err(dev, "Failed to map pinctrl regs: %d\n", err); - - return err; - } - - if (of_property_read_u32(np, "offset", &ns_pinctrl->offset)) { - dev_err(dev, "Failed to get register offset\n"); - return -ENOENT; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "cru_gpio_control"); + ns_pinctrl->base = devm_ioremap_resource(dev, res); + if (IS_ERR(ns_pinctrl->base)) { + dev_err(dev, "Failed to map pinctrl regs\n"); + return PTR_ERR(ns_pinctrl->base); } memcpy(pctldesc, &ns_pinctrl_desc, sizeof(*pctldesc)); -- cgit v1.2.3 From c370bb474016ab9edfdabd7c08a88dd13a71ddbd Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Fri, 8 Oct 2021 14:25:17 +0200 Subject: pinctrl: stm32: use valid pin identifier in stm32_pinctrl_resume() When resuming from low power, the driver attempts to restore the configuration of some pins. This is done by a call to: stm32_pinctrl_restore_gpio_regs(struct stm32_pinctrl *pctl, u32 pin) where 'pin' must be a valid pin value (i.e. matching some 'groups->pin'). Fix the current implementation which uses some wrong 'pin' value. Fixes: e2f3cf18c3e2 ("pinctrl: stm32: add suspend/resume management") Signed-off-by: Fabien Dessenne Link: https://lore.kernel.org/r/20211008122517.617633-1-fabien.dessenne@foss.st.com Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 68b3886f9f0f..dfd8888a222a 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1644,8 +1644,8 @@ int __maybe_unused stm32_pinctrl_resume(struct device *dev) struct stm32_pinctrl_group *g = pctl->groups; int i; - for (i = g->pin; i < g->pin + pctl->ngroups; i++) - stm32_pinctrl_restore_gpio_regs(pctl, i); + for (i = 0; i < pctl->ngroups; i++, g++) + stm32_pinctrl_restore_gpio_regs(pctl, g->pin); return 0; } -- cgit v1.2.3 From f7db8fd03a4bc5baf70ccf8978fe17cb54368b97 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 8 Oct 2021 07:31:03 +0900 Subject: ksmbd: add validation in smb2_ioctl Add validation for request/response buffer size check in smb2_ioctl and fsctl_copychunk() take copychunk_ioctl_req pointer and the other arguments instead of smb2_ioctl_req structure and remove an unused smb2_ioctl_req argument of fsctl_validate_negotiate_info. Cc: Tom Talpey Cc: Ronnie Sahlberg Cc: Ralph Böhme Cc: Steve French Cc: Sergey Senozhatsky Acked-by: Hyunchul Lee Signed-off-by: Colin Ian King Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 113 +++++++++++++++++++++++++++++++++++++++-------------- fs/ksmbd/vfs.c | 2 +- fs/ksmbd/vfs.h | 2 +- 3 files changed, 86 insertions(+), 31 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 005aa93a49d6..1c7b837c8106 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -7023,24 +7023,26 @@ out2: return err; } -static int fsctl_copychunk(struct ksmbd_work *work, struct smb2_ioctl_req *req, +static int fsctl_copychunk(struct ksmbd_work *work, + struct copychunk_ioctl_req *ci_req, + unsigned int cnt_code, + unsigned int input_count, + unsigned long long volatile_id, + unsigned long long persistent_id, struct smb2_ioctl_rsp *rsp) { - struct copychunk_ioctl_req *ci_req; struct copychunk_ioctl_rsp *ci_rsp; struct ksmbd_file *src_fp = NULL, *dst_fp = NULL; struct srv_copychunk *chunks; unsigned int i, chunk_count, chunk_count_written = 0; unsigned int chunk_size_written = 0; loff_t total_size_written = 0; - int ret, cnt_code; + int ret = 0; - cnt_code = le32_to_cpu(req->CntCode); - ci_req = (struct copychunk_ioctl_req *)&req->Buffer[0]; ci_rsp = (struct copychunk_ioctl_rsp *)&rsp->Buffer[0]; - rsp->VolatileFileId = req->VolatileFileId; - rsp->PersistentFileId = req->PersistentFileId; + rsp->VolatileFileId = cpu_to_le64(volatile_id); + rsp->PersistentFileId = cpu_to_le64(persistent_id); ci_rsp->ChunksWritten = cpu_to_le32(ksmbd_server_side_copy_max_chunk_count()); ci_rsp->ChunkBytesWritten = @@ -7050,12 +7052,13 @@ static int fsctl_copychunk(struct ksmbd_work *work, struct smb2_ioctl_req *req, chunks = (struct srv_copychunk *)&ci_req->Chunks[0]; chunk_count = le32_to_cpu(ci_req->ChunkCount); + if (chunk_count == 0) + goto out; total_size_written = 0; /* verify the SRV_COPYCHUNK_COPY packet */ if (chunk_count > ksmbd_server_side_copy_max_chunk_count() || - le32_to_cpu(req->InputCount) < - offsetof(struct copychunk_ioctl_req, Chunks) + + input_count < offsetof(struct copychunk_ioctl_req, Chunks) + chunk_count * sizeof(struct srv_copychunk)) { rsp->hdr.Status = STATUS_INVALID_PARAMETER; return -EINVAL; @@ -7076,9 +7079,7 @@ static int fsctl_copychunk(struct ksmbd_work *work, struct smb2_ioctl_req *req, src_fp = ksmbd_lookup_foreign_fd(work, le64_to_cpu(ci_req->ResumeKey[0])); - dst_fp = ksmbd_lookup_fd_slow(work, - le64_to_cpu(req->VolatileFileId), - le64_to_cpu(req->PersistentFileId)); + dst_fp = ksmbd_lookup_fd_slow(work, volatile_id, persistent_id); ret = -EINVAL; if (!src_fp || src_fp->persistent_id != le64_to_cpu(ci_req->ResumeKey[1])) { @@ -7153,8 +7154,8 @@ static __be32 idev_ipv4_address(struct in_device *idev) } static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, - struct smb2_ioctl_req *req, - struct smb2_ioctl_rsp *rsp) + struct smb2_ioctl_rsp *rsp, + unsigned int out_buf_len) { struct network_interface_info_ioctl_rsp *nii_rsp = NULL; int nbytes = 0; @@ -7166,6 +7167,12 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, rtnl_lock(); for_each_netdev(&init_net, netdev) { + if (out_buf_len < + nbytes + sizeof(struct network_interface_info_ioctl_rsp)) { + rtnl_unlock(); + return -ENOSPC; + } + if (netdev->type == ARPHRD_LOOPBACK) continue; @@ -7245,11 +7252,6 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, if (nii_rsp) nii_rsp->Next = 0; - if (!nbytes) { - rsp->hdr.Status = STATUS_BUFFER_TOO_SMALL; - return -EINVAL; - } - rsp->PersistentFileId = cpu_to_le64(SMB2_NO_FID); rsp->VolatileFileId = cpu_to_le64(SMB2_NO_FID); return nbytes; @@ -7257,11 +7259,16 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, static int fsctl_validate_negotiate_info(struct ksmbd_conn *conn, struct validate_negotiate_info_req *neg_req, - struct validate_negotiate_info_rsp *neg_rsp) + struct validate_negotiate_info_rsp *neg_rsp, + unsigned int in_buf_len) { int ret = 0; int dialect; + if (in_buf_len < sizeof(struct validate_negotiate_info_req) + + le16_to_cpu(neg_req->DialectCount) * sizeof(__le16)) + return -EINVAL; + dialect = ksmbd_lookup_dialect_by_id(neg_req->Dialects, neg_req->DialectCount); if (dialect == BAD_PROT_ID || dialect != conn->dialect) { @@ -7295,7 +7302,7 @@ err_out: static int fsctl_query_allocated_ranges(struct ksmbd_work *work, u64 id, struct file_allocated_range_buffer *qar_req, struct file_allocated_range_buffer *qar_rsp, - int in_count, int *out_count) + unsigned int in_count, unsigned int *out_count) { struct ksmbd_file *fp; loff_t start, length; @@ -7322,7 +7329,8 @@ static int fsctl_query_allocated_ranges(struct ksmbd_work *work, u64 id, } static int fsctl_pipe_transceive(struct ksmbd_work *work, u64 id, - int out_buf_len, struct smb2_ioctl_req *req, + unsigned int out_buf_len, + struct smb2_ioctl_req *req, struct smb2_ioctl_rsp *rsp) { struct ksmbd_rpc_command *rpc_resp; @@ -7436,8 +7444,7 @@ int smb2_ioctl(struct ksmbd_work *work) { struct smb2_ioctl_req *req; struct smb2_ioctl_rsp *rsp, *rsp_org; - int cnt_code, nbytes = 0; - int out_buf_len; + unsigned int cnt_code, nbytes = 0, out_buf_len, in_buf_len; u64 id = KSMBD_NO_FID; struct ksmbd_conn *conn = work->conn; int ret = 0; @@ -7466,7 +7473,11 @@ int smb2_ioctl(struct ksmbd_work *work) cnt_code = le32_to_cpu(req->CntCode); out_buf_len = le32_to_cpu(req->MaxOutputResponse); - out_buf_len = min(KSMBD_IPC_MAX_PAYLOAD, out_buf_len); + out_buf_len = + min_t(u32, work->response_sz - work->next_smb2_rsp_hdr_off - + (offsetof(struct smb2_ioctl_rsp, Buffer) - 4), + out_buf_len); + in_buf_len = le32_to_cpu(req->InputCount); switch (cnt_code) { case FSCTL_DFS_GET_REFERRALS: @@ -7494,6 +7505,7 @@ int smb2_ioctl(struct ksmbd_work *work) break; } case FSCTL_PIPE_TRANSCEIVE: + out_buf_len = min_t(u32, KSMBD_IPC_MAX_PAYLOAD, out_buf_len); nbytes = fsctl_pipe_transceive(work, id, out_buf_len, req, rsp); break; case FSCTL_VALIDATE_NEGOTIATE_INFO: @@ -7502,9 +7514,16 @@ int smb2_ioctl(struct ksmbd_work *work) goto out; } + if (in_buf_len < sizeof(struct validate_negotiate_info_req)) + return -EINVAL; + + if (out_buf_len < sizeof(struct validate_negotiate_info_rsp)) + return -EINVAL; + ret = fsctl_validate_negotiate_info(conn, (struct validate_negotiate_info_req *)&req->Buffer[0], - (struct validate_negotiate_info_rsp *)&rsp->Buffer[0]); + (struct validate_negotiate_info_rsp *)&rsp->Buffer[0], + in_buf_len); if (ret < 0) goto out; @@ -7513,9 +7532,10 @@ int smb2_ioctl(struct ksmbd_work *work) rsp->VolatileFileId = cpu_to_le64(SMB2_NO_FID); break; case FSCTL_QUERY_NETWORK_INTERFACE_INFO: - nbytes = fsctl_query_iface_info_ioctl(conn, req, rsp); - if (nbytes < 0) + ret = fsctl_query_iface_info_ioctl(conn, rsp, out_buf_len); + if (ret < 0) goto out; + nbytes = ret; break; case FSCTL_REQUEST_RESUME_KEY: if (out_buf_len < sizeof(struct resume_key_ioctl_rsp)) { @@ -7540,15 +7560,33 @@ int smb2_ioctl(struct ksmbd_work *work) goto out; } + if (in_buf_len < sizeof(struct copychunk_ioctl_req)) { + ret = -EINVAL; + goto out; + } + if (out_buf_len < sizeof(struct copychunk_ioctl_rsp)) { ret = -EINVAL; goto out; } nbytes = sizeof(struct copychunk_ioctl_rsp); - fsctl_copychunk(work, req, rsp); + rsp->VolatileFileId = req->VolatileFileId; + rsp->PersistentFileId = req->PersistentFileId; + fsctl_copychunk(work, + (struct copychunk_ioctl_req *)&req->Buffer[0], + le32_to_cpu(req->CntCode), + le32_to_cpu(req->InputCount), + le64_to_cpu(req->VolatileFileId), + le64_to_cpu(req->PersistentFileId), + rsp); break; case FSCTL_SET_SPARSE: + if (in_buf_len < sizeof(struct file_sparse)) { + ret = -EINVAL; + goto out; + } + ret = fsctl_set_sparse(work, id, (struct file_sparse *)&req->Buffer[0]); if (ret < 0) @@ -7567,6 +7605,11 @@ int smb2_ioctl(struct ksmbd_work *work) goto out; } + if (in_buf_len < sizeof(struct file_zero_data_information)) { + ret = -EINVAL; + goto out; + } + zero_data = (struct file_zero_data_information *)&req->Buffer[0]; @@ -7586,6 +7629,11 @@ int smb2_ioctl(struct ksmbd_work *work) break; } case FSCTL_QUERY_ALLOCATED_RANGES: + if (in_buf_len < sizeof(struct file_allocated_range_buffer)) { + ret = -EINVAL; + goto out; + } + ret = fsctl_query_allocated_ranges(work, id, (struct file_allocated_range_buffer *)&req->Buffer[0], (struct file_allocated_range_buffer *)&rsp->Buffer[0], @@ -7626,6 +7674,11 @@ int smb2_ioctl(struct ksmbd_work *work) struct duplicate_extents_to_file *dup_ext; loff_t src_off, dst_off, length, cloned; + if (in_buf_len < sizeof(struct duplicate_extents_to_file)) { + ret = -EINVAL; + goto out; + } + dup_ext = (struct duplicate_extents_to_file *)&req->Buffer[0]; fp_in = ksmbd_lookup_fd_slow(work, dup_ext->VolatileFileHandle, @@ -7696,6 +7749,8 @@ out: rsp->hdr.Status = STATUS_OBJECT_NAME_NOT_FOUND; else if (ret == -EOPNOTSUPP) rsp->hdr.Status = STATUS_NOT_SUPPORTED; + else if (ret == -ENOSPC) + rsp->hdr.Status = STATUS_BUFFER_TOO_SMALL; else if (ret < 0 || rsp->hdr.Status == 0) rsp->hdr.Status = STATUS_INVALID_PARAMETER; smb2_set_err_rsp(work); diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index b41954294d38..835b384b0895 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1023,7 +1023,7 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp, int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, struct file_allocated_range_buffer *ranges, - int in_count, int *out_count) + unsigned int in_count, unsigned int *out_count) { struct file *f = fp->filp; struct inode *inode = file_inode(fp->filp); diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h index 7b1dcaa3fbdc..b0d5b8feb4a3 100644 --- a/fs/ksmbd/vfs.h +++ b/fs/ksmbd/vfs.h @@ -166,7 +166,7 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp, struct file_allocated_range_buffer; int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, struct file_allocated_range_buffer *ranges, - int in_count, int *out_count); + unsigned int in_count, unsigned int *out_count); int ksmbd_vfs_unlink(struct user_namespace *user_ns, struct dentry *dir, struct dentry *dentry); void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat); -- cgit v1.2.3 From bf8acc9e10e21c28452dfa067a7d31e6067104b1 Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Thu, 7 Oct 2021 16:26:58 +0900 Subject: ksmbd: improve credits management * Requests except READ, WRITE, IOCTL, INFO, QUERY DIRECOTRY, CANCEL must consume one credit. * If client's granted credits are insufficient, refuse to handle requests. * Windows server 2016 or later grant up to 8192 credits to clients at once. Acked-by: Namjae Jeon Signed-off-by: Hyunchul Lee Signed-off-by: Steve French --- fs/ksmbd/connection.c | 2 ++ fs/ksmbd/smb2misc.c | 38 +++++++++++++++++------- fs/ksmbd/smb2pdu.c | 81 ++++++++++++++++++--------------------------------- 3 files changed, 59 insertions(+), 62 deletions(-) diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 48b18b4ec117..b57a0d8a392f 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -61,6 +61,8 @@ struct ksmbd_conn *ksmbd_conn_alloc(void) conn->local_nls = load_nls_default(); atomic_set(&conn->req_running, 0); atomic_set(&conn->r_count, 0); + conn->total_credits = 1; + init_waitqueue_head(&conn->req_running_q); INIT_LIST_HEAD(&conn->conns_list); INIT_LIST_HEAD(&conn->sessions); diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c index 9edd9c161b27..e7e441c8f050 100644 --- a/fs/ksmbd/smb2misc.c +++ b/fs/ksmbd/smb2misc.c @@ -284,11 +284,13 @@ static inline int smb2_ioctl_resp_len(struct smb2_ioctl_req *h) le32_to_cpu(h->MaxOutputResponse); } -static int smb2_validate_credit_charge(struct smb2_hdr *hdr) +static int smb2_validate_credit_charge(struct ksmbd_conn *conn, + struct smb2_hdr *hdr) { - int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len; - int credit_charge = le16_to_cpu(hdr->CreditCharge); + unsigned int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len; + unsigned short credit_charge = le16_to_cpu(hdr->CreditCharge); void *__hdr = hdr; + int ret; switch (hdr->Command) { case SMB2_QUERY_INFO: @@ -310,21 +312,37 @@ static int smb2_validate_credit_charge(struct smb2_hdr *hdr) req_len = smb2_ioctl_req_len(__hdr); expect_resp_len = smb2_ioctl_resp_len(__hdr); break; - default: + case SMB2_CANCEL: return 0; + default: + req_len = 1; + break; } - credit_charge = max(1, credit_charge); - max_len = max(req_len, expect_resp_len); + credit_charge = max_t(unsigned short, credit_charge, 1); + max_len = max_t(unsigned int, req_len, expect_resp_len); calc_credit_num = DIV_ROUND_UP(max_len, SMB2_MAX_BUFFER_SIZE); if (credit_charge < calc_credit_num) { - pr_err("Insufficient credit charge, given: %d, needed: %d\n", - credit_charge, calc_credit_num); + ksmbd_debug(SMB, "Insufficient credit charge, given: %d, needed: %d\n", + credit_charge, calc_credit_num); + return 1; + } else if (credit_charge > conn->max_credits) { + ksmbd_debug(SMB, "Too large credit charge: %d\n", credit_charge); return 1; } - return 0; + spin_lock(&conn->credits_lock); + if (credit_charge <= conn->total_credits) { + conn->total_credits -= credit_charge; + ret = 0; + } else { + ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n", + credit_charge, conn->total_credits); + ret = 1; + } + spin_unlock(&conn->credits_lock); + return ret; } int ksmbd_smb2_check_message(struct ksmbd_work *work) @@ -383,7 +401,7 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) } if ((work->conn->vals->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU) && - smb2_validate_credit_charge(hdr)) { + smb2_validate_credit_charge(work->conn, hdr)) { work->conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER); return 1; } diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 1c7b837c8106..79b296abe04e 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -292,22 +292,6 @@ int init_smb2_neg_rsp(struct ksmbd_work *work) return 0; } -static int smb2_consume_credit_charge(struct ksmbd_work *work, - unsigned short credit_charge) -{ - struct ksmbd_conn *conn = work->conn; - unsigned int rsp_credits = 1; - - if (!conn->total_credits) - return 0; - - if (credit_charge > 0) - rsp_credits = credit_charge; - - conn->total_credits -= rsp_credits; - return rsp_credits; -} - /** * smb2_set_rsp_credits() - set number of credits in response buffer * @work: smb work containing smb response buffer @@ -317,49 +301,43 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work); struct smb2_hdr *hdr = ksmbd_resp_buf_next(work); struct ksmbd_conn *conn = work->conn; - unsigned short credits_requested = le16_to_cpu(req_hdr->CreditRequest); - unsigned short credit_charge = 1, credits_granted = 0; - unsigned short aux_max, aux_credits, min_credits; - int rsp_credit_charge; + unsigned short credits_requested; + unsigned short credit_charge, credits_granted = 0; + unsigned short aux_max, aux_credits; - if (hdr->Command == SMB2_CANCEL) - goto out; + if (work->send_no_response) + return 0; - /* get default minimum credits by shifting maximum credits by 4 */ - min_credits = conn->max_credits >> 4; + hdr->CreditCharge = req_hdr->CreditCharge; - if (conn->total_credits >= conn->max_credits) { + if (conn->total_credits > conn->max_credits) { + hdr->CreditRequest = 0; pr_err("Total credits overflow: %d\n", conn->total_credits); - conn->total_credits = min_credits; - } - - rsp_credit_charge = - smb2_consume_credit_charge(work, le16_to_cpu(req_hdr->CreditCharge)); - if (rsp_credit_charge < 0) return -EINVAL; + } - hdr->CreditCharge = cpu_to_le16(rsp_credit_charge); + credit_charge = max_t(unsigned short, + le16_to_cpu(req_hdr->CreditCharge), 1); + credits_requested = max_t(unsigned short, + le16_to_cpu(req_hdr->CreditRequest), 1); - if (credits_requested > 0) { - aux_credits = credits_requested - 1; - aux_max = 32; - if (hdr->Command == SMB2_NEGOTIATE) - aux_max = 0; - aux_credits = (aux_credits < aux_max) ? aux_credits : aux_max; - credits_granted = aux_credits + credit_charge; + /* according to smb2.credits smbtorture, Windows server + * 2016 or later grant up to 8192 credits at once. + * + * TODO: Need to adjuct CreditRequest value according to + * current cpu load + */ + aux_credits = credits_requested - 1; + if (hdr->Command == SMB2_NEGOTIATE) + aux_max = 0; + else + aux_max = conn->max_credits - credit_charge; + aux_credits = min_t(unsigned short, aux_credits, aux_max); + credits_granted = credit_charge + aux_credits; - /* if credits granted per client is getting bigger than default - * minimum credits then we should wrap it up within the limits. - */ - if ((conn->total_credits + credits_granted) > min_credits) - credits_granted = min_credits - conn->total_credits; - /* - * TODO: Need to adjuct CreditRequest value according to - * current cpu load - */ - } else if (conn->total_credits == 0) { - credits_granted = 1; - } + if (conn->max_credits - conn->total_credits < credits_granted) + credits_granted = conn->max_credits - + conn->total_credits; conn->total_credits += credits_granted; work->credits_granted += credits_granted; @@ -368,7 +346,6 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) /* Update CreditRequest in last request */ hdr->CreditRequest = cpu_to_le16(work->credits_granted); } -out: ksmbd_debug(SMB, "credits: requested[%d] granted[%d] total_granted[%d]\n", credits_requested, credits_granted, -- cgit v1.2.3 From 9a63b999ae5435d82a5c353c6b1467100f857742 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 11 Oct 2021 12:48:31 +0900 Subject: ksmbd: fix potencial 32bit overflow from data area check in smb2_write DataOffset and Length validation can be potencial 32bit overflow. This patch fix it. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 79b296abe04e..7b4689f2df49 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -6197,8 +6197,7 @@ static noinline int smb2_write_pipe(struct ksmbd_work *work) (offsetof(struct smb2_write_req, Buffer) - 4)) { data_buf = (char *)&req->Buffer[0]; } else { - if ((le16_to_cpu(req->DataOffset) > get_rfc1002_len(req)) || - (le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req))) { + if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) { pr_err("invalid write data offset %u, smb_len %u\n", le16_to_cpu(req->DataOffset), get_rfc1002_len(req)); @@ -6356,8 +6355,7 @@ int smb2_write(struct ksmbd_work *work) (offsetof(struct smb2_write_req, Buffer) - 4)) { data_buf = (char *)&req->Buffer[0]; } else { - if ((le16_to_cpu(req->DataOffset) > get_rfc1002_len(req)) || - (le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req))) { + if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) { pr_err("invalid write data offset %u, smb_len %u\n", le16_to_cpu(req->DataOffset), get_rfc1002_len(req)); -- cgit v1.2.3 From dbad63001eac3abeeb2b66ddf71504e8ab128c5c Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 11 Oct 2021 19:15:25 +0900 Subject: ksmbd: validate compound response buffer Add the check to validate compound response buffer. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 7b4689f2df49..89c187aa8db2 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -449,6 +449,12 @@ bool is_chained_smb2_message(struct ksmbd_work *work) return false; } + if ((u64)get_rfc1002_len(work->response_buf) + MAX_CIFS_SMALL_BUFFER_SIZE > + work->response_sz) { + pr_err("next response offset exceeds response buffer size\n"); + return false; + } + ksmbd_debug(SMB, "got SMB2 chained command\n"); init_chained_smb2_rsp(work); return true; -- cgit v1.2.3 From 6ab4e2eb5e956a61e4d53cea3ab8c866ba79a830 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 13 Oct 2021 23:17:18 +0300 Subject: mmc: sdhci-pci: Read card detect from ACPI for Intel Merrifield Intel Merrifield platform had been converted to use ACPI enumeration. However, the driver missed an update to retrieve card detect GPIO. Fix it here. Unfortunately we can't rely on CD GPIO state because there are two different PCB designs in the wild that are using the opposite card detection sense and there is no way to distinguish those platforms, that's why ignore CD GPIO completely and use it only as an event. Fixes: 4590d98f5a4f ("sfi: Remove framework for deprecated firmware") BugLink: https://github.com/edison-fw/meta-intel-edison/issues/135 Signed-off-by: Andy Shevchenko Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211013201723.52212-2-andriy.shevchenko@linux.intel.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index be19785227fe..d0f2edfe296c 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -616,16 +616,12 @@ static int intel_select_drive_strength(struct mmc_card *card, return intel_host->drv_strength; } -static int bxt_get_cd(struct mmc_host *mmc) +static int sdhci_get_cd_nogpio(struct mmc_host *mmc) { - int gpio_cd = mmc_gpio_get_cd(mmc); struct sdhci_host *host = mmc_priv(mmc); unsigned long flags; int ret = 0; - if (!gpio_cd) - return 0; - spin_lock_irqsave(&host->lock, flags); if (host->flags & SDHCI_DEVICE_DEAD) @@ -638,6 +634,21 @@ out: return ret; } +static int bxt_get_cd(struct mmc_host *mmc) +{ + int gpio_cd = mmc_gpio_get_cd(mmc); + + if (!gpio_cd) + return 0; + + return sdhci_get_cd_nogpio(mmc); +} + +static int mrfld_get_cd(struct mmc_host *mmc) +{ + return sdhci_get_cd_nogpio(mmc); +} + #define SDHCI_INTEL_PWR_TIMEOUT_CNT 20 #define SDHCI_INTEL_PWR_TIMEOUT_UDELAY 100 @@ -1341,6 +1352,14 @@ static int intel_mrfld_mmc_probe_slot(struct sdhci_pci_slot *slot) MMC_CAP_1_8V_DDR; break; case INTEL_MRFLD_SD: + slot->cd_idx = 0; + slot->cd_override_level = true; + /* + * There are two PCB designs of SD card slot with the opposite + * card detection sense. Quirk this out by ignoring GPIO state + * completely in the custom ->get_cd() callback. + */ + slot->host->mmc_host_ops.get_cd = mrfld_get_cd; slot->host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V; break; case INTEL_MRFLD_SDIO: -- cgit v1.2.3 From 22390ce786c59328ccd13c329959dee1e8757487 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 14 Oct 2021 13:17:50 +0200 Subject: ALSA: usb-audio: add Schiit Hel device to quirk table The Shciit Hel device responds to the ctl message for the mic capture switch with a timeout of -EPIPE: usb 7-2.2: cannot get ctl value: req = 0x81, wValue = 0x100, wIndex = 0x1100, type = 1 usb 7-2.2: cannot get ctl value: req = 0x81, wValue = 0x100, wIndex = 0x1100, type = 1 usb 7-2.2: cannot get ctl value: req = 0x81, wValue = 0x100, wIndex = 0x1100, type = 1 usb 7-2.2: cannot get ctl value: req = 0x81, wValue = 0x100, wIndex = 0x1100, type = 1 This seems safe to ignore as the device works properly with the control message quirk, so add it to the quirk table so all is good. Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: alsa-devel@alsa-project.org Cc: linux-usb@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/YWgR3nOI1osvr5Yo@kroah.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 889c855addfc..b8da4a08c458 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1884,6 +1884,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ + QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x534d, 0x2109, /* MacroSilicon MS2109 */ -- cgit v1.2.3 From aef454b40288158b850aab13e3d2a8c406779401 Mon Sep 17 00:00:00 2001 From: Steven Clarkson Date: Thu, 14 Oct 2021 06:35:54 -0700 Subject: ALSA: hda/realtek: Add quirk for Clevo PC50HS Apply existing PCI quirk to the Clevo PC50HS and related models to fix audio output on the built in speakers. Signed-off-by: Steven Clarkson Cc: Link: https://lore.kernel.org/r/20211014133554.1326741-1-sc@lambdal.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 22d27b12c4e7..1bbe22769ca9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2535,6 +2535,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), -- cgit v1.2.3 From ff7e93219442f5ac5b2cfd33e4fe4b7d5942f957 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 28 Jun 2021 10:53:45 -0700 Subject: ice: Fix failure to re-add LAN/RDMA Tx queues Currently if the VSI is rebuilt/removed and the RDMA PF driver is active the RDMA Tx queue scheduler node configuration will not be cleaned up. This will cause the rebuild/re-add of the VSI to fail due to the software structures not being correctly cleaned up for the VSI index. Fix this by always calling ice_rm_vsi_rdma_cfg() for all VSI. If there are no RDMA scheduler nodes created, then there is no harm in calling ice_rm_vsi_rdma_cfg(). This change applies to all VSI types, so if RDMA support is added for other VSI types they will also get this change. Fixes: 348048e724a0 ("ice: Implement iidc operations") Signed-off-by: Brett Creeley Tested-by: Jerzy Wiktor Jurkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 9 +++++++++ drivers/net/ethernet/intel/ice/ice_sched.c | 13 +++++++++++++ drivers/net/ethernet/intel/ice/ice_sched.h | 1 + 3 files changed, 23 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index dde9802c6c72..b718e196af2a 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2841,6 +2841,7 @@ void ice_napi_del(struct ice_vsi *vsi) */ int ice_vsi_release(struct ice_vsi *vsi) { + enum ice_status err; struct ice_pf *pf; if (!vsi->back) @@ -2912,6 +2913,10 @@ int ice_vsi_release(struct ice_vsi *vsi) ice_fltr_remove_all(vsi); ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); + err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx); + if (err) + dev_err(ice_pf_to_dev(vsi->back), "Failed to remove RDMA scheduler config for VSI %u, err %d\n", + vsi->vsi_num, err); ice_vsi_delete(vsi); ice_vsi_free_q_vectors(vsi); @@ -3092,6 +3097,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); + ret = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx); + if (ret) + dev_err(ice_pf_to_dev(vsi->back), "Failed to remove RDMA scheduler config for VSI %u, err %d\n", + vsi->vsi_num, ret); ice_vsi_free_q_vectors(vsi); /* SR-IOV determines needed MSIX resources all at once instead of per diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 9f07b6641705..2d9b10277186 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -2070,6 +2070,19 @@ enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle) return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN); } +/** + * ice_rm_vsi_rdma_cfg - remove VSI and its RDMA children nodes + * @pi: port information structure + * @vsi_handle: software VSI handle + * + * This function clears the VSI and its RDMA children nodes from scheduler tree + * for all TCs. + */ +enum ice_status ice_rm_vsi_rdma_cfg(struct ice_port_info *pi, u16 vsi_handle) +{ + return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_RDMA); +} + /** * ice_get_agg_info - get the aggregator ID * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index 9beef8f0ec76..fdf7a5882f07 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -89,6 +89,7 @@ enum ice_status ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs, u8 owner, bool enable); enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle); +enum ice_status ice_rm_vsi_rdma_cfg(struct ice_port_info *pi, u16 vsi_handle); /* Tx scheduler rate limiter functions */ enum ice_status -- cgit v1.2.3 From 73e30a62b19b9fbb4e6a3465c59da186630d5f2e Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Mon, 4 Oct 2021 05:15:25 -0700 Subject: ice: Avoid crash from unnecessary IDA free In the remove path, there is an attempt to free the aux_idx IDA whether it was allocated or not. This can potentially cause a crash when unloading the driver on systems that do not initialize support for RDMA. But, this free cannot be gated by the status bit for RDMA, since it is allocated if the driver detects support for RDMA at probe time, but the driver can enter into a state where RDMA is not supported after the IDA has been allocated at probe time and this would lead to a memory leak. Initialize aux_idx to an invalid value and check for a valid value when unloading to determine if an IDA free is necessary. Fixes: d25a0fc41c1f9 ("ice: Initialize RDMA support") Reported-by: Jun Miao Signed-off-by: Dave Ertman Tested-by: Jesse Brandeburg Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 0d6c143f6653..94037881bfd8 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4224,6 +4224,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) if (!pf) return -ENOMEM; + /* initialize Auxiliary index to invalid value */ + pf->aux_idx = -1; + /* set up for high or low DMA */ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (err) @@ -4615,7 +4618,8 @@ static void ice_remove(struct pci_dev *pdev) ice_aq_cancel_waiting_tasks(pf); ice_unplug_aux_dev(pf); - ida_free(&ice_aux_ida, pf->aux_idx); + if (pf->aux_idx >= 0) + ida_free(&ice_aux_ida, pf->aux_idx); set_bit(ICE_DOWN, pf->state); mutex_destroy(&(&pf->hw)->fdir_fltr_lock); -- cgit v1.2.3 From e4c2efa1393c6f1fbfabf91d1d83fcb4ae691ccb Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 14 Sep 2021 19:25:05 -0400 Subject: ice: fix getting UDP tunnel entry Correct parameters order in call to ice_tunnel_idx_to_entry function. Entry in sparse port table is correct when the idx is 0. For idx 1 one correct entry should be skipped, for idx 2 two of them should be skipped etc. Change if condition to be true when idx is 0, which means that previous valid entry of this tunnel type were skipped. Fixes: b20e6c17c468 ("ice: convert to new udp_tunnel infrastructure") Signed-off-by: Michal Swiatkowski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_flex_pipe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 06ac9badee77..1ac96dc66d0d 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1668,7 +1668,7 @@ static u16 ice_tunnel_idx_to_entry(struct ice_hw *hw, enum ice_tunnel_type type, for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].type == type && - idx--) + idx-- == 0) return i; WARN_ON_ONCE(1); @@ -1828,7 +1828,7 @@ int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, u16 index; tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE; - index = ice_tunnel_idx_to_entry(&pf->hw, idx, tnl_type); + index = ice_tunnel_idx_to_entry(&pf->hw, tnl_type, idx); status = ice_create_tunnel(&pf->hw, index, tnl_type, ntohs(ti->port)); if (status) { -- cgit v1.2.3 From b726ddf984a56a385c9df406a66c221f3a77c951 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 27 Sep 2021 11:21:50 -0700 Subject: ice: Print the api_patch as part of the fw.mgmt.api Currently when a user uses "devlink dev info", the fw.mgmt.api will be the major.minor numbers as shown below: devlink dev info pci/0000:3b:00.0 pci/0000:3b:00.0: driver ice serial_number 00-01-00-ff-ff-00-00-00 versions: fixed: board.id K91258-000 running: fw.mgmt 6.1.2 fw.mgmt.api 1.7 <--- No patch number included fw.mgmt.build 0xd75e7d06 fw.mgmt.srev 5 fw.undi 1.2992.0 fw.undi.srev 5 fw.psid.api 3.10 fw.bundle_id 0x800085cc fw.app.name ICE OS Default Package fw.app 1.3.27.0 fw.app.bundle_id 0xc0000001 fw.netlist 3.10.2000-3.1e.0 fw.netlist.build 0x2a76e110 stored: fw.mgmt.srev 5 fw.undi 1.2992.0 fw.undi.srev 5 fw.psid.api 3.10 fw.bundle_id 0x800085cc fw.netlist 3.10.2000-3.1e.0 fw.netlist.build 0x2a76e110 There are many features in the driver that depend on the major, minor, and patch version of the FW. Without the patch number in the output for fw.mgmt.api debugging issues related to the FW API version is difficult. Also, using major.minor.patch aligns with the existing firmware version which uses a 3 digit value. Fix this by making the fw.mgmt.api print the major.minor.patch versions. Shown below is the result: devlink dev info pci/0000:3b:00.0 pci/0000:3b:00.0: driver ice serial_number 00-01-00-ff-ff-00-00-00 versions: fixed: board.id K91258-000 running: fw.mgmt 6.1.2 fw.mgmt.api 1.7.9 <--- patch number included fw.mgmt.build 0xd75e7d06 fw.mgmt.srev 5 fw.undi 1.2992.0 fw.undi.srev 5 fw.psid.api 3.10 fw.bundle_id 0x800085cc fw.app.name ICE OS Default Package fw.app 1.3.27.0 fw.app.bundle_id 0xc0000001 fw.netlist 3.10.2000-3.1e.0 fw.netlist.build 0x2a76e110 stored: fw.mgmt.srev 5 fw.undi 1.2992.0 fw.undi.srev 5 fw.psid.api 3.10 fw.bundle_id 0x800085cc fw.netlist 3.10.2000-3.1e.0 fw.netlist.build 0x2a76e110 Fixes: ff2e5c700e08 ("ice: add basic handler for devlink .info_get") Signed-off-by: Brett Creeley Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- Documentation/networking/devlink/ice.rst | 9 +++++---- drivers/net/ethernet/intel/ice/ice_devlink.c | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst index a432dc419fa4..5d97cee9457b 100644 --- a/Documentation/networking/devlink/ice.rst +++ b/Documentation/networking/devlink/ice.rst @@ -30,10 +30,11 @@ The ``ice`` driver reports the following versions PHY, link, etc. * - ``fw.mgmt.api`` - running - - 1.5 - - 2-digit version number of the API exported over the AdminQ by the - management firmware. Used by the driver to identify what commands - are supported. + - 1.5.1 + - 3-digit version number (major.minor.patch) of the API exported over + the AdminQ by the management firmware. Used by the driver to + identify what commands are supported. Historical versions of the + kernel only displayed a 2-digit version number (major.minor). * - ``fw.mgmt.build`` - running - 0x305d955f diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 14afce82ef63..da7288bdc9a3 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -63,7 +63,8 @@ static int ice_info_fw_api(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_hw *hw = &pf->hw; - snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u", hw->api_maj_ver, hw->api_min_ver); + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", hw->api_maj_ver, + hw->api_min_ver, hw->api_patch); return 0; } -- cgit v1.2.3 From a482c5e00a9b5a194085bcd372ac36141028becb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 12 Oct 2021 08:18:13 -0400 Subject: netfilter: ip6t_rt: fix rt0_hdr parsing in rt_mt6 In rt_mt6(), when it's a nonlinear skb, the 1st skb_header_pointer() only copies sizeof(struct ipv6_rt_hdr) to _route that rh points to. The access by ((const struct rt0_hdr *)rh)->reserved will overflow the buffer. So this access should be moved below the 2nd call to skb_header_pointer(). Besides, after the 2nd skb_header_pointer(), its return value should also be checked, othersize, *rp may cause null-pointer-ref. v1->v2: - clean up some old debugging log. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Dan Carpenter Signed-off-by: Xin Long Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_rt.c | 48 ++++++-------------------------------------- 1 file changed, 6 insertions(+), 42 deletions(-) diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 733c83d38b30..4ad8b2032f1f 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -25,12 +25,7 @@ MODULE_AUTHOR("Andras Kis-Szabo "); static inline bool segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) { - bool r; - pr_debug("segsleft_match:%c 0x%x <= 0x%x <= 0x%x\n", - invert ? '!' : ' ', min, id, max); - r = (id >= min && id <= max) ^ invert; - pr_debug(" result %s\n", r ? "PASS" : "FAILED"); - return r; + return (id >= min && id <= max) ^ invert; } static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) @@ -65,30 +60,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) return false; } - pr_debug("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); - pr_debug("TYPE %04X ", rh->type); - pr_debug("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); - - pr_debug("IPv6 RT segsleft %02X ", - segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], - rh->segments_left, - !!(rtinfo->invflags & IP6T_RT_INV_SGS))); - pr_debug("type %02X %02X %02X ", - rtinfo->rt_type, rh->type, - (!(rtinfo->flags & IP6T_RT_TYP) || - ((rtinfo->rt_type == rh->type) ^ - !!(rtinfo->invflags & IP6T_RT_INV_TYP)))); - pr_debug("len %02X %04X %02X ", - rtinfo->hdrlen, hdrlen, - !(rtinfo->flags & IP6T_RT_LEN) || - ((rtinfo->hdrlen == hdrlen) ^ - !!(rtinfo->invflags & IP6T_RT_INV_LEN))); - pr_debug("res %02X %02X %02X ", - rtinfo->flags & IP6T_RT_RES, - ((const struct rt0_hdr *)rh)->reserved, - !((rtinfo->flags & IP6T_RT_RES) && - (((const struct rt0_hdr *)rh)->reserved))); - ret = (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], rh->segments_left, !!(rtinfo->invflags & IP6T_RT_INV_SGS))) && @@ -107,22 +78,22 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) reserved), sizeof(_reserved), &_reserved); + if (!rp) { + par->hotdrop = true; + return false; + } ret = (*rp == 0); } - pr_debug("#%d ", rtinfo->addrnr); if (!(rtinfo->flags & IP6T_RT_FST)) { return ret; } else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) { - pr_debug("Not strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { - pr_debug("There isn't enough space\n"); return false; } else { unsigned int i = 0; - pr_debug("#%d ", rtinfo->addrnr); for (temp = 0; temp < (unsigned int)((hdrlen - 8) / 16); temp++) { @@ -138,26 +109,20 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) return false; } - if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) { - pr_debug("i=%d temp=%d;\n", i, temp); + if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) i++; - } if (i == rtinfo->addrnr) break; } - pr_debug("i=%d #%d\n", i, rtinfo->addrnr); if (i == rtinfo->addrnr) return ret; else return false; } } else { - pr_debug("Strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { - pr_debug("There isn't enough space\n"); return false; } else { - pr_debug("#%d ", rtinfo->addrnr); for (temp = 0; temp < rtinfo->addrnr; temp++) { ap = skb_header_pointer(skb, ptr @@ -173,7 +138,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp])) break; } - pr_debug("temp=%d #%d\n", temp, rtinfo->addrnr); if (temp == rtinfo->addrnr && temp == (unsigned int)((hdrlen - 8) / 16)) return ret; -- cgit v1.2.3 From 174c376278949c44aad89c514a6b5db6cee8db59 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 12 Oct 2021 16:54:37 +0200 Subject: netfilter: ipvs: make global sysctl readonly in non-init netns Because the data pointer of net/ipv4/vs/debug_level is not updated per netns, it must be marked as read-only in non-init netns. Fixes: c6d2d445d8de ("IPVS: netns, final patch enabling network name space.") Signed-off-by: Antoine Tenart Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c25097092a06..29ec3ef63edc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -4090,6 +4090,11 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; tbl[idx++].data = &ipvs->sysctl_schedule_icmp; tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; +#ifdef CONFIG_IP_VS_DEBUG + /* Global sysctls must be ro in non-init netns */ + if (!net_eq(net, &init_net)) + tbl[idx++].mode = 0444; +#endif ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); if (ipvs->sysctl_hdr == NULL) { -- cgit v1.2.3 From 3e6ed7703dae6838c104d73d3e76e9b79f5c0528 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 12 Oct 2021 18:37:09 +0200 Subject: selftests: netfilter: remove stray bash debug line This should not be there. Fixes: 2de03b45236f ("selftests: netfilter: add flowtable test script") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_flowtable.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index 427d94816f2d..d4ffebb989f8 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -199,7 +199,6 @@ fi # test basic connectivity if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then echo "ERROR: ns1 cannot reach ns2" 1>&2 - bash exit 1 fi -- cgit v1.2.3 From 3c414eb65c294719a91a746260085363413f91c1 Mon Sep 17 00:00:00 2001 From: Brendan Grieve Date: Fri, 15 Oct 2021 10:53:35 +0800 Subject: ALSA: usb-audio: Provide quirk for Sennheiser GSP670 Headset As per discussion at: https://github.com/szszoke/sennheiser-gsp670-pulseaudio-profile/issues/13 The GSP670 has 2 playback and 1 recording device that by default are detected in an incompatible order for alsa. This may have been done to make it compatible for the console by the manufacturer and only affects the latest firmware which uses its own ID. This quirk will resolve this by reordering the channels. Signed-off-by: Brendan Grieve Cc: Link: https://lore.kernel.org/r/20211015025335.196592-1-brendan@grieve.com.au Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index de18fff69280..2af8c68fac27 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -4012,6 +4012,38 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, +{ + /* + * Sennheiser GSP670 + * Change order of interfaces loaded + */ + USB_DEVICE(0x1395, 0x0300), + .bInterfaceClass = USB_CLASS_PER_INTERFACE, + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + // Communication + { + .ifnum = 3, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + // Recording + { + .ifnum = 4, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + // Main + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = -1 + } + } + } +}, #undef USB_DEVICE_VENDOR_SPEC #undef USB_AUDIO_DEVICE -- cgit v1.2.3 From d94befbb5ae379f6dfd4fa6d460eacc09fa7b9c3 Mon Sep 17 00:00:00 2001 From: Davide Baldo Date: Fri, 15 Oct 2021 09:21:22 +0200 Subject: ALSA: hda/realtek: Fixes HP Spectre x360 15-eb1xxx speakers In laptop 'HP Spectre x360 Convertible 15-eb1xxx/8811' both front and rear speakers are silent, this patch fixes that by overriding the pin layout and by initializing the amplifier which needs a GPIO pin to be set to 1 then 0, similar to the existing HP Spectre x360 14 model. In order to have volume control, both front and rear speakers were forced to use the DAC1. This patch also correctly map the mute LED but since there is no microphone on/off switch exposed by the alsa subsystem it never turns on by itself. There are still known audio issues in this laptop: headset microphone doesn't work, the button to mute/unmute microphone is not yet mapped, the LED of the mute/unmute speakers doesn't seems to be exposed via GPIO and never turns on. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213953 Signed-off-by: Davide Baldo Link: https://lore.kernel.org/r/20211015072121.5287-1-davide@baldo.me Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 46 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1bbe22769ca9..965b096f416f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6406,6 +6406,44 @@ static void alc_fixup_no_int_mic(struct hda_codec *codec, } } +/* GPIO1 = amplifier on/off + * GPIO3 = mic mute LED + */ +static void alc285_fixup_hp_spectre_x360_eb1(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const hda_nid_t conn[] = { 0x02 }; + + struct alc_spec *spec = codec->spec; + static const struct hda_pintbl pincfgs[] = { + { 0x14, 0x90170110 }, /* front/high speakers */ + { 0x17, 0x90170130 }, /* back/bass speakers */ + { } + }; + + //enable micmute led + alc_fixup_hp_gpio_led(codec, action, 0x00, 0x04); + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->micmute_led_polarity = 1; + /* needed for amp of back speakers */ + spec->gpio_mask |= 0x01; + spec->gpio_dir |= 0x01; + snd_hda_apply_pincfgs(codec, pincfgs); + /* share DAC to have unified volume control */ + snd_hda_override_conn_list(codec, 0x14, ARRAY_SIZE(conn), conn); + snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); + break; + case HDA_FIXUP_ACT_INIT: + /* need to toggle GPIO to enable the amp of back speakers */ + alc_update_gpio_data(codec, 0x01, true); + msleep(100); + alc_update_gpio_data(codec, 0x01, false); + break; + } +} + static void alc285_fixup_hp_spectre_x360(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -6558,6 +6596,7 @@ enum { ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, ALC280_FIXUP_HP_9480M, ALC245_FIXUP_HP_X360_AMP, + ALC285_FIXUP_HP_SPECTRE_X360_EB1, ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, ALC288_FIXUP_DELL_XPS_13, @@ -8251,6 +8290,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_spectre_x360, }, + [ALC285_FIXUP_HP_SPECTRE_X360_EB1] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_spectre_x360_eb1 + }, [ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_ideapad_s740_coef, @@ -8585,6 +8628,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), SND_PCI_QUIRK(0x103c, 0x8805, "HP ProBook 650 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x880d, "HP EliteBook 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8811, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), + SND_PCI_QUIRK(0x103c, 0x8812, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8847, "HP EliteBook x360 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x884b, "HP EliteBook 840 Aero G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), @@ -9006,6 +9051,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"}, {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, + {.id = ALC285_FIXUP_HP_SPECTRE_X360_EB1, .name = "alc285-hp-spectre-x360-eb1"}, {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"}, {.id = ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, .name = "alc255-acer-headphone-and-mic"}, -- cgit v1.2.3 From 019057bd73d1751fdfec41e43148baf3303d98f9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Oct 2021 11:07:59 -0400 Subject: KVM: SEV-ES: fix length of string I/O The size of the data in the scratch buffer is not divided by the size of each port I/O operation, so vcpu->arch.pio.count ends up being larger than it should be by a factor of size. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Acked-by: Tom Lendacky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c36b5fe4c27c..e672493b5d8d 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2583,7 +2583,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) return -EINVAL; return kvm_sev_es_string_io(&svm->vcpu, size, port, - svm->ghcb_sa, svm->ghcb_sa_len, in); + svm->ghcb_sa, svm->ghcb_sa_len / size, in); } void sev_es_init_vmcb(struct vcpu_svm *svm) -- cgit v1.2.3 From c2402d43d183b11445aed921e7bebcd47ef222f1 Mon Sep 17 00:00:00 2001 From: Kele Huang Date: Thu, 14 Oct 2021 11:19:52 +0800 Subject: ptp: fix error print of ptp_kvm on X86_64 platform Commit a86ed2cfa13c5 ("ptp: Don't print an error if ptp_kvm is not supported") fixes the error message print on ARM platform by only concerning about the case that the error returned from kvm_arch_ptp_init() is not -EOPNOTSUPP. Although the ARM platform returns -EOPNOTSUPP if ptp_kvm is not supported while X86_64 platform returns -KVM_EOPNOTSUPP, both error codes share the same value 95. Actually kvm_arch_ptp_init() on X86_64 platform can return three kinds of errors (-KVM_ENOSYS, -KVM_EOPNOTSUPP and -KVM_EFAULT). The problem is that -KVM_EOPNOTSUPP is masked out and -KVM_EFAULT is ignored among them. This patch fixes this by returning them to ptp_kvm_init() respectively. Signed-off-by: Kele Huang Signed-off-by: David S. Miller --- drivers/ptp/ptp_kvm_x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ptp/ptp_kvm_x86.c b/drivers/ptp/ptp_kvm_x86.c index d0096cd7096a..4991054a2135 100644 --- a/drivers/ptp/ptp_kvm_x86.c +++ b/drivers/ptp/ptp_kvm_x86.c @@ -31,10 +31,10 @@ int kvm_arch_ptp_init(void) ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa, KVM_CLOCK_PAIRING_WALLCLOCK); - if (ret == -KVM_ENOSYS || ret == -KVM_EOPNOTSUPP) + if (ret == -KVM_ENOSYS) return -ENODEV; - return 0; + return ret; } int kvm_arch_ptp_get_clock(struct timespec64 *ts) -- cgit v1.2.3 From 075718fdaf0efe20223571236c1bf14ca35a7aa1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 14 Oct 2021 00:50:55 -0400 Subject: sctp: fix transport encap_port update in sctp_vtag_verify transport encap_port update should be updated when sctp_vtag_verify() succeeds, namely, returns 1, not returns 0. Correct it in this patch. While at it, also fix the indentation. Fixes: a1dd2cf2f1ae ("sctp: allow changing transport encap_port by peer packets") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 2eb6d7c2c931..f37c7a558d6d 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -384,11 +384,11 @@ sctp_vtag_verify(const struct sctp_chunk *chunk, * Verification Tag value does not match the receiver's own * tag value, the receiver shall silently discard the packet... */ - if (ntohl(chunk->sctp_hdr->vtag) == asoc->c.my_vtag) - return 1; + if (ntohl(chunk->sctp_hdr->vtag) != asoc->c.my_vtag) + return 0; chunk->transport->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port; - return 0; + return 1; } /* Check VTAG of the packet matches the sender's own tag and the T bit is -- cgit v1.2.3 From 46393d61a328d7c4e3264252dae891921126c674 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 15 Oct 2021 15:07:54 +0200 Subject: lan78xx: select CRC32 Fix the following build/link error by adding a dependency on the CRC32 routines: ld: drivers/net/usb/lan78xx.o: in function `lan78xx_set_multicast': lan78xx.c:(.text+0x48cf): undefined reference to `crc32_le' The actual use of crc32_le() comes indirectly through ether_crc(). Fixes: 55d7de9de6c30 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Vegard Nossum Signed-off-by: David S. Miller --- drivers/net/usb/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index f87f17503373..b554054a7560 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -117,6 +117,7 @@ config USB_LAN78XX select PHYLIB select MICROCHIP_PHY select FIXED_PHY + select CRC32 help This option adds support for Microchip LAN78XX based USB 2 & USB 3 10/100/1000 Ethernet adapters. -- cgit v1.2.3 From 86f1e3a8489f6a0232c1f3bc2bdb379f5ccdecec Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Fri, 15 Oct 2021 10:26:04 +0300 Subject: tcp: md5: Fix overlap between vrf and non-vrf keys With net.ipv4.tcp_l3mdev_accept=1 it is possible for a listen socket to accept connection from the same client address in different VRFs. It is also possible to set different MD5 keys for these clients which differ only in the tcpm_l3index field. This appears to work when distinguishing between different VRFs but not between non-VRF and VRF connections. In particular: * tcp_md5_do_lookup_exact will match a non-vrf key against a vrf key. This means that adding a key with l3index != 0 after a key with l3index == 0 will cause the earlier key to be deleted. Both keys can be present if the non-vrf key is added later. * _tcp_md5_do_lookup can match a non-vrf key before a vrf key. This casues failures if the passwords differ. Fix this by making tcp_md5_do_lookup_exact perform an actual exact comparison on l3index and by making __tcp_md5_do_lookup perfer vrf-bound keys above other considerations like prefixlen. Fixes: dea53bb80e07 ("tcp: Add l3index to tcp_md5sig_key and md5 functions") Signed-off-by: Leonard Crestez Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2e62e0d6373a..f64b6c8380e8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1037,6 +1037,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) DEFINE_STATIC_KEY_FALSE(tcp_md5_needed); EXPORT_SYMBOL(tcp_md5_needed); +static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new) +{ + if (!old) + return true; + + /* l3index always overrides non-l3index */ + if (old->l3index && new->l3index == 0) + return false; + if (old->l3index == 0 && new->l3index) + return true; + + return old->prefixlen < new->prefixlen; +} + /* Find the Key structure for an address. */ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, @@ -1074,8 +1088,7 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, match = false; } - if (match && (!best_match || - key->prefixlen > best_match->prefixlen)) + if (match && better_md5_match(best_match, key)) best_match = key; } return best_match; @@ -1105,7 +1118,7 @@ static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, lockdep_sock_is_held(sk)) { if (key->family != family) continue; - if (key->l3index && key->l3index != l3index) + if (key->l3index != l3index) continue; if (!memcmp(&key->addr, addr, size) && key->prefixlen == prefixlen) -- cgit v1.2.3 From a76c2315bec7afeb9bafe776fe532106fa0a8b05 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Fri, 15 Oct 2021 10:26:05 +0300 Subject: tcp: md5: Allow MD5SIG_FLAG_IFINDEX with ifindex=0 Multiple VRFs are generally meant to be "separate" but right now md5 keys for the default VRF also affect connections inside VRFs if the IP addresses happen to overlap. So far the combination of TCP_MD5SIG_FLAG_IFINDEX with tcpm_ifindex == 0 was an error, accept this to mean "key only applies to default VRF". This is what applications using VRFs for traffic separation want. Signed-off-by: Leonard Crestez Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/tcp.h | 5 +++-- net/ipv4/tcp_ipv4.c | 26 ++++++++++++++++---------- net/ipv6/tcp_ipv6.c | 15 +++++++++------ 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 3166dc15d7d6..60c384569e9c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1576,6 +1576,7 @@ struct tcp_md5sig_key { u8 keylen; u8 family; /* AF_INET or AF_INET6 */ u8 prefixlen; + u8 flags; union tcp_md5_addr addr; int l3index; /* set if key added with L3 scope */ u8 key[TCP_MD5SIG_MAXKEYLEN]; @@ -1621,10 +1622,10 @@ struct tcp_md5sig_pool { int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb); int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, - int family, u8 prefixlen, int l3index, + int family, u8 prefixlen, int l3index, u8 flags, const u8 *newkey, u8 newkeylen, gfp_t gfp); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, - int family, u8 prefixlen, int l3index); + int family, u8 prefixlen, int l3index, u8 flags); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f64b6c8380e8..5b8ce65dfc06 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1073,7 +1073,7 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, lockdep_sock_is_held(sk)) { if (key->family != family) continue; - if (key->l3index && key->l3index != l3index) + if (key->flags & TCP_MD5SIG_FLAG_IFINDEX && key->l3index != l3index) continue; if (family == AF_INET) { mask = inet_make_mask(key->prefixlen); @@ -1098,7 +1098,7 @@ EXPORT_SYMBOL(__tcp_md5_do_lookup); static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, - int l3index) + int l3index, u8 flags) { const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; @@ -1118,6 +1118,8 @@ static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, lockdep_sock_is_held(sk)) { if (key->family != family) continue; + if ((key->flags & TCP_MD5SIG_FLAG_IFINDEX) != (flags & TCP_MD5SIG_FLAG_IFINDEX)) + continue; if (key->l3index != l3index) continue; if (!memcmp(&key->addr, addr, size) && @@ -1142,7 +1144,7 @@ EXPORT_SYMBOL(tcp_v4_md5_lookup); /* This can be called on a newly created socket, from other files */ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, - int family, u8 prefixlen, int l3index, + int family, u8 prefixlen, int l3index, u8 flags, const u8 *newkey, u8 newkeylen, gfp_t gfp) { /* Add Key to the list */ @@ -1150,7 +1152,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_info *md5sig; - key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index); + key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index, flags); if (key) { /* Pre-existing entry - just update that one. * Note that the key might be used concurrently. @@ -1195,6 +1197,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key->family = family; key->prefixlen = prefixlen; key->l3index = l3index; + key->flags = flags; memcpy(&key->addr, addr, (family == AF_INET6) ? sizeof(struct in6_addr) : sizeof(struct in_addr)); @@ -1204,11 +1207,11 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, EXPORT_SYMBOL(tcp_md5_do_add); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, - u8 prefixlen, int l3index) + u8 prefixlen, int l3index, u8 flags) { struct tcp_md5sig_key *key; - key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index); + key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index, flags); if (!key) return -ENOENT; hlist_del_rcu(&key->node); @@ -1242,6 +1245,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, const union tcp_md5_addr *addr; u8 prefixlen = 32; int l3index = 0; + u8 flags; if (optlen < sizeof(cmd)) return -EINVAL; @@ -1252,6 +1256,8 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, if (sin->sin_family != AF_INET) return -EINVAL; + flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; + if (optname == TCP_MD5SIG_EXT && cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { prefixlen = cmd.tcpm_prefixlen; @@ -1259,7 +1265,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, return -EINVAL; } - if (optname == TCP_MD5SIG_EXT && + if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { struct net_device *dev; @@ -1280,12 +1286,12 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, addr = (union tcp_md5_addr *)&sin->sin_addr.s_addr; if (!cmd.tcpm_keylen) - return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index); + return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index, flags); if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; - return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, + return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); } @@ -1609,7 +1615,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, * memory, then we end up not copying the key * across. Shucks. */ - tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index, + tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index, key->flags, key->key, key->keylen, GFP_ATOMIC); sk_nocaps_add(newsk, NETIF_F_GSO_MASK); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0ce52d46e4f8..b03dd02c9f13 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -599,6 +599,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; int l3index = 0; u8 prefixlen; + u8 flags; if (optlen < sizeof(cmd)) return -EINVAL; @@ -609,6 +610,8 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, if (sin6->sin6_family != AF_INET6) return -EINVAL; + flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; + if (optname == TCP_MD5SIG_EXT && cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { prefixlen = cmd.tcpm_prefixlen; @@ -619,7 +622,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; } - if (optname == TCP_MD5SIG_EXT && + if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { struct net_device *dev; @@ -640,9 +643,9 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], AF_INET, prefixlen, - l3index); + l3index, flags); return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, - AF_INET6, prefixlen, l3index); + AF_INET6, prefixlen, l3index, flags); } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) @@ -650,12 +653,12 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], - AF_INET, prefixlen, l3index, + AF_INET, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, - AF_INET6, prefixlen, l3index, + AF_INET6, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); } @@ -1404,7 +1407,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * * across. Shucks. */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, - AF_INET6, 128, l3index, key->key, key->keylen, + AF_INET6, 128, l3index, key->flags, key->key, key->keylen, sk_gfp_mask(sk, GFP_ATOMIC)); } #endif -- cgit v1.2.3 From 78a9cf6143e2d4fb4dd48dde529b24cd5b6bf0e0 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Fri, 15 Oct 2021 10:26:06 +0300 Subject: selftests: nettest: Add --{force,no}-bind-key-ifindex These options allow explicit control over the TCP_MD5SIG_FLAG_IFINDEX flag instead of always setting it based on binding to an interface. Do this by converting to getopt_long because nettest has too many single-character flags already and getopt_long is widely used in selftests. Signed-off-by: Leonard Crestez Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/nettest.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index bd6288302094..b599003eb5ba 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -101,6 +102,8 @@ struct sock_args { struct sockaddr_in6 v6; } md5_prefix; unsigned int prefix_len; + /* 0: default, -1: force off, +1: force on */ + int bind_key_ifindex; /* expected addresses and device index for connection */ const char *expected_dev; @@ -271,11 +274,14 @@ static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args } memcpy(&md5sig.tcpm_addr, addr, alen); - if (args->ifindex) { + if ((args->ifindex && args->bind_key_ifindex >= 0) || args->bind_key_ifindex >= 1) { opt = TCP_MD5SIG_EXT; md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX; md5sig.tcpm_ifindex = args->ifindex; + log_msg("TCP_MD5SIG_FLAG_IFINDEX set tcpm_ifindex=%d\n", md5sig.tcpm_ifindex); + } else { + log_msg("TCP_MD5SIG_FLAG_IFINDEX off\n", md5sig.tcpm_ifindex); } rc = setsockopt(sd, IPPROTO_TCP, opt, &md5sig, sizeof(md5sig)); @@ -1822,6 +1828,14 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) } #define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq" +#define OPT_FORCE_BIND_KEY_IFINDEX 1001 +#define OPT_NO_BIND_KEY_IFINDEX 1002 + +static struct option long_opts[] = { + {"force-bind-key-ifindex", 0, 0, OPT_FORCE_BIND_KEY_IFINDEX}, + {"no-bind-key-ifindex", 0, 0, OPT_NO_BIND_KEY_IFINDEX}, + {0, 0, 0, 0} +}; static void print_usage(char *prog) { @@ -1858,6 +1872,10 @@ static void print_usage(char *prog) " -M password use MD5 sum protection\n" " -X password MD5 password for client mode\n" " -m prefix/len prefix and length to use for MD5 key\n" + " --no-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX off\n" + " --force-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX on\n" + " (default: only if -I is passed)\n" + "\n" " -g grp multicast group (e.g., 239.1.1.1)\n" " -i interactive mode (default is echo and terminate)\n" "\n" @@ -1893,7 +1911,7 @@ int main(int argc, char *argv[]) * process input args */ - while ((rc = getopt(argc, argv, GETOPT_STR)) != -1) { + while ((rc = getopt_long(argc, argv, GETOPT_STR, long_opts, NULL)) != -1) { switch (rc) { case 'B': both_mode = 1; @@ -1966,6 +1984,12 @@ int main(int argc, char *argv[]) case 'M': args.password = optarg; break; + case OPT_FORCE_BIND_KEY_IFINDEX: + args.bind_key_ifindex = 1; + break; + case OPT_NO_BIND_KEY_IFINDEX: + args.bind_key_ifindex = -1; + break; case 'X': args.client_pw = optarg; break; -- cgit v1.2.3 From 64e4017778bea04a50ed564615339801e02d2d32 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Fri, 15 Oct 2021 10:26:07 +0300 Subject: selftests: net/fcnal: Test --{force,no}-bind-key-ifindex Test that applications binding listening sockets to VRFs without specifying TCP_MD5SIG_FLAG_IFINDEX will work as expected. This would be broken if __tcp_md5_do_lookup always made a strict comparison on l3index. See this email: https://lore.kernel.org/netdev/209548b5-27d2-2059-f2e9-2148f5a0291b@gmail.com/ Applications using tcp_l3mdev_accept=1 and a single global socket (not bound to any interface) also should have a way to specify keys that are only for the default VRF, this is done by --force-bind-key-ifindex without otherwise binding to a device. Signed-off-by: Leonard Crestez Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fcnal-test.sh | 60 +++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 13350cd5c8ac..8e67a252b672 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -289,6 +289,12 @@ set_sysctl() run_cmd sysctl -q -w $* } +# get sysctl values in NS-A +get_sysctl() +{ + ${NSA_CMD} sysctl -n $* +} + ################################################################################ # Setup for tests @@ -1003,6 +1009,60 @@ ipv4_tcp_md5() run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET} log_test $? 1 "MD5: VRF: Device must be a VRF - prefix" + test_ipv4_md5_vrf__vrf_server__no_bind_ifindex + test_ipv4_md5_vrf__global_server__bind_ifindex0 +} + +test_ipv4_md5_vrf__vrf_server__no_bind_ifindex() +{ + log_start + show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX" + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection" + + log_start + show_hint "Binding both the socket and the key is not required but it works" + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection" +} + +test_ipv4_md5_vrf__global_server__bind_ifindex0() +{ + # This particular test needs tcp_l3mdev_accept=1 for Global server to accept VRF connections + local old_tcp_l3mdev_accept + old_tcp_l3mdev_accept=$(get_sysctl net.ipv4.tcp_l3mdev_accept) + set_sysctl net.ipv4.tcp_l3mdev_accept=1 + + log_start + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection" + + log_start + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & + sleep 1 + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection" + log_start + + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection" + + log_start + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & + sleep 1 + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection" + + # restore value + set_sysctl net.ipv4.tcp_l3mdev_accept="$old_tcp_l3mdev_accept" } ipv4_tcp_novrf() -- cgit v1.2.3 From 4bc59477c3298b191c72b5d99feb54a1dc8c254d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 15 Oct 2021 17:14:02 +0900 Subject: ksmbd: limit read/write/trans buffer size not to exceed 8MB ksmbd limit read/write/trans buffer size not to exceed maximum 8MB. And set the minimum value of max response buffer size to 64KB. Windows client doesn't send session setup request if ksmbd set max trans/read/write size lower than 64KB in smb2 negotiate. It means windows allow at least 64 KB or more about this value. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2ops.c | 3 +++ fs/ksmbd/smb2pdu.c | 2 +- fs/ksmbd/smb2pdu.h | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c index b06456eb587b..fb6a65d23139 100644 --- a/fs/ksmbd/smb2ops.c +++ b/fs/ksmbd/smb2ops.c @@ -284,6 +284,7 @@ int init_smb3_11_server(struct ksmbd_conn *conn) void init_smb2_max_read_size(unsigned int sz) { + sz = clamp_val(sz, SMB3_MIN_IOSIZE, SMB3_MAX_IOSIZE); smb21_server_values.max_read_size = sz; smb30_server_values.max_read_size = sz; smb302_server_values.max_read_size = sz; @@ -292,6 +293,7 @@ void init_smb2_max_read_size(unsigned int sz) void init_smb2_max_write_size(unsigned int sz) { + sz = clamp_val(sz, SMB3_MIN_IOSIZE, SMB3_MAX_IOSIZE); smb21_server_values.max_write_size = sz; smb30_server_values.max_write_size = sz; smb302_server_values.max_write_size = sz; @@ -300,6 +302,7 @@ void init_smb2_max_write_size(unsigned int sz) void init_smb2_max_trans_size(unsigned int sz) { + sz = clamp_val(sz, SMB3_MIN_IOSIZE, SMB3_MAX_IOSIZE); smb21_server_values.max_trans_size = sz; smb30_server_values.max_trans_size = sz; smb302_server_values.max_trans_size = sz; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 89c187aa8db2..7999d8bc6892 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -524,7 +524,7 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) { struct smb2_hdr *hdr = work->request_buf; size_t small_sz = MAX_CIFS_SMALL_BUFFER_SIZE; - size_t large_sz = work->conn->vals->max_trans_size + MAX_SMB2_HDR_SIZE; + size_t large_sz = small_sz + work->conn->vals->max_trans_size; size_t sz = small_sz; int cmd = le16_to_cpu(hdr->Command); diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h index a6dec5ec6a54..ff5a2f01d34a 100644 --- a/fs/ksmbd/smb2pdu.h +++ b/fs/ksmbd/smb2pdu.h @@ -113,6 +113,8 @@ #define SMB21_DEFAULT_IOSIZE (1024 * 1024) #define SMB3_DEFAULT_IOSIZE (4 * 1024 * 1024) #define SMB3_DEFAULT_TRANS_SIZE (1024 * 1024) +#define SMB3_MIN_IOSIZE (64 * 1024) +#define SMB3_MAX_IOSIZE (8 * 1024 * 1024) /* * SMB2 Header Definition -- cgit v1.2.3 From 2ea086e35c3d726a3bacd0a971c1f02a50e98206 Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Fri, 15 Oct 2021 06:02:50 +0900 Subject: ksmbd: add buffer validation for smb direct Add buffer validation for smb direct. Acked-by: Namjae Jeon Signed-off-by: Hyunchul Lee Signed-off-by: Steve French --- fs/ksmbd/transport_rdma.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c index 3a7fa23ba850..a2fd5a4d4cd5 100644 --- a/fs/ksmbd/transport_rdma.c +++ b/fs/ksmbd/transport_rdma.c @@ -549,6 +549,10 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) switch (recvmsg->type) { case SMB_DIRECT_MSG_NEGOTIATE_REQ: + if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) { + put_empty_recvmsg(t, recvmsg); + return; + } t->negotiation_requested = true; t->full_packet_received = true; wake_up_interruptible(&t->wait_status); @@ -556,10 +560,23 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) case SMB_DIRECT_MSG_DATA_TRANSFER: { struct smb_direct_data_transfer *data_transfer = (struct smb_direct_data_transfer *)recvmsg->packet; - int data_length = le32_to_cpu(data_transfer->data_length); + unsigned int data_length; int avail_recvmsg_count, receive_credits; + if (wc->byte_len < + offsetof(struct smb_direct_data_transfer, padding)) { + put_empty_recvmsg(t, recvmsg); + return; + } + + data_length = le32_to_cpu(data_transfer->data_length); if (data_length) { + if (wc->byte_len < sizeof(struct smb_direct_data_transfer) + + (u64)data_length) { + put_empty_recvmsg(t, recvmsg); + return; + } + if (t->full_packet_received) recvmsg->first_segment = true; @@ -568,7 +585,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) else t->full_packet_received = true; - enqueue_reassembly(t, recvmsg, data_length); + enqueue_reassembly(t, recvmsg, (int)data_length); wake_up_interruptible(&t->wait_reassembly_queue); spin_lock(&t->receive_credit_lock); -- cgit v1.2.3 From 7a33488705008b5bb5f8d95d05326dcc64fc55f4 Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Fri, 15 Oct 2021 12:52:58 +0900 Subject: ksmbd: validate credit charge after validating SMB2 PDU body size smb2_validate_credit_charge() accesses fields in the SMB2 PDU body, but until smb2_calc_size() is called the PDU has not yet been verified to be large enough to access the PDU dynamic part length field. Acked-by: Namjae Jeon Signed-off-by: Ralph Boehme Signed-off-by: Steve French --- fs/ksmbd/smb2misc.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c index e7e441c8f050..030ca57c3784 100644 --- a/fs/ksmbd/smb2misc.c +++ b/fs/ksmbd/smb2misc.c @@ -400,26 +400,20 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) } } - if ((work->conn->vals->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU) && - smb2_validate_credit_charge(work->conn, hdr)) { - work->conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER); - return 1; - } - if (smb2_calc_size(hdr, &clc_len)) return 1; if (len != clc_len) { /* client can return one byte more due to implied bcc[0] */ if (clc_len == len + 1) - return 0; + goto validate_credit; /* * Some windows servers (win2016) will pad also the final * PDU in a compound to 8 bytes. */ if (ALIGN(clc_len, 8) == len) - return 0; + goto validate_credit; /* * windows client also pad up to 8 bytes when compounding. @@ -432,7 +426,7 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) "cli req padded more than expected. Length %d not %d for cmd:%d mid:%llu\n", len, clc_len, command, le64_to_cpu(hdr->MessageId)); - return 0; + goto validate_credit; } ksmbd_debug(SMB, @@ -443,6 +437,13 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) return 1; } +validate_credit: + if ((work->conn->vals->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU) && + smb2_validate_credit_charge(work->conn, hdr)) { + work->conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER); + return 1; + } + return 0; } -- cgit v1.2.3 From 0857d6f8c759d95f89d0436f86cdfd189ef99f20 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Thu, 14 Oct 2021 09:08:45 -0400 Subject: ipv6: When forwarding count rx stats on the orig netdev Commit bdb7cc643fc9 ("ipv6: Count interface receive statistics on the ingress netdev") does not work when ip6_forward() executes on the skbs with vrf-enslaved netdev. Use IP6CB(skb)->iif to get to the right one. Add a selftest script to verify. Fixes: bdb7cc643fc9 ("ipv6: Count interface receive statistics on the ingress netdev") Signed-off-by: Stephen Suryaputra Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20211014130845.410602-1-ssuryaextr@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 3 +- tools/testing/selftests/net/forwarding/Makefile | 1 + .../net/forwarding/forwarding.config.sample | 2 + .../net/forwarding/ip6_forward_instats_vrf.sh | 172 +++++++++++++++++++++ tools/testing/selftests/net/forwarding/lib.sh | 8 + 5 files changed, 185 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 12f985f43bcc..2f044a49afa8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -464,13 +464,14 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) int ip6_forward(struct sk_buff *skb) { - struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); + struct inet6_dev *idev; u32 mtu; + idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); if (net->ipv6.devconf_all->forwarding == 0) goto error; diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index d97bd6889446..72ee644d47bf 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -9,6 +9,7 @@ TEST_PROGS = bridge_igmp.sh \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ gre_multipath.sh \ + ip6_forward_instats_vrf.sh \ ip6gre_inner_v4_multipath.sh \ ip6gre_inner_v6_multipath.sh \ ipip_flat_gre_key.sh \ diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index b802c14d2950..e5e2fbeca22e 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -39,3 +39,5 @@ NETIF_CREATE=yes # Timeout (in seconds) before ping exits regardless of how many packets have # been sent or received PING_TIMEOUT=5 +# IPv6 traceroute utility name. +TROUTE6=traceroute6 diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh new file mode 100755 index 000000000000..9f5b3e2e5e95 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh @@ -0,0 +1,172 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test ipv6 stats on the incoming if when forwarding with VRF + +ALL_TESTS=" + ipv6_ping + ipv6_in_too_big_err + ipv6_in_hdr_err + ipv6_in_addr_err + ipv6_in_discard +" + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 2001:1:1::2/64 + ip -6 route add vrf v$h1 2001:1:2::/64 via 2001:1:1::1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 2001:1:2::/64 via 2001:1:1::1 + simple_if_fini $h1 2001:1:1::2/64 +} + +router_create() +{ + vrf_create router + __simple_if_init $rtr1 router 2001:1:1::1/64 + __simple_if_init $rtr2 router 2001:1:2::1/64 + mtu_set $rtr2 1280 +} + +router_destroy() +{ + mtu_restore $rtr2 + __simple_if_fini $rtr2 2001:1:2::1/64 + __simple_if_fini $rtr1 2001:1:1::1/64 + vrf_destroy router +} + +h2_create() +{ + simple_if_init $h2 2001:1:2::2/64 + ip -6 route add vrf v$h2 2001:1:1::/64 via 2001:1:2::1 + mtu_set $h2 1280 +} + +h2_destroy() +{ + mtu_restore $h2 + ip -6 route del vrf v$h2 2001:1:1::/64 via 2001:1:2::1 + simple_if_fini $h2 2001:1:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rtr1=${NETIFS[p2]} + + rtr2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + h1_create + router_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + router_destroy + h1_destroy + vrf_cleanup +} + +ipv6_in_too_big_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) + local vrf_name=$(master_name_get $h1) + + # Send too big packets + ip vrf exec $vrf_name \ + $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + + local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InTooBigErrors" +} + +ipv6_in_hdr_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InHdrErrors) + local vrf_name=$(master_name_get $h1) + + # Send packets with hop limit 1, easiest with traceroute6 as some ping6 + # doesn't allow hop limit to be specified + ip vrf exec $vrf_name \ + $TROUTE6 2001:1:2::2 &> /dev/null + + local t1=$(ipv6_stats_get $rtr1 Ip6InHdrErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InHdrErrors" +} + +ipv6_in_addr_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) + local vrf_name=$(master_name_get $h1) + + # Disable forwarding temporary while sending the packet + sysctl -qw net.ipv6.conf.all.forwarding=0 + ip vrf exec $vrf_name \ + $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + sysctl -qw net.ipv6.conf.all.forwarding=1 + + local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InAddrErrors" +} + +ipv6_in_discard() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InDiscards) + local vrf_name=$(master_name_get $h1) + + # Add a policy to discard + ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block + ip vrf exec $vrf_name \ + $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + ip xfrm policy del dst 2001:1:2::2/128 dir fwd + + local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InDiscards" +} +ipv6_ping() +{ + RET=0 + + ping6_test $h1 2001:1:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index e7fc5c35b569..92087d423bcf 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -751,6 +751,14 @@ qdisc_parent_stats_get() | jq '.[] | select(.parent == "'"$parent"'") | '"$selector" } +ipv6_stats_get() +{ + local dev=$1; shift + local stat=$1; shift + + cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2 +} + humanize() { local speed=$1; shift -- cgit v1.2.3 From ba95a6225b02c24d99f39fd930b6a388e5fd7b48 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 14 Oct 2021 17:20:45 +0200 Subject: vsock_diag_test: remove free_sock_stat() call in test_no_sockets In `test_no_sockets` we don't expect any sockets, indeed check_no_sockets() prints an error and exits if `sockets` list is not empty, so free_sock_stat() call is unnecessary since it would only be called when the `sockets` list is empty. This was discovered by a strange warning printed by gcc v11.2.1: In file included from ../../include/linux/list.h:7, from vsock_diag_test.c:18: vsock_diag_test.c: In function ‘test_no_sockets’: ../../include/linux/kernel.h:35:45: error: array subscript ‘struct vsock_stat[0]’ is partly outside array bound s of ‘struct list_head[1]’ [-Werror=array-bounds] 35 | const typeof(((type *)0)->member) * __mptr = (ptr); \ | ^~~~~~ ../../include/linux/list.h:352:9: note: in expansion of macro ‘container_of’ 352 | container_of(ptr, type, member) | ^~~~~~~~~~~~ ../../include/linux/list.h:393:9: note: in expansion of macro ‘list_entry’ 393 | list_entry((pos)->member.next, typeof(*(pos)), member) | ^~~~~~~~~~ ../../include/linux/list.h:522:21: note: in expansion of macro ‘list_next_entry’ 522 | n = list_next_entry(pos, member); \ | ^~~~~~~~~~~~~~~ vsock_diag_test.c:325:9: note: in expansion of macro ‘list_for_each_entry_safe’ 325 | list_for_each_entry_safe(st, next, sockets, list) { | ^~~~~~~~~~~~~~~~~~~~~~~~ In file included from vsock_diag_test.c:18: vsock_diag_test.c:333:19: note: while referencing ‘sockets’ 333 | LIST_HEAD(sockets); | ^~~~~~~ ../../include/linux/list.h:23:26: note: in definition of macro ‘LIST_HEAD’ 23 | struct list_head name = LIST_HEAD_INIT(name) It seems related to some compiler optimization and assumption about the empty `sockets` list, since this warning is printed only with -02 or -O3. Also removing `exit(1)` from check_no_sockets() makes the warning disappear since in that case free_sock_stat() can be reached also when the list is not empty. Reported-by: Marc-André Lureau Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20211014152045.173872-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_diag_test.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c index cec6f5a738e1..fa927ad16f8a 100644 --- a/tools/testing/vsock/vsock_diag_test.c +++ b/tools/testing/vsock/vsock_diag_test.c @@ -332,8 +332,6 @@ static void test_no_sockets(const struct test_opts *opts) read_vsock_stat(&sockets); check_no_sockets(&sockets); - - free_sock_stat(&sockets); } static void test_listen_socket_server(const struct test_opts *opts) -- cgit v1.2.3 From 82a4f329b133ad0de66bee12c0be5c67bb8aa188 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:36 +0200 Subject: arm64: dts: imx8mm-kontron: Make sure SOC and DRAM supply voltages are correct It looks like the voltages for the SOC and DRAM supply weren't properly validated before. The datasheet and uboot-imx code tells us that VDD_SOC should be 800 mV in suspend and 850 mV in run mode. VDD_DRAM should be 950 mV for DDR clock frequencies of up to 1.5 GHz. Let's fix these values to make sure the voltages are within the required range. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi index 9db9b90bf2bc..7cf60c17c29b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi @@ -91,10 +91,12 @@ reg_vdd_soc: BUCK1 { regulator-name = "buck1"; regulator-min-microvolt = <800000>; - regulator-max-microvolt = <900000>; + regulator-max-microvolt = <850000>; regulator-boot-on; regulator-always-on; regulator-ramp-delay = <3125>; + nxp,dvs-run-voltage = <850000>; + nxp,dvs-standby-voltage = <800000>; }; reg_vdd_arm: BUCK2 { @@ -111,7 +113,7 @@ reg_vdd_dram: BUCK3 { regulator-name = "buck3"; regulator-min-microvolt = <850000>; - regulator-max-microvolt = <900000>; + regulator-max-microvolt = <950000>; regulator-boot-on; regulator-always-on; }; -- cgit v1.2.3 From 256a24eba7f897c817fb0103dac73467d3789202 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:37 +0200 Subject: arm64: dts: imx8mm-kontron: Set lower limit of VDD_SNVS to 800 mV According to the datasheet the typical value for VDD_SNVS should be 800 mV, so let's make sure that this is within the range of the regulator. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi index 7cf60c17c29b..42bbbb3f532b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi @@ -152,7 +152,7 @@ reg_vdd_snvs: LDO2 { regulator-name = "ldo2"; - regulator-min-microvolt = <850000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; -- cgit v1.2.3 From 6562d6e350284307e33ea10c7f46a6661ff22770 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:38 +0200 Subject: arm64: dts: imx8mm-kontron: Fix polarity of reg_rst_eth2 The regulator reg_rst_eth2 should keep the reset signal of the USB ethernet adapter deasserted anytime. Fix the polarity and mark it as always-on. Anyway, using the regulator is only a workaround for the missing support of specifying a reset GPIO for USB devices in a generic way. As we don't have a solution for this at the moment, at least fix the current workaround. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts index d17abb515835..3912ac2446d7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts @@ -70,7 +70,9 @@ regulator-name = "rst-usb-eth2"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb_eth2>; - gpio = <&gpio3 2 GPIO_ACTIVE_LOW>; + gpio = <&gpio3 2 GPIO_ACTIVE_HIGH>; + enable-active-high; + regulator-always-on; }; reg_vdd_5v: regulator-5v { -- cgit v1.2.3 From ca6f9d85d5944046a241b325700c1ca395651c28 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:39 +0200 Subject: arm64: dts: imx8mm-kontron: Fix CAN SPI clock frequency The MCP2515 can be used with an SPI clock of up to 10 MHz. Set the limit accordingly to prevent any performance issues caused by the really low clock speed of 100 kHz. This removes the arbitrarily low limit on the SPI frequency, that was caused by a typo in the original dts. Without this change, receiving CAN messages on the board beyond a certain bitrate will cause overrun errors (see 'ip -det -stat link show can0'). With this fix, receiving messages on the bus works without any overrun errors for bitrates up to 1 MBit. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts index 3912ac2446d7..a52cdfb0920e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts @@ -97,7 +97,7 @@ clocks = <&osc_can>; interrupt-parent = <&gpio4>; interrupts = <28 IRQ_TYPE_EDGE_FALLING>; - spi-max-frequency = <100000>; + spi-max-frequency = <10000000>; vdd-supply = <®_vdd_3v3>; xceiver-supply = <®_vdd_5v>; }; -- cgit v1.2.3 From 0b28c41e3c951ea3d4f012cfa9da5ebd6512cf6e Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:40 +0200 Subject: arm64: dts: imx8mm-kontron: Fix connection type for VSC8531 RGMII PHY Previously we falsely relied on the PHY driver to unconditionally enable the internal RX delay. Since the following fix for the PHY driver this is not the case anymore: commit 7b005a1742be ("net: phy: mscc: configure both RX and TX internal delays for RGMII") In order to enable the delay we need to set the connection type to "rgmii-rxid". Without the RX delay the ethernet is not functional at all. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts index a52cdfb0920e..e99e7644ff39 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts @@ -113,7 +113,7 @@ &fec1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-rxid"; phy-handle = <ðphy>; status = "okay"; -- cgit v1.2.3 From fac3cb82a54a4b7c49c932f96ef196cf5774344c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 15 Oct 2021 12:05:46 +0300 Subject: net: bridge: mcast: use multicast_membership_interval for IGMPv3 When I added IGMPv3 support I decided to follow the RFC for computing the GMI dynamically: " 8.4. Group Membership Interval The Group Membership Interval is the amount of time that must pass before a multicast router decides there are no more members of a group or a particular source on a network. This value MUST be ((the Robustness Variable) times (the Query Interval)) plus (one Query Response Interval)." But that actually is inconsistent with how the bridge used to compute it for IGMPv2, where it was user-configurable that has a correct default value but it is up to user-space to maintain it. This would make it consistent with the other timer values which are also maintained correct by the user instead of being dynamically computed. It also changes back to the previous user-expected GMI behaviour for IGMPv3 queries which were supported before IGMPv3 was added. Note that to properly compute it dynamically we would need to add support for "Robustness Variable" which is currently missing. Reported-by: Hangbin Liu Fixes: 0436862e417e ("net: bridge: mcast: support for IGMPv3/MLDv2 ALLOW_NEW_SOURCES report") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_private.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e8136db44462..37ca76406f1e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1125,9 +1125,7 @@ static inline unsigned long br_multicast_lmqt(const struct net_bridge_mcast *brm static inline unsigned long br_multicast_gmi(const struct net_bridge_mcast *brmctx) { - /* use the RFC default of 2 for QRV */ - return 2 * brmctx->multicast_query_interval + - brmctx->multicast_query_response_interval; + return brmctx->multicast_membership_interval; } static inline bool -- cgit v1.2.3 From 4e5a04be88fe335ad5331f4f8c17f4ebd357e065 Mon Sep 17 00:00:00 2001 From: Sachi King Date: Sat, 9 Oct 2021 14:32:40 +1100 Subject: pinctrl: amd: disable and mask interrupts on probe Some systems such as the Microsoft Surface Laptop 4 leave interrupts enabled and configured for use in sleep states on boot, which cause unexpected behaviour such as spurious wakes and failed resumes in s2idle states. As interrupts should not be enabled until they are claimed and explicitly enabled, disabling any interrupts mistakenly left enabled by firmware should be safe. Signed-off-by: Sachi King Link: https://lore.kernel.org/r/20211009033240.21543-1-nakato@nakato.io Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 8d0f88e9ca88..bae9d429b813 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -840,6 +840,34 @@ static const struct pinconf_ops amd_pinconf_ops = { .pin_config_group_set = amd_pinconf_group_set, }; +static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) +{ + struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + unsigned long flags; + u32 pin_reg, mask; + int i; + + mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) | + BIT(INTERRUPT_MASK_OFF) | BIT(INTERRUPT_ENABLE_OFF) | + BIT(WAKE_CNTRL_OFF_S4); + + for (i = 0; i < desc->npins; i++) { + int pin = desc->pins[i].number; + const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin); + + if (!pd) + continue; + + raw_spin_lock_irqsave(&gpio_dev->lock, flags); + + pin_reg = readl(gpio_dev->base + i * 4); + pin_reg &= ~mask; + writel(pin_reg, gpio_dev->base + i * 4); + + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); + } +} + #ifdef CONFIG_PM_SLEEP static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin) { @@ -976,6 +1004,9 @@ static int amd_gpio_probe(struct platform_device *pdev) return PTR_ERR(gpio_dev->pctrl); } + /* Disable and mask interrupts */ + amd_gpio_irq_init(gpio_dev); + girq = &gpio_dev->gc.irq; girq->chip = &amd_gpio_irqchip; /* This will let us handle the parent IRQ in the driver */ -- cgit v1.2.3 From 97e6ea6d78064e7f1e9e19c45dc690aabbb71297 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Thu, 14 Oct 2021 11:24:25 +0530 Subject: scsi: mpi3mr: Fix duplicate device entries when scanning through sysfs When scanning devices through the 'scan' attribute in sysfs, the user will observe duplicate device entries in lsscsi command output. Set the shost's max_channel to zero to avoid this. Link: https://lore.kernel.org/r/20211014055425.30719-1-sreekanth.reddy@broadcom.com Fixes: 824a156633df ("scsi: mpi3mr: Base driver code") Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 2197988333fe..3cae8803383b 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -3736,7 +3736,7 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) shost->max_lun = -1; shost->unique_id = mrioc->id; - shost->max_channel = 1; + shost->max_channel = 0; shost->max_id = 0xFFFFFFFF; if (prot_mask >= 0) -- cgit v1.2.3 From 85374b6392293d103c2b3406ceb9a1253f81d328 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Fri, 15 Oct 2021 15:46:54 +0800 Subject: scsi: sd: Fix crashes in sd_resume_runtime() After commit ed4246d37f3b ("scsi: sd: REQUEST SENSE for BLIST_IGN_MEDIA_CHANGE devices in runtime_resume()"), the following crash was observed. static int sd_resume_runtime(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); struct scsi_device *sdp = sdkp->device; // sdkp == NULL and crash if (sdp->ignore_media_change) { ... } It is possible for sdkp to be NULL in sd_resume_runtime(). To fix this crash, follow sd_resume() to test if sdkp is NULL before dereferencing it. Crash: [ 4.695171][ T151] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 [ 4.696591][ T151] Mem abort info: [ 4.697919][ T151] ESR = 0x96000005 [ 4.699692][ T151] EC = 0x25: DABT (current EL), IL = 32 bits [ 4.701990][ T151] SET = 0, FnV = 0 [ 4.702513][ T151] EA = 0, S1PTW = 0 [ 4.704431][ T151] FSC = 0x05: level 1 translation fault [ 4.705254][ T151] Data abort info: [ 4.705806][ T151] ISV = 0, ISS = 0x00000005 [ 4.706484][ T151] CM = 0, WnR = 0 [ 4.707048][ T151] [0000000000000008] user address but active_mm is swapper [ 4.710577][ T151] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 4.832361][ T151] Kernel Offset: 0x12acc80000 from 0xffffffc010000000 [ 4.833254][ T151] PHYS_OFFSET: 0x40000000 [ 4.833814][ T151] pstate: 80400005 (Nzcv daif +PAN -UAO) [ 4.834546][ T151] pc : sd_resume_runtime+0x20/0x14c [ 4.835227][ T151] lr : scsi_runtime_resume+0x84/0xe4 [ 4.835916][ T151] sp : ffffffc0110db8d0 [ 4.836450][ T151] x29: ffffffc0110db8d0 x28: 0000000000000001 [ 4.837258][ T151] x27: ffffff80c0bd1ac0 x26: ffffff80c0bd1ad0 [ 4.838063][ T151] x25: ffffff80cea7e448 x24: ffffffd2bf961000 [ 4.838867][ T151] x23: ffffffd2be69f838 x22: ffffffd2bd9dfb4c [ 4.839670][ T151] x21: 0000000000000000 x20: ffffff80cea7e000 [ 4.840474][ T151] x19: ffffff80cea7e260 x18: ffffffc0110dd078 [ 4.841277][ T151] x17: 00000000658783d9 x16: 0000000051469dac [ 4.842081][ T151] x15: 00000000b87f6327 x14: 0000000068fd680d [ 4.842885][ T151] x13: ffffff80c0bd2470 x12: ffffffd2bfa7f5f0 [ 4.843688][ T151] x11: 0000000000000078 x10: 0000000000000001 [ 4.844492][ T151] x9 : 00000000000000b1 x8 : ffffffd2be69f88c [ 4.845295][ T151] x7 : ffffffd2bd9e0e5c x6 : 0000000000000000 [ 4.846099][ T151] x5 : 0000000000000080 x4 : 0000000000000001 [ 4.846902][ T151] x3 : 68fd680dfe4ebe5e x2 : 0000000000000003 [ 4.847706][ T151] x1 : ffffffd2bf7f9380 x0 : ffffff80cea7e260 [ 4.856708][ T151] die+0x16c/0x59c [ 4.857191][ T151] __do_kernel_fault+0x1e8/0x210 [ 4.857833][ T151] do_page_fault+0xa4/0x654 [ 4.858418][ T151] do_translation_fault+0x6c/0x1b0 [ 4.859083][ T151] do_mem_abort+0x68/0x10c [ 4.859655][ T151] el1_abort+0x40/0x64 [ 4.860182][ T151] el1h_64_sync_handler+0x54/0x88 [ 4.860834][ T151] el1h_64_sync+0x7c/0x80 [ 4.861395][ T151] sd_resume_runtime+0x20/0x14c [ 4.862025][ T151] scsi_runtime_resume+0x84/0xe4 [ 4.862667][ T151] __rpm_callback+0x1f4/0x8cc [ 4.863275][ T151] rpm_resume+0x7e8/0xaa4 [ 4.863836][ T151] __pm_runtime_resume+0xa0/0x110 [ 4.864489][ T151] sd_probe+0x30/0x428 [ 4.865016][ T151] really_probe+0x14c/0x500 [ 4.865602][ T151] __driver_probe_device+0xb4/0x18c [ 4.866278][ T151] driver_probe_device+0x60/0x2c4 [ 4.866931][ T151] __device_attach_driver+0x228/0x2bc [ 4.867630][ T151] __device_attach_async_helper+0x154/0x21c [ 4.868398][ T151] async_run_entry_fn+0x5c/0x1c4 [ 4.869038][ T151] process_one_work+0x3ac/0x590 [ 4.869670][ T151] worker_thread+0x320/0x758 [ 4.870265][ T151] kthread+0x2e8/0x35c [ 4.870792][ T151] ret_from_fork+0x10/0x20 Link: https://lore.kernel.org/r/20211015074654.19615-1-miles.chen@mediatek.com Fixes: ed4246d37f3b ("scsi: sd: REQUEST SENSE for BLIST_IGN_MEDIA_CHANGE devices in runtime_resume()") Cc: Stanley Chu Reviewed-by: Martin Kepplinger Reviewed-by: Stanley Chu Signed-off-by: Miles Chen Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 523bf2fdc253..fce63335084e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3683,7 +3683,12 @@ static int sd_resume(struct device *dev) static int sd_resume_runtime(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - struct scsi_device *sdp = sdkp->device; + struct scsi_device *sdp; + + if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ + return 0; + + sdp = sdkp->device; if (sdp->ignore_media_change) { /* clear the device's sense data */ -- cgit v1.2.3 From b504a884f6b5a77dac7d580ffa08e482f70d1a30 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Mon, 6 Sep 2021 17:42:19 +0800 Subject: can: j1939: j1939_tp_rxtimer(): fix errant alert in j1939_tp_rxtimer When the session state is J1939_SESSION_DONE, j1939_tp_rxtimer() will give an alert "rx timeout, send abort", but do nothing actually. Move the alert into session active judgment condition, it is more reasonable. One of the scenarios is that j1939_tp_rxtimer() execute followed by j1939_xtp_rx_abort_one(). After j1939_xtp_rx_abort_one(), the session state is J1939_SESSION_DONE, then j1939_tp_rxtimer() give an alert. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Link: https://lore.kernel.org/all/20210906094219.95924-1-william.xuanziyang@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Ziyang Xuan Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index bb5c4b8979be..bfb0718d4587 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1237,12 +1237,11 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer) session->err = -ETIME; j1939_session_deactivate(session); } else { - netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n", - __func__, session); - j1939_session_list_lock(session->priv); if (session->state >= J1939_SESSION_ACTIVE && session->state < J1939_SESSION_ACTIVE_MAX) { + netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n", + __func__, session); j1939_session_get(session); hrtimer_start(&session->rxtimer, ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS), -- cgit v1.2.3 From d9d52a3ebd284882f5562c88e55991add5d01586 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sun, 26 Sep 2021 18:47:57 +0800 Subject: can: j1939: j1939_netdev_start(): fix UAF for rx_kref of j1939_priv It will trigger UAF for rx_kref of j1939_priv as following. cpu0 cpu1 j1939_sk_bind(socket0, ndev0, ...) j1939_netdev_start j1939_sk_bind(socket1, ndev0, ...) j1939_netdev_start j1939_priv_set j1939_priv_get_by_ndev_locked j1939_jsk_add ..... j1939_netdev_stop kref_put_lock(&priv->rx_kref, ...) kref_get(&priv->rx_kref, ...) REFCOUNT_WARN("addition on 0;...") ==================================================== refcount_t: addition on 0; use-after-free. WARNING: CPU: 1 PID: 20874 at lib/refcount.c:25 refcount_warn_saturate+0x169/0x1e0 RIP: 0010:refcount_warn_saturate+0x169/0x1e0 Call Trace: j1939_netdev_start+0x68b/0x920 j1939_sk_bind+0x426/0xeb0 ? security_socket_bind+0x83/0xb0 The rx_kref's kref_get() and kref_put() should use j1939_netdev_lock to protect. Fixes: 9d71dd0c70099 ("can: add support of SAE J1939 protocol") Link: https://lore.kernel.org/all/20210926104757.2021540-1-william.xuanziyang@huawei.com Cc: stable@vger.kernel.org Reported-by: syzbot+85d9878b19c94f9019ad@syzkaller.appspotmail.com Signed-off-by: Ziyang Xuan Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 08c8606cfd9c..9bc55ecb37f9 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -249,11 +249,14 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) struct j1939_priv *priv, *priv_new; int ret; - priv = j1939_priv_get_by_ndev(ndev); + spin_lock(&j1939_netdev_lock); + priv = j1939_priv_get_by_ndev_locked(ndev); if (priv) { kref_get(&priv->rx_kref); + spin_unlock(&j1939_netdev_lock); return priv; } + spin_unlock(&j1939_netdev_lock); priv = j1939_priv_create(ndev); if (!priv) @@ -269,10 +272,10 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) /* Someone was faster than us, use their priv and roll * back our's. */ + kref_get(&priv_new->rx_kref); spin_unlock(&j1939_netdev_lock); dev_put(ndev); kfree(priv); - kref_get(&priv_new->rx_kref); return priv_new; } j1939_priv_set(ndev, priv); -- cgit v1.2.3 From 379743985ab6cfe2cbd32067cf4ed497baca6d06 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 30 Sep 2021 11:33:20 +0800 Subject: can: j1939: j1939_xtp_rx_dat_one(): cancel session if receive TP.DT with error length According to SAE-J1939-21, the data length of TP.DT must be 8 bytes, so cancel session when receive unexpected TP.DT message. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Link: https://lore.kernel.org/all/1632972800-45091-1-git-send-email-zhangchangzhong@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Zhang Changzhong Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index bfb0718d4587..1ea6fee3d986 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1788,6 +1788,7 @@ static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb, static void j1939_xtp_rx_dat_one(struct j1939_session *session, struct sk_buff *skb) { + enum j1939_xtp_abort abort = J1939_XTP_ABORT_FAULT; struct j1939_priv *priv = session->priv; struct j1939_sk_buff_cb *skcb, *se_skcb; struct sk_buff *se_skb = NULL; @@ -1802,9 +1803,11 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, skcb = j1939_skb_to_cb(skb); dat = skb->data; - if (skb->len <= 1) + if (skb->len != 8) { /* makes no sense */ + abort = J1939_XTP_ABORT_UNEXPECTED_DATA; goto out_session_cancel; + } switch (session->last_cmd) { case 0xff: @@ -1903,7 +1906,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, out_session_cancel: kfree_skb(se_skb); j1939_session_timers_cancel(session); - j1939_session_cancel(session, J1939_XTP_ABORT_FAULT); + j1939_session_cancel(session, abort); j1939_session_put(session); } -- cgit v1.2.3 From a4fbe70c5cb746441d56b28cf88161d9e0e25378 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 14 Oct 2021 17:26:40 +0800 Subject: can: j1939: j1939_xtp_rx_rts_session_new(): abort TP less than 9 bytes The receiver should abort TP if 'total message size' in TP.CM_RTS and TP.CM_BAM is less than 9 or greater than 1785 [1], but currently the j1939 stack only checks the upper bound and the receiver will accept the following broadcast message: vcan1 18ECFF00 [8] 20 08 00 02 FF 00 23 01 vcan1 18EBFF00 [8] 01 00 00 00 00 00 00 00 vcan1 18EBFF00 [8] 02 00 FF FF FF FF FF FF This patch adds check for the lower bound and abort illegal TP. [1] SAE-J1939-82 A.3.4 Row 2 and A.3.6 Row 6. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Link: https://lore.kernel.org/all/1634203601-3460-1-git-send-email-zhangchangzhong@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Zhang Changzhong Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/j1939-priv.h | 1 + net/can/j1939/transport.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index f6df20808f5e..16af1a7f80f6 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -330,6 +330,7 @@ int j1939_session_activate(struct j1939_session *session); void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec); void j1939_session_timers_cancel(struct j1939_session *session); +#define J1939_MIN_TP_PACKET_SIZE 9 #define J1939_MAX_TP_PACKET_SIZE (7 * 0xff) #define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 1ea6fee3d986..6c0a0ebdd024 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1608,6 +1608,8 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, abort = J1939_XTP_ABORT_FAULT; else if (len > priv->tp_max_packet_size) abort = J1939_XTP_ABORT_RESOURCE; + else if (len < J1939_MIN_TP_PACKET_SIZE) + abort = J1939_XTP_ABORT_FAULT; } if (abort != J1939_XTP_NO_ABORT) { -- cgit v1.2.3 From 9acf636215a6ce9362fe618e7da4913b8bfe84c8 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sat, 9 Oct 2021 15:40:18 +0800 Subject: can: isotp: isotp_sendmsg(): add result check for wait_event_interruptible() Using wait_event_interruptible() to wait for complete transmission, but do not check the result of wait_event_interruptible() which can be interrupted. It will result in TX buffer has multiple accessors and the later process interferes with the previous process. Following is one of the problems reported by syzbot. ============================================================= WARNING: CPU: 0 PID: 0 at net/can/isotp.c:840 isotp_tx_timer_handler+0x2e0/0x4c0 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.13.0-rc7+ #68 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/01/2014 RIP: 0010:isotp_tx_timer_handler+0x2e0/0x4c0 Call Trace: ? isotp_setsockopt+0x390/0x390 __hrtimer_run_queues+0xb8/0x610 hrtimer_run_softirq+0x91/0xd0 ? rcu_read_lock_sched_held+0x4d/0x80 __do_softirq+0xe8/0x553 irq_exit_rcu+0xf8/0x100 sysvec_apic_timer_interrupt+0x9e/0xc0 asm_sysvec_apic_timer_interrupt+0x12/0x20 Add result check for wait_event_interruptible() in isotp_sendmsg() to avoid multiple accessers for tx buffer. Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Link: https://lore.kernel.org/all/10ca695732c9dd267c76a3c30f37aefe1ff7e32f.1633764159.git.william.xuanziyang@huawei.com Cc: stable@vger.kernel.org Reported-by: syzbot+78bab6958a614b0c80b9@syzkaller.appspotmail.com Signed-off-by: Ziyang Xuan Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index caaa532ece94..2ac29c2b2ca6 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -865,7 +865,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) return -EAGAIN; /* wait for complete transmission of current pdu */ - wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + if (err) + return err; } if (!size || size > MAX_MSG_LENGTH) -- cgit v1.2.3 From 43a08c3bdac4cb42eff8fe5e2278bffe0c5c3daa Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sat, 9 Oct 2021 15:40:30 +0800 Subject: can: isotp: isotp_sendmsg(): fix TX buffer concurrent access in isotp_sendmsg() When isotp_sendmsg() concurrent, tx.state of all TX processes can be ISOTP_IDLE. The conditions so->tx.state != ISOTP_IDLE and wq_has_sleeper(&so->wait) can not protect TX buffer from being accessed by multiple TX processes. We can use cmpxchg() to try to modify tx.state to ISOTP_SENDING firstly. If the modification of the previous process succeed, the later process must wait tx.state to ISOTP_IDLE firstly. Thus, we can ensure TX buffer is accessed by only one process at the same time. And we should also restore the original tx.state at the subsequent error processes. Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Link: https://lore.kernel.org/all/c2517874fbdf4188585cf9ddf67a8fa74d5dbde5.1633764159.git.william.xuanziyang@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Ziyang Xuan Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index 2ac29c2b2ca6..d1f54273c0bb 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -121,7 +121,7 @@ enum { struct tpcon { int idx; int len; - u8 state; + u32 state; u8 bs; u8 sn; u8 ll_dl; @@ -848,6 +848,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct isotp_sock *so = isotp_sk(sk); + u32 old_state = so->tx.state; struct sk_buff *skb; struct net_device *dev; struct canfd_frame *cf; @@ -860,47 +861,55 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) return -EADDRNOTAVAIL; /* we do not support multiple buffers - for now */ - if (so->tx.state != ISOTP_IDLE || wq_has_sleeper(&so->wait)) { - if (msg->msg_flags & MSG_DONTWAIT) - return -EAGAIN; + if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE || + wq_has_sleeper(&so->wait)) { + if (msg->msg_flags & MSG_DONTWAIT) { + err = -EAGAIN; + goto err_out; + } /* wait for complete transmission of current pdu */ err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); if (err) - return err; + goto err_out; } - if (!size || size > MAX_MSG_LENGTH) - return -EINVAL; + if (!size || size > MAX_MSG_LENGTH) { + err = -EINVAL; + goto err_out; + } /* take care of a potential SF_DL ESC offset for TX_DL > 8 */ off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0; /* does the given data fit into a single frame for SF_BROADCAST? */ if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) && - (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) - return -EINVAL; + (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) { + err = -EINVAL; + goto err_out; + } err = memcpy_from_msg(so->tx.buf, msg, size); if (err < 0) - return err; + goto err_out; dev = dev_get_by_index(sock_net(sk), so->ifindex); - if (!dev) - return -ENXIO; + if (!dev) { + err = -ENXIO; + goto err_out; + } skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv), msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) { dev_put(dev); - return err; + goto err_out; } can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; can_skb_prv(skb)->skbcnt = 0; - so->tx.state = ISOTP_SENDING; so->tx.len = size; so->tx.idx = 0; @@ -956,7 +965,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (err) { pr_notice_once("can-isotp: %s: can_send_ret %pe\n", __func__, ERR_PTR(err)); - return err; + goto err_out; } if (wait_tx_done) { @@ -965,6 +974,13 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } return size; + +err_out: + so->tx.state = old_state; + if (so->tx.state == ISOTP_IDLE) + wake_up_interruptible(&so->wait); + + return err; } static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, -- cgit v1.2.3 From 5370b0f49078203acf3c064b634a09707167a864 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Oct 2021 13:20:22 -1000 Subject: blk-cgroup: blk_cgroup_bio_start() should use irq-safe operations on blkg->iostat_cpu c3df5fb57fe8 ("cgroup: rstat: fix A-A deadlock on 32bit around u64_stats_sync") made u64_stats updates irq-safe to avoid A-A deadlocks. Unfortunately, the conversion missed one in blk_cgroup_bio_start(). Fix it. Fixes: 2d146aa3aa84 ("mm: memcontrol: switch to rstat") Cc: stable@vger.kernel.org # v5.13+ Reported-by: syzbot+9738c8815b375ce482a1@syzkaller.appspotmail.com Signed-off-by: Tejun Heo Link: https://lore.kernel.org/r/YWi7NrQdVlxD6J9W@slm.duckdns.org Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 38b9f7684952..9a1c5839dd46 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1897,10 +1897,11 @@ void blk_cgroup_bio_start(struct bio *bio) { int rwd = blk_cgroup_io_type(bio), cpu; struct blkg_iostat_set *bis; + unsigned long flags; cpu = get_cpu(); bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu); - u64_stats_update_begin(&bis->sync); + flags = u64_stats_update_begin_irqsave(&bis->sync); /* * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split @@ -1912,7 +1913,7 @@ void blk_cgroup_bio_start(struct bio *bio) } bis->cur.ios[rwd]++; - u64_stats_update_end(&bis->sync); + u64_stats_update_end_irqrestore(&bis->sync, flags); if (cgroup_subsys_on_dfl(io_cgrp_subsys)) cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu); put_cpu(); -- cgit v1.2.3 From f7c05c3987dcfde9a4e8c2d533db013fabebca0d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 24 Sep 2021 16:55:56 +0900 Subject: can: rcar_can: fix suspend/resume If the driver was not opened, rcar_can_suspend() should not call clk_disable() because the clock was not enabled. Fixes: fd1159318e55 ("can: add Renesas R-Car CAN driver") Link: https://lore.kernel.org/all/20210924075556.223685-1-yoshihiro.shimoda.uh@renesas.com Cc: stable@vger.kernel.org Signed-off-by: Yoshihiro Shimoda Tested-by: Ayumi Nakamichi Reviewed-by: Ulrich Hecht Tested-by: Biju Das Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar/rcar_can.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index 00e4533c8bdd..8999ec9455ec 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -846,10 +846,12 @@ static int __maybe_unused rcar_can_suspend(struct device *dev) struct rcar_can_priv *priv = netdev_priv(ndev); u16 ctlr; - if (netif_running(ndev)) { - netif_stop_queue(ndev); - netif_device_detach(ndev); - } + if (!netif_running(ndev)) + return 0; + + netif_stop_queue(ndev); + netif_device_detach(ndev); + ctlr = readw(&priv->regs->ctlr); ctlr |= RCAR_CAN_CTLR_CANM_HALT; writew(ctlr, &priv->regs->ctlr); @@ -868,6 +870,9 @@ static int __maybe_unused rcar_can_resume(struct device *dev) u16 ctlr; int err; + if (!netif_running(ndev)) + return 0; + err = clk_enable(priv->clk); if (err) { netdev_err(ndev, "clk_enable() failed, error %d\n", err); @@ -881,10 +886,9 @@ static int __maybe_unused rcar_can_resume(struct device *dev) writew(ctlr, &priv->regs->ctlr); priv->can.state = CAN_STATE_ERROR_ACTIVE; - if (netif_running(ndev)) { - netif_device_attach(ndev); - netif_start_queue(ndev); - } + netif_device_attach(ndev); + netif_start_queue(ndev); + return 0; } -- cgit v1.2.3 From 99d173fbe8944861a00ebd1c73817a1260d21e60 Mon Sep 17 00:00:00 2001 From: Aswath Govindraju Date: Mon, 20 Sep 2021 18:03:43 +0530 Subject: can: m_can: fix iomap_read_fifo() and iomap_write_fifo() The read and writes from the fifo are from a buffer, with various fields and data at predefined offsets. So, they should not be done to the same address(or port) in case of val_count greater than 1. Therefore, fix this by using iowrite32()/ioread32() instead of ioread32_rep()/iowrite32_rep(). Also, the write into FIFO must be performed with an offset from the message ram base address. Therefore, fix the base address to mram_base. Fixes: e39381770ec9 ("can: m_can: Disable IRQs on FIFO bus errors") Link: https://lore.kernel.org/all/20210920123344.2320-1-a-govindraju@ti.com Signed-off-by: Aswath Govindraju Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can_platform.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c index 308d4f2fff00..eee47bad0592 100644 --- a/drivers/net/can/m_can/m_can_platform.c +++ b/drivers/net/can/m_can/m_can_platform.c @@ -32,8 +32,13 @@ static u32 iomap_read_reg(struct m_can_classdev *cdev, int reg) static int iomap_read_fifo(struct m_can_classdev *cdev, int offset, void *val, size_t val_count) { struct m_can_plat_priv *priv = cdev_to_priv(cdev); + void __iomem *src = priv->mram_base + offset; - ioread32_rep(priv->mram_base + offset, val, val_count); + while (val_count--) { + *(unsigned int *)val = ioread32(src); + val += 4; + src += 4; + } return 0; } @@ -51,8 +56,13 @@ static int iomap_write_fifo(struct m_can_classdev *cdev, int offset, const void *val, size_t val_count) { struct m_can_plat_priv *priv = cdev_to_priv(cdev); + void __iomem *dst = priv->mram_base + offset; - iowrite32_rep(priv->base + offset, val, val_count); + while (val_count--) { + iowrite32(*(unsigned int *)val, dst); + val += 4; + dst += 4; + } return 0; } -- cgit v1.2.3 From 949fe9b35570361bc6ee2652f89a0561b26eec98 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Thu, 14 Oct 2021 06:28:33 +0000 Subject: can: peak_pci: peak_pci_remove(): fix UAF When remove the module peek_pci, referencing 'chan' again after releasing 'dev' will cause UAF. Fix this by releasing 'dev' later. The following log reveals it: [ 35.961814 ] BUG: KASAN: use-after-free in peak_pci_remove+0x16f/0x270 [peak_pci] [ 35.963414 ] Read of size 8 at addr ffff888136998ee8 by task modprobe/5537 [ 35.965513 ] Call Trace: [ 35.965718 ] dump_stack_lvl+0xa8/0xd1 [ 35.966028 ] print_address_description+0x87/0x3b0 [ 35.966420 ] kasan_report+0x172/0x1c0 [ 35.966725 ] ? peak_pci_remove+0x16f/0x270 [peak_pci] [ 35.967137 ] ? trace_irq_enable_rcuidle+0x10/0x170 [ 35.967529 ] ? peak_pci_remove+0x16f/0x270 [peak_pci] [ 35.967945 ] __asan_report_load8_noabort+0x14/0x20 [ 35.968346 ] peak_pci_remove+0x16f/0x270 [peak_pci] [ 35.968752 ] pci_device_remove+0xa9/0x250 Fixes: e6d9c80b7ca1 ("can: peak_pci: add support of some new PEAK-System PCI cards") Link: https://lore.kernel.org/all/1634192913-15639-1-git-send-email-zheyuma97@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Zheyu Ma Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/peak_pci.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c index 6db90dc4bc9d..84f34020aafb 100644 --- a/drivers/net/can/sja1000/peak_pci.c +++ b/drivers/net/can/sja1000/peak_pci.c @@ -752,16 +752,15 @@ static void peak_pci_remove(struct pci_dev *pdev) struct net_device *prev_dev = chan->prev_dev; dev_info(&pdev->dev, "removing device %s\n", dev->name); + /* do that only for first channel */ + if (!prev_dev && chan->pciec_card) + peak_pciec_remove(chan->pciec_card); unregister_sja1000dev(dev); free_sja1000dev(dev); dev = prev_dev; - if (!dev) { - /* do that only for first channel */ - if (chan->pciec_card) - peak_pciec_remove(chan->pciec_card); + if (!dev) break; - } priv = netdev_priv(dev); chan = priv->priv; } -- cgit v1.2.3 From 3d031abc7e7249573148871180c28ecedb5e27df Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Wed, 29 Sep 2021 16:21:10 +0200 Subject: can: peak_usb: pcan_usb_fd_decode_status(): fix back to ERROR_ACTIVE state notification This corrects the lack of notification of a return to ERROR_ACTIVE state for USB - CANFD devices from PEAK-System. Fixes: 0a25e1f4f185 ("can: peak_usb: add support for PEAK new CANFD USB adapters") Link: https://lore.kernel.org/all/20210929142111.55757-1-s.grosjean@peak-system.com Cc: stable@vger.kernel.org Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index b11eabad575b..e206959b3d06 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -551,11 +551,10 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if, } else if (sm->channel_p_w_b & PUCAN_BUS_WARNING) { new_state = CAN_STATE_ERROR_WARNING; } else { - /* no error bit (so, no error skb, back to active state) */ - dev->can.state = CAN_STATE_ERROR_ACTIVE; + /* back to (or still in) ERROR_ACTIVE state */ + new_state = CAN_STATE_ERROR_ACTIVE; pdev->bec.txerr = 0; pdev->bec.rxerr = 0; - return 0; } /* state hasn't changed */ -- cgit v1.2.3 From 553715feaa9e0453bc59f6ba20e1c69346888bd5 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Wed, 29 Sep 2021 16:21:11 +0200 Subject: can: peak_usb: pcan_usb_fd_decode_status(): remove unnecessary test on the nullity of a pointer Since alloc_can_err_skb() puts NULL in cf in the case when skb cannot be allocated and can_change_state() handles the case when cf is NULL, the test on the nullity of skb is now unnecessary. Link: https://lore.kernel.org/all/20210929142111.55757-2-s.grosjean@peak-system.com Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index e206959b3d06..09029a3bad1a 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -567,8 +567,7 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if, /* allocate an skb to store the error frame */ skb = alloc_can_err_skb(netdev, &cf); - if (skb) - can_change_state(netdev, cf, tx_state, rx_state); + can_change_state(netdev, cf, tx_state, rx_state); /* things must be done even in case of OOM */ if (new_state == CAN_STATE_BUS_OFF) -- cgit v1.2.3 From d9aaaf223297f6146d9d7f36caca927c92ab855a Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 10 Oct 2021 11:24:39 -0700 Subject: netfilter: ebtables: allocate chainstack on CPU local nodes Keep the per-CPU memory allocated for chainstacks local. Signed-off-by: Davidlohr Bueso Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 83d1798dfbb4..ba045f35114d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -926,7 +926,9 @@ static int translate_table(struct net *net, const char *name, return -ENOMEM; for_each_possible_cpu(i) { newinfo->chainstack[i] = - vmalloc(array_size(udc_cnt, sizeof(*(newinfo->chainstack[0])))); + vmalloc_node(array_size(udc_cnt, + sizeof(*(newinfo->chainstack[0]))), + cpu_to_node(i)); if (!newinfo->chainstack[i]) { while (i) vfree(newinfo->chainstack[--i]); -- cgit v1.2.3 From c2bbf9d1e9ac7d4fdd503b190bc1ba8a6302bc49 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 12 Oct 2021 01:54:03 -0400 Subject: dma-debug: teach add_dma_entry() about DMA_ATTR_SKIP_CPU_SYNC Mapping something twice should be possible as long as, DMA_ATTR_SKIP_CPU_SYNC is passed to the strictly speaking second relevant mapping operation (that attempts to map the same thing). So, don't issue a warning if the specified condition is met in add_dma_entry(). Signed-off-by: Hamza Mahfooz Signed-off-by: Christoph Hellwig --- kernel/dma/debug.c | 24 ++++++++++++++---------- kernel/dma/debug.h | 24 ++++++++++++++++-------- kernel/dma/mapping.c | 12 ++++++------ 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 35da4c3a6486..7a14ca29c377 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -552,7 +552,7 @@ static void active_cacheline_remove(struct dma_debug_entry *entry) * Wrapper function for adding an entry to the hash. * This function takes care of locking itself. */ -static void add_dma_entry(struct dma_debug_entry *entry) +static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs) { struct hash_bucket *bucket; unsigned long flags; @@ -566,7 +566,7 @@ static void add_dma_entry(struct dma_debug_entry *entry) if (rc == -ENOMEM) { pr_err("cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; - } else if (rc == -EEXIST) { + } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { err_printk(entry->dev, entry, "cacheline tracking EEXIST, overlapping mappings aren't supported\n"); } @@ -1191,7 +1191,8 @@ void debug_dma_map_single(struct device *dev, const void *addr, EXPORT_SYMBOL(debug_dma_map_single); void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, - size_t size, int direction, dma_addr_t dma_addr) + size_t size, int direction, dma_addr_t dma_addr, + unsigned long attrs) { struct dma_debug_entry *entry; @@ -1222,7 +1223,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, check_for_illegal_area(dev, addr, size); } - add_dma_entry(entry); + add_dma_entry(entry, attrs); } void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -1280,7 +1281,8 @@ void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, } void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int mapped_ents, int direction) + int nents, int mapped_ents, int direction, + unsigned long attrs) { struct dma_debug_entry *entry; struct scatterlist *s; @@ -1312,7 +1314,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, check_sg_segment(dev, s); - add_dma_entry(entry); + add_dma_entry(entry, attrs); } } @@ -1368,7 +1370,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, } void debug_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t dma_addr, void *virt) + dma_addr_t dma_addr, void *virt, + unsigned long attrs) { struct dma_debug_entry *entry; @@ -1398,7 +1401,7 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, else entry->pfn = page_to_pfn(virt_to_page(virt)); - add_dma_entry(entry); + add_dma_entry(entry, attrs); } void debug_dma_free_coherent(struct device *dev, size_t size, @@ -1429,7 +1432,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size, } void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, - int direction, dma_addr_t dma_addr) + int direction, dma_addr_t dma_addr, + unsigned long attrs) { struct dma_debug_entry *entry; @@ -1449,7 +1453,7 @@ void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, entry->direction = direction; entry->map_err_type = MAP_ERR_NOT_CHECKED; - add_dma_entry(entry); + add_dma_entry(entry, attrs); } void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, diff --git a/kernel/dma/debug.h b/kernel/dma/debug.h index 83643b3010b2..f525197d3cae 100644 --- a/kernel/dma/debug.h +++ b/kernel/dma/debug.h @@ -11,26 +11,30 @@ #ifdef CONFIG_DMA_API_DEBUG extern void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, - int direction, dma_addr_t dma_addr); + int direction, dma_addr_t dma_addr, + unsigned long attrs); extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction); extern void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int mapped_ents, int direction); + int nents, int mapped_ents, int direction, + unsigned long attrs); extern void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, int dir); extern void debug_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t dma_addr, void *virt); + dma_addr_t dma_addr, void *virt, + unsigned long attrs); extern void debug_dma_free_coherent(struct device *dev, size_t size, void *virt, dma_addr_t addr); extern void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, int direction, - dma_addr_t dma_addr); + dma_addr_t dma_addr, + unsigned long attrs); extern void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, size_t size, int direction); @@ -53,7 +57,8 @@ extern void debug_dma_sync_sg_for_device(struct device *dev, #else /* CONFIG_DMA_API_DEBUG */ static inline void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, - int direction, dma_addr_t dma_addr) + int direction, dma_addr_t dma_addr, + unsigned long attrs) { } @@ -63,7 +68,8 @@ static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, } static inline void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int mapped_ents, int direction) + int nents, int mapped_ents, int direction, + unsigned long attrs) { } @@ -74,7 +80,8 @@ static inline void debug_dma_unmap_sg(struct device *dev, } static inline void debug_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t dma_addr, void *virt) + dma_addr_t dma_addr, void *virt, + unsigned long attrs) { } @@ -85,7 +92,8 @@ static inline void debug_dma_free_coherent(struct device *dev, size_t size, static inline void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, int direction, - dma_addr_t dma_addr) + dma_addr_t dma_addr, + unsigned long attrs) { } diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 49885023c223..8349a9f2c345 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -156,7 +156,7 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); else addr = ops->map_page(dev, page, offset, size, dir, attrs); - debug_dma_map_page(dev, page, offset, size, dir, addr); + debug_dma_map_page(dev, page, offset, size, dir, addr, attrs); return addr; } @@ -195,7 +195,7 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, ents = ops->map_sg(dev, sg, nents, dir, attrs); if (ents > 0) - debug_dma_map_sg(dev, sg, nents, ents, dir); + debug_dma_map_sg(dev, sg, nents, ents, dir, attrs); else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM && ents != -EIO)) return -EIO; @@ -305,7 +305,7 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, else if (ops->map_resource) addr = ops->map_resource(dev, phys_addr, size, dir, attrs); - debug_dma_map_resource(dev, phys_addr, size, dir, addr); + debug_dma_map_resource(dev, phys_addr, size, dir, addr, attrs); return addr; } EXPORT_SYMBOL(dma_map_resource); @@ -510,7 +510,7 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, else return NULL; - debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr, attrs); return cpu_addr; } EXPORT_SYMBOL(dma_alloc_attrs); @@ -566,7 +566,7 @@ struct page *dma_alloc_pages(struct device *dev, size_t size, struct page *page = __dma_alloc_pages(dev, size, dma_handle, dir, gfp); if (page) - debug_dma_map_page(dev, page, 0, size, dir, *dma_handle); + debug_dma_map_page(dev, page, 0, size, dir, *dma_handle, 0); return page; } EXPORT_SYMBOL_GPL(dma_alloc_pages); @@ -644,7 +644,7 @@ struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, if (sgt) { sgt->nents = 1; - debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir); + debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs); } return sgt; } -- cgit v1.2.3 From 0a9bb11a5e298e72b675255a4bb2893513000584 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 14 Oct 2021 19:18:04 -0700 Subject: hamradio: baycom_epp: fix build for UML On i386, the baycom_epp driver wants to inspect X86 CPU features (TSC) and then act on that data, but that info is not available when running on UML, so prevent that test and do the default action. Prevents this build error on UML + i386: ../drivers/net/hamradio/baycom_epp.c: In function ‘epp_bh’: ../drivers/net/hamradio/baycom_epp.c:630:6: error: implicit declaration of function ‘boot_cpu_has’; did you mean ‘get_cpu_mask’? [-Werror=implicit-function-declaration] if (boot_cpu_has(X86_FEATURE_TSC)) \ ^ ../drivers/net/hamradio/baycom_epp.c:658:2: note: in expansion of macro ‘GETTICK’ GETTICK(time1); Fixes: 68f5d3f3b654 ("um: add PCI over virtio emulation driver") Signed-off-by: Randy Dunlap Cc: linux-um@lists.infradead.org Cc: Jeff Dike Cc: Richard Weinberger Cc: Anton Ivanov Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Thomas Sailer Cc: linux-hams@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/hamradio/baycom_epp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 775dcf4ebde5..6b6f28d5b8d5 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -623,16 +623,16 @@ static int receive(struct net_device *dev, int cnt) /* --------------------------------------------------------------------- */ -#ifdef __i386__ +#if defined(__i386__) && !defined(CONFIG_UML) #include #define GETTICK(x) \ ({ \ if (boot_cpu_has(X86_FEATURE_TSC)) \ x = (unsigned int)rdtsc(); \ }) -#else /* __i386__ */ +#else /* __i386__ && !CONFIG_UML */ #define GETTICK(x) -#endif /* __i386__ */ +#endif /* __i386__ && !CONFIG_UML */ static void epp_bh(struct work_struct *work) { -- cgit v1.2.3 From 66d262804a2276721eac86cf18fcd61046149193 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Sat, 16 Oct 2021 00:10:20 +0200 Subject: net: dsa: lantiq_gswip: fix register definition I compared the register definitions with the D-Link DWR-966 GPL sources and found that the PUAFD field definition was incorrect. This definition is unused and causes no issues. Fixes: 14fceff4771e ("net: dsa: Add Lantiq / Intel DSA driver for vrx200") Signed-off-by: Aleksander Jan Bajkowski Acked-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/dsa/lantiq_gswip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 3ff4b7e177f3..dbd4486a173f 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -230,7 +230,7 @@ #define GSWIP_SDMA_PCTRLp(p) (0xBC0 + ((p) * 0x6)) #define GSWIP_SDMA_PCTRL_EN BIT(0) /* SDMA Port Enable */ #define GSWIP_SDMA_PCTRL_FCEN BIT(1) /* Flow Control Enable */ -#define GSWIP_SDMA_PCTRL_PAUFWD BIT(1) /* Pause Frame Forwarding */ +#define GSWIP_SDMA_PCTRL_PAUFWD BIT(3) /* Pause Frame Forwarding */ #define GSWIP_TABLE_ACTIVE_VLAN 0x01 #define GSWIP_TABLE_VLAN_MAPPING 0x02 -- cgit v1.2.3 From 342afce10d6f61c443c95e244f812d4766f73f53 Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Sat, 16 Oct 2021 14:24:14 +0800 Subject: net: dsa: mt7530: correct ds->num_ports Setting ds->num_ports to DSA_MAX_PORTS made DSA core allocate unnecessary dsa_port's and call mt7530_port_disable for non-existent ports. Set it to MT7530_NUM_PORTS to fix that, and dsa_is_user_port check in port_enable/disable is no longer required. Cc: stable@vger.kernel.org Signed-off-by: DENG Qingfang Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 094737e5084a..9890672a206d 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1035,9 +1035,6 @@ mt7530_port_enable(struct dsa_switch *ds, int port, { struct mt7530_priv *priv = ds->priv; - if (!dsa_is_user_port(ds, port)) - return 0; - mutex_lock(&priv->reg_mutex); /* Allow the user port gets connected to the cpu port and also @@ -1060,9 +1057,6 @@ mt7530_port_disable(struct dsa_switch *ds, int port) { struct mt7530_priv *priv = ds->priv; - if (!dsa_is_user_port(ds, port)) - return; - mutex_lock(&priv->reg_mutex); /* Clear up all port matrix which could be restored in the next @@ -3211,7 +3205,7 @@ mt7530_probe(struct mdio_device *mdiodev) return -ENOMEM; priv->ds->dev = &mdiodev->dev; - priv->ds->num_ports = DSA_MAX_PORTS; + priv->ds->num_ports = MT7530_NUM_PORTS; /* Use medatek,mcm property to distinguish hardware type that would * casues a little bit differences on power-on sequence. -- cgit v1.2.3 From 2dc4e9e88cfcc38454d52b01ed3422238c134003 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 17 Oct 2021 14:58:51 +0300 Subject: net/sched: act_ct: Fix byte count on fragmented packets First fragmented packets (frag offset = 0) byte len is zeroed when stolen by ip_defrag(). And since act_ct update the stats only afterwards (at end of execute), bytes aren't correctly accounted for such packets. To fix this, move stats update to start of action execute. Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: Paul Blakey Signed-off-by: David S. Miller --- net/sched/act_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index ad9df0cb4b98..90866ae45573 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -960,6 +960,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, tmpl = p->tmpl; tcf_lastuse_update(&c->tcf_tm); + tcf_action_update_bstats(&c->common, skb); if (clear) { qdisc_skb_cb(skb)->post_ct = false; @@ -1049,7 +1050,6 @@ out_push: qdisc_skb_cb(skb)->post_ct = true; out_clear: - tcf_action_update_bstats(&c->common, skb); if (defrag) qdisc_skb_cb(skb)->pkt_len = skb->len; return retval; -- cgit v1.2.3 From d9fd7e9fccfac466fb528a783f2fc76f2982604c Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Sun, 17 Oct 2021 21:31:30 -0400 Subject: net: sparx5: Add of_node_put() before goto Fix following coccicheck warning: ./drivers/net/ethernet/microchip/sparx5/s4parx5_main.c:723:1-33: WARNING: Function for_each_available_child_of_node should have of_node_put() before goto Early exits from for_each_available_child_of_node should decrement the node reference counter. Signed-off-by: Wan Jiabing Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index cbece6e9bff2..5030dfca3879 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -758,6 +758,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) err = dev_err_probe(sparx5->dev, PTR_ERR(serdes), "port %u: missing serdes\n", portno); + of_node_put(portnp); goto cleanup_config; } config->portno = portno; -- cgit v1.2.3 From d1a7b9e4696584ce05c12567762c18a866837a85 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Sun, 17 Oct 2021 21:32:32 -0400 Subject: net: mscc: ocelot: Add of_node_put() before goto Fix following coccicheck warning: ./drivers/net/ethernet/mscc/ocelot_vsc7514.c:946:1-33: WARNING: Function for_each_available_child_of_node should have of_node_put() before goto. Early exits from for_each_available_child_of_node should decrement the node reference counter. Signed-off-by: Wan Jiabing Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 291ae6817c26..d51f799e4e86 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -969,6 +969,7 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, target = ocelot_regmap_init(ocelot, res); if (IS_ERR(target)) { err = PTR_ERR(target); + of_node_put(portnp); goto out_teardown; } -- cgit v1.2.3 From b2cddb44bddc1a9c5949a978bb454bba863264db Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Mon, 18 Oct 2021 02:16:22 +0000 Subject: cavium: Return negative value when pci_alloc_irq_vectors() fails During the process of driver probing, the probe function should return < 0 for failure, otherwise, the kernel will treat value > 0 as success. Signed-off-by: Zheyu Ma Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 691e1475d55e..0fbecd093fa1 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1193,7 +1193,7 @@ static int nic_register_interrupts(struct nicpf *nic) dev_err(&nic->pdev->dev, "Request for #%d msix vectors failed, returned %d\n", nic->num_vec, ret); - return 1; + return ret; } /* Register mailbox interrupt handler */ -- cgit v1.2.3 From b416beb25d9317499c823aea1522eefd8d72ea36 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 18 Oct 2021 11:29:34 +0800 Subject: mctp: unify sockaddr_mctp types Use the more precise __kernel_sa_family_t for smctp_family, to match struct sockaddr. Also, use an unsigned int for the network member; negative networks don't make much sense. We're already using unsigned for mctp_dev and mctp_skb_cb, but need to change mctp_sock to suit. Fixes: 60fc63981693 ("mctp: Add sockaddr_mctp to uapi") Signed-off-by: Jeremy Kerr Acked-by: Eugene Syromiatnikov Signed-off-by: David S. Miller --- Documentation/networking/mctp.rst | 10 +++++----- include/net/mctp.h | 2 +- include/uapi/linux/mctp.h | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Documentation/networking/mctp.rst b/Documentation/networking/mctp.rst index 6100cdc220f6..fa7730dbf7b9 100644 --- a/Documentation/networking/mctp.rst +++ b/Documentation/networking/mctp.rst @@ -59,11 +59,11 @@ specified with a ``sockaddr`` type, with a single-byte endpoint address: }; struct sockaddr_mctp { - unsigned short int smctp_family; - int smctp_network; - struct mctp_addr smctp_addr; - __u8 smctp_type; - __u8 smctp_tag; + __kernel_sa_family_t smctp_family; + unsigned int smctp_network; + struct mctp_addr smctp_addr; + __u8 smctp_type; + __u8 smctp_tag; }; #define MCTP_NET_ANY 0x0 diff --git a/include/net/mctp.h b/include/net/mctp.h index a824d47c3c6d..ffd2c23bd76d 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -54,7 +54,7 @@ struct mctp_sock { struct sock sk; /* bind() params */ - int bind_net; + unsigned int bind_net; mctp_eid_t bind_addr; __u8 bind_type; diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 52b54d13f385..f384962d8ff2 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -10,6 +10,7 @@ #define __UAPI_MCTP_H #include +#include typedef __u8 mctp_eid_t; @@ -18,8 +19,8 @@ struct mctp_addr { }; struct sockaddr_mctp { - unsigned short int smctp_family; - int smctp_network; + __kernel_sa_family_t smctp_family; + unsigned int smctp_network; struct mctp_addr smctp_addr; __u8 smctp_type; __u8 smctp_tag; -- cgit v1.2.3 From 5a20dd46b8b8459382685969cf0f196ae9fb9766 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 18 Oct 2021 11:29:35 +0800 Subject: mctp: Be explicit about struct sockaddr_mctp padding We currently have some implicit padding in struct sockaddr_mctp. This patch makes this padding explicit, and ensures we have consistent layout on platforms with <32bit alignmnent. Fixes: 60fc63981693 ("mctp: Add sockaddr_mctp to uapi") Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/mctp.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index f384962d8ff2..6acd4ccafbf7 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -20,10 +20,12 @@ struct mctp_addr { struct sockaddr_mctp { __kernel_sa_family_t smctp_family; + __u16 __smctp_pad0; unsigned int smctp_network; struct mctp_addr smctp_addr; __u8 smctp_type; __u8 smctp_tag; + __u8 __smctp_pad1; }; #define MCTP_NET_ANY 0x0 -- cgit v1.2.3 From d49fe5e8151711ed5276f049bc7f73e02dc141f8 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 18 Oct 2021 14:42:01 +0800 Subject: selftests/tls: add SM4 algorithm dependency for tls selftests Kernel TLS test has added SM4 GCM/CCM algorithm support, but SM4 algorithm is not compiled by default, this patch add SM4 config dependency. Reported-by: Hangbin Liu Reported-by: kernel test robot Signed-off-by: Tianjia Zhang Signed-off-by: David S. Miller --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 21b646d10b88..86ab429fe7f3 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -43,3 +43,4 @@ CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_MIRRED=m CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y +CONFIG_CRYPTO_SM4=y -- cgit v1.2.3 From 4cce60f15c04d69eff6ffc539ab09137dbe15070 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 4 Oct 2021 00:56:06 -0700 Subject: NIOS2: irqflags: rename a redefined register name Both arch/nios2/ and drivers/mmc/host/tmio_mmc.c define a macro with the name "CTL_STATUS". Change the one in arch/nios2/ to be "CTL_FSTATUS" (flags status) to eliminate the build warning. In file included from ../drivers/mmc/host/tmio_mmc.c:22: drivers/mmc/host/tmio_mmc.h:31: warning: "CTL_STATUS" redefined 31 | #define CTL_STATUS 0x1c arch/nios2/include/asm/registers.h:14: note: this is the location of the previous definition 14 | #define CTL_STATUS 0 Fixes: b31ebd8055ea ("nios2: Nios2 registers") Signed-off-by: Randy Dunlap Cc: Dinh Nguyen Signed-off-by: Dinh Nguyen --- arch/nios2/include/asm/irqflags.h | 4 ++-- arch/nios2/include/asm/registers.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/nios2/include/asm/irqflags.h b/arch/nios2/include/asm/irqflags.h index b3ec3e510706..25acf27862f9 100644 --- a/arch/nios2/include/asm/irqflags.h +++ b/arch/nios2/include/asm/irqflags.h @@ -9,7 +9,7 @@ static inline unsigned long arch_local_save_flags(void) { - return RDCTL(CTL_STATUS); + return RDCTL(CTL_FSTATUS); } /* @@ -18,7 +18,7 @@ static inline unsigned long arch_local_save_flags(void) */ static inline void arch_local_irq_restore(unsigned long flags) { - WRCTL(CTL_STATUS, flags); + WRCTL(CTL_FSTATUS, flags); } static inline void arch_local_irq_disable(void) diff --git a/arch/nios2/include/asm/registers.h b/arch/nios2/include/asm/registers.h index 183c720e454d..95b67dd16f81 100644 --- a/arch/nios2/include/asm/registers.h +++ b/arch/nios2/include/asm/registers.h @@ -11,7 +11,7 @@ #endif /* control register numbers */ -#define CTL_STATUS 0 +#define CTL_FSTATUS 0 #define CTL_ESTATUS 1 #define CTL_BSTATUS 2 #define CTL_IENABLE 3 -- cgit v1.2.3 From 9fbfabfda25d8774c5a08634fdd2da000a924890 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Mon, 18 Oct 2021 18:34:22 +0800 Subject: block: fix incorrect references to disk objects When adding partitions to the disk, the reference count of the disk object is increased. then alloc partition device and called device_add(), if the device_add() return error, the reference count of the disk object will be reduced twice, at put_device(pdev) and put_disk(disk). this leads to the end of the object's life cycle prematurely, and trigger following calltrace. __init_work+0x2d/0x50 kernel/workqueue.c:519 synchronize_rcu_expedited+0x3af/0x650 kernel/rcu/tree_exp.h:847 bdi_remove_from_list mm/backing-dev.c:938 [inline] bdi_unregister+0x17f/0x5c0 mm/backing-dev.c:946 release_bdi+0xa1/0xc0 mm/backing-dev.c:968 kref_put include/linux/kref.h:65 [inline] bdi_put+0x72/0xa0 mm/backing-dev.c:976 bdev_free_inode+0x11e/0x220 block/bdev.c:408 i_callback+0x3f/0x70 fs/inode.c:226 rcu_do_batch kernel/rcu/tree.c:2508 [inline] rcu_core+0x76d/0x16c0 kernel/rcu/tree.c:2743 __do_softirq+0x1d7/0x93b kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu kernel/softirq.c:636 [inline] irq_exit_rcu+0xf2/0x130 kernel/softirq.c:648 sysvec_apic_timer_interrupt+0x93/0xc0 making disk is NULL when calling put_disk(). Reported-by: Hao Sun Signed-off-by: Zqiang Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211018103422.2043-1-qiang.zhang1211@gmail.com Signed-off-by: Jens Axboe --- block/partitions/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/partitions/core.c b/block/partitions/core.c index 58c4c362c94f..7bea19dd9458 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -423,6 +423,7 @@ out_del: device_del(pdev); out_put: put_device(pdev); + return ERR_PTR(err); out_put_disk: put_disk(disk); return ERR_PTR(err); -- cgit v1.2.3 From baa1e5ca172ce7bf9554070139482dd7ea919528 Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Fri, 15 Oct 2021 13:32:22 -0400 Subject: KVM: SEV-ES: Set guest_state_protected after VMSA update The refactoring in commit bb18a6777465 ("KVM: SEV: Acquire vcpu mutex when updating VMSA") left behind the assignment to svm->vcpu.arch.guest_state_protected; add it back. Signed-off-by: Peter Gonda [Delta between v2 and v3 of Peter's patch, which had already been committed; the commit message is my own. - Paolo] Fixes: bb18a6777465 ("KVM: SEV: Acquire vcpu mutex when updating VMSA") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index e672493b5d8d..0d21d59936e5 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -618,7 +618,12 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, vmsa.handle = to_kvm_svm(kvm)->sev_info.handle; vmsa.address = __sme_pa(svm->vmsa); vmsa.len = PAGE_SIZE; - return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); + ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); + if (ret) + return ret; + + vcpu->arch.guest_state_protected = true; + return 0; } static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) -- cgit v1.2.3 From fa13843d1565d4c5b3aeb9be3343b313416bef46 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 15 Oct 2021 13:05:00 -0400 Subject: KVM: X86: fix lazy allocation of rmaps If allocation of rmaps fails, but some of the pointers have already been written, those pointers can be cleaned up when the memslot is freed, or even reused later for another attempt at allocating the rmaps. Therefore there is no need to WARN, as done for example in memslot_rmap_alloc, but the allocation *must* be skipped lest KVM will overwrite the previous pointer and will indeed leak memory. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index aabd3a2ec1bc..0c8b5129effd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11392,7 +11392,8 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot, int level = i + 1; int lpages = __kvm_mmu_slot_lpages(slot, npages, level); - WARN_ON(slot->arch.rmap[i]); + if (slot->arch.rmap[i]) + continue; slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) { -- cgit v1.2.3 From f7d8a19f9a056a05c5c509fa65af472a322abfee Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 12 Oct 2021 17:35:53 -0700 Subject: Revert "KVM: x86: Open code necessary bits of kvm_lapic_set_base() at vCPU RESET" Revert a change to open code bits of kvm_lapic_set_base() when emulating APIC RESET to fix an apic_hw_disabled underflow bug due to arch.apic_base and apic_hw_disabled being unsyncrhonized when the APIC is created. If kvm_arch_vcpu_create() fails after creating the APIC, kvm_free_lapic() will see the initialized-to-zero vcpu->arch.apic_base and decrement apic_hw_disabled without KVM ever having incremented apic_hw_disabled. Using kvm_lapic_set_base() in kvm_lapic_reset() is also desirable for a potential future where KVM supports RESET outside of vCPU creation, in which case all the side effects of kvm_lapic_set_base() are needed, e.g. to handle the transition from x2APIC => xAPIC. Alternatively, KVM could temporarily increment apic_hw_disabled (and call kvm_lapic_set_base() at RESET), but that's a waste of cycles and would impact the performance of other vCPUs and VMs. The other subtle side effect is that updating the xAPIC ID needs to be done at RESET regardless of whether the APIC was previously enabled, i.e. kvm_lapic_reset() needs an explicit call to kvm_apic_set_xapic_id() regardless of whether or not kvm_lapic_set_base() also performs the update. That makes stuffing the enable bit at vCPU creation slightly more palatable, as doing so affects only the apic_hw_disabled key. Opportunistically tweak the comment to explicitly call out the connection between vcpu->arch.apic_base and apic_hw_disabled, and add a comment to call out the need to always do kvm_apic_set_xapic_id() at RESET. Underflow scenario: kvm_vm_ioctl() { kvm_vm_ioctl_create_vcpu() { kvm_arch_vcpu_create() { if (something_went_wrong) goto fail_free_lapic; /* vcpu->arch.apic_base is initialized when something_went_wrong is false. */ kvm_vcpu_reset() { kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; } } return 0; fail_free_lapic: kvm_free_lapic() { /* vcpu->arch.apic_base is not yet initialized when something_went_wrong is true. */ if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) static_branch_slow_dec_deferred(&apic_hw_disabled); // <= underflow bug. } return r; } } } This (mostly) reverts commit 421221234ada41b4a9f0beeb08e30b07388bd4bd. Fixes: 421221234ada ("KVM: x86: Open code necessary bits of kvm_lapic_set_base() at vCPU RESET") Reported-by: syzbot+9fc046ab2b0cf295a063@syzkaller.appspotmail.com Debugged-by: Tetsuo Handa Signed-off-by: Sean Christopherson Message-Id: <20211013003554.47705-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 76fb00921203..7af25304bda9 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2321,13 +2321,14 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_apicv); void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { struct kvm_lapic *apic = vcpu->arch.apic; + u64 msr_val; int i; if (!init_event) { - vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE | - MSR_IA32_APICBASE_ENABLE; + msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; if (kvm_vcpu_is_reset_bsp(vcpu)) - vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; + msr_val |= MSR_IA32_APICBASE_BSP; + kvm_lapic_set_base(vcpu, msr_val); } if (!apic) @@ -2336,11 +2337,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); - if (!init_event) { - apic->base_address = APIC_DEFAULT_PHYS_BASE; - + /* The xAPIC ID is set at RESET even if the APIC was already enabled. */ + if (!init_event) kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); - } kvm_apic_set_version(apic->vcpu); for (i = 0; i < KVM_APIC_LVT_NUM; i++) @@ -2481,6 +2480,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) lapic_timer_advance_dynamic = false; } + /* + * Stuff the APIC ENABLE bit in lieu of temporarily incrementing + * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset(). + */ + vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */ kvm_iodevice_init(&apic->dev, &apic_mmio_ops); -- cgit v1.2.3 From 9139a7a64581c80d157027ae20e86f2f24d4292c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 12 Oct 2021 17:35:54 -0700 Subject: KVM: x86: WARN if APIC HW/SW disable static keys are non-zero on unload WARN if the static keys used to track if any vCPU has disabled its APIC are left elevated at module exit. Unlike the underflow case, nothing in the static key infrastructure will complain if a key is left elevated, and because an elevated key only affects performance, nothing in KVM will fail if either key is improperly incremented. Signed-off-by: Sean Christopherson Message-Id: <20211013003554.47705-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 7af25304bda9..d6ac32f3f650 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2946,5 +2946,7 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu) void kvm_lapic_exit(void) { static_key_deferred_flush(&apic_hw_disabled); + WARN_ON(static_branch_unlikely(&apic_hw_disabled.key)); static_key_deferred_flush(&apic_sw_disabled); + WARN_ON(static_branch_unlikely(&apic_sw_disabled.key)); } -- cgit v1.2.3 From 01c7d2672a84dbdfa8050d073c3ea466437578fd Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 6 Oct 2021 12:17:24 +0000 Subject: KVM: kvm_stat: do not show halt_wait_ns Similar to commit 111d0bda8eeb ("tools/kvm_stat: Exempt time-based counters"), we should not show timer values in kvm_stat. Remove the new halt_wait_ns. Fixes: 87bcc5fa092f ("KVM: stats: Add halt_wait_ns stats for all architectures") Cc: Jing Zhang Cc: Stefan Raspl Signed-off-by: Christian Borntraeger Reviewed-by: Stefan Raspl Message-Id: <20211006121724.4154-1-borntraeger@de.ibm.com> Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index b0bf56c5f120..5a5bd74f55bd 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -742,7 +742,7 @@ class DebugfsProvider(Provider): The fields are all available KVM debugfs files """ - exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns'] + exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns', 'halt_wait_ns'] fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2] if field not in exempt_list] -- cgit v1.2.3 From d61863c66f9b443192997613cd6aeca3f65cc313 Mon Sep 17 00:00:00 2001 From: Hao Xiang Date: Fri, 15 Oct 2021 19:59:21 +0800 Subject: KVM: VMX: Remove redundant handling of bus lock vmexit Hardware may or may not set exit_reason.bus_lock_detected on BUS_LOCK VM-Exits. Dealing with KVM_RUN_X86_BUS_LOCK in handle_bus_lock_vmexit could be redundant when exit_reason.basic is EXIT_REASON_BUS_LOCK. We can remove redundant handling of bus lock vmexit. Unconditionally Set exit_reason.bus_lock_detected in handle_bus_lock_vmexit(), and deal with KVM_RUN_X86_BUS_LOCK only in vmx_handle_exit(). Signed-off-by: Hao Xiang Message-Id: <1634299161-30101-1-git-send-email-hao.xiang@linux.alibaba.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 116b08904ac3..7fb2a3a1ca46 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5562,9 +5562,13 @@ static int handle_encls(struct kvm_vcpu *vcpu) static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu) { - vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK; - vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK; - return 0; + /* + * Hardware may or may not set the BUS_LOCK_DETECTED flag on BUS_LOCK + * VM-Exits. Unconditionally set the flag here and leave the handling to + * vmx_handle_exit(). + */ + to_vmx(vcpu)->exit_reason.bus_lock_detected = true; + return 1; } /* @@ -6051,9 +6055,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) int ret = __vmx_handle_exit(vcpu, exit_fastpath); /* - * Even when current exit reason is handled by KVM internally, we - * still need to exit to user space when bus lock detected to inform - * that there is a bus lock in guest. + * Exit to user space when bus lock detected to inform that there is + * a bus lock in guest. */ if (to_vmx(vcpu)->exit_reason.bus_lock_detected) { if (ret > 0) -- cgit v1.2.3 From 9f1ee7b169afbd10c3ad254220d1b37beb5798aa Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 Oct 2021 06:49:18 -0400 Subject: KVM: SEV-ES: reduce ghcb_sa_len to 32 bits The size of the GHCB scratch area is limited to 16 KiB (GHCB_SCRATCH_AREA_LIMIT), so there is no need for it to be a u64. This fixes a build error on 32-bit systems: i686-linux-gnu-ld: arch/x86/kvm/svm/sev.o: in function `sev_es_string_io: sev.c:(.text+0x110f): undefined reference to `__udivdi3' Cc: stable@vger.kernel.org Fixes: 019057bd73d1 ("KVM: SEV-ES: fix length of string I/O") Reported-by: Naresh Kamboju Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 128a54b1fbf1..5d30db599e10 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -191,7 +191,7 @@ struct vcpu_svm { /* SEV-ES scratch area support */ void *ghcb_sa; - u64 ghcb_sa_len; + u32 ghcb_sa_len; bool ghcb_sa_sync; bool ghcb_sa_free; -- cgit v1.2.3 From 675c496d0f92b481ebe4abf4fb06eadad7789de6 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 16 Oct 2021 12:50:21 +0200 Subject: clk: composite: Also consider .determine_rate for rate + mux composites Commit 69a00fb3d69706 ("clk: divider: Implement and wire up .determine_rate by default") switches clk_divider_ops to implement .determine_rate by default. This breaks composite clocks with multiple parents because clk-composite.c does not use the special handling for mux + divider combinations anymore (that was restricted to rate clocks which only implement .round_rate, but not .determine_rate). Alex reports: This breaks lot of clocks for Rockchip which intensively uses composites, i.e. those clocks will always stay at the initial parent, which in some cases is the XTAL clock and I strongly guess it is the same for other platforms, which use composite clocks having more than one parent (e.g. mediatek, ti ...) Example (RK3399) clk_sdio is set (initialized) with XTAL (24 MHz) as parent in u-boot. It will always stay at this parent, even if the mmc driver sets a rate of 200 MHz (fails, as the nature of things), which should switch it to any of its possible parent PLLs defined in mux_pll_src_cpll_gpll_npll_ppll_upll_24m_p (see clk-rk3399.c) - which never happens. Restore the original behavior by changing the priority of the conditions inside clk-composite.c. Now the special rate + mux case (with rate_ops having a .round_rate - which is still the case for the default clk_divider_ops) is preferred over rate_ops which have .determine_rate defined (and not further considering the mux). Fixes: 69a00fb3d69706 ("clk: divider: Implement and wire up .determine_rate by default") Reported-by: Alex Bee Signed-off-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20211016105022.303413-2-martin.blumenstingl@googlemail.com Tested-by: Alex Bee Tested-by: Chen-Yu Tsai Signed-off-by: Stephen Boyd --- drivers/clk/clk-composite.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c index 0506046a5f4b..510a9965633b 100644 --- a/drivers/clk/clk-composite.c +++ b/drivers/clk/clk-composite.c @@ -58,11 +58,8 @@ static int clk_composite_determine_rate(struct clk_hw *hw, long rate; int i; - if (rate_hw && rate_ops && rate_ops->determine_rate) { - __clk_hw_set_clk(rate_hw, hw); - return rate_ops->determine_rate(rate_hw, req); - } else if (rate_hw && rate_ops && rate_ops->round_rate && - mux_hw && mux_ops && mux_ops->set_parent) { + if (rate_hw && rate_ops && rate_ops->round_rate && + mux_hw && mux_ops && mux_ops->set_parent) { req->best_parent_hw = NULL; if (clk_hw_get_flags(hw) & CLK_SET_RATE_NO_REPARENT) { @@ -107,6 +104,9 @@ static int clk_composite_determine_rate(struct clk_hw *hw, req->rate = best_rate; return 0; + } else if (rate_hw && rate_ops && rate_ops->determine_rate) { + __clk_hw_set_clk(rate_hw, hw); + return rate_ops->determine_rate(rate_hw, req); } else if (mux_hw && mux_ops && mux_ops->determine_rate) { __clk_hw_set_clk(mux_hw, hw); return mux_ops->determine_rate(mux_hw, req); -- cgit v1.2.3 From 15bc01effefe97757ef02ca09e9d1b927ab22725 Mon Sep 17 00:00:00 2001 From: Eric W. Biederman Date: Sat, 16 Oct 2021 15:59:49 -0500 Subject: ucounts: Fix signal ucount refcounting In commit fda31c50292a ("signal: avoid double atomic counter increments for user accounting") Linus made a clever optimization to how rlimits and the struct user_struct. Unfortunately that optimization does not work in the obvious way when moved to nested rlimits. The problem is that the last decrement of the per user namespace per user sigpending counter might also be the last decrement of the sigpending counter in the parent user namespace as well. Which means that simply freeing the leaf ucount in __free_sigqueue is not enough. Maintain the optimization and handle the tricky cases by introducing inc_rlimit_get_ucounts and dec_rlimit_put_ucounts. By moving the entire optimization into functions that perform all of the work it becomes possible to ensure that every level is handled properly. The new function inc_rlimit_get_ucounts returns 0 on failure to increment the ucount. This is different than inc_rlimit_ucounts which increments the ucounts and returns LONG_MAX if the ucount counter has exceeded it's maximum or it wrapped (to indicate the counter needs to decremented). I wish we had a single user to account all pending signals to across all of the threads of a process so this complexity was not necessary Cc: stable@vger.kernel.org Fixes: d64696905554 ("Reimplement RLIMIT_SIGPENDING on top of ucounts") v1: https://lkml.kernel.org/r/87mtnavszx.fsf_-_@disp2133 Link: https://lkml.kernel.org/r/87fssytizw.fsf_-_@disp2133 Reviewed-by: Alexey Gladkov Tested-by: Rune Kleveland Tested-by: Yu Zhao Tested-by: Jordan Glover Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 2 ++ kernel/signal.c | 25 ++++++--------------- kernel/ucount.c | 49 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 19 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index eb70cabe6e7f..33a4240e6a6f 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -127,6 +127,8 @@ static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type t long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); +long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type); +void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type); bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max); static inline void set_rlimit_ucount_max(struct user_namespace *ns, diff --git a/kernel/signal.c b/kernel/signal.c index a3229add4455..13d2505a14a0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -425,22 +425,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags, */ rcu_read_lock(); ucounts = task_ucounts(t); - sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1); - switch (sigpending) { - case 1: - if (likely(get_ucounts(ucounts))) - break; - fallthrough; - case LONG_MAX: - /* - * we need to decrease the ucount in the userns tree on any - * failure to avoid counts leaking. - */ - dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1); - rcu_read_unlock(); - return NULL; - } + sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING); rcu_read_unlock(); + if (!sigpending) + return NULL; if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { q = kmem_cache_alloc(sigqueue_cachep, gfp_flags); @@ -449,8 +437,7 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags, } if (unlikely(q == NULL)) { - if (dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) - put_ucounts(ucounts); + dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING); } else { INIT_LIST_HEAD(&q->list); q->flags = sigqueue_flags; @@ -463,8 +450,8 @@ static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) return; - if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) { - put_ucounts(q->ucounts); + if (q->ucounts) { + dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING); q->ucounts = NULL; } kmem_cache_free(sigqueue_cachep, q); diff --git a/kernel/ucount.c b/kernel/ucount.c index bb51849e6375..eb03f3c68375 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -284,6 +284,55 @@ bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v) return (new == 0); } +static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts, + struct ucounts *last, enum ucount_type type) +{ + struct ucounts *iter, *next; + for (iter = ucounts; iter != last; iter = next) { + long dec = atomic_long_add_return(-1, &iter->ucount[type]); + WARN_ON_ONCE(dec < 0); + next = iter->ns->ucounts; + if (dec == 0) + put_ucounts(iter); + } +} + +void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type) +{ + do_dec_rlimit_put_ucounts(ucounts, NULL, type); +} + +long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type) +{ + /* Caller must hold a reference to ucounts */ + struct ucounts *iter; + long dec, ret = 0; + + for (iter = ucounts; iter; iter = iter->ns->ucounts) { + long max = READ_ONCE(iter->ns->ucount_max[type]); + long new = atomic_long_add_return(1, &iter->ucount[type]); + if (new < 0 || new > max) + goto unwind; + if (iter == ucounts) + ret = new; + /* + * Grab an extra ucount reference for the caller when + * the rlimit count was previously 0. + */ + if (new != 1) + continue; + if (!get_ucounts(iter)) + goto dec_unwind; + } + return ret; +dec_unwind: + dec = atomic_long_add_return(-1, &iter->ucount[type]); + WARN_ON_ONCE(dec < 0); +unwind: + do_dec_rlimit_put_ucounts(ucounts, iter, type); + return 0; +} + bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max) { struct ucounts *iter; -- cgit v1.2.3 From 5ca6779d2f18b195cb3b66a14a271911fc609094 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 18 Oct 2021 08:36:25 -0700 Subject: drm/msm/devfreq: Restrict idle clamping to a618 for now Until we better understand the stability issues caused by frequent frequency changes, lets limit them to a618. Signed-off-by: Rob Clark Tested-by: John Stultz Tested-by: Caleb Connolly Link: https://lore.kernel.org/r/20211018153627.2787882-1-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 7 +++++++ drivers/gpu/drm/msm/msm_gpu.h | 4 ++++ drivers/gpu/drm/msm/msm_gpu_devfreq.c | 3 ++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 33da25b81615..267a880811d6 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1838,6 +1838,13 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev))) adreno_gpu->base.hw_apriv = true; + /* + * For now only clamp to idle freq for devices where this is known not + * to cause power supply issues: + */ + if (info && (info->revn == 618)) + gpu->clamp_to_idle = true; + a6xx_llc_slices_init(pdev, a6xx_gpu); ret = a6xx_set_supported_hw(&pdev->dev, config->rev); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 030f82f149c2..ee25d556c8a1 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -203,6 +203,10 @@ struct msm_gpu { uint32_t suspend_count; struct msm_gpu_state *crashstate; + + /* Enable clamping to idle freq when inactive: */ + bool clamp_to_idle; + /* True if the hardware supports expanded apriv (a650 and newer) */ bool hw_apriv; diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 84e98c07c900..20006d060b5b 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -200,7 +200,8 @@ void msm_devfreq_idle(struct msm_gpu *gpu) idle_freq = get_freq(gpu); - msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0); + if (gpu->clamp_to_idle) + msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0); df->idle_time = ktime_get(); df->idle_freq = idle_freq; -- cgit v1.2.3 From 8a64ef042eab8a6cec04a6c79d44d1af79b628ca Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 18 Oct 2021 12:31:01 -0700 Subject: nfp: bpf: silence bitwise vs. logical OR warning A new warning in clang points out two places in this driver where boolean expressions are being used with a bitwise OR instead of a logical one: drivers/net/ethernet/netronome/nfp/nfp_asm.c:199:20: error: use of bitwise '|' with boolean operands [-Werror,-Wbitwise-instead-of-logical] reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || drivers/net/ethernet/netronome/nfp/nfp_asm.c:199:20: note: cast one or both operands to int to silence this warning drivers/net/ethernet/netronome/nfp/nfp_asm.c:280:20: error: use of bitwise '|' with boolean operands [-Werror,-Wbitwise-instead-of-logical] reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || drivers/net/ethernet/netronome/nfp/nfp_asm.c:280:20: note: cast one or both operands to int to silence this warning 2 errors generated. The motivation for the warning is that logical operations short circuit while bitwise operations do not. In this case, it does not seem like short circuiting is harmful so implement the suggested fix of changing to a logical operation to fix the warning. Link: https://github.com/ClangBuiltLinux/linux/issues/1479 Reported-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20211018193101.2340261-1-nathan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_asm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index 2643ea5948f4..154399c5453f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -196,7 +196,7 @@ int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, } reg->dst_lmextn = swreg_lmextn(dst); - reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg); + reg->src_lmextn = swreg_lmextn(lreg) || swreg_lmextn(rreg); return 0; } @@ -277,7 +277,7 @@ int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, } reg->dst_lmextn = swreg_lmextn(dst); - reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg); + reg->src_lmextn = swreg_lmextn(lreg) || swreg_lmextn(rreg); return 0; } -- cgit v1.2.3 From ed65df63a39a3f6ed04f7258de8b6789e5021c18 Mon Sep 17 00:00:00 2001 From: Steven Rostedt (VMware) Date: Mon, 18 Oct 2021 15:44:12 -0400 Subject: tracing: Have all levels of checks prevent recursion While writing an email explaining the "bit = 0" logic for a discussion on making ftrace_test_recursion_trylock() disable preemption, I discovered a path that makes the "not do the logic if bit is zero" unsafe. The recursion logic is done in hot paths like the function tracer. Thus, any code executed causes noticeable overhead. Thus, tricks are done to try to limit the amount of code executed. This included the recursion testing logic. Having recursion testing is important, as there are many paths that can end up in an infinite recursion cycle when tracing every function in the kernel. Thus protection is needed to prevent that from happening. Because it is OK to recurse due to different running context levels (e.g. an interrupt preempts a trace, and then a trace occurs in the interrupt handler), a set of bits are used to know which context one is in (normal, softirq, irq and NMI). If a recursion occurs in the same level, it is prevented*. Then there are infrastructure levels of recursion as well. When more than one callback is attached to the same function to trace, it calls a loop function to iterate over all the callbacks. Both the callbacks and the loop function have recursion protection. The callbacks use the "ftrace_test_recursion_trylock()" which has a "function" set of context bits to test, and the loop function calls the internal trace_test_and_set_recursion() directly, with an "internal" set of bits. If an architecture does not implement all the features supported by ftrace then the callbacks are never called directly, and the loop function is called instead, which will implement the features of ftrace. Since both the loop function and the callbacks do recursion protection, it was seemed unnecessary to do it in both locations. Thus, a trick was made to have the internal set of recursion bits at a more significant bit location than the function bits. Then, if any of the higher bits were set, the logic of the function bits could be skipped, as any new recursion would first have to go through the loop function. This is true for architectures that do not support all the ftrace features, because all functions being traced must first go through the loop function before going to the callbacks. But this is not true for architectures that support all the ftrace features. That's because the loop function could be called due to two callbacks attached to the same function, but then a recursion function inside the callback could be called that does not share any other callback, and it will be called directly. i.e. traced_function_1: [ more than one callback tracing it ] call loop_func loop_func: trace_recursion set internal bit call callback callback: trace_recursion [ skipped because internal bit is set, return 0 ] call traced_function_2 traced_function_2: [ only traced by above callback ] call callback callback: trace_recursion [ skipped because internal bit is set, return 0 ] call traced_function_2 [ wash, rinse, repeat, BOOM! out of shampoo! ] Thus, the "bit == 0 skip" trick is not safe, unless the loop function is call for all functions. Since we want to encourage architectures to implement all ftrace features, having them slow down due to this extra logic may encourage the maintainers to update to the latest ftrace features. And because this logic is only safe for them, remove it completely. [*] There is on layer of recursion that is allowed, and that is to allow for the transition between interrupt context (normal -> softirq -> irq -> NMI), because a trace may occur before the context update is visible to the trace recursion logic. Link: https://lore.kernel.org/all/609b565a-ed6e-a1da-f025-166691b5d994@linux.alibaba.com/ Link: https://lkml.kernel.org/r/20211018154412.09fcad3c@gandalf.local.home Cc: Linus Torvalds Cc: Petr Mladek Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Miroslav Benes Cc: Joe Lawrence Cc: Colin Ian King Cc: Masami Hiramatsu Cc: "Peter Zijlstra (Intel)" Cc: Nicholas Piggin Cc: Jisheng Zhang Cc: =?utf-8?b?546L6LSH?= Cc: Guo Ren Cc: stable@vger.kernel.org Fixes: edc15cafcbfa3 ("tracing: Avoid unnecessary multiple recursion checks") Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_recursion.h | 49 ++++++++--------------------------------- kernel/trace/ftrace.c | 4 ++-- 2 files changed, 11 insertions(+), 42 deletions(-) diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index a9f9c5714e65..fe95f0922526 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -16,23 +16,8 @@ * When function tracing occurs, the following steps are made: * If arch does not support a ftrace feature: * call internal function (uses INTERNAL bits) which calls... - * If callback is registered to the "global" list, the list - * function is called and recursion checks the GLOBAL bits. - * then this function calls... * The function callback, which can use the FTRACE bits to * check for recursion. - * - * Now if the arch does not support a feature, and it calls - * the global list function which calls the ftrace callback - * all three of these steps will do a recursion protection. - * There's no reason to do one if the previous caller already - * did. The recursion that we are protecting against will - * go through the same steps again. - * - * To prevent the multiple recursion checks, if a recursion - * bit is set that is higher than the MAX bit of the current - * check, then we know that the check was made by the previous - * caller, and we can skip the current check. */ enum { /* Function recursion bits */ @@ -40,12 +25,14 @@ enum { TRACE_FTRACE_NMI_BIT, TRACE_FTRACE_IRQ_BIT, TRACE_FTRACE_SIRQ_BIT, + TRACE_FTRACE_TRANSITION_BIT, - /* INTERNAL_BITs must be greater than FTRACE_BITs */ + /* Internal use recursion bits */ TRACE_INTERNAL_BIT, TRACE_INTERNAL_NMI_BIT, TRACE_INTERNAL_IRQ_BIT, TRACE_INTERNAL_SIRQ_BIT, + TRACE_INTERNAL_TRANSITION_BIT, TRACE_BRANCH_BIT, /* @@ -86,12 +73,6 @@ enum { */ TRACE_GRAPH_NOTRACE_BIT, - /* - * When transitioning between context, the preempt_count() may - * not be correct. Allow for a single recursion to cover this case. - */ - TRACE_TRANSITION_BIT, - /* Used to prevent recursion recording from recursing. */ TRACE_RECORD_RECURSION_BIT, }; @@ -113,12 +94,10 @@ enum { #define TRACE_CONTEXT_BITS 4 #define TRACE_FTRACE_START TRACE_FTRACE_BIT -#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) #define TRACE_LIST_START TRACE_INTERNAL_BIT -#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) -#define TRACE_CONTEXT_MASK TRACE_LIST_MAX +#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) /* * Used for setting context @@ -132,6 +111,7 @@ enum { TRACE_CTX_IRQ, TRACE_CTX_SOFTIRQ, TRACE_CTX_NORMAL, + TRACE_CTX_TRANSITION, }; static __always_inline int trace_get_context_bit(void) @@ -160,45 +140,34 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); #endif static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, - int start, int max) + int start) { unsigned int val = READ_ONCE(current->trace_recursion); int bit; - /* A previous recursion check was made */ - if ((val & TRACE_CONTEXT_MASK) > max) - return 0; - bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* * It could be that preempt_count has not been updated during * a switch between contexts. Allow for a single recursion. */ - bit = TRACE_TRANSITION_BIT; + bit = TRACE_CTX_TRANSITION + start; if (val & (1 << bit)) { do_ftrace_record_recursion(ip, pip); return -1; } - } else { - /* Normal check passed, clear the transition to allow it again */ - val &= ~(1 << TRACE_TRANSITION_BIT); } val |= 1 << bit; current->trace_recursion = val; barrier(); - return bit + 1; + return bit; } static __always_inline void trace_clear_recursion(int bit) { - if (!bit) - return; - barrier(); - bit--; trace_recursion_clear(bit); } @@ -214,7 +183,7 @@ static __always_inline void trace_clear_recursion(int bit) static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, unsigned long parent_ip) { - return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX); + return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START); } /** diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7efbc8aaf7f6..635fbdc9d589 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6977,7 +6977,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op; int bit; - bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START); if (bit < 0) return; @@ -7052,7 +7052,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, { int bit; - bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START); if (bit < 0) return; -- cgit v1.2.3 From 6e3ee990c90494561921c756481d0e2125d8b895 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sat, 16 Oct 2021 15:23:50 +0800 Subject: audit: fix possible null-pointer dereference in audit_filter_rules Fix possible null-pointer dereference in audit_filter_rules. audit_filter_rules() error: we previously assumed 'ctx' could be null Cc: stable@vger.kernel.org Fixes: bf361231c295 ("audit: add saddr_fam filter field") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Gaosheng Cui Signed-off-by: Paul Moore --- kernel/auditsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 8dd73a64f921..b1cb1dbf7417 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -657,7 +657,7 @@ static int audit_filter_rules(struct task_struct *tsk, result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val); break; case AUDIT_SADDR_FAM: - if (ctx->sockaddr) + if (ctx && ctx->sockaddr) result = audit_comparator(ctx->sockaddr->ss_family, f->op, f->val); break; -- cgit v1.2.3 From 06634d5b6e923ed0d4772aba8def5a618f44c7fe Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Mon, 18 Oct 2021 01:56:21 +0000 Subject: scsi: qla2xxx: Return -ENOMEM if kzalloc() fails The driver probing function should return < 0 for failure, otherwise kernel will treat value > 0 as success. Link: https://lore.kernel.org/r/1634522181-31166-1-git-send-email-zheyuma97@gmail.com Reviewed-by: Himanshu Madhani Signed-off-by: Zheyu Ma Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index d2e40aaba734..836fedcea241 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4157,7 +4157,7 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, ql_dbg_pci(ql_dbg_init, ha->pdev, 0xe0ee, "%s: failed alloc dsd\n", __func__); - return 1; + return -ENOMEM; } ha->dif_bundle_kallocs++; -- cgit v1.2.3 From 7fb223d0ad801f633c78cbe42b1d1b55f5d163ad Mon Sep 17 00:00:00 2001 From: Joy Gu Date: Tue, 12 Oct 2021 12:18:33 -0700 Subject: scsi: qla2xxx: Fix a memory leak in an error path of qla2x00_process_els() Commit 8c0eb596baa5 ("[SCSI] qla2xxx: Fix a memory leak in an error path of qla2x00_process_els()"), intended to change: bsg_job->request->msgcode == FC_BSG_HST_ELS_NOLOGIN to: bsg_job->request->msgcode != FC_BSG_RPT_ELS but changed it to: bsg_job->request->msgcode == FC_BSG_RPT_ELS instead. Change the == to a != to avoid leaking the fcport structure or freeing unallocated memory. Link: https://lore.kernel.org/r/20211012191834.90306-2-jgu@purestorage.com Fixes: 8c0eb596baa5 ("[SCSI] qla2xxx: Fix a memory leak in an error path of qla2x00_process_els()") Reviewed-by: Bart Van Assche Signed-off-by: Joy Gu Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_bsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 4b5d28d89d69..655cf5de604b 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -431,7 +431,7 @@ done_unmap_sg: goto done_free_fcport; done_free_fcport: - if (bsg_request->msgcode == FC_BSG_RPT_ELS) + if (bsg_request->msgcode != FC_BSG_RPT_ELS) qla2x00_free_fcport(fcport); done: return rval; -- cgit v1.2.3 From 4a8f71014b4d56c4fb287607e844c0a9f68f46d9 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Mon, 18 Oct 2021 15:26:50 +0300 Subject: scsi: qla2xxx: Fix unmap of already freed sgl The sgl is freed in the target stack in target_release_cmd_kref() before calling qlt_free_cmd() but there is an unmap of sgl in qlt_free_cmd() that causes a panic if sgl is not yet DMA unmapped: NIP dma_direct_unmap_sg+0xdc/0x180 LR dma_direct_unmap_sg+0xc8/0x180 Call Trace: ql_dbg_prefix+0x68/0xc0 [qla2xxx] (unreliable) dma_unmap_sg_attrs+0x54/0xf0 qlt_unmap_sg.part.19+0x54/0x1c0 [qla2xxx] qlt_free_cmd+0x124/0x1d0 [qla2xxx] tcm_qla2xxx_release_cmd+0x4c/0xa0 [tcm_qla2xxx] target_put_sess_cmd+0x198/0x370 [target_core_mod] transport_generic_free_cmd+0x6c/0x1b0 [target_core_mod] tcm_qla2xxx_complete_free+0x6c/0x90 [tcm_qla2xxx] The sgl may be left unmapped in error cases of response sending. For instance, qlt_rdy_to_xfer() maps sgl and exits when session is being deleted keeping the sgl mapped. This patch removes use-after-free of the sgl and ensures that the sgl is unmapped for any command that was not sent to firmware. Link: https://lore.kernel.org/r/20211018122650.11846-1-d.bogdanov@yadro.com Reviewed-by: Himanshu Madhani Signed-off-by: Dmitry Bogdanov Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index b3478ed9b12e..7d8242c120fc 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3319,8 +3319,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, "RESET-RSP online/active/old-count/new-count = %d/%d/%d/%d.\n", vha->flags.online, qla2x00_reset_active(vha), cmd->reset_count, qpair->chip_reset); - spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); - return 0; + goto out_unmap_unlock; } /* Does F/W have an IOCBs for this request */ @@ -3445,10 +3444,6 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) prm.sg = NULL; prm.req_cnt = 1; - /* Calculate number of entries and segments required */ - if (qlt_pci_map_calc_cnt(&prm) != 0) - return -EAGAIN; - if (!qpair->fw_started || (cmd->reset_count != qpair->chip_reset) || (cmd->sess && cmd->sess->deleted)) { /* @@ -3466,6 +3461,10 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) return 0; } + /* Calculate number of entries and segments required */ + if (qlt_pci_map_calc_cnt(&prm) != 0) + return -EAGAIN; + spin_lock_irqsave(qpair->qp_lock_ptr, flags); /* Does F/W have an IOCBs for this request */ res = qlt_check_reserve_free_req(qpair, prm.req_cnt); @@ -3870,9 +3869,6 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd) BUG_ON(cmd->cmd_in_wq); - if (cmd->sg_mapped) - qlt_unmap_sg(cmd->vha, cmd); - if (!cmd->q_full) qlt_decr_num_pend_cmds(cmd->vha); -- cgit v1.2.3 From 4e5483b8440d01f6851a1388801088a6e0da0b56 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 18 Oct 2021 18:10:04 +0300 Subject: scsi: ufs: ufs-pci: Force a full restore after suspend-to-disk Implement the ->restore() PM operation and set the link to off, which will force a full reset and restore. This ensures that Host Performance Booster is reset after suspend-to-disk. The Host Performance Booster feature caches logical-to-physical mapping information in the host memory. After suspend-to-disk, such information is not valid, so a full reset and restore is needed. A full reset and restore is done if the SPM level is 5 or 6, but not for other SPM levels, so this change fixes those cases. A full reset and restore also restores base address registers, so that code is removed. Link: https://lore.kernel.org/r/20211018151004.284200-2-adrian.hunter@intel.com Reviewed-by: Avri Altman Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index 149c1aa09103..51424557810d 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -370,20 +370,6 @@ static void ufs_intel_common_exit(struct ufs_hba *hba) static int ufs_intel_resume(struct ufs_hba *hba, enum ufs_pm_op op) { - /* - * To support S4 (suspend-to-disk) with spm_lvl other than 5, the base - * address registers must be restored because the restore kernel can - * have used different addresses. - */ - ufshcd_writel(hba, lower_32_bits(hba->utrdl_dma_addr), - REG_UTP_TRANSFER_REQ_LIST_BASE_L); - ufshcd_writel(hba, upper_32_bits(hba->utrdl_dma_addr), - REG_UTP_TRANSFER_REQ_LIST_BASE_H); - ufshcd_writel(hba, lower_32_bits(hba->utmrdl_dma_addr), - REG_UTP_TASK_REQ_LIST_BASE_L); - ufshcd_writel(hba, upper_32_bits(hba->utmrdl_dma_addr), - REG_UTP_TASK_REQ_LIST_BASE_H); - if (ufshcd_is_link_hibern8(hba)) { int ret = ufshcd_uic_hibern8_exit(hba); @@ -463,6 +449,18 @@ static struct ufs_hba_variant_ops ufs_intel_lkf_hba_vops = { .device_reset = ufs_intel_device_reset, }; +#ifdef CONFIG_PM_SLEEP +static int ufshcd_pci_restore(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + /* Force a full reset and restore */ + ufshcd_set_link_off(hba); + + return ufshcd_system_resume(dev); +} +#endif + /** * ufshcd_pci_shutdown - main function to put the controller in reset state * @pdev: pointer to PCI device handle @@ -546,9 +544,14 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) } static const struct dev_pm_ops ufshcd_pci_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume) SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL) #ifdef CONFIG_PM_SLEEP + .suspend = ufshcd_system_suspend, + .resume = ufshcd_system_resume, + .freeze = ufshcd_system_suspend, + .thaw = ufshcd_system_resume, + .poweroff = ufshcd_system_suspend, + .restore = ufshcd_pci_restore, .prepare = ufshcd_suspend_prepare, .complete = ufshcd_resume_complete, #endif -- cgit v1.2.3 From 29664923725a384dc7e0f74af7c66e5ab7bb2a26 Mon Sep 17 00:00:00 2001 From: Marco Giunta Date: Mon, 18 Oct 2021 18:25:52 +0200 Subject: ALSA: usb-audio: Fix microphone sound on Jieli webcam. When a Jieli Technology USB Webcam is connected, the video part works well, but the mic sound is speeded up. On dmesg there are messages about different rates from the runtime rates, warnings about volume resolution and lastly, the log is filled, every 5 seconds, with retire_capture_urb error messages. The mic works only when ep packet size is set to wMaxPacketSize (normal sound and no more retire_capture_urb error messages). Skipping reading sample rate, fixes the messages about different rates and forcing a volume resolution, fixes warnings about volume range. I have arbitrarily choosed the value (16): I read in a comment that there should be no more than 255 levels, so 4096 (max volume) / 16 = 0-255. Signed-off-by: Marco Giunta Link: https://lore.kernel.org/r/20211018162552.12082-1-giun7a@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 7 +++++++ sound/usb/quirks.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index a2ce535df14b..8e030b1c061a 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1198,6 +1198,13 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, cval->res = 1; } break; + case USB_ID(0x1224, 0x2a25): /* Jieli Technology USB PHY 2.0 */ + if (!strcmp(kctl->id.name, "Mic Capture Volume")) { + usb_audio_info(chip, + "set resolution quirk: cval->res = 16\n"); + cval->res = 16; + } + break; } } diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index b8da4a08c458..8929d9abe8aa 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1719,6 +1719,11 @@ void snd_usb_audioformat_attributes_quirk(struct snd_usb_audio *chip, */ fp->attributes &= ~UAC_EP_CS_ATTR_FILL_MAX; break; + case USB_ID(0x1224, 0x2a25): /* Jieli Technology USB PHY 2.0 */ + /* mic works only when ep packet size is set to wMaxPacketSize */ + fp->attributes |= UAC_EP_CS_ATTR_FILL_MAX; + break; + } } @@ -1890,6 +1895,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x534d, 0x2109, /* MacroSilicon MS2109 */ QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ + QUIRK_FLAG_GET_SAMPLE_RATE), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ -- cgit v1.2.3 From 8913970c19915bbe773d97d42989cd85b7fdc098 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 18 Oct 2021 15:15:22 -0700 Subject: mm/userfaultfd: selftests: fix memory corruption with thp enabled In RHEL's gating selftests we've encountered memory corruption in the uffd event test even with upstream kernel: # ./userfaultfd anon 128 4 nr_pages: 32768, nr_pages_per_cpu: 32768 bounces: 3, mode: rnd racing read, userfaults: 6240 missing (6240) 14729 wp (14729) bounces: 2, mode: racing read, userfaults: 1444 missing (1444) 28877 wp (28877) bounces: 1, mode: rnd read, userfaults: 6055 missing (6055) 14699 wp (14699) bounces: 0, mode: read, userfaults: 82 missing (82) 25196 wp (25196) testing uffd-wp with pagemap (pgsize=4096): done testing uffd-wp with pagemap (pgsize=2097152): done testing events (fork, remap, remove): ERROR: nr 32427 memory corruption 0 1 (errno=0, line=963) ERROR: faulting process failed (errno=0, line=1117) It can be easily reproduced when global thp enabled, which is the default for RHEL. It's also known as a side effect of commit 0db282ba2c12 ("selftest: use mmap instead of posix_memalign to allocate memory", 2021-07-23), which is imho right itself on using mmap() to make sure the addresses will be untagged even on arm. The problem is, for each test we allocate buffers using two allocate_area() calls. We assumed these two buffers won't affect each other, however they could, because mmap() could have found that the two buffers are near each other and having the same VMA flags, so they got merged into one VMA. It won't be a big problem if thp is not enabled, but when thp is agressively enabled it means when initializing the src buffer it could accidentally setup part of the dest buffer too when there's a shared THP that overlaps the two regions. Then some of the dest buffer won't be able to be trapped by userfaultfd missing mode, then it'll cause memory corruption as described. To fix it, do release_pages() after initializing the src buffer. Since the previous two release_pages() calls are after uffd_test_ctx_clear() which will unmap all the buffers anyway (which is stronger than release pages; as unmap() also tear town pgtables), drop them as they shouldn't really be anything useful. We can mark the Fixes tag upon 0db282ba2c12 as it's reported to only happen there, however the real "Fixes" IMHO should be 8ba6e8640844, as before that commit we'll always do explicit release_pages() before registration of uffd, and 8ba6e8640844 changed that logic by adding extra unmap/map and we didn't release the pages at the right place. Meanwhile I don't have a solid glue anyway on whether posix_memalign() could always avoid triggering this bug, hence it's safer to attach this fix to commit 8ba6e8640844. Link: https://lkml.kernel.org/r/20210923232512.210092-1-peterx@redhat.com Fixes: 8ba6e8640844 ("userfaultfd/selftests: reinitialize test context in each test") Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1994931 Signed-off-by: Peter Xu Reported-by: Li Wang Tested-by: Li Wang Reviewed-by: Axel Rasmussen Cc: Andrea Arcangeli Cc: Nadav Amit Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 10ab56c2484a..60aa1a4fc69b 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -414,9 +414,6 @@ static void uffd_test_ctx_init_ext(uint64_t *features) uffd_test_ops->allocate_area((void **)&area_src); uffd_test_ops->allocate_area((void **)&area_dst); - uffd_test_ops->release_pages(area_src); - uffd_test_ops->release_pages(area_dst); - userfaultfd_open(features); count_verify = malloc(nr_pages * sizeof(unsigned long long)); @@ -437,6 +434,26 @@ static void uffd_test_ctx_init_ext(uint64_t *features) *(area_count(area_src, nr) + 1) = 1; } + /* + * After initialization of area_src, we must explicitly release pages + * for area_dst to make sure it's fully empty. Otherwise we could have + * some area_dst pages be errornously initialized with zero pages, + * hence we could hit memory corruption later in the test. + * + * One example is when THP is globally enabled, above allocate_area() + * calls could have the two areas merged into a single VMA (as they + * will have the same VMA flags so they're mergeable). When we + * initialize the area_src above, it's possible that some part of + * area_dst could have been faulted in via one huge THP that will be + * shared between area_src and area_dst. It could cause some of the + * area_dst won't be trapped by missing userfaults. + * + * This release_pages() will guarantee even if that happened, we'll + * proactively split the thp and drop any accidentally initialized + * pages within area_dst. + */ + uffd_test_ops->release_pages(area_dst); + pipefd = malloc(sizeof(int) * nr_cpus * 2); if (!pipefd) err("pipefd"); -- cgit v1.2.3 From cb185d5f1ebf900f4ae3bf84cee212e6dd035aca Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 18 Oct 2021 15:15:25 -0700 Subject: userfaultfd: fix a race between writeprotect and exit_mmap() A race is possible when a process exits, its VMAs are removed by exit_mmap() and at the same time userfaultfd_writeprotect() is called. The race was detected by KASAN on a development kernel, but it appears to be possible on vanilla kernels as well. Use mmget_not_zero() to prevent the race as done in other userfaultfd operations. Link: https://lkml.kernel.org/r/20210921200247.25749-1-namit@vmware.com Fixes: 63b2d4174c4ad ("userfaultfd: wp: add the writeprotect API to userfaultfd ioctl") Signed-off-by: Nadav Amit Tested-by: Li Wang Reviewed-by: Peter Xu Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 003f0d31743e..22bf14ab2d16 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1827,9 +1827,15 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, if (mode_wp && mode_dontwake) return -EINVAL; - ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start, - uffdio_wp.range.len, mode_wp, - &ctx->mmap_changing); + if (mmget_not_zero(ctx->mm)) { + ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start, + uffdio_wp.range.len, mode_wp, + &ctx->mmap_changing); + mmput(ctx->mm); + } else { + return -ESRCH; + } + if (ret) return ret; -- cgit v1.2.3 From 295be91f7ef0027fca2f2e4788e99731aa931834 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 18 Oct 2021 15:15:29 -0700 Subject: mm/migrate: optimize hotplug-time demotion order updates Patch series "mm/migrate: 5.15 fixes for automatic demotion", v2. This contains two fixes for the "automatic demotion" code which was merged into 5.15: * Fix memory hotplug performance regression by watching suppressing any real action on irrelevant hotplug events. * Ensure CPU hotplug handler is registered when memory hotplug is disabled. This patch (of 2): == tl;dr == Automatic demotion opted for a simple, lazy approach to handling hotplug events. This noticeably slows down memory hotplug[1]. Optimize away updates to the demotion order when memory hotplug events should have no effect. This has no effect on CPU hotplug. There is no known problem on the CPU side and any work there will be in a separate series. == Background == Automatic demotion is a memory migration strategy to ensure that new allocations have room in faster memory tiers on tiered memory systems. The kernel maintains an array (node_demotion[]) to drive these migrations. The node_demotion[] path is calculated by starting at nodes with CPUs and then "walking" to nodes with memory. Only hotplug events which online or offline a node with memory (N_ONLINE) or CPUs (N_CPU) will actually affect the migration order. == Problem == However, the current code is lazy. It completely regenerates the migration order on *any* CPU or memory hotplug event. The logic was that these events are extremely rare and that the overhead from indiscriminate order regeneration is minimal. Part of the update logic involves a synchronize_rcu(), which is a pretty big hammer. Its overhead was large enough to be detected by some 0day tests that watch memory hotplug performance[1]. == Solution == Add a new helper (node_demotion_topo_changed()) which can differentiate between superfluous and impactful hotplug events. Skip the expensive update operation for superfluous events. == Aside: Locking == It took me a few moments to declare the locking to be safe enough for node_demotion_topo_changed() to work. It all hinges on the memory hotplug lock: During memory hotplug events, 'mem_hotplug_lock' is held for write. This ensures that two memory hotplug events can not be called simultaneously. CPU hotplug has a similar lock (cpuhp_state_mutex) which also provides mutual exclusion between CPU hotplug events. In addition, the demotion code acquire and hold the mem_hotplug_lock for read during its CPU hotplug handlers. This provides mutual exclusion between the demotion memory hotplug callbacks and the CPU hotplug callbacks. This effectively allows treating the migration target generation code to act as if it is single-threaded. 1. https://lore.kernel.org/all/20210905135932.GE15026@xsang-OptiPlex-9020/ Link: https://lkml.kernel.org/r/20210924161251.093CCD06@davehans-spike.ostc.intel.com Link: https://lkml.kernel.org/r/20210924161253.D7673E31@davehans-spike.ostc.intel.com Fixes: 884a6e5d1f93 ("mm/migrate: update node demotion order on hotplug events") Signed-off-by: Dave Hansen Reported-by: kernel test robot Reviewed-by: David Hildenbrand Cc: "Huang, Ying" Cc: Michal Hocko Cc: Wei Xu Cc: Oscar Salvador Cc: David Rientjes Cc: Dan Williams Cc: Greg Thelen Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index a6a7743ee98f..a6311e46f605 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -3239,8 +3239,18 @@ static int migration_offline_cpu(unsigned int cpu) * set_migration_target_nodes(). */ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, - unsigned long action, void *arg) + unsigned long action, void *_arg) { + struct memory_notify *arg = _arg; + + /* + * Only update the node migration order when a node is + * changing status, like online->offline. This avoids + * the overhead of synchronize_rcu() in most cases. + */ + if (arg->status_change_nid < 0) + return notifier_from_errno(0); + switch (action) { case MEM_GOING_OFFLINE: /* -- cgit v1.2.3 From 76af6a054da4055305ddb28c5eb151b9ee4f74f9 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 18 Oct 2021 15:15:32 -0700 Subject: mm/migrate: add CPU hotplug to demotion #ifdef Once upon a time, the node demotion updates were driven solely by memory hotplug events. But now, there are handlers for both CPU and memory hotplug. However, the #ifdef around the code checks only memory hotplug. A system that has HOTPLUG_CPU=y but MEMORY_HOTPLUG=n would miss CPU hotplug events. Update the #ifdef around the common code. Add memory and CPU-specific #ifdefs for their handlers. These memory/CPU #ifdefs avoid unused function warnings when their Kconfig option is off. [arnd@arndb.de: rework hotplug_memory_notifier() stub] Link: https://lkml.kernel.org/r/20211013144029.2154629-1-arnd@kernel.org Link: https://lkml.kernel.org/r/20210924161255.E5FE8F7E@davehans-spike.ostc.intel.com Fixes: 884a6e5d1f93 ("mm/migrate: update node demotion order on hotplug events") Signed-off-by: Dave Hansen Signed-off-by: Arnd Bergmann Cc: "Huang, Ying" Cc: Michal Hocko Cc: Wei Xu Cc: Oscar Salvador Cc: David Rientjes Cc: Dan Williams Cc: David Hildenbrand Cc: Greg Thelen Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 5 ++++- mm/migrate.c | 42 +++++++++++++++++++++--------------------- mm/page_ext.c | 4 +--- mm/slab.c | 4 ++-- 4 files changed, 28 insertions(+), 27 deletions(-) diff --git a/include/linux/memory.h b/include/linux/memory.h index 7efc0a7c14c9..182c606adb06 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -160,7 +160,10 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, #define register_hotmemory_notifier(nb) register_memory_notifier(nb) #define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb) #else -#define hotplug_memory_notifier(fn, pri) ({ 0; }) +static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri) +{ + return 0; +} /* These aren't inline functions due to a GCC bug. */ #define register_hotmemory_notifier(nb) ({ (void)(nb); 0; }) #define unregister_hotmemory_notifier(nb) ({ (void)(nb); }) diff --git a/mm/migrate.c b/mm/migrate.c index a6311e46f605..700112c0123d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -3066,7 +3066,7 @@ void migrate_vma_finalize(struct migrate_vma *migrate) EXPORT_SYMBOL(migrate_vma_finalize); #endif /* CONFIG_DEVICE_PRIVATE */ -#if defined(CONFIG_MEMORY_HOTPLUG) +#if defined(CONFIG_HOTPLUG_CPU) /* Disable reclaim-based migration. */ static void __disable_all_migrate_targets(void) { @@ -3208,25 +3208,6 @@ static void set_migration_target_nodes(void) put_online_mems(); } -/* - * React to hotplug events that might affect the migration targets - * like events that online or offline NUMA nodes. - * - * The ordering is also currently dependent on which nodes have - * CPUs. That means we need CPU on/offline notification too. - */ -static int migration_online_cpu(unsigned int cpu) -{ - set_migration_target_nodes(); - return 0; -} - -static int migration_offline_cpu(unsigned int cpu) -{ - set_migration_target_nodes(); - return 0; -} - /* * This leaves migrate-on-reclaim transiently disabled between * the MEM_GOING_OFFLINE and MEM_OFFLINE events. This runs @@ -3284,6 +3265,25 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, return notifier_from_errno(0); } +/* + * React to hotplug events that might affect the migration targets + * like events that online or offline NUMA nodes. + * + * The ordering is also currently dependent on which nodes have + * CPUs. That means we need CPU on/offline notification too. + */ +static int migration_online_cpu(unsigned int cpu) +{ + set_migration_target_nodes(); + return 0; +} + +static int migration_offline_cpu(unsigned int cpu) +{ + set_migration_target_nodes(); + return 0; +} + static int __init migrate_on_reclaim_init(void) { int ret; @@ -3303,4 +3303,4 @@ static int __init migrate_on_reclaim_init(void) return 0; } late_initcall(migrate_on_reclaim_init); -#endif /* CONFIG_MEMORY_HOTPLUG */ +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/mm/page_ext.c b/mm/page_ext.c index dfb91653d359..2a52fd9ed464 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -269,7 +269,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid) total_usage += table_size; return 0; } -#ifdef CONFIG_MEMORY_HOTPLUG + static void free_page_ext(void *addr) { if (is_vmalloc_addr(addr)) { @@ -374,8 +374,6 @@ static int __meminit page_ext_callback(struct notifier_block *self, return notifier_from_errno(ret); } -#endif - void __init page_ext_init(void) { unsigned long pfn; diff --git a/mm/slab.c b/mm/slab.c index d0f725637663..874b3f8fe80d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1095,7 +1095,7 @@ static int slab_offline_cpu(unsigned int cpu) return 0; } -#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) +#if defined(CONFIG_NUMA) /* * Drains freelist for a node on each slab cache, used for memory hot-remove. * Returns -EBUSY if all objects cannot be drained so that the node is not @@ -1157,7 +1157,7 @@ static int __meminit slab_memory_callback(struct notifier_block *self, out: return notifier_from_errno(ret); } -#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */ +#endif /* CONFIG_NUMA */ /* * swap the static kmem_cache_node with kmalloced memory -- cgit v1.2.3 From a6a0251c6fce496744121b4e08c899f45270dbcc Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 18 Oct 2021 15:15:35 -0700 Subject: mm/migrate: fix CPUHP state to update node demotion order The node demotion order needs to be updated during CPU hotplug. Because whether a NUMA node has CPU may influence the demotion order. The update function should be called during CPU online/offline after the node_states[N_CPU] has been updated. That is done in CPUHP_AP_ONLINE_DYN during CPU online and in CPUHP_MM_VMSTAT_DEAD during CPU offline. But in commit 884a6e5d1f93 ("mm/migrate: update node demotion order on hotplug events"), the function to update node demotion order is called in CPUHP_AP_ONLINE_DYN during CPU online/offline. This doesn't satisfy the order requirement. For example, there are 4 CPUs (P0, P1, P2, P3) in 2 sockets (P0, P1 in S0 and P2, P3 in S1), the demotion order is - S0 -> NUMA_NO_NODE - S1 -> NUMA_NO_NODE After P2 and P3 is offlined, because S1 has no CPU now, the demotion order should have been changed to - S0 -> S1 - S1 -> NO_NODE but it isn't changed, because the order updating callback for CPU hotplug doesn't see the new nodemask. After that, if P1 is offlined, the demotion order is changed to the expected order as above. So in this patch, we added CPUHP_AP_MM_DEMOTION_ONLINE and CPUHP_MM_DEMOTION_DEAD to be called after CPUHP_AP_ONLINE_DYN and CPUHP_MM_VMSTAT_DEAD during CPU online and offline, and register the update function on them. Link: https://lkml.kernel.org/r/20210929060351.7293-1-ying.huang@intel.com Fixes: 884a6e5d1f93 ("mm/migrate: update node demotion order on hotplug events") Signed-off-by: "Huang, Ying" Cc: Thomas Gleixner Cc: Dave Hansen Cc: Yang Shi Cc: Zi Yan Cc: Michal Hocko Cc: Wei Xu Cc: Oscar Salvador Cc: David Rientjes Cc: Dan Williams Cc: David Hildenbrand Cc: Greg Thelen Cc: Keith Busch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuhotplug.h | 4 ++++ mm/migrate.c | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 832d8a74fa59..991911048857 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -72,6 +72,8 @@ enum cpuhp_state { CPUHP_SLUB_DEAD, CPUHP_DEBUG_OBJ_DEAD, CPUHP_MM_WRITEBACK_DEAD, + /* Must be after CPUHP_MM_VMSTAT_DEAD */ + CPUHP_MM_DEMOTION_DEAD, CPUHP_MM_VMSTAT_DEAD, CPUHP_SOFTIRQ_DEAD, CPUHP_NET_MVNETA_DEAD, @@ -240,6 +242,8 @@ enum cpuhp_state { CPUHP_AP_BASE_CACHEINFO_ONLINE, CPUHP_AP_ONLINE_DYN, CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, + /* Must be after CPUHP_AP_ONLINE_DYN for node_states[N_CPU] update */ + CPUHP_AP_MM_DEMOTION_ONLINE, CPUHP_AP_X86_HPET_ONLINE, CPUHP_AP_X86_KVM_CLK_ONLINE, CPUHP_AP_DTPM_CPU_ONLINE, diff --git a/mm/migrate.c b/mm/migrate.c index 700112c0123d..1852d787e6ab 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -3288,9 +3288,8 @@ static int __init migrate_on_reclaim_init(void) { int ret; - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "migrate on reclaim", - migration_online_cpu, - migration_offline_cpu); + ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline", + NULL, migration_offline_cpu); /* * In the unlikely case that this fails, the automatic * migration targets may become suboptimal for nodes @@ -3298,6 +3297,9 @@ static int __init migrate_on_reclaim_init(void) * rare case, do not bother trying to do anything special. */ WARN_ON(ret < 0); + ret = cpuhp_setup_state(CPUHP_AP_MM_DEMOTION_ONLINE, "mm/demotion:online", + migration_online_cpu, NULL); + WARN_ON(ret < 0); hotplug_memory_notifier(migrate_on_reclaim_callback, 100); return 0; -- cgit v1.2.3 From 5314454ea3ff6fc746eaf71b9a7ceebed52888fa Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 18 Oct 2021 15:15:39 -0700 Subject: ocfs2: fix data corruption after conversion from inline format Commit 6dbf7bb55598 ("fs: Don't invalidate page buffers in block_write_full_page()") uncovered a latent bug in ocfs2 conversion from inline inode format to a normal inode format. The code in ocfs2_convert_inline_data_to_extents() attempts to zero out the whole cluster allocated for file data by grabbing, zeroing, and dirtying all pages covering this cluster. However these pages are beyond i_size, thus writeback code generally ignores these dirty pages and no blocks were ever actually zeroed on the disk. This oversight was fixed by commit 693c241a5f6a ("ocfs2: No need to zero pages past i_size.") for standard ocfs2 write path, inline conversion path was apparently forgotten; the commit log also has a reasoning why the zeroing actually is not needed. After commit 6dbf7bb55598, things became worse as writeback code stopped invalidating buffers on pages beyond i_size and thus these pages end up with clean PageDirty bit but with buffers attached to these pages being still dirty. So when a file is converted from inline format, then writeback triggers, and then the file is grown so that these pages become valid, the invalid dirtiness state is preserved, mark_buffer_dirty() does nothing on these pages (buffers are already dirty) but page is never written back because it is clean. So data written to these pages is lost once pages are reclaimed. Simple reproducer for the problem is: xfs_io -f -c "pwrite 0 2000" -c "pwrite 2000 2000" -c "fsync" \ -c "pwrite 4000 2000" ocfs2_file After unmounting and mounting the fs again, you can observe that end of 'ocfs2_file' has lost its contents. Fix the problem by not doing the pointless zeroing during conversion from inline format similarly as in the standard write path. [akpm@linux-foundation.org: fix whitespace, per Joseph] Link: https://lkml.kernel.org/r/20210930095405.21433-1-jack@suse.cz Fixes: 6dbf7bb55598 ("fs: Don't invalidate page buffers in block_write_full_page()") Signed-off-by: Jan Kara Reviewed-by: Joseph Qi Tested-by: Joseph Qi Acked-by: Gang He Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: "Markov, Andrey" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 46 ++++++++++++---------------------------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index f1cc8258d34a..5d9ae17bd443 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7045,7 +7045,7 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) int ocfs2_convert_inline_data_to_extents(struct inode *inode, struct buffer_head *di_bh) { - int ret, i, has_data, num_pages = 0; + int ret, has_data, num_pages = 0; int need_free = 0; u32 bit_off, num; handle_t *handle; @@ -7054,26 +7054,17 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_alloc_context *data_ac = NULL; - struct page **pages = NULL; - loff_t end = osb->s_clustersize; + struct page *page = NULL; struct ocfs2_extent_tree et; int did_quota = 0; has_data = i_size_read(inode) ? 1 : 0; if (has_data) { - pages = kcalloc(ocfs2_pages_per_cluster(osb->sb), - sizeof(struct page *), GFP_NOFS); - if (pages == NULL) { - ret = -ENOMEM; - mlog_errno(ret); - return ret; - } - ret = ocfs2_reserve_clusters(osb, 1, &data_ac); if (ret) { mlog_errno(ret); - goto free_pages; + goto out; } } @@ -7093,7 +7084,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, } if (has_data) { - unsigned int page_end; + unsigned int page_end = min_t(unsigned, PAGE_SIZE, + osb->s_clustersize); u64 phys; ret = dquot_alloc_space_nodirty(inode, @@ -7117,15 +7109,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, */ block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); - /* - * Non sparse file systems zero on extend, so no need - * to do that now. - */ - if (!ocfs2_sparse_alloc(osb) && - PAGE_SIZE < osb->s_clustersize) - end = PAGE_SIZE; - - ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages); + ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page, + &num_pages); if (ret) { mlog_errno(ret); need_free = 1; @@ -7136,20 +7121,15 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, * This should populate the 1st page for us and mark * it up to date. */ - ret = ocfs2_read_inline_data(inode, pages[0], di_bh); + ret = ocfs2_read_inline_data(inode, page, di_bh); if (ret) { mlog_errno(ret); need_free = 1; goto out_unlock; } - page_end = PAGE_SIZE; - if (PAGE_SIZE > osb->s_clustersize) - page_end = osb->s_clustersize; - - for (i = 0; i < num_pages; i++) - ocfs2_map_and_dirty_page(inode, handle, 0, page_end, - pages[i], i > 0, &phys); + ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0, + &phys); } spin_lock(&oi->ip_lock); @@ -7180,8 +7160,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, } out_unlock: - if (pages) - ocfs2_unlock_and_free_pages(pages, num_pages); + if (page) + ocfs2_unlock_and_free_pages(&page, num_pages); out_commit: if (ret < 0 && did_quota) @@ -7205,8 +7185,6 @@ out_commit: out: if (data_ac) ocfs2_free_alloc_context(data_ac); -free_pages: - kfree(pages); return ret; } -- cgit v1.2.3 From b15fa9224e6e1239414525d8d556d824701849fc Mon Sep 17 00:00:00 2001 From: Valentin Vidic Date: Mon, 18 Oct 2021 15:15:42 -0700 Subject: ocfs2: mount fails with buffer overflow in strlen Starting with kernel 5.11 built with CONFIG_FORTIFY_SOURCE mouting an ocfs2 filesystem with either o2cb or pcmk cluster stack fails with the trace below. Problem seems to be that strings for cluster stack and cluster name are not guaranteed to be null terminated in the disk representation, while strlcpy assumes that the source string is always null terminated. This causes a read outside of the source string triggering the buffer overflow detection. detected buffer overflow in strlen ------------[ cut here ]------------ kernel BUG at lib/string.c:1149! invalid opcode: 0000 [#1] SMP PTI CPU: 1 PID: 910 Comm: mount.ocfs2 Not tainted 5.14.0-1-amd64 #1 Debian 5.14.6-2 RIP: 0010:fortify_panic+0xf/0x11 ... Call Trace: ocfs2_initialize_super.isra.0.cold+0xc/0x18 [ocfs2] ocfs2_fill_super+0x359/0x19b0 [ocfs2] mount_bdev+0x185/0x1b0 legacy_get_tree+0x27/0x40 vfs_get_tree+0x25/0xb0 path_mount+0x454/0xa20 __x64_sys_mount+0x103/0x140 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae Link: https://lkml.kernel.org/r/20210929180654.32460-1-vvidic@valentin-vidic.from.hr Signed-off-by: Valentin Vidic Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/super.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index c86bd4e60e20..5c914ce9b3ac 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2167,11 +2167,17 @@ static int ocfs2_initialize_super(struct super_block *sb, } if (ocfs2_clusterinfo_valid(osb)) { + /* + * ci_stack and ci_cluster in ocfs2_cluster_info may not be null + * terminated, so make sure no overflow happens here by using + * memcpy. Destination strings will always be null terminated + * because osb is allocated using kzalloc. + */ osb->osb_stackflags = OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; - strlcpy(osb->osb_cluster_stack, + memcpy(osb->osb_cluster_stack, OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, - OCFS2_STACK_LABEL_LEN + 1); + OCFS2_STACK_LABEL_LEN); if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) { mlog(ML_ERROR, "couldn't mount because of an invalid " @@ -2180,9 +2186,9 @@ static int ocfs2_initialize_super(struct super_block *sb, status = -EINVAL; goto bail; } - strlcpy(osb->osb_cluster_name, + memcpy(osb->osb_cluster_name, OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster, - OCFS2_CLUSTER_NAME_LEN + 1); + OCFS2_CLUSTER_NAME_LEN); } else { /* The empty string is identical with classic tools that * don't know about s_cluster_info. */ -- cgit v1.2.3 From 5173ed72bcfcddda21ff274ee31c6472fa150f29 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 18 Oct 2021 15:15:45 -0700 Subject: memblock: check memory total_size mem=[X][G|M] is broken on ARM64 platform, there are cases that even type.cnt is 1, but total_size is not 0 because regions are merged into 1. So only check 'cnt' is not enough, total_size should be used, othersize bootargs 'mem=[X][G|B]' not work anymore. Link: https://lkml.kernel.org/r/20210930024437.32598-1-peng.fan@oss.nxp.com Fixes: e888fa7bb882 ("memblock: Check memory add/cap ordering") Signed-off-by: Peng Fan Reviewed-by: Mike Rapoport Cc: Geert Uytterhoeven Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index 5c3503c98b2f..b91df5cf54d3 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1692,7 +1692,7 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) if (!size) return; - if (memblock.memory.cnt <= 1) { + if (!memblock_memory->total_size) { pr_warn("%s: No memory registered yet\n", __func__); return; } -- cgit v1.2.3 From 6d2aec9e123bb9c49cb5c7fc654f25f81e688e8c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Oct 2021 15:15:49 -0700 Subject: mm/mempolicy: do not allow illegal MPOL_F_NUMA_BALANCING | MPOL_LOCAL in mbind() syzbot reported access to unitialized memory in mbind() [1] Issue came with commit bda420b98505 ("numa balancing: migrate on fault among multiple bound nodes") This commit added a new bit in MPOL_MODE_FLAGS, but only checked valid combination (MPOL_F_NUMA_BALANCING can only be used with MPOL_BIND) in do_set_mempolicy() This patch moves the check in sanitize_mpol_flags() so that it is also used by mbind() [1] BUG: KMSAN: uninit-value in __mpol_equal+0x567/0x590 mm/mempolicy.c:2260 __mpol_equal+0x567/0x590 mm/mempolicy.c:2260 mpol_equal include/linux/mempolicy.h:105 [inline] vma_merge+0x4a1/0x1e60 mm/mmap.c:1190 mbind_range+0xcc8/0x1e80 mm/mempolicy.c:811 do_mbind+0xf42/0x15f0 mm/mempolicy.c:1333 kernel_mbind mm/mempolicy.c:1483 [inline] __do_sys_mbind mm/mempolicy.c:1490 [inline] __se_sys_mbind+0x437/0xb80 mm/mempolicy.c:1486 __x64_sys_mbind+0x19d/0x200 mm/mempolicy.c:1486 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Uninit was created at: slab_alloc_node mm/slub.c:3221 [inline] slab_alloc mm/slub.c:3230 [inline] kmem_cache_alloc+0x751/0xff0 mm/slub.c:3235 mpol_new mm/mempolicy.c:293 [inline] do_mbind+0x912/0x15f0 mm/mempolicy.c:1289 kernel_mbind mm/mempolicy.c:1483 [inline] __do_sys_mbind mm/mempolicy.c:1490 [inline] __se_sys_mbind+0x437/0xb80 mm/mempolicy.c:1486 __x64_sys_mbind+0x19d/0x200 mm/mempolicy.c:1486 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae ===================================================== Kernel panic - not syncing: panic_on_kmsan set ... CPU: 0 PID: 15049 Comm: syz-executor.0 Tainted: G B 5.15.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1ff/0x28e lib/dump_stack.c:106 dump_stack+0x25/0x28 lib/dump_stack.c:113 panic+0x44f/0xdeb kernel/panic.c:232 kmsan_report+0x2ee/0x300 mm/kmsan/report.c:186 __msan_warning+0xd7/0x150 mm/kmsan/instrumentation.c:208 __mpol_equal+0x567/0x590 mm/mempolicy.c:2260 mpol_equal include/linux/mempolicy.h:105 [inline] vma_merge+0x4a1/0x1e60 mm/mmap.c:1190 mbind_range+0xcc8/0x1e80 mm/mempolicy.c:811 do_mbind+0xf42/0x15f0 mm/mempolicy.c:1333 kernel_mbind mm/mempolicy.c:1483 [inline] __do_sys_mbind mm/mempolicy.c:1490 [inline] __se_sys_mbind+0x437/0xb80 mm/mempolicy.c:1486 __x64_sys_mbind+0x19d/0x200 mm/mempolicy.c:1486 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Link: https://lkml.kernel.org/r/20211001215630.810592-1-eric.dumazet@gmail.com Fixes: bda420b98505 ("numa balancing: migrate on fault among multiple bound nodes") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Mel Gorman Cc: "Huang, Ying" Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 1592b081c58e..d12e0608fced 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -856,16 +856,6 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, goto out; } - if (flags & MPOL_F_NUMA_BALANCING) { - if (new && new->mode == MPOL_BIND) { - new->flags |= (MPOL_F_MOF | MPOL_F_MORON); - } else { - ret = -EINVAL; - mpol_put(new); - goto out; - } - } - ret = mpol_set_nodemask(new, nodes, scratch); if (ret) { mpol_put(new); @@ -1458,7 +1448,11 @@ static inline int sanitize_mpol_flags(int *mode, unsigned short *flags) return -EINVAL; if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES)) return -EINVAL; - + if (*flags & MPOL_F_NUMA_BALANCING) { + if (*mode != MPOL_BIND) + return -EINVAL; + *flags |= (MPOL_F_MOF | MPOL_F_MORON); + } return 0; } -- cgit v1.2.3 From 2127d22509aec3a83dffb2a3c736df7ba747a7ce Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 18 Oct 2021 15:15:52 -0700 Subject: mm, slub: fix two bugs in slab_debug_trace_open() Patch series "Fixups for slub". This series contains various bug fixes for slub. We fix memoryleak, use-afer-free, NULL pointer dereferencing and so on in slub. More details can be found in the respective changelogs. This patch (of 5): It's possible that __seq_open_private() will return NULL. So we should check it before using lest dereferencing NULL pointer. And in error paths, we forgot to release private buffer via seq_release_private(). Memory will leak in these paths. Link: https://lkml.kernel.org/r/20210916123920.48704-1-linmiaohe@huawei.com Link: https://lkml.kernel.org/r/20210916123920.48704-2-linmiaohe@huawei.com Fixes: 64dd68497be7 ("mm: slub: move sysfs slab alloc/free interfaces to debugfs") Signed-off-by: Miaohe Lin Reviewed-by: Vlastimil Babka Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Greg Kroah-Hartman Cc: Faiyaz Mohammed Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Kees Cook Cc: Bharata B Rao Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 3d2025f7163b..ed160b6c54f8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6108,9 +6108,14 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep) struct kmem_cache *s = file_inode(filep)->i_private; unsigned long *obj_map; + if (!t) + return -ENOMEM; + obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL); - if (!obj_map) + if (!obj_map) { + seq_release_private(inode, filep); return -ENOMEM; + } if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0) alloc = TRACK_ALLOC; @@ -6119,6 +6124,7 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep) if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) { bitmap_free(obj_map); + seq_release_private(inode, filep); return -ENOMEM; } -- cgit v1.2.3 From 899447f669da76cc3605665e1a95ee877bc464cc Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 18 Oct 2021 15:15:55 -0700 Subject: mm, slub: fix mismatch between reconstructed freelist depth and cnt If object's reuse is delayed, it will be excluded from the reconstructed freelist. But we forgot to adjust the cnt accordingly. So there will be a mismatch between reconstructed freelist depth and cnt. This will lead to free_debug_processing() complaining about freelist count or a incorrect slub inuse count. Link: https://lkml.kernel.org/r/20210916123920.48704-3-linmiaohe@huawei.com Fixes: c3895391df38 ("kasan, slub: fix handling of kasan_slab_free hook") Signed-off-by: Miaohe Lin Reviewed-by: Vlastimil Babka Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Bharata B Rao Cc: Christoph Lameter Cc: David Rientjes Cc: Faiyaz Mohammed Cc: Greg Kroah-Hartman Cc: Joonsoo Kim Cc: Kees Cook Cc: Pekka Enberg Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index ed160b6c54f8..a56a6423d4e8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1701,7 +1701,8 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, } static inline bool slab_free_freelist_hook(struct kmem_cache *s, - void **head, void **tail) + void **head, void **tail, + int *cnt) { void *object; @@ -1728,6 +1729,12 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, *head = object; if (!*tail) *tail = object; + } else { + /* + * Adjust the reconstructed freelist depth + * accordingly if object's reuse is delayed. + */ + --(*cnt); } } while (object != old_tail); @@ -3480,7 +3487,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page, * With KASAN enabled slab_free_freelist_hook modifies the freelist * to remove objects, whose reuse must be delayed. */ - if (slab_free_freelist_hook(s, &head, &tail)) + if (slab_free_freelist_hook(s, &head, &tail, &cnt)) do_slab_free(s, page, head, tail, cnt, addr); } -- cgit v1.2.3 From 9037c57681d25e4dcc442d940d6dbe24dd31f461 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 18 Oct 2021 15:15:59 -0700 Subject: mm, slub: fix potential memoryleak in kmem_cache_open() In error path, the random_seq of slub cache might be leaked. Fix this by using __kmem_cache_release() to release all the relevant resources. Link: https://lkml.kernel.org/r/20210916123920.48704-4-linmiaohe@huawei.com Fixes: 210e7a43fa90 ("mm: SLUB freelist randomization") Signed-off-by: Miaohe Lin Reviewed-by: Vlastimil Babka Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Bharata B Rao Cc: Christoph Lameter Cc: David Rientjes Cc: Faiyaz Mohammed Cc: Greg Kroah-Hartman Cc: Joonsoo Kim Cc: Kees Cook Cc: Pekka Enberg Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index a56a6423d4e8..bf1793fb4ce5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4210,8 +4210,8 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) if (alloc_kmem_cache_cpus(s)) return 0; - free_kmem_cache_nodes(s); error: + __kmem_cache_release(s); return -EINVAL; } -- cgit v1.2.3 From 67823a544414def2a36c212abadb55b23bcda00c Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 18 Oct 2021 15:16:02 -0700 Subject: mm, slub: fix potential use-after-free in slab_debugfs_fops When sysfs_slab_add failed, we shouldn't call debugfs_slab_add() for s because s will be freed soon. And slab_debugfs_fops will use s later leading to a use-after-free. Link: https://lkml.kernel.org/r/20210916123920.48704-5-linmiaohe@huawei.com Fixes: 64dd68497be7 ("mm: slub: move sysfs slab alloc/free interfaces to debugfs") Signed-off-by: Miaohe Lin Reviewed-by: Vlastimil Babka Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Bharata B Rao Cc: Christoph Lameter Cc: David Rientjes Cc: Faiyaz Mohammed Cc: Greg Kroah-Hartman Cc: Joonsoo Kim Cc: Kees Cook Cc: Pekka Enberg Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index bf1793fb4ce5..f3df0f04a472 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4887,13 +4887,15 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags) return 0; err = sysfs_slab_add(s); - if (err) + if (err) { __kmem_cache_release(s); + return err; + } if (s->flags & SLAB_STORE_USER) debugfs_slab_add(s); - return err; + return 0; } void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) -- cgit v1.2.3 From 3ddd60268c24bcac9d744404cc277e9dc52fe6b6 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 18 Oct 2021 15:16:06 -0700 Subject: mm, slub: fix incorrect memcg slab count for bulk free kmem_cache_free_bulk() will call memcg_slab_free_hook() for all objects when doing bulk free. So we shouldn't call memcg_slab_free_hook() again for bulk free to avoid incorrect memcg slab count. Link: https://lkml.kernel.org/r/20210916123920.48704-6-linmiaohe@huawei.com Fixes: d1b2cf6cb84a ("mm: memcg/slab: uncharge during kmem_cache_free_bulk()") Signed-off-by: Miaohe Lin Reviewed-by: Vlastimil Babka Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Bharata B Rao Cc: Christoph Lameter Cc: David Rientjes Cc: Faiyaz Mohammed Cc: Greg Kroah-Hartman Cc: Joonsoo Kim Cc: Kees Cook Cc: Pekka Enberg Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index f3df0f04a472..d8f77346376d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3420,7 +3420,9 @@ static __always_inline void do_slab_free(struct kmem_cache *s, struct kmem_cache_cpu *c; unsigned long tid; - memcg_slab_free_hook(s, &head, 1); + /* memcg_slab_free_hook() is already called for bulk free. */ + if (!tail) + memcg_slab_free_hook(s, &head, 1); redo: /* * Determine the currently cpus per cpu slab. -- cgit v1.2.3 From b0e901280d9860a0a35055f220e8e457f300f40a Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 18 Oct 2021 15:16:09 -0700 Subject: elfcore: correct reference to CONFIG_UML Commit 6e7b64b9dd6d ("elfcore: fix building with clang") introduces special handling for two architectures, ia64 and User Mode Linux. However, the wrong name, i.e., CONFIG_UM, for the intended Kconfig symbol for User-Mode Linux was used. Although the directory for User Mode Linux is ./arch/um; the Kconfig symbol for this architecture is called CONFIG_UML. Luckily, ./scripts/checkkconfigsymbols.py warns on non-existing configs: UM Referencing files: include/linux/elfcore.h Similar symbols: UML, NUMA Correct the name of the config to the intended one. [akpm@linux-foundation.org: fix um/x86_64, per Catalin] Link: https://lkml.kernel.org/r/20211006181119.2851441-1-catalin.marinas@arm.com Link: https://lkml.kernel.org/r/YV6pejGzLy5ppEpt@arm.com Link: https://lkml.kernel.org/r/20211006082209.417-1-lukas.bulwahn@gmail.com Fixes: 6e7b64b9dd6d ("elfcore: fix building with clang") Signed-off-by: Lukas Bulwahn Cc: Arnd Bergmann Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Catalin Marinas Cc: Barret Rhoden Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/elfcore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 2aaa15779d50..957ebec35aad 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -109,7 +109,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg #endif } -#if defined(CONFIG_UM) || defined(CONFIG_IA64) +#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its -- cgit v1.2.3 From 032146cda85566abcd1c4884d9d23e4e30a07e9a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox (Oracle) Date: Mon, 18 Oct 2021 15:16:12 -0700 Subject: vfs: check fd has read access in kernel_read_file_from_fd() If we open a file without read access and then pass the fd to a syscall whose implementation calls kernel_read_file_from_fd(), we get a warning from __kernel_read(): if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) This currently affects both finit_module() and kexec_file_load(), but it could affect other syscalls in the future. Link: https://lkml.kernel.org/r/20211007220110.600005-1-willy@infradead.org Fixes: b844f0ecbc56 ("vfs: define kernel_copy_file_from_fd()") Signed-off-by: Matthew Wilcox (Oracle) Reported-by: Hao Sun Reviewed-by: Kees Cook Acked-by: Christian Brauner Cc: Al Viro Cc: Mimi Zohar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/kernel_read_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c index 87aac4c72c37..1b07550485b9 100644 --- a/fs/kernel_read_file.c +++ b/fs/kernel_read_file.c @@ -178,7 +178,7 @@ int kernel_read_file_from_fd(int fd, loff_t offset, void **buf, struct fd f = fdget(fd); int ret = -EBADF; - if (!f.file) + if (!f.file || !(f.file->f_mode & FMODE_READ)) goto out; ret = kernel_read_file(f.file, offset, buf, buf_size, file_size, id); -- cgit v1.2.3 From 79f9bc5843142b649575f887dccdf1c07ad75c20 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 18 Oct 2021 15:16:16 -0700 Subject: mm/secretmem: fix NULL page->mapping dereference in page_is_secretmem() Check for a NULL page->mapping before dereferencing the mapping in page_is_secretmem(), as the page's mapping can be nullified while gup() is running, e.g. by reclaim or truncation. BUG: kernel NULL pointer dereference, address: 0000000000000068 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 6 PID: 4173897 Comm: CPU 3/KVM Tainted: G W RIP: 0010:internal_get_user_pages_fast+0x621/0x9d0 Code: <48> 81 7a 68 80 08 04 bc 0f 85 21 ff ff 8 89 c7 be RSP: 0018:ffffaa90087679b0 EFLAGS: 00010046 RAX: ffffe3f37905b900 RBX: 00007f2dd561e000 RCX: ffffe3f37905b934 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffe3f37905b900 ... CR2: 0000000000000068 CR3: 00000004c5898003 CR4: 00000000001726e0 Call Trace: get_user_pages_fast_only+0x13/0x20 hva_to_pfn+0xa9/0x3e0 try_async_pf+0xa1/0x270 direct_page_fault+0x113/0xad0 kvm_mmu_page_fault+0x69/0x680 vmx_handle_exit+0xe1/0x5d0 kvm_arch_vcpu_ioctl_run+0xd81/0x1c70 kvm_vcpu_ioctl+0x267/0x670 __x64_sys_ioctl+0x83/0xa0 do_syscall_64+0x56/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Link: https://lkml.kernel.org/r/20211007231502.3552715-1-seanjc@google.com Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas") Signed-off-by: Sean Christopherson Reported-by: Darrick J. Wong Reported-by: Stephen Tested-by: Darrick J. Wong Reviewed-by: David Hildenbrand Reviewed-by: Mike Rapoport Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/secretmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index 21c3771e6a56..988528b5da43 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -23,7 +23,7 @@ static inline bool page_is_secretmem(struct page *page) mapping = (struct address_space *) ((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS); - if (mapping != page->mapping) + if (!mapping || mapping != page->mapping) return false; return mapping->a_ops == &secretmem_aops; -- cgit v1.2.3 From 1ca7554d05ac038c98271f8968ed821266ecaa9c Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 18 Oct 2021 15:16:19 -0700 Subject: mm/thp: decrease nr_thps in file's mapping on THP split Decrease nr_thps counter in file's mapping to ensure that the page cache won't be dropped excessively on file write access if page has been already split. I've tried a test scenario running a big binary, kernel remaps it with THPs, then force a THP split with /sys/kernel/debug/split_huge_pages. During any further open of that binary with O_RDWR or O_WRITEONLY kernel drops page cache for it, because of non-zero thps counter. Link: https://lkml.kernel.org/r/20211012120237.2600-1-m.szyprowski@samsung.com Signed-off-by: Marek Szyprowski Fixes: 09d91cda0e82 ("mm,thp: avoid writes to file with THP in pagecache") Fixes: 06d3eff62d9d ("mm/thp: fix node page state in split_huge_page_to_list()") Acked-by: Matthew Wilcox (Oracle) Reviewed-by: Yang Shi Cc: Cc: Song Liu Cc: Rik van Riel Cc: "Kirill A . Shutemov" Cc: Johannes Weiner Cc: Hillf Danton Cc: Hugh Dickins Cc: William Kucharski Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5e9ef0fc261e..92192cb086c7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2700,12 +2700,14 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) if (mapping) { int nr = thp_nr_pages(head); - if (PageSwapBacked(head)) + if (PageSwapBacked(head)) { __mod_lruvec_page_state(head, NR_SHMEM_THPS, -nr); - else + } else { __mod_lruvec_page_state(head, NR_FILE_THPS, -nr); + filemap_nr_thps_dec(mapping); + } } __split_huge_page(page, list, end); -- cgit v1.2.3 From 362d5dfc483c7786bcfff27ff48e7b6f493f7c5a Mon Sep 17 00:00:00 2001 From: Andrej Shadura Date: Mon, 18 Oct 2021 15:16:22 -0700 Subject: mailmap: add Andrej Shadura Add a mapping for my old work email for BelDisplayTech to the personal email, and make sure the Collabora email has the correct spelling of the first name. Link: https://lkml.kernel.org/r/20210917091016.30232-1-andrew.shadura@collabora.co.uk Signed-off-by: Andrej Shadura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 6e849110cb4e..90e614d2bf7e 100644 --- a/.mailmap +++ b/.mailmap @@ -33,6 +33,8 @@ Al Viro Andi Kleen Andi Shyti Andreas Herrmann +Andrej Shadura +Andrej Shadura Andrew Morton Andrew Murray Andrew Murray -- cgit v1.2.3 From d674a8f123b4096d85955c7eaabec688f29724c9 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 7 May 2021 11:18:39 +0200 Subject: can: isotp: isotp_sendmsg(): fix return error on FC timeout on TX path When the a large chunk of data send and the receiver does not send a Flow Control frame back in time, the sendmsg() does not return a error code, but the number of bytes sent corresponding to the size of the packet. If a timeout occurs the isotp_tx_timer_handler() is fired, sets sk->sk_err and calls the sk->sk_error_report() function. It was wrongly expected that the error would be propagated to user space in every case. For isotp_sendmsg() blocking on wait_event_interruptible() this is not the case. This patch fixes the problem by checking if sk->sk_err is set and returning the error to user space. Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Link: https://github.com/hartkopp/can-isotp/issues/42 Link: https://github.com/hartkopp/can-isotp/pull/43 Link: https://lore.kernel.org/all/20210507091839.1366379-1-mkl@pengutronix.de Cc: stable@vger.kernel.org Reported-by: Sottas Guillaume (LMB) Tested-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/can/isotp.c b/net/can/isotp.c index d1f54273c0bb..df6968b28bf4 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -971,6 +971,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (wait_tx_done) { /* wait for complete transmission of current pdu */ wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + + if (sk->sk_err) + return -sk->sk_err; } return size; -- cgit v1.2.3 From 98d0a6fb7303a6f4a120b8b8ed05b86ff5db53e8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Sep 2021 08:33:13 -0400 Subject: ceph: skip existing superblocks that are blocklisted or shut down when mounting Currently when mounting, we may end up finding an existing superblock that corresponds to a blocklisted MDS client. This means that the new mount ends up being unusable. If we've found an existing superblock with a client that is already blocklisted, and the client is not configured to recover on its own, fail the match. Ditto if the superblock has been forcibly unmounted. While we're in here, also rename "other" to the more conventional "fsc". Cc: stable@vger.kernel.org URL: https://bugzilla.redhat.com/show_bug.cgi?id=1901499 Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9b1b7f4cfdd4..fd8742bae847 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1002,16 +1002,16 @@ static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) struct ceph_fs_client *new = fc->s_fs_info; struct ceph_mount_options *fsopt = new->mount_options; struct ceph_options *opt = new->client->options; - struct ceph_fs_client *other = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_client(sb); dout("ceph_compare_super %p\n", sb); - if (compare_mount_options(fsopt, opt, other)) { + if (compare_mount_options(fsopt, opt, fsc)) { dout("monitor(s)/mount options don't match\n"); return 0; } if ((opt->flags & CEPH_OPT_FSID) && - ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { + ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) { dout("fsid doesn't match\n"); return 0; } @@ -1019,6 +1019,17 @@ static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) dout("flags differ\n"); return 0; } + + if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) { + dout("client is blocklisted (and CLEANRECOVER is not set)\n"); + return 0; + } + + if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { + dout("client has been forcibly unmounted\n"); + return 0; + } + return 1; } -- cgit v1.2.3 From 1bd85aa65d0e7b5e4d09240f492f37c569fdd431 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 7 Oct 2021 14:19:49 -0400 Subject: ceph: fix handling of "meta" errors Currently, we check the wb_err too early for directories, before all of the unsafe child requests have been waited on. In order to fix that we need to check the mapping->wb_err later nearer to the end of ceph_fsync. We also have an overly-complex method for tracking errors after blocklisting. The errors recorded in cleanup_session_requests go to a completely separate field in the inode, but we end up reporting them the same way we would for any other error (in fsync). There's no real benefit to tracking these errors in two different places, since the only reporting mechanism for them is in fsync, and we'd need to advance them both every time. Given that, we can just remove i_meta_err, and convert the places that used it to instead just use mapping->wb_err instead. That also fixes the original problem by ensuring that we do a check_and_advance of the wb_err at the end of the fsync op. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/52864 Reported-by: Patrick Donnelly Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 12 +++--------- fs/ceph/file.c | 1 - fs/ceph/inode.c | 2 -- fs/ceph/mds_client.c | 17 +++++------------ fs/ceph/super.h | 3 --- 5 files changed, 8 insertions(+), 27 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 3e42d0466521..8f537f1d9d1d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2330,7 +2330,6 @@ retry: int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct ceph_file_info *fi = file->private_data; struct inode *inode = file->f_mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); u64 flush_tid; @@ -2365,14 +2364,9 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) if (err < 0) ret = err; - if (errseq_check(&ci->i_meta_err, READ_ONCE(fi->meta_err))) { - spin_lock(&file->f_lock); - err = errseq_check_and_advance(&ci->i_meta_err, - &fi->meta_err); - spin_unlock(&file->f_lock); - if (err < 0) - ret = err; - } + err = file_check_and_advance_wb_err(file); + if (err < 0) + ret = err; out: dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); return ret; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d16fd2d5fd42..e61018d9764e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -233,7 +233,6 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, spin_lock_init(&fi->rw_contexts_lock); INIT_LIST_HEAD(&fi->rw_contexts); - fi->meta_err = errseq_sample(&ci->i_meta_err); fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen); return 0; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 2df1e1284451..1c7574105478 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -541,8 +541,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ceph_fscache_inode_init(ci); - ci->i_meta_err = 0; - return &ci->vfs_inode; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 7cad180d6deb..d64413adc0fd 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1493,7 +1493,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, { struct ceph_mds_request *req; struct rb_node *p; - struct ceph_inode_info *ci; dout("cleanup_session_requests mds%d\n", session->s_mds); mutex_lock(&mdsc->mutex); @@ -1502,16 +1501,10 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, struct ceph_mds_request, r_unsafe_item); pr_warn_ratelimited(" dropping unsafe request %llu\n", req->r_tid); - if (req->r_target_inode) { - /* dropping unsafe change of inode's attributes */ - ci = ceph_inode(req->r_target_inode); - errseq_set(&ci->i_meta_err, -EIO); - } - if (req->r_unsafe_dir) { - /* dropping unsafe directory operation */ - ci = ceph_inode(req->r_unsafe_dir); - errseq_set(&ci->i_meta_err, -EIO); - } + if (req->r_target_inode) + mapping_set_error(req->r_target_inode->i_mapping, -EIO); + if (req->r_unsafe_dir) + mapping_set_error(req->r_unsafe_dir->i_mapping, -EIO); __unregister_request(mdsc, req); } /* zero r_attempts, so kick_requests() will re-send requests */ @@ -1678,7 +1671,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, spin_unlock(&mdsc->cap_dirty_lock); if (dirty_dropped) { - errseq_set(&ci->i_meta_err, -EIO); + mapping_set_error(inode->i_mapping, -EIO); if (ci->i_wrbuffer_ref_head == 0 && ci->i_wr_ref == 0 && diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a40eb14c282a..14f951cd5b61 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -429,8 +429,6 @@ struct ceph_inode_info { #ifdef CONFIG_CEPH_FSCACHE struct fscache_cookie *fscache; #endif - errseq_t i_meta_err; - struct inode vfs_inode; /* at end */ }; @@ -774,7 +772,6 @@ struct ceph_file_info { spinlock_t rw_contexts_lock; struct list_head rw_contexts; - errseq_t meta_err; u32 filp_gen; atomic_t num_locks; }; -- cgit v1.2.3 From 59be177a909ac320e5f4b2a461ac09e20f35b2d8 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 12 Oct 2021 18:00:46 -0700 Subject: drm/i915: Remove memory frequency calculation This memory frequency calculated is only used to check if it is zero, what is not useful as it will never actually be zero. Also the calculation is wrong, we should be checking other bit to select the appropriate frequency multiplier while this code is stuck with a fixed multiplier. So here dropping it as whole. v2: - Also remove memory frequency calculation for gen9 LP platforms Cc: Yakui Zhao Cc: Matt Roper Fixes: 5d0c938ec9cc ("drm/i915/gen11+: Only load DRAM information from pcode") Signed-off-by: José Roberto de Souza Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20211013010046.91858-1-jose.souza@intel.com (cherry picked from commit 83f52364b15265aec47d07e02b0fbf4093ab8554) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 8 -------- drivers/gpu/drm/i915/intel_dram.c | 30 ++---------------------------- 2 files changed, 2 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4037030f0984..9023d4ecf3b3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -11048,12 +11048,6 @@ enum skl_power_gate { #define DC_STATE_DEBUG_MASK_CORES (1 << 0) #define DC_STATE_DEBUG_MASK_MEMORY_UP (1 << 1) -#define BXT_P_CR_MC_BIOS_REQ_0_0_0 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7114) -#define BXT_REQ_DATA_MASK 0x3F -#define BXT_DRAM_CHANNEL_ACTIVE_SHIFT 12 -#define BXT_DRAM_CHANNEL_ACTIVE_MASK (0xF << 12) -#define BXT_MEMORY_FREQ_MULTIPLIER_HZ 133333333 - #define BXT_D_CR_DRP0_DUNIT8 0x1000 #define BXT_D_CR_DRP0_DUNIT9 0x1200 #define BXT_D_CR_DRP0_DUNIT_START 8 @@ -11084,9 +11078,7 @@ enum skl_power_gate { #define BXT_DRAM_TYPE_LPDDR4 (0x2 << 22) #define BXT_DRAM_TYPE_DDR4 (0x4 << 22) -#define SKL_MEMORY_FREQ_MULTIPLIER_HZ 266666666 #define SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5E04) -#define SKL_REQ_DATA_MASK (0xF << 0) #define DG1_GEAR_TYPE REG_BIT(16) #define SKL_MAD_INTER_CHANNEL_0_0_0_MCHBAR_MCMAIN _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5000) diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c index 91866520c173..7acce64b0941 100644 --- a/drivers/gpu/drm/i915/intel_dram.c +++ b/drivers/gpu/drm/i915/intel_dram.c @@ -244,7 +244,6 @@ static int skl_get_dram_info(struct drm_i915_private *i915) { struct dram_info *dram_info = &i915->dram_info; - u32 mem_freq_khz, val; int ret; dram_info->type = skl_get_dram_type(i915); @@ -255,17 +254,6 @@ skl_get_dram_info(struct drm_i915_private *i915) if (ret) return ret; - val = intel_uncore_read(&i915->uncore, - SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU); - mem_freq_khz = DIV_ROUND_UP((val & SKL_REQ_DATA_MASK) * - SKL_MEMORY_FREQ_MULTIPLIER_HZ, 1000); - - if (dram_info->num_channels * mem_freq_khz == 0) { - drm_info(&i915->drm, - "Couldn't get system memory bandwidth\n"); - return -EINVAL; - } - return 0; } @@ -350,24 +338,10 @@ static void bxt_get_dimm_info(struct dram_dimm_info *dimm, u32 val) static int bxt_get_dram_info(struct drm_i915_private *i915) { struct dram_info *dram_info = &i915->dram_info; - u32 dram_channels; - u32 mem_freq_khz, val; - u8 num_active_channels, valid_ranks = 0; + u32 val; + u8 valid_ranks = 0; int i; - val = intel_uncore_read(&i915->uncore, BXT_P_CR_MC_BIOS_REQ_0_0_0); - mem_freq_khz = DIV_ROUND_UP((val & BXT_REQ_DATA_MASK) * - BXT_MEMORY_FREQ_MULTIPLIER_HZ, 1000); - - dram_channels = val & BXT_DRAM_CHANNEL_ACTIVE_MASK; - num_active_channels = hweight32(dram_channels); - - if (mem_freq_khz * num_active_channels == 0) { - drm_info(&i915->drm, - "Couldn't get system memory bandwidth\n"); - return -EINVAL; - } - /* * Now read each DUNIT8/9/10/11 to check the rank of each dimms. */ -- cgit v1.2.3 From 9d417cbe36eee7afdd85c2e871685f8dab7c2dba Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 8 Sep 2021 19:25:59 +0100 Subject: ARM: 9122/1: select HAVE_FUTEX_CMPXCHG tglx notes: This function [futex_detect_cmpxchg] is only needed when an architecture has to runtime discover whether the CPU supports it or not. ARM has unconditional support for this, so the obvious thing to do is the below. Fixes linkage failure from Clang randconfigs: kernel/futex.o:(.text.fixup+0x5c): relocation truncated to fit: R_ARM_JUMP24 against `.init.text' and boot failures for CONFIG_THUMB2_KERNEL. Link: https://github.com/ClangBuiltLinux/linux/issues/325 Comments from Nick Desaulniers: See-also: 03b8c7b623c8 ("futex: Allow architectures to skip futex_atomic_cmpxchg_inatomic() test") Reported-by: Arnd Bergmann Reported-by: Nathan Chancellor Suggested-by: Thomas Gleixner Signed-off-by: Nick Desaulniers Reviewed-by: Thomas Gleixner Tested-by: Nathan Chancellor Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # v3.14+ Reviewed-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fc196421b2ce..b760dd45b734 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -92,6 +92,7 @@ config ARM select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL + select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_IRQ_TIME_ACCOUNTING -- cgit v1.2.3 From 00d43d13da6c3424101aebabefab9e97e6318bee Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 21 Sep 2021 15:31:23 +0100 Subject: ARM: 9125/1: fix incorrect use of get_kernel_nofault() Commit 344179fc7ef4 ("ARM: 9106/1: traps: use get_kernel_nofault instead of set_fs()") replaced an occurrence of __get_user() with get_kernel_nofault(), but inverted the sense of the conditional in the process, resulting in no values to be printed at all. I.e., every exception stack now looks like this: Exception stack(0xc18d1fb0 to 0xc18d1ff8) 1fa0: ???????? ???????? ???????? ???????? 1fc0: ???????? ???????? ???????? ???????? ???????? ???????? ???????? ???????? 1fe0: ???????? ???????? ???????? ???????? ???????? ???????? which is rather unhelpful. Fixes: 344179fc7ef4 ("ARM: 9106/1: traps: use get_kernel_nofault instead of set_fs()") Signed-off-by: Ard Biesheuvel Reviewed-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 4a7edc6e848f..195dff58bafc 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -136,7 +136,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, for (p = first, i = 0; i < 8 && p < top; i++, p += 4) { if (p >= bottom && p < top) { unsigned long val; - if (get_kernel_nofault(val, (unsigned long *)p)) + if (!get_kernel_nofault(val, (unsigned long *)p)) sprintf(str + i * 9, " %08lx", val); else sprintf(str + i * 9, " ????????"); -- cgit v1.2.3 From df909df0770779f1a5560c2bb641a2809655ef28 Mon Sep 17 00:00:00 2001 From: Lexi Shao Date: Thu, 23 Sep 2021 03:41:25 +0100 Subject: ARM: 9132/1: Fix __get_user_check failure with ARM KASAN images ARM: kasan: Fix __get_user_check failure with kasan In macro __get_user_check defined in arch/arm/include/asm/uaccess.h, error code is store in register int __e(r0). When kasan is enabled, assigning value to kernel address might trigger kasan check, which unexpectedly overwrites r0 and causes undefined behavior on arm kasan images. One example is failure in do_futex and results in process soft lockup. Log: watchdog: BUG: soft lockup - CPU#0 stuck for 62946ms! [rs:main Q:Reg:1151] ... (__asan_store4) from (futex_wait_setup+0xf8/0x2b4) (futex_wait_setup) from (futex_wait+0x138/0x394) (futex_wait) from (do_futex+0x164/0xe40) (do_futex) from (sys_futex_time32+0x178/0x230) (sys_futex_time32) from (ret_fast_syscall+0x0/0x50) The soft lockup happens in function futex_wait_setup. The reason is function get_futex_value_locked always return EINVAL, thus pc jump back to retry label and causes looping. This line in function get_futex_value_locked ret = __get_user(*dest, from); is expanded to *dest = (typeof(*(p))) __r2; , in macro __get_user_check. Writing to pointer dest triggers kasan check and overwrites the return value of __get_user_x function. The assembly code of get_futex_value_locked in kernel/futex.c: ... c01f6dc8: eb0b020e bl c04b7608 <__get_user_4> // "x = (typeof(*(p))) __r2;" triggers kasan check and r0 is overwritten c01f6dCc: e1a00007 mov r0, r7 c01f6dd0: e1a05002 mov r5, r2 c01f6dd4: eb04f1e6 bl c0333574 <__asan_store4> c01f6dd8: e5875000 str r5, [r7] // save ret value of __get_user(*dest, from), which is dest address now c01f6ddc: e1a05000 mov r5, r0 ... // checking return value of __get_user failed c01f6e00: e3550000 cmp r5, #0 ... c01f6e0c: 01a00005 moveq r0, r5 // assign return value to EINVAL c01f6e10: 13e0000d mvnne r0, #13 Return value is the destination address of get_user thus certainly non-zero, so get_futex_value_locked always return EINVAL. Fix it by using a tmp vairable to store the error code before the assignment. This fix has no effects to non-kasan images thanks to compiler optimization. It only affects cases that overwrite r0 due to kasan check. This should fix bug discussed in Link: [1] https://lore.kernel.org/linux-arm-kernel/0ef7c2a5-5d8b-c5e0-63fa-31693fd4495c@gmail.com/ Fixes: 421015713b30 ("ARM: 9017/2: Enable KASan for ARM") Signed-off-by: Lexi Shao Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/uaccess.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 084d1c07c2d0..36fbc3329252 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -176,6 +176,7 @@ extern int __get_user_64t_4(void *); register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ unsigned int __ua_flags = uaccess_save_and_enable(); \ + int __tmp_e; \ switch (sizeof(*(__p))) { \ case 1: \ if (sizeof((x)) >= 8) \ @@ -203,9 +204,10 @@ extern int __get_user_64t_4(void *); break; \ default: __e = __get_user_bad(); break; \ } \ + __tmp_e = __e; \ uaccess_restore(__ua_flags); \ x = (typeof(*(p))) __r2; \ - __e; \ + __tmp_e; \ }) #define get_user(x, p) \ -- cgit v1.2.3 From e6a0c958bdf9b2e1b57501fc9433a461f0a6aadd Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 4 Oct 2021 18:03:28 +0100 Subject: ARM: 9133/1: mm: proc-macros: ensure *_tlb_fns are 4B aligned A kernel built with CONFIG_THUMB2_KERNEL=y and using clang as the assembler could generate non-naturally-aligned v7wbi_tlb_fns which results in a boot failure. The original commit adding the macro missed the .align directive on this data. Link: https://github.com/ClangBuiltLinux/linux/issues/1447 Link: https://lore.kernel.org/all/0699da7b-354f-aecc-a62f-e25693209af4@linaro.org/ Debugged-by: Ard Biesheuvel Debugged-by: Nathan Chancellor Debugged-by: Richard Henderson Fixes: 66a625a88174 ("ARM: mm: proc-macros: Add generic proc/cache/tlb struct definition macros") Suggested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Signed-off-by: Nick Desaulniers Tested-by: Nathan Chancellor Signed-off-by: Russell King (Oracle) --- arch/arm/mm/proc-macros.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index e2c743aa2eb2..d9f7dfe2a7ed 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -340,6 +340,7 @@ ENTRY(\name\()_cache_fns) .macro define_tlb_functions name:req, flags_up:req, flags_smp .type \name\()_tlb_fns, #object + .align 2 ENTRY(\name\()_tlb_fns) .long \name\()_flush_user_tlb_range .long \name\()_flush_kern_tlb_range -- cgit v1.2.3 From eaf6cc7165c9c5aa3c2f9faa03a98598123d0afb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Oct 2021 15:30:04 +0100 Subject: ARM: 9134/1: remove duplicate memcpy() definition Both the decompressor code and the kasan logic try to override the memcpy() and memmove() definitions, which leading to a clash in a KASAN-enabled kernel with XZ decompression: arch/arm/boot/compressed/decompress.c:50:9: error: 'memmove' macro redefined [-Werror,-Wmacro-redefined] #define memmove memmove ^ arch/arm/include/asm/string.h:59:9: note: previous definition is here #define memmove(dst, src, len) __memmove(dst, src, len) ^ arch/arm/boot/compressed/decompress.c:51:9: error: 'memcpy' macro redefined [-Werror,-Wmacro-redefined] #define memcpy memcpy ^ arch/arm/include/asm/string.h:58:9: note: previous definition is here #define memcpy(dst, src, len) __memcpy(dst, src, len) ^ Here we want the set of functions from the decompressor, so undefine the other macros before the override. Link: https://lore.kernel.org/linux-arm-kernel/CACRpkdZYJogU_SN3H9oeVq=zJkRgRT1gDz3xp59gdqWXxw-B=w@mail.gmail.com/ Link: https://lore.kernel.org/lkml/202105091112.F5rmd4By-lkp@intel.com/ Fixes: d6d51a96c7d6 ("ARM: 9014/2: Replace string mem* functions for KASan") Fixes: a7f464f3db93 ("ARM: 7001/2: Wire up support for the XZ decompressor") Reported-by: kernel test robot Reviewed-by: Linus Walleij Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/boot/compressed/decompress.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index aa075d8372ea..74255e819831 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -47,7 +47,10 @@ extern char * strchrnul(const char *, int); #endif #ifdef CONFIG_KERNEL_XZ +/* Prevent KASAN override of string helpers in decompressor */ +#undef memmove #define memmove memmove +#undef memcpy #define memcpy memcpy #include "../../../../lib/decompress_unxz.c" #endif -- cgit v1.2.3 From 44cc6412e66b2b84544eaf2e14cf1764301e2a80 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Oct 2021 15:30:08 +0100 Subject: ARM: 9138/1: fix link warning with XIP + frame-pointer When frame pointers are used instead of the ARM unwinder, and the kernel is built using clang with an external assembler and CONFIG_XIP_KERNEL, every file produces two warnings like: arm-linux-gnueabi-ld: warning: orphan section `.ARM.extab' from `net/mac802154/util.o' being placed in section `.ARM.extab' arm-linux-gnueabi-ld: warning: orphan section `.ARM.exidx' from `net/mac802154/util.o' being placed in section `.ARM.exidx' The same fix was already merged for the normal (non-XIP) linker script, with a longer description. Fixes: c39866f268f8 ("arm/build: Always handle .ARM.exidx and .ARM.extab sections") Reviewed-by: Kees Cook Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/vmlinux-xip.lds.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 50136828f5b5..e0c00986487f 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -40,6 +40,10 @@ SECTIONS ARM_DISCARD *(.alt.smp.init) *(.pv_table) +#ifndef CONFIG_ARM_UNWIND + *(.ARM.exidx) *(.ARM.exidx.*) + *(.ARM.extab) *(.ARM.extab.*) +#endif } . = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR); -- cgit v1.2.3 From 1f323127cab086e4fd618981b1e5edc396eaf0f4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Oct 2021 15:30:09 +0100 Subject: ARM: 9139/1: kprobes: fix arch_init_kprobes() prototype With extra warnings enabled, gcc complains about this function definition: arch/arm/probes/kprobes/core.c: In function 'arch_init_kprobes': arch/arm/probes/kprobes/core.c:465:12: warning: old-style function definition [-Wold-style-definition] 465 | int __init arch_init_kprobes() Link: https://lore.kernel.org/all/20201027093057.c685a14b386acacb3c449e3d@kernel.org/ Fixes: 24ba613c9d6c ("ARM kprobes: core code") Acked-by: Masami Hiramatsu Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/probes/kprobes/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 27e0af78e88b..9d8634e2f12f 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -439,7 +439,7 @@ static struct undef_hook kprobes_arm_break_hook = { #endif /* !CONFIG_THUMB2_KERNEL */ -int __init arch_init_kprobes() +int __init arch_init_kprobes(void) { arm_probes_decode_init(); #ifdef CONFIG_THUMB2_KERNEL -- cgit v1.2.3 From 48ccc8edf5b90622cdc4f8878e0042ab5883e2ca Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Oct 2021 15:30:37 +0100 Subject: ARM: 9141/1: only warn about XIP address when not compile testing In randconfig builds, we sometimes come across this warning: arm-linux-gnueabi-ld: XIP start address may cause MPU programming issues While this is helpful for actual systems to figure out why it fails, the warning does not provide any benefit for build testing, so guard it in a check for CONFIG_COMPILE_TEST, which is usually set on randconfig builds. Fixes: 216218308cfb ("ARM: 8713/1: NOMMU: Support MPU in XIP configuration") Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/vmlinux-xip.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index e0c00986487f..f14c2360ea0b 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -176,7 +176,7 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA") #endif -#ifdef CONFIG_ARM_MPU +#if defined(CONFIG_ARM_MPU) && !defined(CONFIG_COMPILE_TEST) /* * Due to PMSAv7 restriction on base address and size we have to * enforce minimal alignment restrictions. It was seen that weaker -- cgit v1.2.3 From 9af372dc70e9fdcbb70939dac75365e7b88580b4 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Fri, 15 Oct 2021 10:00:36 +0800 Subject: mmc: sdhci-esdhc-imx: clear the buffer_read_ready to reset standard tuning circuit To reset standard tuning circuit completely, after clear ESDHC_MIX_CTRL_EXE_TUNE, also need to clear bit buffer_read_ready, this operation will finally clear the USDHC IP internal logic flag execute_tuning_with_clr_buf, make sure the following normal data transfer will not be impacted by standard tuning logic used before. Find this issue when do quick SD card insert/remove stress test. During standard tuning prodedure, if remove SD card, USDHC standard tuning logic can't clear the internal flag execute_tuning_with_clr_buf. Next time when insert SD card, all data related commands can't get any data related interrupts, include data transfer complete interrupt, data timeout interrupt, data CRC interrupt, data end bit interrupt. Always trigger software timeout issue. Even reset the USDHC through bits in register SYS_CTRL (0x2C, bit28 reset tuning, bit26 reset data, bit 25 reset command, bit 24 reset all) can't recover this. From the user's point of view, USDHC stuck, SD can't be recognized any more. Fixes: d9370424c948 ("mmc: sdhci-esdhc-imx: reset tuning circuit when power on mmc card") Signed-off-by: Haibo Chen Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1634263236-6111-1-git-send-email-haibo.chen@nxp.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-esdhc-imx.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index f18d169bc8ff..e658f0174242 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1187,6 +1187,7 @@ static void esdhc_reset_tuning(struct sdhci_host *host) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host); u32 ctrl; + int ret; /* Reset the tuning circuit */ if (esdhc_is_usdhc(imx_data)) { @@ -1199,7 +1200,22 @@ static void esdhc_reset_tuning(struct sdhci_host *host) } else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) { ctrl = readl(host->ioaddr + SDHCI_AUTO_CMD_STATUS); ctrl &= ~ESDHC_MIX_CTRL_SMPCLK_SEL; + ctrl &= ~ESDHC_MIX_CTRL_EXE_TUNE; writel(ctrl, host->ioaddr + SDHCI_AUTO_CMD_STATUS); + /* Make sure ESDHC_MIX_CTRL_EXE_TUNE cleared */ + ret = readl_poll_timeout(host->ioaddr + SDHCI_AUTO_CMD_STATUS, + ctrl, !(ctrl & ESDHC_MIX_CTRL_EXE_TUNE), 1, 50); + if (ret == -ETIMEDOUT) + dev_warn(mmc_dev(host->mmc), + "Warning! clear execute tuning bit failed\n"); + /* + * SDHCI_INT_DATA_AVAIL is W1C bit, set this bit will clear the + * usdhc IP internal logic flag execute_tuning_with_clr_buf, which + * will finally make sure the normal data transfer logic correct. + */ + ctrl = readl(host->ioaddr + SDHCI_INT_STATUS); + ctrl |= SDHCI_INT_DATA_AVAIL; + writel(ctrl, host->ioaddr + SDHCI_INT_STATUS); } } } -- cgit v1.2.3 From 162079f2dccd02cb4b6654defd32ca387dd6d4d4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 17 Oct 2021 10:59:49 -0700 Subject: mmc: winbond: don't build on M68K The Winbond MMC driver fails to build on ARCH=m68k so prevent that build config. Silences these build errors: ../drivers/mmc/host/wbsd.c: In function 'wbsd_request_end': ../drivers/mmc/host/wbsd.c:212:28: error: implicit declaration of function 'claim_dma_lock' [-Werror=implicit-function-declaration] 212 | dmaflags = claim_dma_lock(); ../drivers/mmc/host/wbsd.c:215:17: error: implicit declaration of function 'release_dma_lock'; did you mean 'release_task'? [-Werror=implicit-function-declaration] 215 | release_dma_lock(dmaflags); Signed-off-by: Randy Dunlap Cc: Pierre Ossman Cc: Geert Uytterhoeven Cc: Arnd Bergmann Link: https://lore.kernel.org/r/20211017175949.23838-1-rdunlap@infradead.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 95b3511b0560..ccc148cdb5ee 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -506,7 +506,7 @@ config MMC_OMAP_HS config MMC_WBSD tristate "Winbond W83L51xD SD/MMC Card Interface support" - depends on ISA_DMA_API + depends on ISA_DMA_API && !M68K help This selects the Winbond(R) W83L51xD Secure digital and Multimedia card Interface. -- cgit v1.2.3 From e211210098cb7490db2183d725f5c0f10463a704 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Mon, 18 Oct 2021 14:20:38 +0000 Subject: mISDN: Fix return values of the probe function During the process of driver probing, the probe function should return < 0 for failure, otherwise, the kernel will treat value > 0 as success. Signed-off-by: Zheyu Ma Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcpci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index e501cb03f211..bd087cca1c1d 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -1994,14 +1994,14 @@ setup_hw(struct hfc_pci *hc) pci_set_master(hc->pdev); if (!hc->irq) { printk(KERN_WARNING "HFC-PCI: No IRQ for PCI card found\n"); - return 1; + return -EINVAL; } hc->hw.pci_io = (char __iomem *)(unsigned long)hc->pdev->resource[1].start; if (!hc->hw.pci_io) { printk(KERN_WARNING "HFC-PCI: No IO-Mem for PCI card found\n"); - return 1; + return -ENOMEM; } /* Allocate memory for FIFOS */ /* the memory needs to be on a 32k boundary within the first 4G */ @@ -2012,7 +2012,7 @@ setup_hw(struct hfc_pci *hc) if (!buffer) { printk(KERN_WARNING "HFC-PCI: Error allocating memory for FIFO!\n"); - return 1; + return -ENOMEM; } hc->hw.fifos = buffer; pci_write_config_dword(hc->pdev, 0x80, hc->hw.dmahandle); @@ -2022,7 +2022,7 @@ setup_hw(struct hfc_pci *hc) "HFC-PCI: Error in ioremap for PCI!\n"); dma_free_coherent(&hc->pdev->dev, 0x8000, hc->hw.fifos, hc->hw.dmahandle); - return 1; + return -ENOMEM; } printk(KERN_INFO -- cgit v1.2.3 From c69b2f46876825c726bd8a97c7fa852d8932bc32 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Mon, 18 Oct 2021 14:32:57 +0000 Subject: cavium: Fix return values of the probe function During the process of driver probing, the probe function should return < 0 for failure, otherwise, the kernel will treat value > 0 as success. Signed-off-by: Zheyu Ma Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index d1667b759522..a27227aeae88 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1224,7 +1224,7 @@ static int nicvf_register_misc_interrupt(struct nicvf *nic) if (ret < 0) { netdev_err(nic->netdev, "Req for #%d msix vectors failed\n", nic->num_vec); - return 1; + return ret; } sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); @@ -1243,7 +1243,7 @@ static int nicvf_register_misc_interrupt(struct nicvf *nic) if (!nicvf_check_pf_ready(nic)) { nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); nicvf_unregister_interrupts(nic); - return 1; + return -EIO; } return 0; -- cgit v1.2.3 From 63acd42c0d4942f74710b11c38602fb14dea7320 Mon Sep 17 00:00:00 2001 From: Woody Lin Date: Tue, 12 Oct 2021 16:35:21 +0800 Subject: sched/scs: Reset the shadow stack when idle_task_exit Commit f1a0a376ca0c ("sched/core: Initialize the idle task with preemption disabled") removed the init_idle() call from idle_thread_get(). This was the sole call-path on hotplug that resets the Shadow Call Stack (scs) Stack Pointer (sp). Not resetting the scs-sp leads to scs overflow after enough hotplug cycles. Therefore add an explicit scs_task_reset() to the hotplug code to make sure the scs-sp does get reset on hotplug. Fixes: f1a0a376ca0c ("sched/core: Initialize the idle task with preemption disabled") Signed-off-by: Woody Lin [peterz: Changelog] Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lore.kernel.org/r/20211012083521.973587-1-woodylin@google.com --- kernel/sched/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1bba4128a3e6..f21714ea3db8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8795,6 +8795,7 @@ void idle_task_exit(void) finish_arch_post_lock_switch(); } + scs_task_reset(current); /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ } -- cgit v1.2.3 From 629715adc62b0ad27ab04d0aa73a71927f886910 Mon Sep 17 00:00:00 2001 From: Eric W. Biederman Date: Sat, 16 Oct 2021 12:30:00 -0500 Subject: ucounts: Pair inc_rlimit_ucounts with dec_rlimit_ucoutns in commit_creds The purpose of inc_rlimit_ucounts and dec_rlimit_ucounts in commit_creds is to change which rlimit counter is used to track a process when the credentials changes. Use the same test for both to guarantee the tracking is correct. Cc: stable@vger.kernel.org Fixes: 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts") Link: https://lkml.kernel.org/r/87v91us0w4.fsf_-_@disp2133 Tested-by: Yu Zhao Reviewed-by: Alexey Gladkov Signed-off-by: "Eric W. Biederman" --- kernel/cred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cred.c b/kernel/cred.c index f784e08c2fbd..3d163bfd64a9 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -501,7 +501,7 @@ int commit_creds(struct cred *new) inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1); rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); - if (new->user != old->user) + if (new->user != old->user || new->user_ns != old->user_ns) dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1); alter_cred_subscribers(old, -2); -- cgit v1.2.3 From 34dc2fd6e6908499b669c7b45320cddf38b332e1 Mon Sep 17 00:00:00 2001 From: Eric W. Biederman Date: Sat, 16 Oct 2021 12:47:51 -0500 Subject: ucounts: Proper error handling in set_cred_ucounts Instead of leaking the ucounts in new if alloc_ucounts fails, store the result of alloc_ucounts into a temporary variable, which is later assigned to new->ucounts. Cc: stable@vger.kernel.org Fixes: 905ae01c4ae2 ("Add a reference to ucounts for each cred") Link: https://lkml.kernel.org/r/87pms2s0v8.fsf_-_@disp2133 Tested-by: Yu Zhao Reviewed-by: Alexey Gladkov Signed-off-by: "Eric W. Biederman" --- kernel/cred.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/cred.c b/kernel/cred.c index 3d163bfd64a9..16c05dfbec4d 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -669,7 +669,7 @@ int set_cred_ucounts(struct cred *new) { struct task_struct *task = current; const struct cred *old = task->real_cred; - struct ucounts *old_ucounts = new->ucounts; + struct ucounts *new_ucounts, *old_ucounts = new->ucounts; if (new->user == old->user && new->user_ns == old->user_ns) return 0; @@ -681,9 +681,10 @@ int set_cred_ucounts(struct cred *new) if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid)) return 0; - if (!(new->ucounts = alloc_ucounts(new->user_ns, new->euid))) + if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid))) return -EAGAIN; + new->ucounts = new_ucounts; if (old_ucounts) put_ucounts(old_ucounts); -- cgit v1.2.3 From bc28368596436e6e81ffc48c815b8225d96121c0 Mon Sep 17 00:00:00 2001 From: Rafael J. Wysocki Date: Fri, 15 Oct 2021 19:12:21 +0200 Subject: ACPI: PM: Do not turn off power resources in unknown state Commit 6381195ad7d0 ("ACPI: power: Rework turning off unused power resources") caused power resources in unknown state with reference counters equal to zero to be turned off too, but that caused issues to appear in the field, so modify the code to only turn off power resources that are known to be "on". Link: https://lore.kernel.org/linux-acpi/6faf4b92-78d5-47a4-63df-cc2bab7769d0@molgen.mpg.de/ Fixes: 6381195ad7d0 ("ACPI: power: Rework turning off unused power resources") Reported-by: Andreas K. Huettel Tested-by: Andreas K. Huettel Signed-off-by: Rafael J. Wysocki Cc: 5.14+ # 5.14+ --- drivers/acpi/power.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index b9863e22b952..f0ed4414edb1 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -1035,13 +1035,8 @@ void acpi_turn_off_unused_power_resources(void) list_for_each_entry_reverse(resource, &acpi_power_resource_list, list_node) { mutex_lock(&resource->resource_lock); - /* - * Turn off power resources in an unknown state too, because the - * platform firmware on some system expects the OS to turn off - * power resources without any users unconditionally. - */ if (!resource->ref_count && - resource->state != ACPI_POWER_RESOURCE_STATE_OFF) { + resource->state == ACPI_POWER_RESOURCE_STATE_ON) { acpi_handle_debug(resource->device.handle, "Turning OFF\n"); __acpi_power_off(resource); } -- cgit v1.2.3 From ba69fd9101f20a6d05a96ab743341d4e7b1a2178 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 18 Oct 2021 21:59:00 +0200 Subject: net: dsa: Fix an error handling path in 'dsa_switch_parse_ports_of()' If we return before the end of the 'for_each_child_of_node()' iterator, the reference taken on 'port' must be released. Add the missing 'of_node_put()' calls. Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/15d5310d1d55ad51c1af80775865306d92432e03.1634587046.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- net/dsa/dsa2.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index da18094b5a04..e9911b18bdbf 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1374,12 +1374,15 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds, for_each_available_child_of_node(ports, port) { err = of_property_read_u32(port, "reg", ®); - if (err) + if (err) { + of_node_put(port); goto out_put_node; + } if (reg >= ds->num_ports) { dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%zu)\n", port, reg, ds->num_ports); + of_node_put(port); err = -EINVAL; goto out_put_node; } @@ -1387,8 +1390,10 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds, dp = dsa_to_port(ds, reg); err = dsa_port_parse_of(dp, port); - if (err) + if (err) { + of_node_put(port); goto out_put_node; + } } out_put_node: -- cgit v1.2.3 From bc369921d6708542eb93da33478762f1162a5805 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 19 Oct 2021 20:31:26 +0100 Subject: io-wq: max_worker fixes First, fix nr_workers checks against max_workers, with max_worker registration, it may pretty easily happen that nr_workers > max_workers. Also, synchronise writing to acct->max_worker with wqe->lock. It's not an actual problem, but as we don't care about io_wqe_create_worker(), it's better than WRITE_ONCE()/READ_ONCE(). Fixes: 2e480058ddc2 ("io-wq: provide a way to limit max number of workers") Reported-by: Beld Zhang Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/11f90e6b49410b7d1a88f5d04fb8d95bb86b8cf3.1634671835.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io-wq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 5bf8aa81715e..422a7ed6a9bd 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -253,7 +253,7 @@ static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) pr_warn_once("io-wq is not configured for unbound workers"); raw_spin_lock(&wqe->lock); - if (acct->nr_workers == acct->max_workers) { + if (acct->nr_workers >= acct->max_workers) { raw_spin_unlock(&wqe->lock); return true; } @@ -1291,15 +1291,18 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count) rcu_read_lock(); for_each_node(node) { + struct io_wqe *wqe = wq->wqes[node]; struct io_wqe_acct *acct; + raw_spin_lock(&wqe->lock); for (i = 0; i < IO_WQ_ACCT_NR; i++) { - acct = &wq->wqes[node]->acct[i]; + acct = &wqe->acct[i]; prev = max_t(int, acct->max_workers, prev); if (new_count[i]) acct->max_workers = new_count[i]; new_count[i] = prev; } + raw_spin_unlock(&wqe->lock); } rcu_read_unlock(); return 0; -- cgit v1.2.3 From 60fab1076636493cfa2686e712446595fe43bc16 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Tue, 19 Oct 2021 11:22:12 +0300 Subject: rdma/qedr: Fix crash due to redundant release of device's qp memory Device's QP memory should only be allocated and released by IB layer. This patch removes the redundant release of the device's qp memory and uses completion APIs to make sure that .destroy_qp() only return, when qp reference becomes 0. Fixes: 514aee660df4 ("RDMA: Globally allocate and release QP memory") Link: https://lore.kernel.org/r/20211019082212.7052-1-pkushwaha@marvell.com Acked-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: Alok Prasad Signed-off-by: Prabhakar Kushwaha Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr.h | 1 + drivers/infiniband/hw/qedr/qedr_iw_cm.c | 2 +- drivers/infiniband/hw/qedr/verbs.c | 5 ++++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 3cb4febaad0f..8def88cfa300 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -455,6 +455,7 @@ struct qedr_qp { /* synchronization objects used with iwarp ep */ struct kref refcnt; struct completion iwarp_cm_comp; + struct completion qp_rel_comp; unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */ }; diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 1715fbe0719d..a51fc6854984 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -83,7 +83,7 @@ static void qedr_iw_free_qp(struct kref *ref) { struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt); - kfree(qp); + complete(&qp->qp_rel_comp); } static void diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 3fbf172dbbef..dcb3653db72d 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1357,6 +1357,7 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, if (rdma_protocol_iwarp(&dev->ibdev, 1)) { kref_init(&qp->refcnt); init_completion(&qp->iwarp_cm_comp); + init_completion(&qp->qp_rel_comp); } qp->pd = pd; @@ -2857,8 +2858,10 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) qedr_free_qp_resources(dev, qp, udata); - if (rdma_protocol_iwarp(&dev->ibdev, 1)) + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { qedr_iw_qp_rem_ref(&qp->ibqp); + wait_for_completion(&qp->qp_rel_comp); + } return 0; } -- cgit v1.2.3 From 5508546631a0f555d7088203dec2614e41b5106e Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 19 Oct 2021 14:30:10 +0300 Subject: RDMA/mlx5: Initialize the ODP xarray when creating an ODP MR Normally the zero fill would hide the missing initialization, but an errant set to desc_size in reg_create() causes a crash: BUG: unable to handle page fault for address: 0000000800000000 PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 5 PID: 890 Comm: ib_write_bw Not tainted 5.15.0-rc4+ #47 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:mlx5_ib_dereg_mr+0x14/0x3b0 [mlx5_ib] Code: 48 63 cd 4c 89 f7 48 89 0c 24 e8 37 30 03 e1 48 8b 0c 24 eb a0 90 0f 1f 44 00 00 41 56 41 55 41 54 55 53 48 89 fb 48 83 ec 30 <48> 8b 2f 65 48 8b 04 25 28 00 00 00 48 89 44 24 28 31 c0 8b 87 c8 RSP: 0018:ffff88811afa3a60 EFLAGS: 00010286 RAX: 000000000000001c RBX: 0000000800000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000800000000 RBP: 0000000800000000 R08: 0000000000000000 R09: c0000000fffff7ff R10: ffff88811afa38f8 R11: ffff88811afa38f0 R12: ffffffffa02c7ac0 R13: 0000000000000000 R14: ffff88811afa3cd8 R15: ffff88810772fa00 FS: 00007f47b9080740(0000) GS:ffff88852cd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000800000000 CR3: 000000010761e003 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: mlx5_ib_free_odp_mr+0x95/0xc0 [mlx5_ib] mlx5_ib_dereg_mr+0x128/0x3b0 [mlx5_ib] ib_dereg_mr_user+0x45/0xb0 [ib_core] ? xas_load+0x8/0x80 destroy_hw_idr_uobject+0x1a/0x50 [ib_uverbs] uverbs_destroy_uobject+0x2f/0x150 [ib_uverbs] uobj_destroy+0x3c/0x70 [ib_uverbs] ib_uverbs_cmd_verbs+0x467/0xb00 [ib_uverbs] ? uverbs_finalize_object+0x60/0x60 [ib_uverbs] ? ttwu_queue_wakelist+0xa9/0xe0 ? pty_write+0x85/0x90 ? file_tty_write.isra.33+0x214/0x330 ? process_echoes+0x60/0x60 ib_uverbs_ioctl+0xa7/0x110 [ib_uverbs] __x64_sys_ioctl+0x10d/0x8e0 ? vfs_write+0x17f/0x260 do_syscall_64+0x3c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Add the missing xarray initialization and remove the desc_size set. Fixes: a639e66703ee ("RDMA/mlx5: Zero out ODP related items in the mlx5_ib_mr") Link: https://lore.kernel.org/r/a4846a11c9de834663e521770da895007f9f0d30.1634642730.git.leonro@nvidia.com Signed-off-by: Aharon Landau Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3be36ebbf67a..22e2f4d79743 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1339,7 +1339,6 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, goto err_2; } mr->mmkey.type = MLX5_MKEY_MR; - mr->desc_size = sizeof(struct mlx5_mtt); mr->umem = umem; set_mr_fields(dev, mr, umem->length, access_flags); kvfree(in); @@ -1533,6 +1532,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, ib_umem_release(&odp->umem); return ERR_CAST(mr); } + xa_init(&mr->implicit_children); odp->private = mr; err = mlx5r_store_odp_mkey(dev, &mr->mmkey); -- cgit v1.2.3 From cc07b73ef11d11d4359fb104d0199b22451dd3d8 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 19 Oct 2021 10:16:53 -0500 Subject: RDMA/irdma: Set VLAN in UD work completion correctly Currently VLAN is reported in UD work completion when VLAN id is zero, i.e. no VLAN case. Report VLAN in UD work completion only when VLAN id is non-zero. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Link: https://lore.kernel.org/r/20211019151654.1943-1-shiraz.saleem@intel.com Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 7110ebf834f9..102dc9342f2a 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3399,9 +3399,13 @@ static void irdma_process_cqe(struct ib_wc *entry, } if (cq_poll_info->ud_vlan_valid) { - entry->vlan_id = cq_poll_info->ud_vlan & VLAN_VID_MASK; - entry->wc_flags |= IB_WC_WITH_VLAN; + u16 vlan = cq_poll_info->ud_vlan & VLAN_VID_MASK; + entry->sl = cq_poll_info->ud_vlan >> VLAN_PRIO_SHIFT; + if (vlan) { + entry->vlan_id = vlan; + entry->wc_flags |= IB_WC_WITH_VLAN; + } } else { entry->sl = 0; } -- cgit v1.2.3 From 2dace185caa580720c7cd67fec9efc5ee26108ac Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 19 Oct 2021 10:16:54 -0500 Subject: RDMA/irdma: Do not hold qos mutex twice on QP resume When irdma_ws_add fails, irdma_ws_remove is used to cleanup the leaf node. This lead to holding the qos mutex twice in the QP resume path. Fix this by avoiding the call to irdma_ws_remove and unwinding the error in irdma_ws_add. This skips the call to irdma_tc_in_use function which is not needed in the error unwind cases. Fixes: 3ae331c75128 ("RDMA/irdma: Add QoS definitions") Link: https://lore.kernel.org/r/20211019151654.1943-2-shiraz.saleem@intel.com Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/ws.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ws.c b/drivers/infiniband/hw/irdma/ws.c index b68c575eb78e..b0d6ee0739f5 100644 --- a/drivers/infiniband/hw/irdma/ws.c +++ b/drivers/infiniband/hw/irdma/ws.c @@ -330,8 +330,10 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) tc_node->enable = true; ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_MODIFY_NODE); - if (ret) + if (ret) { + vsi->unregister_qset(vsi, tc_node); goto reg_err; + } } ibdev_dbg(to_ibdev(vsi->dev), "WS: Using node %d which represents VSI %d TC %d\n", @@ -350,6 +352,10 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) } goto exit; +reg_err: + irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE); + list_del(&tc_node->siblings); + irdma_free_node(vsi, tc_node); leaf_add_err: if (list_empty(&vsi_node->child_list_head)) { if (irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE)) @@ -369,11 +375,6 @@ vsi_add_err: exit: mutex_unlock(&vsi->dev->ws_mutex); return ret; - -reg_err: - mutex_unlock(&vsi->dev->ws_mutex); - irdma_ws_remove(vsi, user_pri); - return ret; } /** -- cgit v1.2.3 From f09f6dfef8ce7b70a240cf83811e2b1909c3e47b Mon Sep 17 00:00:00 2001 From: Russ Weight Date: Mon, 18 Oct 2021 17:24:01 -0700 Subject: spi: altera: Change to dynamic allocation of spi id The spi-altera driver has two flavors: platform and dfl. I'm seeing a case where I have both device types in the same machine, and they are conflicting on the SPI ID: ... kernel: couldn't get idr ... kernel: WARNING: CPU: 28 PID: 912 at drivers/spi/spi.c:2920 spi_register_controller.cold+0x84/0xc0a Both the platform and dfl drivers use the parent's driver ID as the SPI ID. In the error case, the parent devices are dfl_dev.4 and subdev_spi_altera.4.auto. When the second spi-master is created, the failure occurs because the SPI ID of 4 has already been allocated. Change the ID allocation to dynamic (by initializing bus_num to -1) to avoid duplicate SPI IDs. Signed-off-by: Russ Weight Link: https://lore.kernel.org/r/20211019002401.24041-1-russell.h.weight@intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-altera-dfl.c | 2 +- drivers/spi/spi-altera-platform.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-altera-dfl.c b/drivers/spi/spi-altera-dfl.c index 44fc9ee13fc7..ca40923258af 100644 --- a/drivers/spi/spi-altera-dfl.c +++ b/drivers/spi/spi-altera-dfl.c @@ -134,7 +134,7 @@ static int dfl_spi_altera_probe(struct dfl_device *dfl_dev) if (!master) return -ENOMEM; - master->bus_num = dfl_dev->id; + master->bus_num = -1; hw = spi_master_get_devdata(master); diff --git a/drivers/spi/spi-altera-platform.c b/drivers/spi/spi-altera-platform.c index f7a7c14e3679..65147aae82a1 100644 --- a/drivers/spi/spi-altera-platform.c +++ b/drivers/spi/spi-altera-platform.c @@ -48,7 +48,7 @@ static int altera_spi_probe(struct platform_device *pdev) return err; /* setup the master state. */ - master->bus_num = pdev->id; + master->bus_num = -1; if (pdata) { if (pdata->num_chipselect > ALTERA_SPI_MAX_CS) { -- cgit v1.2.3 From 34061d6b76a41b1e43c19e1e50d98e5d77f77d4e Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Sat, 16 Oct 2021 08:39:54 +0900 Subject: ksmbd: validate OutputBufferLength of QUERY_DIR, QUERY_INFO, IOCTL requests Validate OutputBufferLength of QUERY_DIR, QUERY_INFO, IOCTL requests and check the free size of response buffer for these requests. Acked-by: Namjae Jeon Signed-off-by: Hyunchul Lee Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 68 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 7999d8bc6892..e0f3a44e1599 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -3762,6 +3762,24 @@ static int verify_info_level(int info_level) return 0; } +static int smb2_calc_max_out_buf_len(struct ksmbd_work *work, + unsigned short hdr2_len, + unsigned int out_buf_len) +{ + int free_len; + + if (out_buf_len > work->conn->vals->max_trans_size) + return -EINVAL; + + free_len = (int)(work->response_sz - + (get_rfc1002_len(work->response_buf) + 4)) - + hdr2_len; + if (free_len < 0) + return -EINVAL; + + return min_t(int, out_buf_len, free_len); +} + int smb2_query_dir(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; @@ -3838,9 +3856,13 @@ int smb2_query_dir(struct ksmbd_work *work) memset(&d_info, 0, sizeof(struct ksmbd_dir_info)); d_info.wptr = (char *)rsp->Buffer; d_info.rptr = (char *)rsp->Buffer; - d_info.out_buf_len = (work->response_sz - (get_rfc1002_len(rsp_org) + 4)); - d_info.out_buf_len = min_t(int, d_info.out_buf_len, le32_to_cpu(req->OutputBufferLength)) - - sizeof(struct smb2_query_directory_rsp); + d_info.out_buf_len = + smb2_calc_max_out_buf_len(work, 8, + le32_to_cpu(req->OutputBufferLength)); + if (d_info.out_buf_len < 0) { + rc = -EINVAL; + goto err_out; + } d_info.flags = srch_flag; /* @@ -4074,12 +4096,11 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp, le32_to_cpu(req->Flags)); } - buf_free_len = work->response_sz - - (get_rfc1002_len(rsp_org) + 4) - - sizeof(struct smb2_query_info_rsp); - - if (le32_to_cpu(req->OutputBufferLength) < buf_free_len) - buf_free_len = le32_to_cpu(req->OutputBufferLength); + buf_free_len = + smb2_calc_max_out_buf_len(work, 8, + le32_to_cpu(req->OutputBufferLength)); + if (buf_free_len < 0) + return -EINVAL; rc = ksmbd_vfs_listxattr(path->dentry, &xattr_list); if (rc < 0) { @@ -4390,6 +4411,8 @@ static void get_file_stream_info(struct ksmbd_work *work, struct path *path = &fp->filp->f_path; ssize_t xattr_list_len; int nbytes = 0, streamlen, stream_name_len, next, idx = 0; + int buf_free_len; + struct smb2_query_info_req *req = ksmbd_req_buf_next(work); generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp), &stat); @@ -4403,6 +4426,12 @@ static void get_file_stream_info(struct ksmbd_work *work, goto out; } + buf_free_len = + smb2_calc_max_out_buf_len(work, 8, + le32_to_cpu(req->OutputBufferLength)); + if (buf_free_len < 0) + goto out; + while (idx < xattr_list_len) { stream_name = xattr_list + idx; streamlen = strlen(stream_name); @@ -4427,6 +4456,10 @@ static void get_file_stream_info(struct ksmbd_work *work, streamlen = snprintf(stream_buf, streamlen + 1, ":%s", &stream_name[XATTR_NAME_STREAM_LEN]); + next = sizeof(struct smb2_file_stream_info) + streamlen * 2; + if (next > buf_free_len) + break; + file_info = (struct smb2_file_stream_info *)&rsp->Buffer[nbytes]; streamlen = smbConvertToUTF16((__le16 *)file_info->StreamName, stream_buf, streamlen, @@ -4437,12 +4470,13 @@ static void get_file_stream_info(struct ksmbd_work *work, file_info->StreamSize = cpu_to_le64(stream_name_len); file_info->StreamAllocationSize = cpu_to_le64(stream_name_len); - next = sizeof(struct smb2_file_stream_info) + streamlen; nbytes += next; + buf_free_len -= next; file_info->NextEntryOffset = cpu_to_le32(next); } - if (!S_ISDIR(stat.mode)) { + if (!S_ISDIR(stat.mode) && + buf_free_len >= sizeof(struct smb2_file_stream_info) + 7 * 2) { file_info = (struct smb2_file_stream_info *) &rsp->Buffer[nbytes]; streamlen = smbConvertToUTF16((__le16 *)file_info->StreamName, @@ -7453,11 +7487,13 @@ int smb2_ioctl(struct ksmbd_work *work) } cnt_code = le32_to_cpu(req->CntCode); - out_buf_len = le32_to_cpu(req->MaxOutputResponse); - out_buf_len = - min_t(u32, work->response_sz - work->next_smb2_rsp_hdr_off - - (offsetof(struct smb2_ioctl_rsp, Buffer) - 4), - out_buf_len); + ret = smb2_calc_max_out_buf_len(work, 48, + le32_to_cpu(req->MaxOutputResponse)); + if (ret < 0) { + rsp->hdr.Status = STATUS_INVALID_PARAMETER; + goto out; + } + out_buf_len = (unsigned int)ret; in_buf_len = le32_to_cpu(req->InputCount); switch (cnt_code) { -- cgit v1.2.3 From 621be84a9d1fbf0097fd058e249ec5cc4f35f3c5 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 13 Oct 2021 17:28:31 +0900 Subject: ksmbd: throttle session setup failures to avoid dictionary attacks To avoid dictionary attacks (repeated session setups rapidly sent) to connect to server, ksmbd make a delay of a 5 seconds on session setup failure to make it harder to send enough random connection requests to break into a server if a user insert the wrong password 10 times in a row. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/ksmbd_netlink.h | 2 ++ fs/ksmbd/mgmt/user_config.c | 2 +- fs/ksmbd/mgmt/user_config.h | 1 + fs/ksmbd/smb2pdu.c | 27 ++++++++++++++++++++++++--- fs/ksmbd/transport_ipc.c | 3 ++- fs/ksmbd/transport_ipc.h | 2 +- 6 files changed, 31 insertions(+), 6 deletions(-) diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h index 2fbe2bc1e093..c6718a05d347 100644 --- a/fs/ksmbd/ksmbd_netlink.h +++ b/fs/ksmbd/ksmbd_netlink.h @@ -211,6 +211,7 @@ struct ksmbd_tree_disconnect_request { */ struct ksmbd_logout_request { __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */ + __u32 account_flags; }; /* @@ -317,6 +318,7 @@ enum KSMBD_TREE_CONN_STATUS { #define KSMBD_USER_FLAG_BAD_UID BIT(2) #define KSMBD_USER_FLAG_BAD_USER BIT(3) #define KSMBD_USER_FLAG_GUEST_ACCOUNT BIT(4) +#define KSMBD_USER_FLAG_DELAY_SESSION BIT(5) /* * Share config flags. diff --git a/fs/ksmbd/mgmt/user_config.c b/fs/ksmbd/mgmt/user_config.c index d21629ae5c89..1019d3677d55 100644 --- a/fs/ksmbd/mgmt/user_config.c +++ b/fs/ksmbd/mgmt/user_config.c @@ -55,7 +55,7 @@ struct ksmbd_user *ksmbd_alloc_user(struct ksmbd_login_response *resp) void ksmbd_free_user(struct ksmbd_user *user) { - ksmbd_ipc_logout_request(user->name); + ksmbd_ipc_logout_request(user->name, user->flags); kfree(user->name); kfree(user->passkey); kfree(user); diff --git a/fs/ksmbd/mgmt/user_config.h b/fs/ksmbd/mgmt/user_config.h index b2bb074a0150..aff80b029579 100644 --- a/fs/ksmbd/mgmt/user_config.h +++ b/fs/ksmbd/mgmt/user_config.h @@ -18,6 +18,7 @@ struct ksmbd_user { size_t passkey_sz; char *passkey; + unsigned int failed_login_count; }; static inline bool user_guest(struct ksmbd_user *user) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index e0f3a44e1599..cf7db5f71f9b 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -1779,9 +1779,30 @@ out_err: conn->mechToken = NULL; } - if (rc < 0 && sess) { - ksmbd_session_destroy(sess); - work->sess = NULL; + if (rc < 0) { + /* + * SecurityBufferOffset should be set to zero + * in session setup error response. + */ + rsp->SecurityBufferOffset = 0; + + if (sess) { + bool try_delay = false; + + /* + * To avoid dictionary attacks (repeated session setups rapidly sent) to + * connect to server, ksmbd make a delay of a 5 seconds on session setup + * failure to make it harder to send enough random connection requests + * to break into a server. + */ + if (sess->user && sess->user->flags & KSMBD_USER_FLAG_DELAY_SESSION) + try_delay = true; + + ksmbd_session_destroy(sess); + work->sess = NULL; + if (try_delay) + ssleep(5); + } } return rc; diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c index 44aea33a67fa..1acf1892a466 100644 --- a/fs/ksmbd/transport_ipc.c +++ b/fs/ksmbd/transport_ipc.c @@ -601,7 +601,7 @@ int ksmbd_ipc_tree_disconnect_request(unsigned long long session_id, return ret; } -int ksmbd_ipc_logout_request(const char *account) +int ksmbd_ipc_logout_request(const char *account, int flags) { struct ksmbd_ipc_msg *msg; struct ksmbd_logout_request *req; @@ -616,6 +616,7 @@ int ksmbd_ipc_logout_request(const char *account) msg->type = KSMBD_EVENT_LOGOUT_REQUEST; req = (struct ksmbd_logout_request *)msg->payload; + req->account_flags = flags; strscpy(req->account, account, KSMBD_REQ_MAX_ACCOUNT_NAME_SZ); ret = ipc_msg_send(msg); diff --git a/fs/ksmbd/transport_ipc.h b/fs/ksmbd/transport_ipc.h index 9eacc895ffdb..5e5b90a0c187 100644 --- a/fs/ksmbd/transport_ipc.h +++ b/fs/ksmbd/transport_ipc.h @@ -25,7 +25,7 @@ ksmbd_ipc_tree_connect_request(struct ksmbd_session *sess, struct sockaddr *peer_addr); int ksmbd_ipc_tree_disconnect_request(unsigned long long session_id, unsigned long long connect_id); -int ksmbd_ipc_logout_request(const char *account); +int ksmbd_ipc_logout_request(const char *account, int flags); struct ksmbd_share_config_response * ksmbd_ipc_share_config_request(const char *name); struct ksmbd_spnego_authen_response * -- cgit v1.2.3 From 0d994cd482ee4e8e851388a70869beee51be1c54 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Tue, 19 Oct 2021 17:39:38 +0200 Subject: ksmbd: add buffer validation in session setup Make sure the security buffer's length/offset are valid with regards to the packet length. Acked-by: Namjae Jeon Signed-off-by: Marios Makassikis Signed-off-by: Steve French --- fs/ksmbd/auth.c | 16 +++++++++------- fs/ksmbd/smb2pdu.c | 51 +++++++++++++++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c index 71c989f1568d..30a92ddc1817 100644 --- a/fs/ksmbd/auth.c +++ b/fs/ksmbd/auth.c @@ -298,8 +298,8 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, int blob_len, struct ksmbd_session *sess) { char *domain_name; - unsigned int lm_off, nt_off; - unsigned short nt_len; + unsigned int nt_off, dn_off; + unsigned short nt_len, dn_len; int ret; if (blob_len < sizeof(struct authenticate_message)) { @@ -314,15 +314,17 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, return -EINVAL; } - lm_off = le32_to_cpu(authblob->LmChallengeResponse.BufferOffset); nt_off = le32_to_cpu(authblob->NtChallengeResponse.BufferOffset); nt_len = le16_to_cpu(authblob->NtChallengeResponse.Length); + dn_off = le32_to_cpu(authblob->DomainName.BufferOffset); + dn_len = le16_to_cpu(authblob->DomainName.Length); + + if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len) + return -EINVAL; /* TODO : use domain name that imported from configuration file */ - domain_name = smb_strndup_from_utf16((const char *)authblob + - le32_to_cpu(authblob->DomainName.BufferOffset), - le16_to_cpu(authblob->DomainName.Length), true, - sess->conn->local_nls); + domain_name = smb_strndup_from_utf16((const char *)authblob + dn_off, + dn_len, true, sess->conn->local_nls); if (IS_ERR(domain_name)) return PTR_ERR(domain_name); diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index cf7db5f71f9b..7e448df3f847 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -1257,19 +1257,13 @@ static int generate_preauth_hash(struct ksmbd_work *work) return 0; } -static int decode_negotiation_token(struct ksmbd_work *work, - struct negotiate_message *negblob) +static int decode_negotiation_token(struct ksmbd_conn *conn, + struct negotiate_message *negblob, + size_t sz) { - struct ksmbd_conn *conn = work->conn; - struct smb2_sess_setup_req *req; - int sz; - if (!conn->use_spnego) return -EINVAL; - req = work->request_buf; - sz = le16_to_cpu(req->SecurityBufferLength); - if (ksmbd_decode_negTokenInit((char *)negblob, sz, conn)) { if (ksmbd_decode_negTokenTarg((char *)negblob, sz, conn)) { conn->auth_mechs |= KSMBD_AUTH_NTLMSSP; @@ -1281,9 +1275,9 @@ static int decode_negotiation_token(struct ksmbd_work *work, } static int ntlm_negotiate(struct ksmbd_work *work, - struct negotiate_message *negblob) + struct negotiate_message *negblob, + size_t negblob_len) { - struct smb2_sess_setup_req *req = work->request_buf; struct smb2_sess_setup_rsp *rsp = work->response_buf; struct challenge_message *chgblob; unsigned char *spnego_blob = NULL; @@ -1292,8 +1286,7 @@ static int ntlm_negotiate(struct ksmbd_work *work, int sz, rc; ksmbd_debug(SMB, "negotiate phase\n"); - sz = le16_to_cpu(req->SecurityBufferLength); - rc = ksmbd_decode_ntlmssp_neg_blob(negblob, sz, work->sess); + rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->sess); if (rc) return rc; @@ -1361,12 +1354,23 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn, struct authenticate_message *authblob; struct ksmbd_user *user; char *name; - int sz; + unsigned int auth_msg_len, name_off, name_len, secbuf_len; + secbuf_len = le16_to_cpu(req->SecurityBufferLength); + if (secbuf_len < sizeof(struct authenticate_message)) { + ksmbd_debug(SMB, "blob len %d too small\n", secbuf_len); + return NULL; + } authblob = user_authblob(conn, req); - sz = le32_to_cpu(authblob->UserName.BufferOffset); - name = smb_strndup_from_utf16((const char *)authblob + sz, - le16_to_cpu(authblob->UserName.Length), + name_off = le32_to_cpu(authblob->UserName.BufferOffset); + name_len = le16_to_cpu(authblob->UserName.Length); + auth_msg_len = le16_to_cpu(req->SecurityBufferOffset) + secbuf_len; + + if (auth_msg_len < (u64)name_off + name_len) + return NULL; + + name = smb_strndup_from_utf16((const char *)authblob + name_off, + name_len, true, conn->local_nls); if (IS_ERR(name)) { @@ -1612,6 +1616,7 @@ int smb2_sess_setup(struct ksmbd_work *work) struct smb2_sess_setup_rsp *rsp = work->response_buf; struct ksmbd_session *sess; struct negotiate_message *negblob; + unsigned int negblob_len, negblob_off; int rc = 0; ksmbd_debug(SMB, "Received request for session setup\n"); @@ -1692,10 +1697,16 @@ int smb2_sess_setup(struct ksmbd_work *work) if (sess->state == SMB2_SESSION_EXPIRED) sess->state = SMB2_SESSION_IN_PROGRESS; + negblob_off = le16_to_cpu(req->SecurityBufferOffset); + negblob_len = le16_to_cpu(req->SecurityBufferLength); + if (negblob_off < (offsetof(struct smb2_sess_setup_req, Buffer) - 4) || + negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) + return -EINVAL; + negblob = (struct negotiate_message *)((char *)&req->hdr.ProtocolId + - le16_to_cpu(req->SecurityBufferOffset)); + negblob_off); - if (decode_negotiation_token(work, negblob) == 0) { + if (decode_negotiation_token(conn, negblob, negblob_len) == 0) { if (conn->mechToken) negblob = (struct negotiate_message *)conn->mechToken; } @@ -1719,7 +1730,7 @@ int smb2_sess_setup(struct ksmbd_work *work) sess->Preauth_HashValue = NULL; } else if (conn->preferred_auth_mech == KSMBD_AUTH_NTLMSSP) { if (negblob->MessageType == NtLmNegotiate) { - rc = ntlm_negotiate(work, negblob); + rc = ntlm_negotiate(work, negblob, negblob_len); if (rc) goto out_err; rsp->hdr.Status = -- cgit v1.2.3 From 55161e67d44fdd23900be166a81e996abd6e3be9 Mon Sep 17 00:00:00 2001 From: Eugene Crosser Date: Mon, 18 Oct 2021 20:22:50 +0200 Subject: vrf: Revert "Reset skb conntrack connection..." This reverts commit 09e856d54bda5f288ef8437a90ab2b9b3eab83d1. When an interface is enslaved in a VRF, prerouting conntrack hook is called twice: once in the context of the original input interface, and once in the context of the VRF interface. If no special precausions are taken, this leads to creation of two conntrack entries instead of one, and breaks SNAT. Commit above was intended to avoid creation of extra conntrack entries when input interface is enslaved in a VRF. It did so by resetting conntrack related data associated with the skb when it enters VRF context. However it breaks netfilter operation. Imagine a use case when conntrack zone must be assigned based on the original input interface, rather than VRF interface (that would make original interfaces indistinguishable). One could create netfilter rules similar to these: chain rawprerouting { type filter hook prerouting priority raw; iif realiface1 ct zone set 1 return iif realiface2 ct zone set 2 return } This works before the mentioned commit, but not after: zone assignment is "forgotten", and any subsequent NAT or filtering that is dependent on the conntrack zone does not work. Here is a reproducer script that demonstrates the difference in behaviour. ========== #!/bin/sh # This script demonstrates unexpected change of nftables behaviour # caused by commit 09e856d54bda5f28 ""vrf: Reset skb conntrack # connection on VRF rcv" # https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=09e856d54bda5f288ef8437a90ab2b9b3eab83d1 # # Before the commit, it was possible to assign conntrack zone to a # packet (or mark it for `notracking`) in the prerouting chanin, raw # priority, based on the `iif` (interface from which the packet # arrived). # After the change, # if the interface is enslaved in a VRF, such # assignment is lost. Instead, assignment based on the `iif` matching # the VRF master interface is honored. Thus it is impossible to # distinguish packets based on the original interface. # # This script demonstrates this change of behaviour: conntrack zone 1 # or 2 is assigned depending on the match with the original interface # or the vrf master interface. It can be observed that conntrack entry # appears in different zone in the kernel versions before and after # the commit. IPIN=172.30.30.1 IPOUT=172.30.30.2 PFXL=30 ip li sh vein >/dev/null 2>&1 && ip li del vein ip li sh tvrf >/dev/null 2>&1 && ip li del tvrf nft list table testct >/dev/null 2>&1 && nft delete table testct ip li add vein type veth peer veout ip li add tvrf type vrf table 9876 ip li set veout master tvrf ip li set vein up ip li set veout up ip li set tvrf up /sbin/sysctl -w net.ipv4.conf.veout.accept_local=1 /sbin/sysctl -w net.ipv4.conf.veout.rp_filter=0 ip addr add $IPIN/$PFXL dev vein ip addr add $IPOUT/$PFXL dev veout nft -f - <<__END__ table testct { chain rawpre { type filter hook prerouting priority raw; iif { veout, tvrf } meta nftrace set 1 iif veout ct zone set 1 return iif tvrf ct zone set 2 return notrack } chain rawout { type filter hook output priority raw; notrack } } __END__ uname -rv conntrack -F ping -W 1 -c 1 -I vein $IPOUT conntrack -L Signed-off-by: Eugene Crosser Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index bf2fac913942..662e26117353 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1360,8 +1360,6 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); bool is_ndisc = ipv6_ndisc_frame(skb); - nf_reset_ct(skb); - /* loopback, multicast & non-ND link-local traffic; do not push through * packet taps again. Reset pkt_type for upper layers to process skb. * For strict packets with a source LLA, determine the dst using the @@ -1424,8 +1422,6 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, skb->skb_iif = vrf_dev->ifindex; IPCB(skb)->flags |= IPSKB_L3SLAVE; - nf_reset_ct(skb); - if (ipv4_is_multicast(ip_hdr(skb)->daddr)) goto out; -- cgit v1.2.3 From 496c5fe25c377ddb7815c4ce8ecfb676f051e9b6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 20 Oct 2021 20:48:26 +1100 Subject: powerpc/idle: Don't corrupt back chain when going idle In isa206_idle_insn_mayloss() we store various registers into the stack red zone, which is allowed. However inside the IDLE_STATE_ENTER_SEQ_NORET macro we save r2 again, to 0(r1), which corrupts the stack back chain. We used to do the same in isa206_idle_insn_mayloss() itself, but we fixed that in 73287caa9210 ("powerpc64/idle: Fix SP offsets when saving GPRs"), however we missed that the macro also corrupts the back chain. Corrupting the back chain is bad for debuggability but doesn't necessarily cause a bug. However we recently changed the stack handling in some KVM code, and it now relies on the stack back chain being valid when it returns. The corruption causes that code to return with r1 pointing somewhere in kernel data, at some point LR is restored from the stack and we branch to NULL or somewhere else invalid. Only affects Power8 hosts running KVM guests, with dynamic_mt_modes enabled (which it is by default). The fixes tag below points to the commit that changed the KVM stack handling, exposing this bug. The actual corruption of the back chain has always existed since 948cf67c4726 ("powerpc: Add NAP mode support on Power7 in HV mode"). Fixes: 9b4416c5095c ("KVM: PPC: Book3S HV: Fix stack handling in idle_kvm_start_guest()") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211020094826.3222052-1-mpe@ellerman.id.au --- arch/powerpc/kernel/idle_book3s.S | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index abb719b21cae..3d97fb833834 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -126,14 +126,16 @@ _GLOBAL(idle_return_gpr_loss) /* * This is the sequence required to execute idle instructions, as * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0. - * - * The 0(r1) slot is used to save r2 in isa206, so use that here. + * We have to store a GPR somewhere, ptesync, then reload it, and create + * a false dependency on the result of the load. It doesn't matter which + * GPR we store, or where we store it. We have already stored r2 to the + * stack at -8(r1) in isa206_idle_insn_mayloss, so use that. */ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r2,0(r1); \ + std r2,-8(r1); \ ptesync; \ - ld r2,0(r1); \ + ld r2,-8(r1); \ 236: cmpd cr0,r2,r2; \ bne 236b; \ IDLE_INST; \ -- cgit v1.2.3 From 787252a10d9422f3058df9a4821f389e5326c440 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 15 Oct 2021 12:39:02 -0500 Subject: powerpc/smp: do not decrement idle task preempt count in CPU offline With PREEMPT_COUNT=y, when a CPU is offlined and then onlined again, we get: BUG: scheduling while atomic: swapper/1/0/0x00000000 no locks held by swapper/1/0. CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.15.0-rc2+ #100 Call Trace: dump_stack_lvl+0xac/0x108 __schedule_bug+0xac/0xe0 __schedule+0xcf8/0x10d0 schedule_idle+0x3c/0x70 do_idle+0x2d8/0x4a0 cpu_startup_entry+0x38/0x40 start_secondary+0x2ec/0x3a0 start_secondary_prolog+0x10/0x14 This is because powerpc's arch_cpu_idle_dead() decrements the idle task's preempt count, for reasons explained in commit a7c2bb8279d2 ("powerpc: Re-enable preemption before cpu_die()"), specifically "start_secondary() expects a preempt_count() of 0." However, since commit 2c669ef6979c ("powerpc/preempt: Don't touch the idle task's preempt_count during hotplug") and commit f1a0a376ca0c ("sched/core: Initialize the idle task with preemption disabled"), that justification no longer holds. The idle task isn't supposed to re-enable preemption, so remove the vestigial preempt_enable() from the CPU offline path. Tested with pseries and powernv in qemu, and pseries on PowerVM. Fixes: 2c669ef6979c ("powerpc/preempt: Don't touch the idle task's preempt_count during hotplug") Signed-off-by: Nathan Lynch Reviewed-by: Valentin Schneider Reviewed-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211015173902.2278118-1-nathanl@linux.ibm.com --- arch/powerpc/kernel/smp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9cc7d3dbf439..605bab448f84 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1730,8 +1730,6 @@ void __cpu_die(unsigned int cpu) void arch_cpu_idle_dead(void) { - sched_preempt_enable_no_resched(); - /* * Disable on the down path. This will be re-enabled by * start_secondary() via start_secondary_resume() below -- cgit v1.2.3 From 60484103d5c387df49bd60de4b16c88022747048 Mon Sep 17 00:00:00 2001 From: Jiaran Zhang Date: Tue, 19 Oct 2021 22:16:28 +0800 Subject: net: hns3: Add configuration of TM QCN error event Add configuration of interrupt type and fifo interrupt enable of TM QCN error event if enabled, otherwise this event will not be reported when there is error. Fixes: d914971df022 ("net: hns3: remove redundant query in hclge_config_tm_hw_err_int()") Signed-off-by: Jiaran Zhang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 5 ++++- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index bb9b026ae88e..93aa7f2bdc13 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1560,8 +1560,11 @@ static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en) /* configure TM QCN hw errors */ hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_QCN_MEM_INT_CFG, false); - if (en) + desc.data[0] = cpu_to_le32(HCLGE_TM_QCN_ERR_INT_TYPE); + if (en) { + desc.data[0] |= cpu_to_le32(HCLGE_TM_QCN_FIFO_INT_EN); desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN); + } ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h index 07987fb8332e..d811eeefe2c0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h @@ -50,6 +50,8 @@ #define HCLGE_PPP_MPF_ECC_ERR_INT3_EN 0x003F #define HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK 0x003F #define HCLGE_TM_SCH_ECC_ERR_INT_EN 0x3 +#define HCLGE_TM_QCN_ERR_INT_TYPE 0x29 +#define HCLGE_TM_QCN_FIFO_INT_EN 0xFFFF00 #define HCLGE_TM_QCN_MEM_ERR_INT_EN 0xFFFFFF #define HCLGE_NCSI_ERR_INT_EN 0x3 #define HCLGE_NCSI_ERR_INT_TYPE 0x9 -- cgit v1.2.3 From b63fcaab959807282e9822e659034edf95fc8bd1 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Tue, 19 Oct 2021 22:16:29 +0800 Subject: net: hns3: reset DWRR of unused tc to zero Currently, DWRR of tc will be initialized to a fixed value when this tc is enabled, but it is not been reset to 0 when this tc is disabled. It cause a problem that the DWRR of unused tc is not 0 after using tc tool to add and delete multi-tc parameters. For examples, after enabling 4 TCs and restoring to 1 TC by follow tc commands: $ tc qdisc add dev eth0 root mqprio num_tc 4 map 0 1 2 3 0 1 2 3 queues \ 8@0 8@8 8@16 8@24 hw 1 mode channel $ tc qdisc del dev eth0 root Now there is just one TC is enabled for eth0, but the tc info querying by debugfs is shown as follow: $ cat /mnt/hns3/0000:7d:00.0/tm/tc_sch_info enabled tc number: 1 weight_offset: 14 TC MODE WEIGHT 0 dwrr 100 1 dwrr 100 2 dwrr 100 3 dwrr 100 4 dwrr 0 5 dwrr 0 6 dwrr 0 7 dwrr 0 This patch fixes it by resetting DWRR of tc to 0 when tc is disabled. Fixes: 848440544b41 ("net: hns3: Add support of TX Scheduler & Shaper to HNS3 driver") Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index f314dbd3ce11..95074e91a846 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -752,6 +752,8 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev) hdev->tm_info.pg_info[i].tc_bit_map = hdev->hw_tc_map; for (k = 0; k < hdev->tm_info.num_tc; k++) hdev->tm_info.pg_info[i].tc_dwrr[k] = BW_PERCENT; + for (; k < HNAE3_MAX_TC; k++) + hdev->tm_info.pg_info[i].tc_dwrr[k] = 0; } } -- cgit v1.2.3 From 731797fdffa3d083db536e2fdd07ceb050bb40b1 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Tue, 19 Oct 2021 22:16:30 +0800 Subject: net: hns3: add limit ets dwrr bandwidth cannot be 0 If ets dwrr bandwidth of tc is set to 0, the hardware will switch to SP mode. In this case, this tc may occupy all the tx bandwidth if it has huge traffic, so it violates the purpose of the user setting. To fix this problem, limit the ets dwrr bandwidth must greater than 0. Fixes: cacde272dd00 ("net: hns3: Add hclge_dcb module for the support of DCB feature") Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index 307c9e830510..91cb578f56b8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -137,6 +137,15 @@ static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev, *changed = true; break; case IEEE_8021QAZ_TSA_ETS: + /* The hardware will switch to sp mode if bandwidth is + * 0, so limit ets bandwidth must be greater than 0. + */ + if (!ets->tc_tx_bw[i]) { + dev_err(&hdev->pdev->dev, + "tc%u ets bw cannot be 0\n", i); + return -EINVAL; + } + if (hdev->tm_info.tc_info[i].tc_sch_mode != HCLGE_SCH_MODE_DWRR) *changed = true; -- cgit v1.2.3 From adfb7b4966c0c4c63a791f202b8b3837b07a9ece Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 19 Oct 2021 22:16:31 +0800 Subject: net: hns3: fix the max tx size according to user manual Currently the max tx size supported by the hw is calculated by using the max BD num supported by the hw. According to the hw user manual, the max tx size is fixed value for both non-TSO and TSO skb. This patch updates the max tx size according to the manual. Fixes: 8ae10cfb5089("net: hns3: support tx-scatter-gather-fraglist feature") Signed-off-by: Yunsheng Lin Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 7 ++----- drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 6 ++---- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 468b8f07bf47..ea89772e8952 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1847,7 +1847,6 @@ void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size) static int hns3_skb_linearize(struct hns3_enet_ring *ring, struct sk_buff *skb, - u8 max_non_tso_bd_num, unsigned int bd_num) { /* 'bd_num == UINT_MAX' means the skb' fraglist has a @@ -1864,8 +1863,7 @@ static int hns3_skb_linearize(struct hns3_enet_ring *ring, * will not help. */ if (skb->len > HNS3_MAX_TSO_SIZE || - (!skb_is_gso(skb) && skb->len > - HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num))) { + (!skb_is_gso(skb) && skb->len > HNS3_MAX_NON_TSO_SIZE)) { u64_stats_update_begin(&ring->syncp); ring->stats.hw_limitation++; u64_stats_update_end(&ring->syncp); @@ -1900,8 +1898,7 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, goto out; } - if (hns3_skb_linearize(ring, skb, max_non_tso_bd_num, - bd_num)) + if (hns3_skb_linearize(ring, skb, bd_num)) return -ENOMEM; bd_num = hns3_tx_bd_count(skb->len); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 6162d9f88e37..9d9be3665bb1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -186,11 +186,9 @@ enum hns3_nic_state { #define HNS3_MAX_BD_SIZE 65535 #define HNS3_MAX_TSO_BD_NUM 63U -#define HNS3_MAX_TSO_SIZE \ - (HNS3_MAX_BD_SIZE * HNS3_MAX_TSO_BD_NUM) +#define HNS3_MAX_TSO_SIZE 1048576U +#define HNS3_MAX_NON_TSO_SIZE 9728U -#define HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num) \ - (HNS3_MAX_BD_SIZE * (max_non_tso_bd_num)) #define HNS3_VECTOR_GL0_OFFSET 0x100 #define HNS3_VECTOR_GL1_OFFSET 0x200 -- cgit v1.2.3 From 9f9f0f19994b42b3e5e8735d41b9c5136828a76c Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 19 Oct 2021 22:16:32 +0800 Subject: net: hns3: fix for miscalculation of rx unused desc rx unused desc is the desc that need attatching new buffer before refilling to hw to receive new packet, the number of desc need attatching new buffer is calculated using next_to_use and next_to_clean. when next_to_use == next_to_clean, currently hns3 driver assumes that all the desc has the buffer attatched, but 'next_to_use == next_to_clean' also means all the desc need attatching new buffer if hw has comsumed all the desc and the driver has not attatched any buffer to the desc yet. This patch adds 'refill' in desc_cb to indicate whether a new buffer has been refilled to a desc. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Yunsheng Lin Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 8 ++++++++ drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index ea89772e8952..2ecc9abc02d6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3255,6 +3255,7 @@ static void hns3_buffer_detach(struct hns3_enet_ring *ring, int i) { hns3_unmap_buffer(ring, &ring->desc_cb[i]); ring->desc[i].addr = 0; + ring->desc_cb[i].refill = 0; } static void hns3_free_buffer_detach(struct hns3_enet_ring *ring, int i, @@ -3333,6 +3334,7 @@ static int hns3_alloc_and_attach_buffer(struct hns3_enet_ring *ring, int i) ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + ring->desc_cb[i].page_offset); + ring->desc_cb[i].refill = 1; return 0; } @@ -3362,6 +3364,7 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i, { hns3_unmap_buffer(ring, &ring->desc_cb[i]); ring->desc_cb[i] = *res_cb; + ring->desc_cb[i].refill = 1; ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + ring->desc_cb[i].page_offset); ring->desc[i].rx.bd_base_info = 0; @@ -3370,6 +3373,7 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i, static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i) { ring->desc_cb[i].reuse_flag = 0; + ring->desc_cb[i].refill = 1; ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + ring->desc_cb[i].page_offset); ring->desc[i].rx.bd_base_info = 0; @@ -3476,6 +3480,9 @@ static int hns3_desc_unused(struct hns3_enet_ring *ring) int ntc = ring->next_to_clean; int ntu = ring->next_to_use; + if (unlikely(ntc == ntu && !ring->desc_cb[ntc].refill)) + return ring->desc_num; + return ((ntc >= ntu) ? 0 : ring->desc_num) + ntc - ntu; } @@ -3821,6 +3828,7 @@ static void hns3_rx_ring_move_fw(struct hns3_enet_ring *ring) { ring->desc[ring->next_to_clean].rx.bd_base_info &= cpu_to_le32(~BIT(HNS3_RXD_VLD_B)); + ring->desc_cb[ring->next_to_clean].refill = 0; ring->next_to_clean += 1; if (unlikely(ring->next_to_clean == ring->desc_num)) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 9d9be3665bb1..f09a61d9c626 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -330,6 +330,7 @@ struct hns3_desc_cb { u32 length; /* length of the buffer */ u16 reuse_flag; + u16 refill; /* desc type, used by the ring user to mark the type of the priv data */ u16 type; -- cgit v1.2.3 From 68752b24f51a71d4f350a764d890b670f59062c5 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 19 Oct 2021 22:16:33 +0800 Subject: net: hns3: schedule the polling again when allocation fails Currently when there is a rx page allocation failure, it is possible that polling may be stopped if there is no more packet to be reveiced, which may cause queue stall problem under memory pressure. This patch makes sure polling is scheduled again when there is any rx page allocation failure, and polling will try to allocate receive buffers until it succeeds. Now the allocation retry is added, it is unnecessary to do the rx page allocation at the end of rx cleaning, so remove it. And reset the unused_count to zero after calling hns3_nic_alloc_rx_buffers() to avoid calling hns3_nic_alloc_rx_buffers() repeatedly under memory pressure. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Yunsheng Lin Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 2ecc9abc02d6..4b886a13e079 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3486,7 +3486,8 @@ static int hns3_desc_unused(struct hns3_enet_ring *ring) return ((ntc >= ntu) ? 0 : ring->desc_num) + ntc - ntu; } -static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, +/* Return true if there is any allocation failure */ +static bool hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, int cleand_count) { struct hns3_desc_cb *desc_cb; @@ -3511,7 +3512,10 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, hns3_rl_err(ring_to_netdev(ring), "alloc rx buffer failed: %d\n", ret); - break; + + writel(i, ring->tqp->io_base + + HNS3_RING_RX_RING_HEAD_REG); + return true; } hns3_replace_buffer(ring, ring->next_to_use, &res_cbs); @@ -3524,6 +3528,7 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, } writel(i, ring->tqp->io_base + HNS3_RING_RX_RING_HEAD_REG); + return false; } static bool hns3_can_reuse_page(struct hns3_desc_cb *cb) @@ -4175,6 +4180,7 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget, { #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16 int unused_count = hns3_desc_unused(ring); + bool failure = false; int recv_pkts = 0; int err; @@ -4183,9 +4189,9 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget, while (recv_pkts < budget) { /* Reuse or realloc buffers */ if (unused_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) { - hns3_nic_alloc_rx_buffers(ring, unused_count); - unused_count = hns3_desc_unused(ring) - - ring->pending_buf; + failure = failure || + hns3_nic_alloc_rx_buffers(ring, unused_count); + unused_count = 0; } /* Poll one pkt */ @@ -4204,11 +4210,7 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget, } out: - /* Make all data has been write before submit */ - if (unused_count > 0) - hns3_nic_alloc_rx_buffers(ring, unused_count); - - return recv_pkts; + return failure ? budget : recv_pkts; } static void hns3_update_rx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector) -- cgit v1.2.3 From 1385cc81baeb3bd8cbbbcdc1557f038ac1712529 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Tue, 19 Oct 2021 22:16:34 +0800 Subject: net: hns3: fix vf reset workqueue cannot exit The task of VF reset is performed through the workqueue. It checks the value of hdev->reset_pending to determine whether to exit the loop. However, the value of hdev->reset_pending may also be assigned by the interrupt function hclgevf_misc_irq_handle(), which may cause the loop fail to exit and keep occupying the workqueue. This loop is not necessary, so remove it and the workqueue will be rescheduled if the reset needs to be retried or a new reset occurs. Fixes: 1cc9bc6e5867 ("net: hns3: split hclgevf_reset() into preparing and rebuilding part") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 5fdac8685f95..bef6b98e2f50 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2273,9 +2273,9 @@ static void hclgevf_reset_service_task(struct hclgevf_dev *hdev) hdev->reset_attempts = 0; hdev->last_reset_time = jiffies; - while ((hdev->reset_type = - hclgevf_get_reset_level(hdev, &hdev->reset_pending)) - != HNAE3_NONE_RESET) + hdev->reset_type = + hclgevf_get_reset_level(hdev, &hdev->reset_pending); + if (hdev->reset_type != HNAE3_NONE_RESET) hclgevf_reset(hdev); } else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state)) { -- cgit v1.2.3 From 0dd8a25f355b4df2d41c08df1716340854c7d4c5 Mon Sep 17 00:00:00 2001 From: Peng Li Date: Tue, 19 Oct 2021 22:16:35 +0800 Subject: net: hns3: disable sriov before unload hclge layer HNS3 driver includes hns3.ko, hnae3.ko and hclge.ko. hns3.ko includes network stack and pci_driver, hclge.ko includes HW device action, algo_ops and timer task, hnae3.ko includes some register function. When SRIOV is enable and hclge.ko is removed, HW device is unloaded but VF still exists, PF will not reply VF mbx messages, and cause errors. This patch fix it by disable SRIOV before remove hclge.ko. Fixes: e2cb1dec9779 ("net: hns3: Add HNS3 VF HCL(Hardware Compatibility Layer) Support") Signed-off-by: Peng Li Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.c | 21 +++++++++++++++++++++ drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1 + 3 files changed, 23 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c index eef1b2764d34..67b0bf310daa 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c @@ -10,6 +10,27 @@ static LIST_HEAD(hnae3_ae_algo_list); static LIST_HEAD(hnae3_client_list); static LIST_HEAD(hnae3_ae_dev_list); +void hnae3_unregister_ae_algo_prepare(struct hnae3_ae_algo *ae_algo) +{ + const struct pci_device_id *pci_id; + struct hnae3_ae_dev *ae_dev; + + if (!ae_algo) + return; + + list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) { + if (!hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B)) + continue; + + pci_id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev); + if (!pci_id) + continue; + if (IS_ENABLED(CONFIG_PCI_IOV)) + pci_disable_sriov(ae_dev->pdev); + } +} +EXPORT_SYMBOL(hnae3_unregister_ae_algo_prepare); + /* we are keeping things simple and using single lock for all the * list. This is a non-critical code so other updations, if happen * in parallel, can wait. diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 8ba21d6dc220..d701451596c8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -853,6 +853,7 @@ struct hnae3_handle { int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev); void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev); +void hnae3_unregister_ae_algo_prepare(struct hnae3_ae_algo *ae_algo); void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo); void hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index f5b8d1fee0f1..dcd40cc73082 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -13065,6 +13065,7 @@ static int hclge_init(void) static void hclge_exit(void) { + hnae3_unregister_ae_algo_prepare(&ae_algo); hnae3_unregister_ae_algo(&ae_algo); destroy_workqueue(hclge_wq); } -- cgit v1.2.3 From 9b57e9d5010bbed7c0d9d445085840f7025e6f9a Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Tue, 19 Oct 2021 19:53:59 +0200 Subject: KVM: s390: clear kicked_mask before sleeping again The idea behind kicked mask is that we should not re-kick a vcpu that is already in the "kick" process, i.e. that was kicked and is is about to be dispatched if certain conditions are met. The problem with the current implementation is, that it assumes the kicked vcpu is going to enter SIE shortly. But under certain circumstances, the vcpu we just kicked will be deemed non-runnable and will remain in wait state. This can happen, if the interrupt(s) this vcpu got kicked to deal with got already cleared (because the interrupts got delivered to another vcpu). In this case kvm_arch_vcpu_runnable() would return false, and the vcpu would remain in kvm_vcpu_block(), but this time with its kicked_mask bit set. So next time around we wouldn't kick the vcpu form __airqs_kick_single_vcpu(), but would assume that we just kicked it. Let us make sure the kicked_mask is cleared before we give up on re-dispatching the vcpu. Fixes: 9f30f6216378 ("KVM: s390: add gib_alert_irq_handler()") Reported-by: Matthew Rosato Signed-off-by: Halil Pasic Reviewed-by: Christian Borntraeger Reviewed-by: Michael Mueller Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20211019175401.3757927-2-pasic@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6a6dd5e1daf6..1c97493d21e1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3363,6 +3363,7 @@ out_free_sie_block: int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { + clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); return kvm_s390_vcpu_has_irq(vcpu, 0); } -- cgit v1.2.3 From 0e9ff65f455dfd0a8aea5e7843678ab6fe097e21 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Tue, 19 Oct 2021 19:54:00 +0200 Subject: KVM: s390: preserve deliverable_mask in __airqs_kick_single_vcpu Changing the deliverable mask in __airqs_kick_single_vcpu() is a bug. If one idle vcpu can't take the interrupts we want to deliver, we should look for another vcpu that can, instead of saying that we don't want to deliver these interrupts by clearing the bits from the deliverable_mask. Fixes: 9f30f6216378 ("KVM: s390: add gib_alert_irq_handler()") Signed-off-by: Halil Pasic Reviewed-by: Christian Borntraeger Reviewed-by: Michael Mueller Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20211019175401.3757927-3-pasic@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 10722455fd02..2245f4b8d362 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3053,13 +3053,14 @@ static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask) int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus); struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_vcpu *vcpu; + u8 vcpu_isc_mask; for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) { vcpu = kvm_get_vcpu(kvm, vcpu_idx); if (psw_ioint_disabled(vcpu)) continue; - deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24); - if (deliverable_mask) { + vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24); + if (deliverable_mask & vcpu_isc_mask) { /* lately kicked but not yet running */ if (test_and_set_bit(vcpu_idx, gi->kicked_mask)) return; -- cgit v1.2.3 From 641e3fd1a038c68045bbf89d78502f4b4bbc7284 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Tue, 19 Oct 2021 22:49:16 +0200 Subject: nfc: st95hf: Make spi remove() callback return zero If something goes wrong in the remove callback, returning an error code just results in an error message. The device still disappears. So don't skip disabling the regulator in st95hf_remove() if resetting the controller via spi fails. Also don't return an error code which just results in two error messages. Signed-off-by: Uwe Kleine-König Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/st95hf/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c index d16cf3ff644e..b23f47936473 100644 --- a/drivers/nfc/st95hf/core.c +++ b/drivers/nfc/st95hf/core.c @@ -1226,11 +1226,9 @@ static int st95hf_remove(struct spi_device *nfc_spi_dev) &reset_cmd, ST95HF_RESET_CMD_LEN, ASYNC); - if (result) { + if (result) dev_err(&spictx->spidev->dev, "ST95HF reset failed in remove() err = %d\n", result); - return result; - } /* wait for 3 ms to complete the controller reset process */ usleep_range(3000, 4000); @@ -1239,7 +1237,7 @@ static int st95hf_remove(struct spi_device *nfc_spi_dev) if (stcontext->st95hf_supply) regulator_disable(stcontext->st95hf_supply); - return result; + return 0; } /* Register as SPI protocol driver */ -- cgit v1.2.3 From 3cb958027cb8b78d3ee639ce9af54c2ef1bf964f Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Wed, 20 Oct 2021 09:04:33 +0200 Subject: net: stmmac: Fix E2E delay mechanism When utilizing End to End delay mechanism, the following error messages show up: |root@ehl1:~# ptp4l --tx_timestamp_timeout=50 -H -i eno2 -E -m |ptp4l[950.573]: selected /dev/ptp3 as PTP clock |ptp4l[950.586]: port 1: INITIALIZING to LISTENING on INIT_COMPLETE |ptp4l[950.586]: port 0: INITIALIZING to LISTENING on INIT_COMPLETE |ptp4l[952.879]: port 1: new foreign master 001395.fffe.4897b4-1 |ptp4l[956.879]: selected best master clock 001395.fffe.4897b4 |ptp4l[956.879]: port 1: assuming the grand master role |ptp4l[956.879]: port 1: LISTENING to GRAND_MASTER on RS_GRAND_MASTER |ptp4l[962.017]: port 1: received DELAY_REQ without timestamp |ptp4l[962.273]: port 1: received DELAY_REQ without timestamp |ptp4l[963.090]: port 1: received DELAY_REQ without timestamp Commit f2fb6b6275eb ("net: stmmac: enable timestamp snapshot for required PTP packets in dwmac v5.10a") already addresses this problem for the dwmac v5.10. However, same holds true for all dwmacs above version v4.10. Correct the check accordingly. Afterwards everything works as expected. Tested on Intel Atom(R) x6414RE Processor. Fixes: 14f347334bf2 ("net: stmmac: Correctly take timestamp for PTPv2") Fixes: f2fb6b6275eb ("net: stmmac: enable timestamp snapshot for required PTP packets in dwmac v5.10a") Suggested-by: Ong Boon Leong Signed-off-by: Kurt Kanzenbach Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index eb3b7bf771d7..3d67d1fa3690 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -736,7 +736,7 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; ptp_v2 = PTP_TCR_TSVER2ENA; snap_type_sel = PTP_TCR_SNAPTYPSEL_1; - if (priv->synopsys_id != DWMAC_CORE_5_10) + if (priv->synopsys_id < DWMAC_CORE_4_10) ts_event_en = PTP_TCR_TSEVNTENA; ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; -- cgit v1.2.3 From 4225fea1cb28370086e17e82c0f69bec2779dca0 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 20 Oct 2021 16:18:34 +0800 Subject: ptp: Fix possible memory leak in ptp_clock_register() I got memory leak as follows when doing fault injection test: unreferenced object 0xffff88800906c618 (size 8): comm "i2c-idt82p33931", pid 4421, jiffies 4294948083 (age 13.188s) hex dump (first 8 bytes): 70 74 70 30 00 00 00 00 ptp0.... backtrace: [<00000000312ed458>] __kmalloc_track_caller+0x19f/0x3a0 [<0000000079f6e2ff>] kvasprintf+0xb5/0x150 [<0000000026aae54f>] kvasprintf_const+0x60/0x190 [<00000000f323a5f7>] kobject_set_name_vargs+0x56/0x150 [<000000004e35abdd>] dev_set_name+0xc0/0x100 [<00000000f20cfe25>] ptp_clock_register+0x9f4/0xd30 [ptp] [<000000008bb9f0de>] idt82p33_probe.cold+0x8b6/0x1561 [ptp_idt82p33] When posix_clock_register() returns an error, the name allocated in dev_set_name() will be leaked, the put_device() should be used to give up the device reference, then the name will be freed in kobject_cleanup() and other memory will be freed in ptp_clock_release(). Reported-by: Hulk Robot Fixes: a33121e5487b ("ptp: fix the race between the release of ptp_clock and cdev") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 4dfc52e06704..7fd02aabd79a 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -283,15 +283,22 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, /* Create a posix clock and link it to the device. */ err = posix_clock_register(&ptp->clock, &ptp->dev); if (err) { + if (ptp->pps_source) + pps_unregister_source(ptp->pps_source); + + kfree(ptp->vclock_index); + + if (ptp->kworker) + kthread_destroy_worker(ptp->kworker); + + put_device(&ptp->dev); + pr_err("failed to create posix clock\n"); - goto no_clock; + return ERR_PTR(err); } return ptp; -no_clock: - if (ptp->pps_source) - pps_unregister_source(ptp->pps_source); no_pps: ptp_cleanup_pin_groups(ptp); no_pin_groups: -- cgit v1.2.3 From 5ebcbe342b1c12fae44b4f83cbeae1520e09857e Mon Sep 17 00:00:00 2001 From: Eric W. Biederman Date: Sat, 16 Oct 2021 12:17:30 -0500 Subject: ucounts: Move get_ucounts from cred_alloc_blank to key_change_session_keyring Setting cred->ucounts in cred_alloc_blank does not make sense. The uid and user_ns are deliberately not set in cred_alloc_blank but instead the setting is delayed until key_change_session_keyring. So move dealing with ucounts into key_change_session_keyring as well. Unfortunately that movement of get_ucounts adds a new failure mode to key_change_session_keyring. I do not see anything stopping the parent process from calling setuid and changing the relevant part of it's cred while keyctl_session_to_parent is running making it fundamentally necessary to call get_ucounts in key_change_session_keyring. Which means that the new failure mode cannot be avoided. A failure of key_change_session_keyring results in a single threaded parent keeping it's existing credentials. Which results in the parent process not being able to access the session keyring and whichever keys are in the new keyring. Further get_ucounts is only expected to fail if the number of bits in the refernece count for the structure is too few. Since the code has no other way to report the failure of get_ucounts and because such failures are not expected to be common add a WARN_ONCE to report this problem to userspace. Between the WARN_ONCE and the parent process not having access to the keys in the new session keyring I expect any failure of get_ucounts will be noticed and reported and we can find another way to handle this condition. (Possibly by just making ucounts->count an atomic_long_t). Cc: stable@vger.kernel.org Fixes: 905ae01c4ae2 ("Add a reference to ucounts for each cred") Link: https://lkml.kernel.org/r/7k0ias0uf.fsf_-_@disp2133 Tested-by: Yu Zhao Reviewed-by: Alexey Gladkov Signed-off-by: "Eric W. Biederman" --- kernel/cred.c | 2 -- security/keys/process_keys.c | 8 ++++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/cred.c b/kernel/cred.c index 16c05dfbec4d..1ae0b4948a5a 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -225,8 +225,6 @@ struct cred *cred_alloc_blank(void) #ifdef CONFIG_DEBUG_CREDENTIALS new->magic = CRED_MAGIC; #endif - new->ucounts = get_ucounts(&init_ucounts); - if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index e3d79a7b6db6..b5d5333ab330 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -918,6 +918,13 @@ void key_change_session_keyring(struct callback_head *twork) return; } + /* If get_ucounts fails more bits are needed in the refcount */ + if (unlikely(!get_ucounts(old->ucounts))) { + WARN_ONCE(1, "In %s get_ucounts failed\n", __func__); + put_cred(new); + return; + } + new-> uid = old-> uid; new-> euid = old-> euid; new-> suid = old-> suid; @@ -927,6 +934,7 @@ void key_change_session_keyring(struct callback_head *twork) new-> sgid = old-> sgid; new->fsgid = old->fsgid; new->user = get_uid(old->user); + new->ucounts = old->ucounts; new->user_ns = get_user_ns(old->user_ns); new->group_info = get_group_info(old->group_info); -- cgit v1.2.3 From 280db5d420090a24e4e41f9ddcbf37920a598572 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Wed, 22 Sep 2021 09:54:49 +0300 Subject: e1000e: Separate TGP board type from SPT We have the same LAN controller on different PCHs. Separate TGP board type from SPT which will allow for specific fixes to be applied for TGP platforms. Suggested-by: Kai-Heng Feng Signed-off-by: Sasha Neftin Reviewed-by: Paul Menzel Tested-by: Mark Pearson Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/e1000.h | 4 ++- drivers/net/ethernet/intel/e1000e/ich8lan.c | 20 +++++++++++++ drivers/net/ethernet/intel/e1000e/netdev.c | 45 +++++++++++++++-------------- 3 files changed, 46 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index 5b2143f4b1f8..3178efd98006 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -113,7 +113,8 @@ enum e1000_boards { board_pch2lan, board_pch_lpt, board_pch_spt, - board_pch_cnp + board_pch_cnp, + board_pch_tgp }; struct e1000_ps_page { @@ -499,6 +500,7 @@ extern const struct e1000_info e1000_pch2_info; extern const struct e1000_info e1000_pch_lpt_info; extern const struct e1000_info e1000_pch_spt_info; extern const struct e1000_info e1000_pch_cnp_info; +extern const struct e1000_info e1000_pch_tgp_info; extern const struct e1000_info e1000_es2_info; void e1000e_ptp_init(struct e1000_adapter *adapter); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 60c582a16821..66d7196310e2 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -5992,3 +5992,23 @@ const struct e1000_info e1000_pch_cnp_info = { .phy_ops = &ich8_phy_ops, .nvm_ops = &spt_nvm_ops, }; + +const struct e1000_info e1000_pch_tgp_info = { + .mac = e1000_pch_tgp, + .flags = FLAG_IS_ICH + | FLAG_HAS_WOL + | FLAG_HAS_HW_TIMESTAMP + | FLAG_HAS_CTRLEXT_ON_LOAD + | FLAG_HAS_AMT + | FLAG_HAS_FLASH + | FLAG_HAS_JUMBO_FRAMES + | FLAG_APME_IN_WUC, + .flags2 = FLAG2_HAS_PHY_STATS + | FLAG2_HAS_EEE, + .pba = 26, + .max_hw_frame_size = 9022, + .get_variants = e1000_get_variants_ich8lan, + .mac_ops = &ich8_mac_ops, + .phy_ops = &ich8_phy_ops, + .nvm_ops = &spt_nvm_ops, +}; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 900b3ab998bd..ebcb2a30add0 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -51,6 +51,7 @@ static const struct e1000_info *e1000_info_tbl[] = { [board_pch_lpt] = &e1000_pch_lpt_info, [board_pch_spt] = &e1000_pch_spt_info, [board_pch_cnp] = &e1000_pch_cnp_info, + [board_pch_tgp] = &e1000_pch_tgp_info, }; struct e1000_reg_info { @@ -7896,28 +7897,28 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V11), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM12), board_pch_spt }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V12), board_pch_spt }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM13), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V13), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM14), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM13), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V13), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM14), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_tgp }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; -- cgit v1.2.3 From e139a1ec92f8dbaaa7380d7e7ea17e148d473d06 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 19 Oct 2021 23:43:46 +0100 Subject: io_uring: apply max_workers limit to all future users Currently, IORING_REGISTER_IOWQ_MAX_WORKERS applies only to the task that issued it, it's unexpected for users. If one task creates a ring, limits workers and then passes it to another task the limit won't be applied to the other task. Another pitfall is that a task should either create a ring or submit at least one request for IORING_REGISTER_IOWQ_MAX_WORKERS to work at all, furher complicating the picture. Change the API, save the limits and apply to all future users. Note, it should be done first before giving away the ring or submitting new requests otherwise the result is not guaranteed. Fixes: 2e480058ddc2 ("io-wq: provide a way to limit max number of workers") Link: https://github.com/axboe/liburing/issues/460 Reported-by: Beld Zhang Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/51d0bae97180e08ab722c0d5c93e7439cfb6f697.1634683237.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e68d27829bb2..e8b71f14ac8b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -456,6 +456,8 @@ struct io_ring_ctx { struct work_struct exit_work; struct list_head tctx_list; struct completion ref_comp; + u32 iowq_limits[2]; + bool iowq_limits_set; }; }; @@ -9638,7 +9640,16 @@ static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) ret = io_uring_alloc_task_context(current, ctx); if (unlikely(ret)) return ret; + tctx = current->io_uring; + if (ctx->iowq_limits_set) { + unsigned int limits[2] = { ctx->iowq_limits[0], + ctx->iowq_limits[1], }; + + ret = io_wq_max_workers(tctx->io_wq, limits); + if (ret) + return ret; + } } if (!xa_load(&tctx->xa, (unsigned long)ctx)) { node = kmalloc(sizeof(*node), GFP_KERNEL); @@ -10674,13 +10685,19 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, tctx = current->io_uring; } - ret = -EINVAL; - if (!tctx || !tctx->io_wq) - goto err; + BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); - ret = io_wq_max_workers(tctx->io_wq, new_count); - if (ret) - goto err; + memcpy(ctx->iowq_limits, new_count, sizeof(new_count)); + ctx->iowq_limits_set = true; + + ret = -EINVAL; + if (tctx && tctx->io_wq) { + ret = io_wq_max_workers(tctx->io_wq, new_count); + if (ret) + goto err; + } else { + memset(new_count, 0, sizeof(new_count)); + } if (sqd) { mutex_unlock(&sqd->lock); -- cgit v1.2.3 From 4ea672ab694c23886b52e97cee10dea056e43e62 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 20 Oct 2021 09:53:02 +0100 Subject: io_uring: fix ltimeout unprep io_unprep_linked_timeout() is broken, first it needs to return back REQ_F_ARM_LTIMEOUT, so the linked timeout is enqueued and disarmed. But now we refcounted it, and linked timeouts may get not executed at all, leaking a request. Just kill the unprep optimisation. Fixes: 906c6caaf586 ("io_uring: optimise io_prep_linked_timeout()") Reported-by: Beld Zhang Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/51b8e2bfc4bea8ee625cf2ba62b2a350cc9be031.1634719585.git.asml.silence@gmail.com Link: https://github.com/axboe/liburing/issues/460 Reported-by: Beld Zhang Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e8b71f14ac8b..d5cc103224f1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1370,11 +1370,6 @@ static void io_req_track_inflight(struct io_kiocb *req) } } -static inline void io_unprep_linked_timeout(struct io_kiocb *req) -{ - req->flags &= ~REQ_F_LINK_TIMEOUT; -} - static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) { if (WARN_ON_ONCE(!req->link)) @@ -6985,7 +6980,7 @@ issue_sqe: switch (io_arm_poll_handler(req)) { case IO_APOLL_READY: if (linked_timeout) - io_unprep_linked_timeout(req); + io_queue_linked_timeout(linked_timeout); goto issue_sqe; case IO_APOLL_ABORTED: /* -- cgit v1.2.3 From 639e298f432fb058a9496ea16863f53b1ce935fe Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Wed, 22 Sep 2021 09:55:42 +0300 Subject: e1000e: Fix packet loss on Tiger Lake and later Update the HW MAC initialization flow. Do not gate DMA clock from the modPHY block. Keeping this clock will prevent dropped packets sent in burst mode on the Kumeran interface. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213651 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213377 Fixes: fb776f5d57ee ("e1000e: Add support for Tiger Lake") Signed-off-by: Sasha Neftin Tested-by: Mark Pearson Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 11 ++++++++++- drivers/net/ethernet/intel/e1000e/ich8lan.h | 3 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 66d7196310e2..5e4fc9b4e2ad 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -4813,7 +4813,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; - u32 ctrl_ext, txdctl, snoop; + u32 ctrl_ext, txdctl, snoop, fflt_dbg; s32 ret_val; u16 i; @@ -4872,6 +4872,15 @@ static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) snoop = (u32)~(PCIE_NO_SNOOP_ALL); e1000e_set_pcie_no_snoop(hw, snoop); + /* Enable workaround for packet loss issue on TGP PCH + * Do not gate DMA clock from the modPHY block + */ + if (mac->type >= e1000_pch_tgp) { + fflt_dbg = er32(FFLT_DBG); + fflt_dbg |= E1000_FFLT_DBG_DONT_GATE_WAKE_DMA_CLK; + ew32(FFLT_DBG, fflt_dbg); + } + ctrl_ext = er32(CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_RO_DIS; ew32(CTRL_EXT, ctrl_ext); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index d6a092e5ee74..2504b11c3169 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -289,6 +289,9 @@ /* Proprietary Latency Tolerance Reporting PCI Capability */ #define E1000_PCI_LTR_CAP_LPT 0xA8 +/* Don't gate wake DMA clock */ +#define E1000_FFLT_DBG_DONT_GATE_WAKE_DMA_CLK 0x1000 + void e1000e_write_protect_nvm_ich8lan(struct e1000_hw *hw); void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, bool state); -- cgit v1.2.3 From 79cc8322b6d82747cb63ea464146c0bf5b5a6bc1 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Thu, 9 Sep 2021 20:49:04 +0300 Subject: igc: Update I226_K device ID The device ID for I226_K was incorrectly assigned, update the device ID to the correct one. Fixes: bfa5e98c9de4 ("igc: Add new device ID") Signed-off-by: Sasha Neftin Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_hw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index 4461f8b9a864..4e0203336c6b 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -22,8 +22,8 @@ #define IGC_DEV_ID_I220_V 0x15F7 #define IGC_DEV_ID_I225_K 0x3100 #define IGC_DEV_ID_I225_K2 0x3101 +#define IGC_DEV_ID_I226_K 0x3102 #define IGC_DEV_ID_I225_LMVP 0x5502 -#define IGC_DEV_ID_I226_K 0x5504 #define IGC_DEV_ID_I225_IT 0x0D9F #define IGC_DEV_ID_I226_LM 0x125B #define IGC_DEV_ID_I226_V 0x125C -- cgit v1.2.3 From 7dcf78b870be6418d72bb1c4d4924bf0f5ca5052 Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Tue, 19 Oct 2021 13:04:16 -0700 Subject: ice: Add missing E810 device ids As part of support for E810 XXV devices, some device ids were inadvertently left out. Add those missing ids. Fixes: 195fb97766da ("ice: add additional E810 device id") Signed-off-by: Tony Nguyen Acked-by: Jesse Brandeburg Acked-by: Paul Menzel --- drivers/net/ethernet/intel/ice/ice_common.c | 2 ++ drivers/net/ethernet/intel/ice/ice_devids.h | 4 ++++ drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ 3 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 2fb81e359cdf..df5ad4de1f00 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -25,6 +25,8 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw) case ICE_DEV_ID_E810C_BACKPLANE: case ICE_DEV_ID_E810C_QSFP: case ICE_DEV_ID_E810C_SFP: + case ICE_DEV_ID_E810_XXV_BACKPLANE: + case ICE_DEV_ID_E810_XXV_QSFP: case ICE_DEV_ID_E810_XXV_SFP: hw->mac_type = ICE_MAC_E810; break; diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h index 9d8194671f6a..ef4392e6e244 100644 --- a/drivers/net/ethernet/intel/ice/ice_devids.h +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -21,6 +21,10 @@ #define ICE_DEV_ID_E810C_QSFP 0x1592 /* Intel(R) Ethernet Controller E810-C for SFP */ #define ICE_DEV_ID_E810C_SFP 0x1593 +/* Intel(R) Ethernet Controller E810-XXV for backplane */ +#define ICE_DEV_ID_E810_XXV_BACKPLANE 0x1599 +/* Intel(R) Ethernet Controller E810-XXV for QSFP */ +#define ICE_DEV_ID_E810_XXV_QSFP 0x159A /* Intel(R) Ethernet Controller E810-XXV for SFP */ #define ICE_DEV_ID_E810_XXV_SFP 0x159B /* Intel(R) Ethernet Connection E823-C for backplane */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 94037881bfd8..06fa93e597fb 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5020,6 +5020,8 @@ static const struct pci_device_id ice_pci_tbl[] = { { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_BACKPLANE), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_QSFP), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_SFP), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_BACKPLANE), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_QSFP), 0 }, -- cgit v1.2.3 From 14fe2471c62816ba82546fb68369d957c3a58b59 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Thu, 7 Oct 2021 16:05:38 +0300 Subject: net/mlx5: Lag, change multipath and bonding to be mutually exclusive Both multipath and bonding events are changing the HW LAG state independently. Handling one of the features events while the other is already enabled can cause unwanted behavior, for example handling bonding event while multipath enabled will disable the lag and cause multipath to stop working. Fix it by ignoring bonding event while in multipath and ignoring FIB events while in bonding mode. Fixes: 544fe7c2e654 ("net/mlx5e: Activate HW multipath and handle port affinity based on FIB events") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/lag.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 13 ++++++++----- drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h | 2 ++ include/linux/mlx5/driver.h | 1 - 6 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index b4e986818794..4a13ef561587 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -10,6 +10,8 @@ #include "en_tc.h" #include "rep/tc.h" #include "rep/neigh.h" +#include "lag.h" +#include "lag_mp.h" struct mlx5e_tc_tun_route_attr { struct net_device *out_dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index ba8164792016..129ff7e0d65c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -67,6 +67,8 @@ #include "lib/fs_chains.h" #include "diag/en_tc_tracepoint.h" #include +#include "lag.h" +#include "lag_mp.h" #define nic_chains(priv) ((priv)->fs.tc.chains) #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index ca5690b0a7ab..d2105c1635c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -442,6 +442,10 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) if (!mlx5_lag_is_ready(ldev)) { do_bond = false; } else { + /* VF LAG is in multipath mode, ignore bond change requests */ + if (mlx5_lag_is_multipath(dev0)) + return; + tracker = ldev->tracker; do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index f239b352a58a..21fdaf708f1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -9,20 +9,23 @@ #include "eswitch.h" #include "lib/mlx5.h" +static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH); +} + static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) { if (!mlx5_lag_is_ready(ldev)) return false; + if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev)) + return false; + return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, ldev->pf[MLX5_LAG_P2].dev); } -static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) -{ - return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH); -} - bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h index 729c839397a8..dea199e79bed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h @@ -24,12 +24,14 @@ struct lag_mp { void mlx5_lag_mp_reset(struct mlx5_lag *ldev); int mlx5_lag_mp_init(struct mlx5_lag *ldev); void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev); +bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {}; static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; } static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {} +bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { return false; } #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_LAG_MP_H__ */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e23417424373..f17d2101af7a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1138,7 +1138,6 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); bool mlx5_lag_is_roce(struct mlx5_core_dev *dev); bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); -bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); bool mlx5_lag_is_master(struct mlx5_core_dev *dev); bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev); -- cgit v1.2.3 From a6f74333548f56afb413fc928ae1aefc4fe7608f Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 25 Aug 2021 17:51:26 +0300 Subject: net/mlx5: E-switch, Return correct error code on group creation failure Dan Carpenter report: The patch f47e04eb96e0: "net/mlx5: E-switch, Allow setting share/max tx rate limits of rate groups" from May 31, 2021, leads to the following Smatch static checker warning: drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c:483 esw_qos_create_rate_group() warn: passing zero to 'ERR_PTR' If min rate normalization failed then error code may be overwritten to 0 if scheduling element destruction succeed. Ignore this value and always return initial one. Fixes: f47e04eb96e0 ("net/mlx5: E-switch, Allow setting share/max tx rate limits of rate groups") Reported-by: Dan Carpenter Signed-off-by: Dmytro Linkin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 985e305179d1..c6cc67cb4f6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -473,10 +473,9 @@ esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta err_min_rate: list_del(&group->list); - err = mlx5_destroy_scheduling_element_cmd(esw->dev, - SCHEDULING_HIERARCHY_E_SWITCH, - group->tsar_ix); - if (err) + if (mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + group->tsar_ix)) NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); err_sched_elem: kfree(group); -- cgit v1.2.3 From 68e66e1a69cd94f934522348ab232af49863452a Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sat, 2 Oct 2021 11:15:35 +0300 Subject: net/mlx5e: Fix vlan data lost during suspend flow During suspend flow the driver calls mlx5e_destroy_vlan_table() which does not only delete the vlans steering flow rules, but also frees the data on currently active vlans, thus it is not restored during resume flow. This fix keeps the vlan data on suspend flow and frees it only on driver remove flow. Fixes: 6783f0a21a3c ("net/mlx5e: Dynamic alloc vlan table for netdev when needed") Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/fs.h | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 28 +++++++++++++---------- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 ++++++ 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 41684a6c44e9..a88a1a48229f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -199,6 +199,9 @@ void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); +int mlx5e_fs_init(struct mlx5e_priv *priv); +void mlx5e_fs_cleanup(struct mlx5e_priv *priv); + int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num); void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv); int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index c06b4b938ae7..d226cc5ab1d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -1186,10 +1186,6 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) struct mlx5e_flow_table *ft; int err; - priv->fs.vlan = kvzalloc(sizeof(*priv->fs.vlan), GFP_KERNEL); - if (!priv->fs.vlan) - return -ENOMEM; - ft = &priv->fs.vlan->ft; ft->num_groups = 0; @@ -1198,10 +1194,8 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) ft_attr.prio = MLX5E_NIC_PRIO; ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); - if (IS_ERR(ft->t)) { - err = PTR_ERR(ft->t); - goto err_free_t; - } + if (IS_ERR(ft->t)) + return PTR_ERR(ft->t); ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); if (!ft->g) { @@ -1221,9 +1215,6 @@ err_free_g: kfree(ft->g); err_destroy_vlan_table: mlx5_destroy_flow_table(ft->t); -err_free_t: - kvfree(priv->fs.vlan); - priv->fs.vlan = NULL; return err; } @@ -1232,7 +1223,6 @@ static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) { mlx5e_del_vlan_rules(priv); mlx5e_destroy_flow_table(&priv->fs.vlan->ft); - kvfree(priv->fs.vlan); } static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) @@ -1351,3 +1341,17 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) mlx5e_arfs_destroy_tables(priv); mlx5e_ethtool_cleanup_steering(priv); } + +int mlx5e_fs_init(struct mlx5e_priv *priv) +{ + priv->fs.vlan = kvzalloc(sizeof(*priv->fs.vlan), GFP_KERNEL); + if (!priv->fs.vlan) + return -ENOMEM; + return 0; +} + +void mlx5e_fs_cleanup(struct mlx5e_priv *priv) +{ + kvfree(priv->fs.vlan); + priv->fs.vlan = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 09c8b71b186c..41ef6eb70a58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4578,6 +4578,12 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, mlx5e_timestamp_init(priv); + err = mlx5e_fs_init(priv); + if (err) { + mlx5_core_err(mdev, "FS initialization failed, %d\n", err); + return err; + } + err = mlx5e_ipsec_init(priv); if (err) mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err); @@ -4595,6 +4601,7 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) mlx5e_health_destroy_reporters(priv); mlx5e_tls_cleanup(priv); mlx5e_ipsec_cleanup(priv); + mlx5e_fs_cleanup(priv); } static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) -- cgit v1.2.3 From d10457f85d4ae4d32c0df0cd65358a78c577fbe6 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Mon, 18 Oct 2021 15:30:09 +0300 Subject: net/mlx5e: IPsec: Fix a misuse of the software parser's fields IPsec crypto offload current Software Parser (SWP) fields settings in the ethernet segment (eseg) are not aligned with PRM/HW expectations. Among others in case of IP|ESP|TCP packet, current driver sets the offsets for inner_l3 and inner_l4 although there is no inner l3/l4 headers relative to ESP header in such packets. SWP provides the offsets for HW ,so it can be used to find csum fields to offload the checksum, however these are not necessarily used by HW and are used as fallback in case HW fails to parse the packet, e.g when performing IPSec Transport Aware (IP | ESP | TCP) there is no need to add SW parse on inner packet. So in some cases packets csum was calculated correctly , whereas in other cases it failed. The later faced csum errors (caused by wrong packet length calculations) which led to lots of packet drops hence the low throughput. Fix by setting the SWP fields as expected in a IP|ESP|TCP packet. the following describe the expected SWP offsets: * Tunnel Mode: * SWP: OutL3 InL3 InL4 * Pkt: MAC IP ESP IP L4 * * Transport Mode: * SWP: OutL3 OutL4 * Pkt: MAC IP ESP L4 * * Tunnel(VXLAN TCP/UDP) over Transport Mode * SWP: OutL3 InL3 InL4 * Pkt: MAC IP ESP UDP VXLAN IP L4 Fixes: f1267798c980 ("net/mlx5: Fix checksum issue of VXLAN and IPsec crypto offload") Signed-off-by: Emeel Hakim Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 51 ++++++++++++---------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 33de8f0092a6..fb5397324aa4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -141,8 +141,7 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb, * Pkt: MAC IP ESP IP L4 * * Transport Mode: - * SWP: OutL3 InL4 - * InL3 + * SWP: OutL3 OutL4 * Pkt: MAC IP ESP L4 * * Tunnel(VXLAN TCP/UDP) over Transport Mode @@ -171,31 +170,35 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb, return; if (!xo->inner_ipproto) { - eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; - eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; - if (skb->protocol == htons(ETH_P_IPV6)) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; - if (xo->proto == IPPROTO_UDP) + switch (xo->proto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + /* IP | ESP | TCP */ + eseg->swp_outer_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + default: + break; + } + } else { + /* Tunnel(VXLAN TCP/UDP) over Transport Mode */ + switch (xo->inner_ipproto) { + case IPPROTO_UDP: eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; - return; - } - - /* Tunnel(VXLAN TCP/UDP) over Transport Mode */ - switch (xo->inner_ipproto) { - case IPPROTO_UDP: - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; - fallthrough; - case IPPROTO_TCP: - eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; - eseg->swp_inner_l4_offset = (skb->csum_start + skb->head - skb->data) / 2; - if (skb->protocol == htons(ETH_P_IPV6)) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; - break; - default: - break; + fallthrough; + case IPPROTO_TCP: + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + eseg->swp_inner_l4_offset = + (skb->csum_start + skb->head - skb->data) / 2; + if (skb->protocol == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + break; + default: + break; + } } - return; } void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, -- cgit v1.2.3 From 1d000323940137332d4d62c6332b6daf5f07aba7 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Mon, 18 Oct 2021 15:31:19 +0300 Subject: net/mlx5e: IPsec: Fix work queue entry ethernet segment checksum flags Current Work Queue Entry (WQE) checksum (csum) flags in the ethernet segment (eseg) in case of IPsec crypto offload datapath are not aligned with PRM/HW expectations. Currently the driver always sets the l3_inner_csum flag in case of IPsec because of the wrong usage of skb->encapsulation as indicator for inner IPsec header since skb->encapsulation is always ON for IPsec packets since IPsec itself is an encapsulation protocol. The above forced a failing attempts of calculating csum of non-existing segments (like in the IP|ESP|TCP packet case which does not have an l3_inner) which led to lots of packet drops hence the low throughput. Fix by using xo->inner_ipproto as indicator for inner IPsec header instead of skb->encapsulation in addition to setting the csum flags as following: * Tunnel Mode: * Pkt: MAC IP ESP IP L4 * CSUM: l3_cs | l3_inner_cs | l4_inner_cs * * Transport Mode: * Pkt: MAC IP ESP L4 * CSUM: l3_cs [ | l4_cs (checksum partial case)] * * Tunnel(VXLAN TCP/UDP) over Transport Mode * Pkt: MAC IP ESP UDP VXLAN IP L4 * CSUM: l3_cs | l3_inner_cs | l4_inner_cs Fixes: f1267798c980 ("net/mlx5: Fix checksum issue of VXLAN and IPsec crypto offload") Signed-off-by: Emeel Hakim Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index c63d78eda606..188994d091c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -213,19 +213,18 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz); } -/* If packet is not IP's CHECKSUM_PARTIAL (e.g. icmd packet), - * need to set L3 checksum flag for IPsec - */ static void ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) { + struct xfrm_offload *xo = xfrm_offload(skb); + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; - if (skb->encapsulation) { - eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; + if (xo->inner_ipproto) { + eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L3_INNER_CSUM; + } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; sq->stats->csum_partial_inner++; - } else { - sq->stats->csum_partial++; } } @@ -234,6 +233,11 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, struct mlx5_wqe_eth_seg *eseg) { + if (unlikely(mlx5e_ipsec_eseg_meta(eseg))) { + ipsec_txwqe_build_eseg_csum(sq, skb, eseg); + return; + } + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; if (skb->encapsulation) { @@ -249,8 +253,6 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; sq->stats->csum_partial++; #endif - } else if (unlikely(mlx5e_ipsec_eseg_meta(eseg))) { - ipsec_txwqe_build_eseg_csum(sq, skb, eseg); } else sq->stats->csum_none++; } -- cgit v1.2.3 From 5afa7898ab7a0ec9c28556a91df714bf3c2f725e Mon Sep 17 00:00:00 2001 From: Thelford Williams Date: Wed, 13 Oct 2021 16:04:13 -0400 Subject: drm/amdgpu: fix out of bounds write Size can be any value and is user controlled resulting in overwriting the 40 byte array wr_buf with an arbitrary length of data from buf. Signed-off-by: Thelford Williams Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 87daa78a32b8..17f2756a64dc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -263,7 +263,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, if (!wr_buf) return -ENOSPC; - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { -- cgit v1.2.3 From c21b105380cf86e829c68586ca1315cfc253ad8c Mon Sep 17 00:00:00 2001 From: Nikola Cornij Date: Tue, 28 Sep 2021 22:43:52 -0400 Subject: drm/amd/display: Limit display scaling to up to true 4k for DCN 3.1 [why] The requirement is that image width up to 4096 shall be supported Reviewed-by: Aric Cyr Acked-by: Agustin Gutierrez Sanchez Signed-off-by: Nikola Cornij Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 0006bbac466c..1e3d50da1f93 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -928,7 +928,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .vsr_support = true, .performance_trace = false, - .max_downscale_src_width = 3840,/*upto 4K*/ + .max_downscale_src_width = 4096,/*upto true 4K*/ .disable_pplib_wm_range = false, .scl_reset_length10 = true, .sanity_checks = false, -- cgit v1.2.3 From c938aed88f8259dc913b717a32319101c66e87a9 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 29 Sep 2021 11:37:33 -0400 Subject: drm/amd/display: Fix prefetch bandwidth calculation for DCN3.1 [Why] Prefetch BW calculated is lower than the DML reference because of a porting error that's excluding cursor and row bandwidth from the pixel data bandwidth. [How] Change the dml_max4 to dml_max3 and include cursor and row bandwidth in the same calculation as the rest of the pixel data during vactive. Reviewed-by: Dmytro Laktyushkin Acked-by: Agustin Gutierrez Sanchez Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index ce55c9caf9a2..d58925cff420 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -5398,9 +5398,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch - + dml_max4( - v->VActivePixelBandwidth[i][j][k], - v->VActiveCursorBandwidth[i][j][k] + + dml_max3( + v->VActivePixelBandwidth[i][j][k] + + v->VActiveCursorBandwidth[i][j][k] + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]), -- cgit v1.2.3 From 672437486ee9da3ed0e774937e6d0dd570921b39 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 29 Sep 2021 16:22:53 -0400 Subject: drm/amd/display: Require immediate flip support for DCN3.1 planes [Why] Immediate flip can be enabled dynamically and has higher BW requirements when validating which voltage mode to use. If we validate when it's not set then potentially DCFCLK will be too low and we will underflow. [How] DM always requires support so always require it as part of DML input parameters. This can't be enabled unconditionally on older ASIC because it blocks some expected modes so only target DCN3.1 for now. Reviewed-by: Dmytro Laktyushkin Acked-by: Agustin Gutierrez Sanchez Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 1e3d50da1f93..df6abe2b1f9e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1590,6 +1590,13 @@ static int dcn31_populate_dml_pipes_from_context( pipe = &res_ctx->pipe_ctx[i]; timing = &pipe->stream->timing; + /* + * Immediate flip can be set dynamically after enabling the plane. + * We need to require support for immediate flip or underflow can be + * intermittently experienced depending on peak b/w requirements. + */ + pipes[pipe_cnt].pipe.src.immediate_flip = true; + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.src.gpuvm = true; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; -- cgit v1.2.3 From 4835ea6c173a8d8dfbfdbb21c4cd987d12681610 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Thu, 30 Sep 2021 13:46:45 -0400 Subject: drm/amd/display: increase Z9 latency to workaround underflow in Z9 [Why] Z9 latency is higher than when we originally tuned the watermark parameters, causing underflow. Increasing the value until the latency issues is resolved. Reviewed-by: Nicholas Kazlauskas Acked-by: Agustin Gutierrez Sanchez Signed-off-by: Eric Yang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index df6abe2b1f9e..79e92ecca96c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -217,8 +217,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { .num_states = 5, .sr_exit_time_us = 9.0, .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 402.0, - .sr_enter_plus_exit_z8_time_us = 520.0, + .sr_exit_z8_time_us = 442.0, + .sr_enter_plus_exit_z8_time_us = 560.0, .writeback_latency_us = 12.0, .dram_channel_width_bytes = 4, .round_trip_ping_latency_dcfclk_cycles = 106, -- cgit v1.2.3 From dd8cb18906d97b2916fde42d32d915ae363c7e55 Mon Sep 17 00:00:00 2001 From: Nikola Cornij Date: Fri, 1 Oct 2021 13:26:05 -0400 Subject: drm/amd/display: Increase watermark latencies for DCN3.1 [why] The original latencies were causing underflow in some modes [how] Replace with the up-to-date watermark values based on new measurments Reviewed-by: Ahmad Othman Acked-by: Agustin Gutierrez Sanchez Signed-off-by: Nikola Cornij Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 4a4894e9d9c9..035ba0ef6369 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -366,32 +366,32 @@ static struct wm_table lpddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 5.32, - .sr_enter_plus_exit_time_us = 6.38, + .sr_exit_time_us = 11.5, + .sr_enter_plus_exit_time_us = 14.5, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.82, - .sr_enter_plus_exit_time_us = 11.196, + .sr_exit_time_us = 11.5, + .sr_enter_plus_exit_time_us = 14.5, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.89, - .sr_enter_plus_exit_time_us = 11.24, + .sr_exit_time_us = 11.5, + .sr_enter_plus_exit_time_us = 14.5, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.748, - .sr_enter_plus_exit_time_us = 11.102, + .sr_exit_time_us = 11.5, + .sr_enter_plus_exit_time_us = 14.5, .valid = true, }, } -- cgit v1.2.3 From 2ef8ea23942f4c2569930c34e7689a0cb1b232cc Mon Sep 17 00:00:00 2001 From: Jake Wang Date: Fri, 1 Oct 2021 17:14:21 -0400 Subject: drm/amd/display: Moved dccg init to after bios golden init [Why] bios_golden_init will override dccg_init during init_hw. [How] Move dccg_init to after bios_golden_init. Reviewed-by: Aric Cyr Reviewed-by: Eric Yang Acked-by: Agustin Gutierrez Sanchez Signed-off-by: Jake Wang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 3f2333ec67e2..3afa1159a5f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -76,10 +76,6 @@ void dcn31_init_hw(struct dc *dc) if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); - // Initialize the dccg - if (res_pool->dccg->funcs->dccg_init) - res_pool->dccg->funcs->dccg_init(res_pool->dccg); - if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { REG_WRITE(REFCLK_CNTL, 0); @@ -106,6 +102,9 @@ void dcn31_init_hw(struct dc *dc) hws->funcs.bios_golden_init(dc); hws->funcs.disable_vga(dc->hwseq); } + // Initialize the dccg + if (res_pool->dccg->funcs->dccg_init) + res_pool->dccg->funcs->dccg_init(res_pool->dccg); if (dc->debug.enable_mem_low_power.bits.dmcu) { // Force ERAM to shutdown if DMCU is not enabled -- cgit v1.2.3 From 53c2ff8bcb06acd07e24a62e7f5a0247bd7c6f67 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Tue, 19 Oct 2021 11:13:25 +0800 Subject: drm/amdgpu: support B0&B1 external revision id for yellow carp B0 internal rev_id is 0x01, B1 internal rev_id is 0x02. The external rev_id for B0 and B1 is 0x20. The original expression is not suitable for B1. v2: squash in fix for display code (Alex) Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/nv.c | 2 +- drivers/gpu/drm/amd/display/include/dal_asic_id.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index ff80786e3918..01efda4398e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1257,7 +1257,7 @@ static int nv_common_early_init(void *handle) AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; if (adev->pdev->device == 0x1681) - adev->external_rev_id = adev->rev_id + 0x19; + adev->external_rev_id = 0x20; else adev->external_rev_id = adev->rev_id + 0x01; break; diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 5adc471bef57..3d2f0817e40a 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -227,7 +227,7 @@ enum { #define FAMILY_YELLOW_CARP 146 #define YELLOW_CARP_A0 0x01 -#define YELLOW_CARP_B0 0x1A +#define YELLOW_CARP_B0 0x20 #define YELLOW_CARP_UNKNOWN 0xFF #ifndef ASICREV_IS_YELLOW_CARP -- cgit v1.2.3 From 25f54d08f12feb593e62cc2193fedefaf7825301 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 23 Sep 2021 15:13:39 +0800 Subject: autofs: fix wait name hash calculation in autofs_wait() There's a mistake in commit 2be7828c9fefc ("get rid of autofs_getpath()") that affects kernels from v5.13.0, basically missed because of me not fully testing the change for Al. The problem is that the hash calculation for the wait name qstr hasn't been updated to account for the change to use dentry_path_raw(). This prevents the correct matching an existing wait resulting in multiple notifications being sent to the daemon for the same mount which must not occur. The problem wasn't discovered earlier because it only occurs when multiple processes trigger a request for the same mount concurrently so it only shows up in more aggressive testing. Fixes: 2be7828c9fefc ("get rid of autofs_getpath()") Cc: stable@vger.kernel.org Signed-off-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs/waitq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c index 16b5fca0626e..54c1f8b8b075 100644 --- a/fs/autofs/waitq.c +++ b/fs/autofs/waitq.c @@ -358,7 +358,7 @@ int autofs_wait(struct autofs_sb_info *sbi, qstr.len = strlen(p); offset = p - name; } - qstr.hash = full_name_hash(dentry, name, qstr.len); + qstr.hash = full_name_hash(dentry, qstr.name, qstr.len); if (mutex_lock_interruptible(&sbi->wq_mutex)) { kfree(name); -- cgit v1.2.3 From e20f80b9b163dc402dca115eed0affba6df5ebb5 Mon Sep 17 00:00:00 2001 From: Brian King Date: Tue, 19 Oct 2021 10:21:29 -0500 Subject: scsi: ibmvfc: Fix up duplicate response detection Commit a264cf5e81c7 ("scsi: ibmvfc: Fix command state accounting and stale response detection") introduced a regression in detecting duplicate responses. This was observed in test where a command was sent to the VIOS and completed before ibmvfc_send_event() set the active flag to 1, which resulted in the atomic_dec_if_positive() call in ibmvfc_handle_crq() thinking this was a duplicate response, which resulted in scsi_done() not getting called, so we then hit a SCSI command timeout for this command once the timeout expires. This simply ensures the active flag gets set prior to making the hcall to send the command to the VIOS, in order to close this window. Link: https://lore.kernel.org/r/20211019152129.16558-1-brking@linux.vnet.ibm.com Fixes: a264cf5e81c7 ("scsi: ibmvfc: Fix command state accounting and stale response detection") Cc: stable@vger.kernel.org Acked-by: Tyrel Datwyler Signed-off-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 1f1586ad48fe..01f79991bf4a 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1696,6 +1696,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt, spin_lock_irqsave(&evt->queue->l_lock, flags); list_add_tail(&evt->queue_list, &evt->queue->sent); + atomic_set(&evt->active, 1); mb(); @@ -1710,6 +1711,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt, be64_to_cpu(crq_as_u64[1])); if (rc) { + atomic_set(&evt->active, 0); list_del(&evt->queue_list); spin_unlock_irqrestore(&evt->queue->l_lock, flags); del_timer(&evt->timer); @@ -1737,7 +1739,6 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt, evt->done(evt); } else { - atomic_set(&evt->active, 1); spin_unlock_irqrestore(&evt->queue->l_lock, flags); ibmvfc_trc_start(evt); } -- cgit v1.2.3 From 282da7cef078a87b6d5e8ceba8b17e428cf0e37c Mon Sep 17 00:00:00 2001 From: Chanho Park Date: Mon, 18 Oct 2021 15:28:41 +0900 Subject: scsi: ufs: ufs-exynos: Correct timeout value setting registers PA_PWRMODEUSERDATA0 -> DL_FC0PROTTIMEOUTVAL PA_PWRMODEUSERDATA1 -> DL_TC0REPLAYTIMEOUTVAL PA_PWRMODEUSERDATA2 -> DL_AFC0REQTIMEOUTVAL Link: https://lore.kernel.org/r/20211018062841.18226-1-chanho61.park@samsung.com Fixes: a967ddb22d94 ("scsi: ufs: ufs-exynos: Apply vendor-specific values for three timeouts") Cc: Alim Akhtar Cc: Kiwoong Kim Cc: Krzysztof Kozlowski Reviewed-by: Alim Akhtar Reviewed-by: Avri Altman Signed-off-by: Chanho Park Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-exynos.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ufs-exynos.c b/drivers/scsi/ufs/ufs-exynos.c index a14dd8ce56d4..bb2dd79a1bcd 100644 --- a/drivers/scsi/ufs/ufs-exynos.c +++ b/drivers/scsi/ufs/ufs-exynos.c @@ -642,9 +642,9 @@ static int exynos_ufs_pre_pwr_mode(struct ufs_hba *hba, } /* setting for three timeout values for traffic class #0 */ - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA0), 8064); - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA1), 28224); - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA2), 20160); + ufshcd_dme_set(hba, UIC_ARG_MIB(DL_FC0PROTTIMEOUTVAL), 8064); + ufshcd_dme_set(hba, UIC_ARG_MIB(DL_TC0REPLAYTIMEOUTVAL), 28224); + ufshcd_dme_set(hba, UIC_ARG_MIB(DL_AFC0REQTIMEOUTVAL), 20160); return 0; out: -- cgit v1.2.3 From de7cd3f6761f49bef044ec49493d88737a70f1a6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 20 Oct 2021 06:27:36 -0400 Subject: KVM: x86: check for interrupts before deciding whether to exit the fast path The kvm_x86_sync_pir_to_irr callback can sometimes set KVM_REQ_EVENT. If that happens exactly at the time that an exit is handled as EXIT_FASTPATH_REENTER_GUEST, vcpu_enter_guest will go incorrectly through the loop that calls kvm_x86_run, instead of processing the request promptly. Fixes: 379a3c8ee444 ("KVM: VMX: Optimize posted-interrupt delivery for timer fastpath") Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c8b5129effd..381384a54790 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9643,14 +9643,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST)) break; - if (unlikely(kvm_vcpu_exit_request(vcpu))) { + if (vcpu->arch.apicv_active) + static_call(kvm_x86_sync_pir_to_irr)(vcpu); + + if (unlikely(kvm_vcpu_exit_request(vcpu))) { exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED; break; } - - if (vcpu->arch.apicv_active) - static_call(kvm_x86_sync_pir_to_irr)(vcpu); - } + } /* * Do this here before restoring debug registers on the host. And -- cgit v1.2.3 From 3a25dfa67fe40f3a2690af2c562e0947a78bd6a0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 20 Oct 2021 06:22:59 -0400 Subject: KVM: nVMX: promptly process interrupts delivered while in guest mode Since commit c300ab9f08df ("KVM: x86: Replace late check_nested_events() hack with more precise fix") there is no longer the certainty that check_nested_events() tries to inject an external interrupt vmexit to L1 on every call to vcpu_enter_guest. Therefore, even in that case we need to set KVM_REQ_EVENT. This ensures that inject_pending_event() is called, and from there kvm_check_nested_events(). Fixes: c300ab9f08df ("KVM: x86: Replace late check_nested_events() hack with more precise fix") Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 7fb2a3a1ca46..7d595effb66f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6305,18 +6305,13 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) /* * If we are running L2 and L1 has a new pending interrupt - * which can be injected, we should re-evaluate - * what should be done with this new L1 interrupt. - * If L1 intercepts external-interrupts, we should - * exit from L2 to L1. Otherwise, interrupt should be - * delivered directly to L2. + * which can be injected, this may cause a vmexit or it may + * be injected into L2. Either way, this interrupt will be + * processed via KVM_REQ_EVENT, not RVI, because we do not use + * virtual interrupt delivery to inject L1 interrupts into L2. */ - if (is_guest_mode(vcpu) && max_irr_updated) { - if (nested_exit_on_intr(vcpu)) - kvm_vcpu_exiting_guest_mode(vcpu); - else - kvm_make_request(KVM_REQ_EVENT, vcpu); - } + if (is_guest_mode(vcpu) && max_irr_updated) + kvm_make_request(KVM_REQ_EVENT, vcpu); } else { max_irr = kvm_lapic_find_highest_irr(vcpu); } -- cgit v1.2.3 From d534d31d6a45d71de61db22090b4820afb68fddc Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 21 Oct 2021 10:01:38 +0200 Subject: fuse: check s_root when destroying sb Checking "fm" works because currently sb->s_fs_info is cleared on error paths; however, sb->s_root is what generic_shutdown_super() checks to determine whether the sb was fully initialized or not. This change will allow cleanup of sb setup error paths. Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 2 +- fs/fuse/virtio_fs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 36cd03114b6d..60ceaf332840 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1747,7 +1747,7 @@ static void fuse_sb_destroy(struct super_block *sb) struct fuse_mount *fm = get_fuse_mount_super(sb); bool last; - if (fm) { + if (sb->s_root) { last = fuse_mount_remove(fm); if (last) fuse_conn_destroy(fm); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 0ad89c6629d7..32fd138c621e 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1394,7 +1394,7 @@ static void virtio_kill_sb(struct super_block *sb) bool last; /* If mount failed, we can still be called without any fc */ - if (fm) { + if (sb->s_root) { last = fuse_mount_remove(fm); if (last) virtio_fs_conn_destroy(fm); -- cgit v1.2.3 From a27c061a49afd7ad2d935e6ac734e2a9f62861b8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 21 Oct 2021 10:01:38 +0200 Subject: fuse: get rid of fuse_put_super() The ->put_super callback is called from generic_shutdown_super() in case of a fully initialized sb. This is called from kill_***_super(), which is called from ->kill_sb instances. Fuse uses ->put_super to destroy the fs specific fuse_mount and drop the reference to the fuse_conn, while it does the same on each error case during sb setup. This patch moves the destruction from fuse_put_super() to fuse_mount_destroy(), called at the end of all ->kill_sb instances. A follup patch will clean up the error paths. Signed-off-by: Miklos Szeredi --- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 20 +++++++++++--------- fs/fuse/virtio_fs.c | 1 + 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 319596df5dc6..f55f9f94b1a4 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1121,6 +1121,9 @@ int fuse_init_fs_context_submount(struct fs_context *fsc); */ void fuse_conn_destroy(struct fuse_mount *fm); +/* Drop the connection and free the fuse mount */ +void fuse_mount_destroy(struct fuse_mount *fm); + /** * Add connection to control filesystem */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 60ceaf332840..30ca1f71d777 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -457,14 +457,6 @@ static void fuse_send_destroy(struct fuse_mount *fm) } } -static void fuse_put_super(struct super_block *sb) -{ - struct fuse_mount *fm = get_fuse_mount_super(sb); - - fuse_conn_put(fm->fc); - kfree(fm); -} - static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) { stbuf->f_type = FUSE_SUPER_MAGIC; @@ -1003,7 +995,6 @@ static const struct super_operations fuse_super_operations = { .evict_inode = fuse_evict_inode, .write_inode = fuse_write_inode, .drop_inode = generic_delete_inode, - .put_super = fuse_put_super, .umount_begin = fuse_umount_begin, .statfs = fuse_statfs, .sync_fs = fuse_sync_fs, @@ -1754,10 +1745,20 @@ static void fuse_sb_destroy(struct super_block *sb) } } +void fuse_mount_destroy(struct fuse_mount *fm) +{ + if (fm) { + fuse_conn_put(fm->fc); + kfree(fm); + } +} +EXPORT_SYMBOL(fuse_mount_destroy); + static void fuse_kill_sb_anon(struct super_block *sb) { fuse_sb_destroy(sb); kill_anon_super(sb); + fuse_mount_destroy(get_fuse_mount_super(sb)); } static struct file_system_type fuse_fs_type = { @@ -1775,6 +1776,7 @@ static void fuse_kill_sb_blk(struct super_block *sb) { fuse_sb_destroy(sb); kill_block_super(sb); + fuse_mount_destroy(get_fuse_mount_super(sb)); } static struct file_system_type fuseblk_fs_type = { diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 32fd138c621e..fa80d250c802 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1400,6 +1400,7 @@ static void virtio_kill_sb(struct super_block *sb) virtio_fs_conn_destroy(fm); } kill_anon_super(sb); + fuse_mount_destroy(fm); } static int virtio_fs_test_super(struct super_block *sb, -- cgit v1.2.3 From c191cd07ee948c93081d8e4cba43d23b18b2f3da Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 21 Oct 2021 10:01:39 +0200 Subject: fuse: clean up fuse_mount destruction 1. call fuse_mount_destroy() for open coded variants 2. before deactivate_locked_super() don't need fuse_mount destruction since that will now be done (if ->s_fs_info is not cleared) 3. rearrange fuse_mount setup in fuse_get_tree_submount() so that the regular pattern can be used Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 17 +++++------------ fs/fuse/virtio_fs.c | 9 ++------- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 30ca1f71d777..308daee54c12 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1415,20 +1415,17 @@ static int fuse_get_tree_submount(struct fs_context *fsc) if (!fm) return -ENOMEM; + fm->fc = fuse_conn_get(fc); fsc->s_fs_info = fm; sb = sget_fc(fsc, NULL, set_anon_super_fc); - if (IS_ERR(sb)) { - kfree(fm); + if (fsc->s_fs_info) + fuse_mount_destroy(fm); + if (IS_ERR(sb)) return PTR_ERR(sb); - } - fm->fc = fuse_conn_get(fc); /* Initialize superblock, making @mp_fi its root */ err = fuse_fill_super_submount(sb, mp_fi); if (err) { - fuse_conn_put(fc); - kfree(fm); - sb->s_fs_info = NULL; deactivate_locked_super(sb); return err; } @@ -1595,16 +1592,12 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) err = fuse_fill_super_common(sb, ctx); if (err) - goto err_put_conn; + goto err; /* file->private_data shall be visible on all CPUs after this */ smp_mb(); fuse_send_init(get_fuse_mount_super(sb)); return 0; - err_put_conn: - fuse_conn_put(fc); - kfree(fm); - sb->s_fs_info = NULL; err: return err; } diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index fa80d250c802..94fc874f5de7 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1456,19 +1456,14 @@ static int virtio_fs_get_tree(struct fs_context *fsc) fsc->s_fs_info = fm; sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc); - if (fsc->s_fs_info) { - fuse_conn_put(fc); - kfree(fm); - } + if (fsc->s_fs_info) + fuse_mount_destroy(fm); if (IS_ERR(sb)) return PTR_ERR(sb); if (!sb->s_root) { err = virtio_fs_fill_super(sb, fsc); if (err) { - fuse_conn_put(fc); - kfree(fm); - sb->s_fs_info = NULL; deactivate_locked_super(sb); return err; } -- cgit v1.2.3 From 80019f1138324b6f35ae728b4f25eeb08899b452 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 21 Oct 2021 10:01:39 +0200 Subject: fuse: always initialize sb->s_fs_info Syzkaller reports a null pointer dereference in fuse_test_super() that is caused by sb->s_fs_info being NULL. This is due to the fact that fuse_fill_super() is initializing s_fs_info, which is too late, it's already on the fs_supers list. The initialization needs to be done in sget_fc() with the sb_lock held. Move allocation of fuse_mount and fuse_conn from fuse_fill_super() into fuse_get_tree(). After this ->kill_sb() will always be called with non-NULL ->s_fs_info, hence fuse_mount_destroy() can drop the test for non-NULL "fm". Reported-by: syzbot+74a15f02ccb51f398601@syzkaller.appspotmail.com Fixes: 5d5b74aa9c76 ("fuse: allow sharing existing sb") Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 308daee54c12..b468f55634f2 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1557,8 +1557,6 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) { struct fuse_fs_context *ctx = fsc->fs_private; int err; - struct fuse_conn *fc; - struct fuse_mount *fm; if (!ctx->file || !ctx->rootmode_present || !ctx->user_id_present || !ctx->group_id_present) @@ -1574,22 +1572,6 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) goto err; ctx->fudptr = &ctx->file->private_data; - fc = kmalloc(sizeof(*fc), GFP_KERNEL); - err = -ENOMEM; - if (!fc) - goto err; - - fm = kzalloc(sizeof(*fm), GFP_KERNEL); - if (!fm) { - kfree(fc); - goto err; - } - - fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL); - fc->release = fuse_free_conn; - - sb->s_fs_info = fm; - err = fuse_fill_super_common(sb, ctx); if (err) goto err; @@ -1621,22 +1603,40 @@ static int fuse_get_tree(struct fs_context *fsc) { struct fuse_fs_context *ctx = fsc->fs_private; struct fuse_dev *fud; + struct fuse_conn *fc; + struct fuse_mount *fm; struct super_block *sb; int err; + fc = kmalloc(sizeof(*fc), GFP_KERNEL); + if (!fc) + return -ENOMEM; + + fm = kzalloc(sizeof(*fm), GFP_KERNEL); + if (!fm) { + kfree(fc); + return -ENOMEM; + } + + fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL); + fc->release = fuse_free_conn; + + fsc->s_fs_info = fm; + if (ctx->fd_present) ctx->file = fget(ctx->fd); if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) { err = get_tree_bdev(fsc, fuse_fill_super); - goto out_fput; + goto out; } /* * While block dev mount can be initialized with a dummy device fd * (found by device name), normal fuse mounts can't */ + err = -EINVAL; if (!ctx->file) - return -EINVAL; + goto out; /* * Allow creating a fuse mount with an already initialized fuse @@ -1652,7 +1652,9 @@ static int fuse_get_tree(struct fs_context *fsc) } else { err = get_tree_nodev(fsc, fuse_fill_super); } -out_fput: +out: + if (fsc->s_fs_info) + fuse_mount_destroy(fm); if (ctx->file) fput(ctx->file); return err; @@ -1740,10 +1742,8 @@ static void fuse_sb_destroy(struct super_block *sb) void fuse_mount_destroy(struct fuse_mount *fm) { - if (fm) { - fuse_conn_put(fm->fc); - kfree(fm); - } + fuse_conn_put(fm->fc); + kfree(fm); } EXPORT_SYMBOL(fuse_mount_destroy); -- cgit v1.2.3 From 964d32e512670c7b87870e30cfed2303da86d614 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 21 Oct 2021 10:01:39 +0200 Subject: fuse: clean up error exits in fuse_fill_super() Instead of "goto err", return error directly, since there's no error cleanup to do now. Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index b468f55634f2..12d49a1914e8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1566,22 +1566,18 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) * Require mount to happen from the same user namespace which * opened /dev/fuse to prevent potential attacks. */ - err = -EINVAL; if ((ctx->file->f_op != &fuse_dev_operations) || (ctx->file->f_cred->user_ns != sb->s_user_ns)) - goto err; + return -EINVAL; ctx->fudptr = &ctx->file->private_data; err = fuse_fill_super_common(sb, ctx); if (err) - goto err; + return err; /* file->private_data shall be visible on all CPUs after this */ smp_mb(); fuse_send_init(get_fuse_mount_super(sb)); return 0; - - err: - return err; } /* -- cgit v1.2.3 From 3cfc183052c3dbf8eae57b6c1685dab00ed3db4a Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 16 Oct 2021 23:04:46 +0200 Subject: drm: mxsfb: Fix NULL pointer dereference crash on unload The mxsfb->crtc.funcs may already be NULL when unloading the driver, in which case calling mxsfb_irq_disable() via drm_irq_uninstall() from mxsfb_unload() leads to NULL pointer dereference. Since all we care about is masking the IRQ and mxsfb->base is still valid, just use that to clear and mask the IRQ. Fixes: ae1ed00932819 ("drm: mxsfb: Stop using DRM simple display pipeline helper") Signed-off-by: Marek Vasut Cc: Daniel Abrecht Cc: Emil Velikov Cc: Laurent Pinchart Cc: Sam Ravnborg Cc: Stefan Agner Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20211016210446.171616-1-marex@denx.de Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/mxsfb/mxsfb_drv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index ec0432fe1bdf..86d78634a979 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -173,7 +173,11 @@ static void mxsfb_irq_disable(struct drm_device *drm) struct mxsfb_drm_private *mxsfb = drm->dev_private; mxsfb_enable_axi_clk(mxsfb); - mxsfb->crtc.funcs->disable_vblank(&mxsfb->crtc); + + /* Disable and clear VBLANK IRQ */ + writel(CTRL1_CUR_FRAME_DONE_IRQ_EN, mxsfb->base + LCDC_CTRL1 + REG_CLR); + writel(CTRL1_CUR_FRAME_DONE_IRQ, mxsfb->base + LCDC_CTRL1 + REG_CLR); + mxsfb_disable_axi_clk(mxsfb); } -- cgit v1.2.3 From 772970620a839141835eaf2bc507d957b10adcca Mon Sep 17 00:00:00 2001 From: Dan Johansen Date: Wed, 18 Aug 2021 23:48:18 +0200 Subject: drm/panel: ilitek-ili9881c: Fix sync for Feixin K101-IM2BYL02 panel This adjusts sync values according to the datasheet Fixes: 1c243751c095 ("drm/panel: ilitek-ili9881c: add support for Feixin K101-IM2BYL02 panel") Co-developed-by: Marius Gripsgard Signed-off-by: Dan Johansen Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20210818214818.298089-1-strit@manjaro.org Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/panel/panel-ilitek-ili9881c.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c index 0145129d7c66..534dd7414d42 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c @@ -590,14 +590,14 @@ static const struct drm_display_mode k101_im2byl02_default_mode = { .clock = 69700, .hdisplay = 800, - .hsync_start = 800 + 6, - .hsync_end = 800 + 6 + 15, - .htotal = 800 + 6 + 15 + 16, + .hsync_start = 800 + 52, + .hsync_end = 800 + 52 + 8, + .htotal = 800 + 52 + 8 + 48, .vdisplay = 1280, - .vsync_start = 1280 + 8, - .vsync_end = 1280 + 8 + 48, - .vtotal = 1280 + 8 + 48 + 52, + .vsync_start = 1280 + 16, + .vsync_end = 1280 + 16 + 6, + .vtotal = 1280 + 16 + 6 + 15, .width_mm = 135, .height_mm = 217, -- cgit v1.2.3 From 3e4c31e8f70251732529a10934355084c7fab0ac Mon Sep 17 00:00:00 2001 From: Anitha Chrisanthus Date: Tue, 15 Dec 2020 11:13:09 -0800 Subject: drm/kmb: Work around for higher system clock Use a different value for system clock offset in the ppl/llp ratio calculations for clocks higher than 500 Mhz. Fixes: 98521f4d4b4c ("drm/kmb: Mipi DSI part of the display driver") Signed-off-by: Anitha Chrisanthus Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20211013233632.471892-1-anitha.chrisanthus@intel.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/kmb/kmb_dsi.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/kmb/kmb_dsi.c b/drivers/gpu/drm/kmb/kmb_dsi.c index 1793cd31b117..86e8e7943e89 100644 --- a/drivers/gpu/drm/kmb/kmb_dsi.c +++ b/drivers/gpu/drm/kmb/kmb_dsi.c @@ -482,6 +482,10 @@ static u32 mipi_tx_fg_section_cfg(struct kmb_dsi *kmb_dsi, return 0; } +#define CLK_DIFF_LOW 50 +#define CLK_DIFF_HI 60 +#define SYSCLK_500 500 + static void mipi_tx_fg_cfg_regs(struct kmb_dsi *kmb_dsi, u8 frame_gen, struct mipi_tx_frame_timing_cfg *fg_cfg) { @@ -492,7 +496,12 @@ static void mipi_tx_fg_cfg_regs(struct kmb_dsi *kmb_dsi, u8 frame_gen, /* 500 Mhz system clock minus 50 to account for the difference in * MIPI clock speed in RTL tests */ - sysclk = kmb_dsi->sys_clk_mhz - 50; + if (kmb_dsi->sys_clk_mhz == SYSCLK_500) { + sysclk = kmb_dsi->sys_clk_mhz - CLK_DIFF_LOW; + } else { + /* 700 Mhz clk*/ + sysclk = kmb_dsi->sys_clk_mhz - CLK_DIFF_HI; + } /* PPL-Pixel Packing Layer, LLP-Low Level Protocol * Frame genartor timing parameters are clocked on the system clock, -- cgit v1.2.3 From a79f40cccd4644c32f6d5ae1ccf091a262e1dc57 Mon Sep 17 00:00:00 2001 From: Anitha Chrisanthus Date: Fri, 8 Jan 2021 14:34:13 -0800 Subject: drm/kmb: Limit supported mode to 1080p KMB only supports single resolution(1080p), this commit checks for 1920x1080x60 or 1920x1080x59 in crtc_mode_valid. Also, modes with vfp < 4 are not supported in KMB display. This change prunes display modes with vfp < 4. v2: added vfp check Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display") Co-developed-by: Edmund Dea Signed-off-by: Edmund Dea Signed-off-by: Anitha Chrisanthus Acked-by: Sam Ravnborg Link:https://patchwork.freedesktop.org/patch/msgid/20211013233632.471892-2-anitha.chrisanthus@intel.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/kmb/kmb_crtc.c | 34 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/kmb/kmb_drv.h | 9 ++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/kmb/kmb_crtc.c b/drivers/gpu/drm/kmb/kmb_crtc.c index 44327bc629ca..08a45e813db7 100644 --- a/drivers/gpu/drm/kmb/kmb_crtc.c +++ b/drivers/gpu/drm/kmb/kmb_crtc.c @@ -185,11 +185,45 @@ static void kmb_crtc_atomic_flush(struct drm_crtc *crtc, spin_unlock_irq(&crtc->dev->event_lock); } +static enum drm_mode_status + kmb_crtc_mode_valid(struct drm_crtc *crtc, + const struct drm_display_mode *mode) +{ + int refresh; + struct drm_device *dev = crtc->dev; + int vfp = mode->vsync_start - mode->vdisplay; + + if (mode->vdisplay < KMB_CRTC_MAX_HEIGHT) { + drm_dbg(dev, "height = %d less than %d", + mode->vdisplay, KMB_CRTC_MAX_HEIGHT); + return MODE_BAD_VVALUE; + } + if (mode->hdisplay < KMB_CRTC_MAX_WIDTH) { + drm_dbg(dev, "width = %d less than %d", + mode->hdisplay, KMB_CRTC_MAX_WIDTH); + return MODE_BAD_HVALUE; + } + refresh = drm_mode_vrefresh(mode); + if (refresh < KMB_MIN_VREFRESH || refresh > KMB_MAX_VREFRESH) { + drm_dbg(dev, "refresh = %d less than %d or greater than %d", + refresh, KMB_MIN_VREFRESH, KMB_MAX_VREFRESH); + return MODE_BAD; + } + + if (vfp < KMB_CRTC_MIN_VFP) { + drm_dbg(dev, "vfp = %d less than %d", vfp, KMB_CRTC_MIN_VFP); + return MODE_BAD; + } + + return MODE_OK; +} + static const struct drm_crtc_helper_funcs kmb_crtc_helper_funcs = { .atomic_begin = kmb_crtc_atomic_begin, .atomic_enable = kmb_crtc_atomic_enable, .atomic_disable = kmb_crtc_atomic_disable, .atomic_flush = kmb_crtc_atomic_flush, + .mode_valid = kmb_crtc_mode_valid, }; int kmb_setup_crtc(struct drm_device *drm) diff --git a/drivers/gpu/drm/kmb/kmb_drv.h b/drivers/gpu/drm/kmb/kmb_drv.h index 69a62e2d03ff..4f7a98667626 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.h +++ b/drivers/gpu/drm/kmb/kmb_drv.h @@ -20,11 +20,18 @@ #define DRIVER_MAJOR 1 #define DRIVER_MINOR 1 +/* Platform definitions */ +#define KMB_CRTC_MIN_VFP 4 +#define KMB_CRTC_MAX_WIDTH 1920 /* max width in pixels */ +#define KMB_CRTC_MAX_HEIGHT 1080 /* max height in pixels */ +#define KMB_CRTC_MIN_WIDTH 1920 +#define KMB_CRTC_MIN_HEIGHT 1080 #define KMB_FB_MAX_WIDTH 1920 #define KMB_FB_MAX_HEIGHT 1080 #define KMB_FB_MIN_WIDTH 1 #define KMB_FB_MIN_HEIGHT 1 - +#define KMB_MIN_VREFRESH 59 /*vertical refresh in Hz */ +#define KMB_MAX_VREFRESH 60 /*vertical refresh in Hz */ #define KMB_LCD_DEFAULT_CLK 200000000 #define KMB_SYS_CLK_MHZ 500 -- cgit v1.2.3 From 13047a092c6d3f23b7d684b5b3fe46b2b50423b9 Mon Sep 17 00:00:00 2001 From: Edmund Dea Date: Tue, 20 Apr 2021 15:31:53 -0700 Subject: drm/kmb: Remove clearing DPHY regs Don't clear the shared DPHY registers common to MIPI Rx and MIPI Tx during DSI initialization since this was causing MIPI Rx reset. Rest of the writes are bitwise, so will not affect Mipi Rx side. Fixes: 98521f4d4b4c ("drm/kmb: Mipi DSI part of the display driver") Signed-off-by: Edmund Dea Signed-off-by: Anitha Chrisanthus Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20211013233632.471892-3-anitha.chrisanthus@intel.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/kmb/kmb_dsi.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/kmb/kmb_dsi.c b/drivers/gpu/drm/kmb/kmb_dsi.c index 86e8e7943e89..a0669b842ff5 100644 --- a/drivers/gpu/drm/kmb/kmb_dsi.c +++ b/drivers/gpu/drm/kmb/kmb_dsi.c @@ -1393,11 +1393,6 @@ int kmb_dsi_mode_set(struct kmb_dsi *kmb_dsi, struct drm_display_mode *mode, mipi_tx_init_cfg.lane_rate_mbps = data_rate; } - kmb_write_mipi(kmb_dsi, DPHY_ENABLE, 0); - kmb_write_mipi(kmb_dsi, DPHY_INIT_CTRL0, 0); - kmb_write_mipi(kmb_dsi, DPHY_INIT_CTRL1, 0); - kmb_write_mipi(kmb_dsi, DPHY_INIT_CTRL2, 0); - /* Initialize mipi controller */ mipi_tx_init_cntrl(kmb_dsi, &mipi_tx_init_cfg); -- cgit v1.2.3 From 982f8ad666a1123028a077b6b009871a0dc9df26 Mon Sep 17 00:00:00 2001 From: Edmund Dea Date: Wed, 6 Oct 2021 16:03:48 -0700 Subject: drm/kmb: Disable change of plane parameters Due to HW limitations, KMB cannot change height, width, or pixel format after initial plane configuration. v2: removed memset disp_cfg as it is already zero. Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display") Signed-off-by: Edmund Dea Signed-off-by: Anitha Chrisanthus Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20211013233632.471892-4-anitha.chrisanthus@intel.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/kmb/kmb_drv.h | 1 + drivers/gpu/drm/kmb/kmb_plane.c | 43 ++++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/kmb/kmb_plane.h | 6 ++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/kmb/kmb_drv.h b/drivers/gpu/drm/kmb/kmb_drv.h index 4f7a98667626..bf085e95b28f 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.h +++ b/drivers/gpu/drm/kmb/kmb_drv.h @@ -57,6 +57,7 @@ struct kmb_drm_private { spinlock_t irq_lock; int irq_lcd; int sys_clk_mhz; + struct disp_cfg init_disp_cfg[KMB_MAX_PLANES]; struct layer_status plane_status[KMB_MAX_PLANES]; int kmb_under_flow; int kmb_flush_done; diff --git a/drivers/gpu/drm/kmb/kmb_plane.c b/drivers/gpu/drm/kmb/kmb_plane.c index 06b0c42c9e91..00404ba4126d 100644 --- a/drivers/gpu/drm/kmb/kmb_plane.c +++ b/drivers/gpu/drm/kmb/kmb_plane.c @@ -67,8 +67,21 @@ static const u32 kmb_formats_v[] = { static unsigned int check_pixel_format(struct drm_plane *plane, u32 format) { + struct kmb_drm_private *kmb; + struct kmb_plane *kmb_plane = to_kmb_plane(plane); int i; + int plane_id = kmb_plane->id; + struct disp_cfg init_disp_cfg; + kmb = to_kmb(plane->dev); + init_disp_cfg = kmb->init_disp_cfg[plane_id]; + /* Due to HW limitations, changing pixel format after initial + * plane configuration is not supported. + */ + if (init_disp_cfg.format && init_disp_cfg.format != format) { + drm_dbg(&kmb->drm, "Cannot change format after initial plane configuration"); + return -EINVAL; + } for (i = 0; i < plane->format_count; i++) { if (plane->format_types[i] == format) return 0; @@ -81,11 +94,17 @@ static int kmb_plane_atomic_check(struct drm_plane *plane, { struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); + struct kmb_drm_private *kmb; + struct kmb_plane *kmb_plane = to_kmb_plane(plane); + int plane_id = kmb_plane->id; + struct disp_cfg init_disp_cfg; struct drm_framebuffer *fb; int ret; struct drm_crtc_state *crtc_state; bool can_position; + kmb = to_kmb(plane->dev); + init_disp_cfg = kmb->init_disp_cfg[plane_id]; fb = new_plane_state->fb; if (!fb || !new_plane_state->crtc) return 0; @@ -99,6 +118,16 @@ static int kmb_plane_atomic_check(struct drm_plane *plane, new_plane_state->crtc_w < KMB_FB_MIN_WIDTH || new_plane_state->crtc_h < KMB_FB_MIN_HEIGHT) return -EINVAL; + + /* Due to HW limitations, changing plane height or width after + * initial plane configuration is not supported. + */ + if ((init_disp_cfg.width && init_disp_cfg.height) && + (init_disp_cfg.width != fb->width || + init_disp_cfg.height != fb->height)) { + drm_dbg(&kmb->drm, "Cannot change plane height or width after initial configuration"); + return -EINVAL; + } can_position = (plane->type == DRM_PLANE_TYPE_OVERLAY); crtc_state = drm_atomic_get_existing_crtc_state(state, @@ -335,6 +364,7 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, unsigned char plane_id; int num_planes; static dma_addr_t addr[MAX_SUB_PLANES]; + struct disp_cfg *init_disp_cfg; if (!plane || !new_plane_state || !old_plane_state) return; @@ -357,7 +387,8 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, } spin_unlock_irq(&kmb->irq_lock); - src_w = (new_plane_state->src_w >> 16); + init_disp_cfg = &kmb->init_disp_cfg[plane_id]; + src_w = new_plane_state->src_w >> 16; src_h = new_plane_state->src_h >> 16; crtc_x = new_plane_state->crtc_x; crtc_y = new_plane_state->crtc_y; @@ -500,6 +531,16 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, /* Enable DMA */ kmb_write_lcd(kmb, LCD_LAYERn_DMA_CFG(plane_id), dma_cfg); + + /* Save initial display config */ + if (!init_disp_cfg->width || + !init_disp_cfg->height || + !init_disp_cfg->format) { + init_disp_cfg->width = width; + init_disp_cfg->height = height; + init_disp_cfg->format = fb->format->format; + } + drm_dbg(&kmb->drm, "dma_cfg=0x%x LCD_DMA_CFG=0x%x\n", dma_cfg, kmb_read_lcd(kmb, LCD_LAYERn_DMA_CFG(plane_id))); diff --git a/drivers/gpu/drm/kmb/kmb_plane.h b/drivers/gpu/drm/kmb/kmb_plane.h index 6e8d22cf8819..b51144044fe8 100644 --- a/drivers/gpu/drm/kmb/kmb_plane.h +++ b/drivers/gpu/drm/kmb/kmb_plane.h @@ -63,6 +63,12 @@ struct layer_status { u32 ctrl; }; +struct disp_cfg { + unsigned int width; + unsigned int height; + unsigned int format; +}; + struct kmb_plane *kmb_plane_init(struct drm_device *drm); void kmb_plane_destroy(struct drm_plane *plane); #endif /* __KMB_PLANE_H__ */ -- cgit v1.2.3 From 004d2719806fb8e355c1bccd538e82c04319d391 Mon Sep 17 00:00:00 2001 From: Anitha Chrisanthus Date: Mon, 19 Jul 2021 16:28:51 -0700 Subject: drm/kmb: Corrected typo in handle_lcd_irq Check for Overflow bits for layer3 in the irq handler. Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display") Signed-off-by: Anitha Chrisanthus Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20211013233632.471892-5-anitha.chrisanthus@intel.com Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/kmb/kmb_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c index 12ce669650cc..961ac6fb5fcf 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.c +++ b/drivers/gpu/drm/kmb/kmb_drv.c @@ -380,7 +380,7 @@ static irqreturn_t handle_lcd_irq(struct drm_device *dev) if (val & LAYER3_DMA_FIFO_UNDERFLOW) drm_dbg(&kmb->drm, "LAYER3:GL1 DMA UNDERFLOW val = 0x%lx", val); - if (val & LAYER3_DMA_FIFO_UNDERFLOW) + if (val & LAYER3_DMA_FIFO_OVERFLOW) drm_dbg(&kmb->drm, "LAYER3:GL1 DMA OVERFLOW val = 0x%lx", val); } -- cgit v1.2.3 From 74056092ff415e7e20ce2544689b32ee811c4f0b Mon Sep 17 00:00:00 2001 From: Anitha Chrisanthus Date: Mon, 7 Jun 2021 14:17:11 -0700 Subject: drm/kmb: Enable ADV bridge after modeset On KMB, ADV bridge must be programmed and powered on prior to MIPI DSI HW initialization. v2: changed to atomic_bridge_chain_enable (Sam) Fixes: 98521f4d4b4c ("drm/kmb: Mipi DSI part of the display driver") Co-developed-by: Edmund Dea Signed-off-by: Edmund Dea Signed-off-by: Anitha Chrisanthus Link: https://patchwork.freedesktop.org/patch/msgid/20211019230719.789958-1-anitha.chrisanthus@intel.com Acked-by: Sam Ravnborg Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/kmb/kmb_crtc.c | 7 ++++--- drivers/gpu/drm/kmb/kmb_dsi.c | 9 +++++---- drivers/gpu/drm/kmb/kmb_dsi.h | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/kmb/kmb_crtc.c b/drivers/gpu/drm/kmb/kmb_crtc.c index 08a45e813db7..06613ffeaaf8 100644 --- a/drivers/gpu/drm/kmb/kmb_crtc.c +++ b/drivers/gpu/drm/kmb/kmb_crtc.c @@ -66,7 +66,8 @@ static const struct drm_crtc_funcs kmb_crtc_funcs = { .disable_vblank = kmb_crtc_disable_vblank, }; -static void kmb_crtc_set_mode(struct drm_crtc *crtc) +static void kmb_crtc_set_mode(struct drm_crtc *crtc, + struct drm_atomic_state *old_state) { struct drm_device *dev = crtc->dev; struct drm_display_mode *m = &crtc->state->adjusted_mode; @@ -75,7 +76,7 @@ static void kmb_crtc_set_mode(struct drm_crtc *crtc) unsigned int val = 0; /* Initialize mipi */ - kmb_dsi_mode_set(kmb->kmb_dsi, m, kmb->sys_clk_mhz); + kmb_dsi_mode_set(kmb->kmb_dsi, m, kmb->sys_clk_mhz, old_state); drm_info(dev, "vfp= %d vbp= %d vsync_len=%d hfp=%d hbp=%d hsync_len=%d\n", m->crtc_vsync_start - m->crtc_vdisplay, @@ -138,7 +139,7 @@ static void kmb_crtc_atomic_enable(struct drm_crtc *crtc, struct kmb_drm_private *kmb = crtc_to_kmb_priv(crtc); clk_prepare_enable(kmb->kmb_clk.clk_lcd); - kmb_crtc_set_mode(crtc); + kmb_crtc_set_mode(crtc, state); drm_crtc_vblank_on(crtc); } diff --git a/drivers/gpu/drm/kmb/kmb_dsi.c b/drivers/gpu/drm/kmb/kmb_dsi.c index a0669b842ff5..f6071882054c 100644 --- a/drivers/gpu/drm/kmb/kmb_dsi.c +++ b/drivers/gpu/drm/kmb/kmb_dsi.c @@ -1331,7 +1331,8 @@ static u32 mipi_tx_init_dphy(struct kmb_dsi *kmb_dsi, return 0; } -static void connect_lcd_to_mipi(struct kmb_dsi *kmb_dsi) +static void connect_lcd_to_mipi(struct kmb_dsi *kmb_dsi, + struct drm_atomic_state *old_state) { struct regmap *msscam; @@ -1340,7 +1341,7 @@ static void connect_lcd_to_mipi(struct kmb_dsi *kmb_dsi) dev_dbg(kmb_dsi->dev, "failed to get msscam syscon"); return; } - + drm_atomic_bridge_chain_enable(adv_bridge, old_state); /* DISABLE MIPI->CIF CONNECTION */ regmap_write(msscam, MSS_MIPI_CIF_CFG, 0); @@ -1351,7 +1352,7 @@ static void connect_lcd_to_mipi(struct kmb_dsi *kmb_dsi) } int kmb_dsi_mode_set(struct kmb_dsi *kmb_dsi, struct drm_display_mode *mode, - int sys_clk_mhz) + int sys_clk_mhz, struct drm_atomic_state *old_state) { u64 data_rate; @@ -1399,7 +1400,7 @@ int kmb_dsi_mode_set(struct kmb_dsi *kmb_dsi, struct drm_display_mode *mode, /* Dphy initialization */ mipi_tx_init_dphy(kmb_dsi, &mipi_tx_init_cfg); - connect_lcd_to_mipi(kmb_dsi); + connect_lcd_to_mipi(kmb_dsi, old_state); dev_info(kmb_dsi->dev, "mipi hw initialized"); return 0; diff --git a/drivers/gpu/drm/kmb/kmb_dsi.h b/drivers/gpu/drm/kmb/kmb_dsi.h index 66b7c500d9bc..09dc88743d77 100644 --- a/drivers/gpu/drm/kmb/kmb_dsi.h +++ b/drivers/gpu/drm/kmb/kmb_dsi.h @@ -380,7 +380,7 @@ int kmb_dsi_host_bridge_init(struct device *dev); struct kmb_dsi *kmb_dsi_init(struct platform_device *pdev); void kmb_dsi_host_unregister(struct kmb_dsi *kmb_dsi); int kmb_dsi_mode_set(struct kmb_dsi *kmb_dsi, struct drm_display_mode *mode, - int sys_clk_mhz); + int sys_clk_mhz, struct drm_atomic_state *old_state); int kmb_dsi_map_mmio(struct kmb_dsi *kmb_dsi); int kmb_dsi_clk_init(struct kmb_dsi *kmb_dsi); int kmb_dsi_encoder_init(struct drm_device *dev, struct kmb_dsi *kmb_dsi); -- cgit v1.2.3 From c62041c5baa9ded3bc6fd38d3f724de70683b489 Mon Sep 17 00:00:00 2001 From: Erik Ekman Date: Tue, 19 Oct 2021 23:13:32 +0200 Subject: sfc: Export fibre-specific supported link modes The 1/10GbaseT modes were set up for cards with SFP+ cages in 3497ed8c852a5 ("sfc: report supported link speeds on SFP connections"). 10GbaseT was likely used since no 10G fibre mode existed. The missing fibre modes for 1/10G were added to ethtool.h in 5711a9822144 ("net: ethtool: add support for 1000BaseX and missing 10G link modes") shortly thereafter. The user guide available at https://support-nic.xilinx.com/wp/drivers lists support for the following cable and transceiver types in section 2.9: - QSFP28 100G Direct Attach Cables - QSFP28 100G SR Optical Transceivers (with SR4 modules listed) - SFP28 25G Direct Attach Cables - SFP28 25G SR Optical Transceivers - QSFP+ 40G Direct Attach Cables - QSFP+ 40G Active Optical Cables - QSFP+ 40G SR4 Optical Transceivers - QSFP+ to SFP+ Breakout Direct Attach Cables - QSFP+ to SFP+ Breakout Active Optical Cables - SFP+ 10G Direct Attach Cables - SFP+ 10G SR Optical Transceivers - SFP+ 10G LR Optical Transceivers - SFP 1000BASE‐T Transceivers - 1G Optical Transceivers (From user guide issue 28. Issue 16 which also includes older cards like SFN5xxx/SFN6xxx has matching lists for 1/10/40G transceiver types.) Regarding SFP+ 10GBASE‐T transceivers the latest guide says: "Solarflare adapters do not support 10GBASE‐T transceiver modules." Tested using SFN5122F-R7 (with 2 SFP+ ports). Supported link modes do not change depending on module used (tested with 1000BASE-T, 1000BASE-BX10, 10GBASE-LR). Before: $ ethtool ext Settings for ext: Supported ports: [ FIBRE ] Supported link modes: 1000baseT/Full 10000baseT/Full Supported pause frame use: Symmetric Receive-only Supports auto-negotiation: No Supported FEC modes: Not reported Advertised link modes: Not reported Advertised pause frame use: No Advertised auto-negotiation: No Advertised FEC modes: Not reported Link partner advertised link modes: Not reported Link partner advertised pause frame use: No Link partner advertised auto-negotiation: No Link partner advertised FEC modes: Not reported Speed: 1000Mb/s Duplex: Full Auto-negotiation: off Port: FIBRE PHYAD: 255 Transceiver: internal Current message level: 0x000020f7 (8439) drv probe link ifdown ifup rx_err tx_err hw Link detected: yes After: $ ethtool ext Settings for ext: Supported ports: [ FIBRE ] Supported link modes: 1000baseT/Full 1000baseX/Full 10000baseCR/Full 10000baseSR/Full 10000baseLR/Full Supported pause frame use: Symmetric Receive-only Supports auto-negotiation: No Supported FEC modes: Not reported Advertised link modes: Not reported Advertised pause frame use: No Advertised auto-negotiation: No Advertised FEC modes: Not reported Link partner advertised link modes: Not reported Link partner advertised pause frame use: No Link partner advertised auto-negotiation: No Link partner advertised FEC modes: Not reported Speed: 1000Mb/s Duplex: Full Auto-negotiation: off Port: FIBRE PHYAD: 255 Transceiver: internal Supports Wake-on: g Wake-on: d Current message level: 0x000020f7 (8439) drv probe link ifdown ifup rx_err tx_err hw Link detected: yes Signed-off-by: Erik Ekman Acked-by: Martin Habets Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi_port_common.c | 37 ++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.c b/drivers/net/ethernet/sfc/mcdi_port_common.c index 4bd3ef8f3384..c4fe3c48ac46 100644 --- a/drivers/net/ethernet/sfc/mcdi_port_common.c +++ b/drivers/net/ethernet/sfc/mcdi_port_common.c @@ -132,16 +132,27 @@ void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset) case MC_CMD_MEDIA_SFP_PLUS: case MC_CMD_MEDIA_QSFP_PLUS: SET_BIT(FIBRE); - if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) { SET_BIT(1000baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) - SET_BIT(10000baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) + SET_BIT(1000baseX_Full); + } + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) { + SET_BIT(10000baseCR_Full); + SET_BIT(10000baseLR_Full); + SET_BIT(10000baseSR_Full); + } + if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) { SET_BIT(40000baseCR4_Full); - if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN)) + SET_BIT(40000baseSR4_Full); + } + if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN)) { SET_BIT(100000baseCR4_Full); - if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN)) + SET_BIT(100000baseSR4_Full); + } + if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN)) { SET_BIT(25000baseCR_Full); + SET_BIT(25000baseSR_Full); + } if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN)) SET_BIT(50000baseCR2_Full); break; @@ -192,15 +203,19 @@ u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset) result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN); if (TEST_BIT(1000baseT_Half)) result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN); - if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full)) + if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full) || + TEST_BIT(1000baseX_Full)) result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN); - if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full)) + if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full) || + TEST_BIT(10000baseCR_Full) || TEST_BIT(10000baseLR_Full) || + TEST_BIT(10000baseSR_Full)) result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN); - if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full)) + if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full) || + TEST_BIT(40000baseSR4_Full)) result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN); - if (TEST_BIT(100000baseCR4_Full)) + if (TEST_BIT(100000baseCR4_Full) || TEST_BIT(100000baseSR4_Full)) result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN); - if (TEST_BIT(25000baseCR_Full)) + if (TEST_BIT(25000baseCR_Full) || TEST_BIT(25000baseSR_Full)) result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN); if (TEST_BIT(50000baseCR2_Full)) result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN); -- cgit v1.2.3 From bf6abf345dfa77786aca554bc58c64bd428ecb1d Mon Sep 17 00:00:00 2001 From: Erik Ekman Date: Wed, 20 Oct 2021 00:40:16 +0200 Subject: sfc: Don't use netif_info before net_device setup Use pci_info instead to avoid unnamed/uninitialized noise: [197088.688729] sfc 0000:01:00.0: Solarflare NIC detected [197088.690333] sfc 0000:01:00.0: Part Number : SFN5122F [197088.729061] sfc 0000:01:00.0 (unnamed net_device) (uninitialized): no SR-IOV VFs probed [197088.729071] sfc 0000:01:00.0 (unnamed net_device) (uninitialized): no PTP support Inspired by fa44821a4ddd ("sfc: don't use netif_info et al before net_device is registered") from Heiner Kallweit. Signed-off-by: Erik Ekman Acked-by: Martin Habets Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ptp.c | 4 ++-- drivers/net/ethernet/sfc/siena_sriov.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index a39c5143b386..797e51802ccb 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -648,7 +648,7 @@ static int efx_ptp_get_attributes(struct efx_nic *efx) } else if (rc == -EINVAL) { fmt = MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS; } else if (rc == -EPERM) { - netif_info(efx, probe, efx->net_dev, "no PTP support\n"); + pci_info(efx->pci_dev, "no PTP support\n"); return rc; } else { efx_mcdi_display_error(efx, MC_CMD_PTP, sizeof(inbuf), @@ -824,7 +824,7 @@ static int efx_ptp_disable(struct efx_nic *efx) * should only have been called during probe. */ if (rc == -ENOSYS || rc == -EPERM) - netif_info(efx, probe, efx->net_dev, "no PTP support\n"); + pci_info(efx->pci_dev, "no PTP support\n"); else if (rc) efx_mcdi_display_error(efx, MC_CMD_PTP, MC_CMD_PTP_IN_DISABLE_LEN, diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c index 83dcfcae3d4b..441e7f3e5375 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.c +++ b/drivers/net/ethernet/sfc/siena_sriov.c @@ -1057,7 +1057,7 @@ void efx_siena_sriov_probe(struct efx_nic *efx) return; if (efx_siena_sriov_cmd(efx, false, &efx->vi_scale, &count)) { - netif_info(efx, probe, efx->net_dev, "no SR-IOV VFs probed\n"); + pci_info(efx->pci_dev, "no SR-IOV VFs probed\n"); return; } if (count > 0 && count > max_vfs) -- cgit v1.2.3 From b6b19a71c8bbde2abaa99ba29e6c76ddc84984f8 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 21 Oct 2021 17:13:53 +0800 Subject: ptp: free 'vclock_index' in ptp_clock_release() 'vclock_index' is accessed from sysfs, it shouled be freed in release function, so move it from ptp_clock_unregister() to ptp_clock_release(). Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 7fd02aabd79a..f9b2d66b0443 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -170,6 +170,7 @@ static void ptp_clock_release(struct device *dev) struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev); ptp_cleanup_pin_groups(ptp); + kfree(ptp->vclock_index); mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); mutex_destroy(&ptp->n_vclocks_mux); @@ -286,8 +287,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, if (ptp->pps_source) pps_unregister_source(ptp->pps_source); - kfree(ptp->vclock_index); - if (ptp->kworker) kthread_destroy_worker(ptp->kworker); @@ -328,8 +327,6 @@ int ptp_clock_unregister(struct ptp_clock *ptp) ptp->defunct = 1; wake_up_interruptible(&ptp->tsev_wq); - kfree(ptp->vclock_index); - if (ptp->kworker) { kthread_cancel_delayed_work_sync(&ptp->aux_work); kthread_destroy_worker(ptp->kworker); -- cgit v1.2.3 From 0a30896fc5025e71c350449760b240fba5581b42 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Oct 2021 23:08:16 +0200 Subject: MAINTAINERS: Add Dave Hansen to the x86 maintainer team Dave is already listed as x86/mm maintainer, has a profund knowledge of the x86 architecture in general and a good taste in terms of kernel programming in general. Add him as a full x86 maintainer with all rights and duties. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Acked-by: Borislav Petkov Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/87zgr3flq7.ffs@tglx --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d118d7957d2..f26920f0fa65 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20336,6 +20336,7 @@ X86 ARCHITECTURE (32-BIT AND 64-BIT) M: Thomas Gleixner M: Ingo Molnar M: Borislav Petkov +M: Dave Hansen M: x86@kernel.org R: "H. Peter Anvin" L: linux-kernel@vger.kernel.org -- cgit v1.2.3 From 0db55f9a1bafbe3dac750ea669de9134922389b5 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 20 Oct 2021 19:19:46 +0200 Subject: drm/ttm: fix memleak in ttm_transfered_destroy We need to cleanup the fences for ghost objects as well. Signed-off-by: Christian König Reported-by: Erhard F. Tested-by: Erhard F. Reviewed-by: Huang Rui Bug: https://bugzilla.kernel.org/show_bug.cgi?id=214029 Bug: https://bugzilla.kernel.org/show_bug.cgi?id=214447 CC: Link: https://patchwork.freedesktop.org/patch/msgid/20211020173211.2247-1-christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo_util.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 1c5ffe2935af..abf2d7a4fdf1 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -190,6 +190,7 @@ static void ttm_transfered_destroy(struct ttm_buffer_object *bo) struct ttm_transfer_obj *fbo; fbo = container_of(bo, struct ttm_transfer_obj, base); + dma_resv_fini(&fbo->base.base._resv); ttm_bo_put(fbo->bo); kfree(fbo); } -- cgit v1.2.3 From fb8dc5fc8cbdfd62ecd16493848aee2f42ed84d9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 19:52:06 +0300 Subject: net: enetc: fix ethtool counter name for PM0_TERR There are two counters named "MAC tx frames", one of them is actually incorrect. The correct name for that counter should be "MAC tx error frames", which is symmetric to the existing "MAC rx error frames". Fixes: 16eb4c85c964 ("enetc: Add ethtool statistics") Signed-off-by: Vladimir Oltean Reviewed-by: Link: https://lore.kernel.org/r/20211020165206.1069889-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 9690e36e9e85..910b9f722504 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -157,7 +157,7 @@ static const struct { { ENETC_PM0_TFRM, "MAC tx frames" }, { ENETC_PM0_TFCS, "MAC tx fcs errors" }, { ENETC_PM0_TVLAN, "MAC tx VLAN frames" }, - { ENETC_PM0_TERR, "MAC tx frames" }, + { ENETC_PM0_TERR, "MAC tx frame errors" }, { ENETC_PM0_TUCA, "MAC tx unicast frames" }, { ENETC_PM0_TMCA, "MAC tx multicast frames" }, { ENETC_PM0_TBCA, "MAC tx broadcast frames" }, -- cgit v1.2.3 From e378f4967c8edd64c680f2e279cb646ee06b6f2d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 20:33:40 +0300 Subject: net: enetc: make sure all traffic classes can send large frames The enetc driver does not implement .ndo_change_mtu, instead it configures the MAC register field PTC{Traffic Class}MSDUR[MAXSDU] statically to a large value during probe time. The driver used to configure only the max SDU for traffic class 0, and that was fine while the driver could only use traffic class 0. But with the introduction of mqprio, sending a large frame into any other TC than 0 is broken. This patch fixes that by replicating per traffic class the static configuration done in enetc_configure_port_mac(). Fixes: cbe9e835946f ("enetc: Enable TC offloading with mqprio") Reported-by: Richie Pearn Signed-off-by: Vladimir Oltean Reviewed-by: Link: https://lore.kernel.org/r/20211020173340.1089992-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 4c977dfc44f0..d522bd5c90b4 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -517,10 +517,13 @@ static void enetc_port_si_configure(struct enetc_si *si) static void enetc_configure_port_mac(struct enetc_hw *hw) { + int tc; + enetc_port_wr(hw, ENETC_PM0_MAXFRM, ENETC_SET_MAXFRM(ENETC_RX_MAXFRM_SIZE)); - enetc_port_wr(hw, ENETC_PTCMSDUR(0), ENETC_MAC_MAXFRM_SIZE); + for (tc = 0; tc < 8; tc++) + enetc_port_wr(hw, ENETC_PTCMSDUR(tc), ENETC_MAC_MAXFRM_SIZE); enetc_port_wr(hw, ENETC_PM0_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN | ENETC_PM0_CMD_TXP | ENETC_PM0_PROMISC); -- cgit v1.2.3 From 397430b50a363d8b7bdda00522123f82df6adc5e Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 21 Oct 2021 14:29:44 +0200 Subject: usbnet: sanity check for maxpacket maxpacket of 0 makes no sense and oopses as we need to divide by it. Give up. V2: fixed typo in log and stylistic issues Signed-off-by: Oliver Neukum Reported-by: syzbot+76bb1d34ffa0adc03baa@syzkaller.appspotmail.com Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20211021122944.21816-1-oneukum@suse.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 840c1c2ab16a..80432ee0ce69 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1788,6 +1788,10 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) if (!dev->rx_urb_size) dev->rx_urb_size = dev->hard_mtu; dev->maxpacket = usb_maxpacket (dev->udev, dev->out, 1); + if (dev->maxpacket == 0) { + /* that is a broken device */ + goto out4; + } /* let userspace know we have a random address */ if (ether_addr_equal(net->dev_addr, node_id)) -- cgit v1.2.3 From a3ca5281bb771d8103ea16f0a6a8a5df9a7fb4f3 Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Thu, 21 Oct 2021 15:10:22 +0800 Subject: KVM: MMU: Reset mmu->pkru_mask to avoid stale data When updating mmu->pkru_mask, the value can only be added but it isn't reset in advance. This will make mmu->pkru_mask keep the stale data. Fix this issue. Fixes: 2d344105f57c ("KVM, pkeys: introduce pkru_mask to cache conditions") Signed-off-by: Chenyi Qiang Message-Id: <20211021071022.1140-1-chenyi.qiang@intel.com> Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 1a64ba5b9437..0cc58901bf7a 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4596,10 +4596,10 @@ static void update_pkru_bitmask(struct kvm_mmu *mmu) unsigned bit; bool wp; - if (!is_cr4_pke(mmu)) { - mmu->pkru_mask = 0; + mmu->pkru_mask = 0; + + if (!is_cr4_pke(mmu)) return; - } wp = is_cr0_wp(mmu); -- cgit v1.2.3 From c8c340a9b4149fe5caa433f3b62463a1c8e07a46 Mon Sep 17 00:00:00 2001 From: Masahiro Kozuka Date: Tue, 14 Sep 2021 14:09:51 -0700 Subject: KVM: SEV: Flush cache on non-coherent systems before RECEIVE_UPDATE_DATA Flush the destination page before invoking RECEIVE_UPDATE_DATA, as the PSP encrypts the data with the guest's key when writing to guest memory. If the target memory was not previously encrypted, the cache may contain dirty, unecrypted data that will persist on non-coherent systems. Fixes: 15fb7de1a7f5 ("KVM: SVM: Add KVM_SEV_RECEIVE_UPDATE_DATA command") Cc: stable@vger.kernel.org Cc: Peter Gonda Cc: Marc Orr Cc: Tom Lendacky Cc: Brijesh Singh Signed-off-by: Masahiro Kozuka [sean: converted bug report to changelog] Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Message-Id: <20210914210951.2994260-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0d21d59936e5..2e4916be290e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1484,6 +1484,13 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) goto e_free_trans; } + /* + * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP + * encrypts the written data with the guest's key, and the cache may + * contain dirty, unencrypted data. + */ + sev_clflush_pages(guest_page, n); + /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */ data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; data.guest_address |= sev_me_mask; -- cgit v1.2.3 From b22fa62a35d7f2029d757a518d78041822b7c7c1 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 21 Oct 2021 13:20:29 +0100 Subject: io_uring: apply worker limits to previous users Another change to the API io-wq worker limitation API added in 5.15, apply the limit to all prior users that already registered a tctx. It may be confusing as it's now, in particular the change covers the following 2 cases: TASK1 | TASK2 _________________________________________________ ring = create() | | limit_iowq_workers() *not limited* | TASK1 | TASK2 _________________________________________________ ring = create() | | issue_requests() limit_iowq_workers() | | *not limited* A note on locking, it's safe to traverse ->tctx_list as we hold ->uring_lock, but do that after dropping sqd->lock to avoid possible problems. It's also safe to access tctx->io_wq there because tasks kill it only after removing themselves from tctx_list, see io_uring_cancel_generic() -> io_uring_clean_tctx() Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/d6e09ecc3545e4dc56e43c906ee3d71b7ae21bed.1634818641.git.asml.silence@gmail.com Reviewed-by: Hao Xu Signed-off-by: Jens Axboe --- fs/io_uring.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index d5cc103224f1..bc18af5e0a93 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -10649,7 +10649,9 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx) static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, void __user *arg) + __must_hold(&ctx->uring_lock) { + struct io_tctx_node *node; struct io_uring_task *tctx = NULL; struct io_sq_data *sqd = NULL; __u32 new_count[2]; @@ -10702,6 +10704,22 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, if (copy_to_user(arg, new_count, sizeof(new_count))) return -EFAULT; + /* that's it for SQPOLL, only the SQPOLL task creates requests */ + if (sqd) + return 0; + + /* now propagate the restriction to all registered users */ + list_for_each_entry(node, &ctx->tctx_list, ctx_node) { + struct io_uring_task *tctx = node->task->io_uring; + + if (WARN_ON_ONCE(!tctx->io_wq)) + continue; + + for (i = 0; i < ARRAY_SIZE(new_count); i++) + new_count[i] = ctx->iowq_limits[i]; + /* ignore errors, it always returns zero anyway */ + (void)io_wq_max_workers(tctx->io_wq, new_count); + } return 0; err: if (sqd) { -- cgit v1.2.3 From def0c3697287f6e85d5ac68b21302966c95474f9 Mon Sep 17 00:00:00 2001 From: Bryant Mairs Date: Tue, 19 Oct 2021 09:24:33 -0500 Subject: drm: panel-orientation-quirks: Add quirk for Aya Neo 2021 Fixes screen orientation for the Aya Neo 2021 handheld gaming console. Signed-off-by: Bryant Mairs Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20211019142433.4295-1-bryant@mai.rs --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index f6bdec7fa925..30c17a76f49a 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -134,6 +134,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T103HAF"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* AYA NEO 2021 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "AYADEVICE"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "AYA NEO 2021"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* GPD MicroPC (generic strings, also match on bios date) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"), -- cgit v1.2.3 From 595cb5e0b832a3e100cbbdefef797b0c27bf725a Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 21 Oct 2021 10:30:06 -0500 Subject: Revert "drm/ast: Add detect function support" This reverts commit aae74ff9caa8de9a45ae2e46068c417817392a26, since it prevents my AMD Milan system from booting, with: [ 27.189558] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 27.197506] #PF: supervisor write access in kernel mode [ 27.203333] #PF: error_code(0x0002) - not-present page [ 27.209064] PGD 0 P4D 0 [ 27.211885] Oops: 0002 [#1] PREEMPT SMP NOPTI [ 27.216744] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.15.0-rc6+ #15 [ 27.223928] Hardware name: AMD Corporation ETHANOL_X/ETHANOL_X, BIOS RXM1006B 08/20/2021 [ 27.232955] RIP: 0010:run_timer_softirq+0x38b/0x4a0 [ 27.238397] Code: 4c 89 f7 e8 37 27 ac 00 49 c7 46 08 00 00 00 00 49 8b 04 24 48 85 c0 74 71 4d 8b 3c 24 4d 89 7e 08 66 90 49 8b 07 49 8b 57 08 <48> 89 02 48 85 c0 74 04 48 89 50 08 49 8b 77 18 41 f6 47 22 20 4c [ 27.259350] RSP: 0018:ffffc42d00003ee8 EFLAGS: 00010086 [ 27.265176] RAX: dead000000000122 RBX: 0000000000000000 RCX: 0000000000000101 [ 27.273134] RDX: 0000000000000000 RSI: 0000000000000087 RDI: 0000000000000001 [ 27.281084] RBP: ffffc42d00003f70 R08: 0000000000000000 R09: 00000000000003eb [ 27.289043] R10: ffffa0860cb300d0 R11: ffffa0c44de290b0 R12: ffffc42d00003ef8 [ 27.297002] R13: 00000000fffef200 R14: ffffa0c44de18dc0 R15: ffffa0867a882350 [ 27.304961] FS: 0000000000000000(0000) GS:ffffa0c44de00000(0000) knlGS:0000000000000000 [ 27.313988] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 27.320396] CR2: 0000000000000000 CR3: 000000014569c001 CR4: 0000000000770ef0 [ 27.328346] PKRU: 55555554 [ 27.331359] Call Trace: [ 27.334073] [ 27.336314] ? __queue_work+0x420/0x420 [ 27.340589] ? lapic_next_event+0x21/0x30 [ 27.345060] ? clockevents_program_event+0x8f/0xe0 [ 27.350402] __do_softirq+0xfb/0x2db [ 27.354388] irq_exit_rcu+0x98/0xd0 [ 27.358275] sysvec_apic_timer_interrupt+0xac/0xd0 [ 27.363620] [ 27.365955] asm_sysvec_apic_timer_interrupt+0x12/0x20 [ 27.371685] RIP: 0010:cpuidle_enter_state+0xcc/0x390 [ 27.377292] Code: 3d 01 79 0a 50 e8 44 ed 77 ff 49 89 c6 0f 1f 44 00 00 31 ff e8 f5 f8 77 ff 80 7d d7 00 0f 85 e6 01 00 00 fb 66 0f 1f 44 00 00 <45> 85 ff 0f 88 17 01 00 00 49 63 c7 4c 2b 75 c8 48 8d 14 40 48 8d [ 27.398243] RSP: 0018:ffffffffb0e03dc8 EFLAGS: 00000246 [ 27.404069] RAX: ffffa0c44de00000 RBX: 0000000000000001 RCX: 000000000000001f [ 27.412028] RDX: 0000000000000000 RSI: ffffffffb0bafc1f RDI: ffffffffb0bbdb81 [ 27.419986] RBP: ffffffffb0e03e00 R08: 00000006549f8f3f R09: ffffffffb1065200 [ 27.427935] R10: ffffa0c44de27ae4 R11: ffffa0c44de27ac4 R12: ffffa0c5634cb000 [ 27.435894] R13: ffffffffb1065200 R14: 00000006549f8f3f R15: 0000000000000001 [ 27.443854] ? cpuidle_enter_state+0xbb/0x390 [ 27.448712] cpuidle_enter+0x2e/0x40 [ 27.452695] call_cpuidle+0x23/0x40 [ 27.456584] do_idle+0x1f0/0x270 [ 27.460181] cpu_startup_entry+0x20/0x30 [ 27.464553] rest_init+0xd4/0xe0 [ 27.468149] arch_call_rest_init+0xe/0x1b [ 27.472619] start_kernel+0x6bc/0x6e2 [ 27.476764] x86_64_start_reservations+0x24/0x26 [ 27.481912] x86_64_start_kernel+0x75/0x79 [ 27.486477] secondary_startup_64_no_verify+0xb0/0xbb [ 27.492111] Modules linked in: kvm_amd(+) kvm ipmi_si(+) ipmi_devintf rapl wmi_bmof ipmi_msghandler input_leds ccp k10temp mac_hid sch_fq_codel msr ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear ast i2c_algo_bit drm_vram_helper drm_ttm_helper ttm drm_kms_helper crct10dif_pclmul crc32_pclmul ghash_clmulni_intel syscopyarea aesni_intel sysfillrect crypto_simd sysimgblt fb_sys_fops cryptd hid_generic cec nvme ahci usbhid drm e1000e nvme_core hid libahci i2c_piix4 wmi [ 27.551789] CR2: 0000000000000000 [ 27.555482] ---[ end trace 897987dfe93dccc6 ]--- [ 27.560630] RIP: 0010:run_timer_softirq+0x38b/0x4a0 [ 27.566069] Code: 4c 89 f7 e8 37 27 ac 00 49 c7 46 08 00 00 00 00 49 8b 04 24 48 85 c0 74 71 4d 8b 3c 24 4d 89 7e 08 66 90 49 8b 07 49 8b 57 08 <48> 89 02 48 85 c0 74 04 48 89 50 08 49 8b 77 18 41 f6 47 22 20 4c [ 27.587021] RSP: 0018:ffffc42d00003ee8 EFLAGS: 00010086 [ 27.592848] RAX: dead000000000122 RBX: 0000000000000000 RCX: 0000000000000101 [ 27.600808] RDX: 0000000000000000 RSI: 0000000000000087 RDI: 0000000000000001 [ 27.608765] RBP: ffffc42d00003f70 R08: 0000000000000000 R09: 00000000000003eb [ 27.616716] R10: ffffa0860cb300d0 R11: ffffa0c44de290b0 R12: ffffc42d00003ef8 [ 27.624673] R13: 00000000fffef200 R14: ffffa0c44de18dc0 R15: ffffa0867a882350 [ 27.632624] FS: 0000000000000000(0000) GS:ffffa0c44de00000(0000) knlGS:0000000000000000 [ 27.641650] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 27.648159] CR2: 0000000000000000 CR3: 000000014569c001 CR4: 0000000000770ef0 [ 27.656119] PKRU: 55555554 [ 27.659133] Kernel panic - not syncing: Fatal exception in interrupt [ 29.030411] Shutting down cpus with NMI [ 29.034699] Kernel Offset: 0x2e600000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 29.046790] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Since unreliable, found by bisecting for KASAN's use-after-free in enqueue_timer+0x4f/0x1e0, where the timer callback is called. Reported-by: Kim Phillips Signed-off-by: Kim Phillips Fixes: aae74ff9caa8 ("drm/ast: Add detect function support") Link: https://lore.kernel.org/lkml/0f7871be-9ca6-5ae4-3a40-5db9a8fb2365@amd.com/ Cc: Ainux Cc: Thomas Zimmermann Cc: David Airlie Cc: David Airlie Cc: Daniel Vetter Cc: sterlingteng@gmail.com Cc: chenhuacai@kernel.org Cc: Chuck Lever III Cc: Borislav Petkov Cc: Borislav Petkov Cc: Jon Grimm Cc: dri-devel Cc: linux-kernel Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20211021153006.92983-1-kim.phillips@amd.com --- drivers/gpu/drm/ast/ast_mode.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 6bfaefa01818..1e30eaeb0e1b 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -1300,18 +1300,6 @@ static enum drm_mode_status ast_mode_valid(struct drm_connector *connector, return flags; } -static enum drm_connector_status ast_connector_detect(struct drm_connector - *connector, bool force) -{ - int r; - - r = ast_get_modes(connector); - if (r <= 0) - return connector_status_disconnected; - - return connector_status_connected; -} - static void ast_connector_destroy(struct drm_connector *connector) { struct ast_connector *ast_connector = to_ast_connector(connector); @@ -1327,7 +1315,6 @@ static const struct drm_connector_helper_funcs ast_connector_helper_funcs = { static const struct drm_connector_funcs ast_connector_funcs = { .reset = drm_atomic_helper_connector_reset, - .detect = ast_connector_detect, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = ast_connector_destroy, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, @@ -1355,8 +1342,7 @@ static int ast_connector_init(struct drm_device *dev) connector->interlace_allowed = 0; connector->doublescan_allowed = 0; - connector->polled = DRM_CONNECTOR_POLL_CONNECT | - DRM_CONNECTOR_POLL_DISCONNECT; + connector->polled = DRM_CONNECTOR_POLL_CONNECT; drm_connector_attach_encoder(connector, encoder); @@ -1425,8 +1411,6 @@ int ast_mode_config_init(struct ast_private *ast) drm_mode_config_reset(dev); - drm_kms_helper_poll_init(dev); - return 0; } -- cgit v1.2.3 From 6c9a54551977ddf2d6e22c21354b4fb88946f96e Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 21 Oct 2021 10:09:28 +0300 Subject: Revert "memblock: exclude NOMAP regions from kmemleak" Commit 6e44bd6d34d6 ("memblock: exclude NOMAP regions from kmemleak") breaks boot on EFI systems with kmemleak and VM_DEBUG enabled: efi: Processing EFI memory map: efi: 0x000090000000-0x000091ffffff [Conventional| | | | | | | | | | |WB|WT|WC|UC] efi: 0x000092000000-0x0000928fffff [Runtime Data|RUN| | | | | | | | | |WB|WT|WC|UC] ------------[ cut here ]------------ kernel BUG at mm/kmemleak.c:1140! Internal error: Oops - BUG: 0 [#1] SMP Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.15.0-rc6-next-20211019+ #104 pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : kmemleak_free_part_phys+0x64/0x8c lr : kmemleak_free_part_phys+0x38/0x8c sp : ffff800011eafbc0 x29: ffff800011eafbc0 x28: 1fffff7fffb41c0d x27: fffffbfffda0e068 x26: 0000000092000000 x25: 1ffff000023d5f94 x24: ffff800011ed84d0 x23: ffff800011ed84c0 x22: ffff800011ed83d8 x21: 0000000000900000 x20: ffff800011782000 x19: 0000000092000000 x18: ffff800011ee0730 x17: 0000000000000000 x16: 0000000000000000 x15: 1ffff0000233252c x14: ffff800019a905a0 x13: 0000000000000001 x12: ffff7000023d5ed7 x11: 1ffff000023d5ed6 x10: ffff7000023d5ed6 x9 : dfff800000000000 x8 : ffff800011eaf6b7 x7 : 0000000000000001 x6 : ffff800011eaf6b0 x5 : 00008ffffdc2a12a x4 : ffff7000023d5ed7 x3 : 1ffff000023dbf99 x2 : 1ffff000022f0463 x1 : 0000000000000000 x0 : ffffffffffffffff Call trace: kmemleak_free_part_phys+0x64/0x8c memblock_mark_nomap+0x5c/0x78 reserve_regions+0x294/0x33c efi_init+0x2d0/0x490 setup_arch+0x80/0x138 start_kernel+0xa0/0x3ec __primary_switched+0xc0/0xc8 Code: 34000041 97d526e7 f9418e80 36000040 (d4210000) random: get_random_bytes called from print_oops_end_marker+0x34/0x80 with crng_init=0 ---[ end trace 0000000000000000 ]--- The crash happens because kmemleak_free_part_phys() tries to use __va() before memstart_addr is initialized and this triggers a VM_BUG_ON() in arch/arm64/include/asm/memory.h: Revert 6e44bd6d34d6 ("memblock: exclude NOMAP regions from kmemleak"), the issue it is fixing will be fixed differently. Reported-by: Qian Cai Signed-off-by: Mike Rapoport Acked-by: Catalin Marinas Signed-off-by: Linus Torvalds --- mm/memblock.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/mm/memblock.c b/mm/memblock.c index b91df5cf54d3..103b052c016e 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -936,12 +936,7 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) */ int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) { - int ret = memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP); - - if (!ret) - kmemleak_free_part_phys(base, size); - - return ret; + return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP); } /** -- cgit v1.2.3 From 658aafc8139c23a6a23f6f4d9a0c4c95476838d4 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 21 Oct 2021 10:09:29 +0300 Subject: memblock: exclude MEMBLOCK_NOMAP regions from kmemleak Vladimir Zapolskiy reports: Commit a7259df76702 ("memblock: make memblock_find_in_range method private") invokes a kernel panic while running kmemleak on OF platforms with nomaped regions: Unable to handle kernel paging request at virtual address fff000021e00000 [...] scan_block+0x64/0x170 scan_gray_list+0xe8/0x17c kmemleak_scan+0x270/0x514 kmemleak_write+0x34c/0x4ac The memory allocated from memblock is registered with kmemleak, but if it is marked MEMBLOCK_NOMAP it won't have linear map entries so an attempt to scan such areas will fault. Ideally, memblock_mark_nomap() would inform kmemleak to ignore MEMBLOCK_NOMAP memory, but it can be called before kmemleak interfaces operating on physical addresses can use __va() conversion. Make sure that functions that mark allocated memory as MEMBLOCK_NOMAP take care of informing kmemleak to ignore such memory. Link: https://lore.kernel.org/all/8ade5174-b143-d621-8c8e-dc6a1898c6fb@linaro.org Link: https://lore.kernel.org/all/c30ff0a2-d196-c50d-22f0-bd50696b1205@quicinc.com Fixes: a7259df76702 ("memblock: make memblock_find_in_range method private") Reported-by: Vladimir Zapolskiy Signed-off-by: Mike Rapoport Reviewed-by: Catalin Marinas Tested-by: Vladimir Zapolskiy Tested-by: Qian Cai Signed-off-by: Linus Torvalds --- drivers/acpi/tables.c | 3 +++ drivers/of/of_reserved_mem.c | 2 ++ mm/memblock.c | 3 +++ 3 files changed, 8 insertions(+) diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index f9383736fa0f..71419eb16e09 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "internal.h" #ifdef CONFIG_ACPI_CUSTOM_DSDT @@ -601,6 +602,8 @@ void __init acpi_table_upgrade(void) */ arch_reserve_mem_area(acpi_tables_addr, all_tables_size); + kmemleak_ignore_phys(acpi_tables_addr); + /* * early_ioremap only can remap 256k one time. If we map all * tables one time, we will hit the limit. Need to map chunks diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 59c1390cdf42..9da8835ba5a5 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "of_private.h" @@ -46,6 +47,7 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, err = memblock_mark_nomap(base, size); if (err) memblock_free(base, size); + kmemleak_ignore_phys(base); } return err; diff --git a/mm/memblock.c b/mm/memblock.c index 103b052c016e..5096500b2647 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -932,6 +932,9 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) * covered by the memory map. The struct page representing NOMAP memory * frames in the memory map will be PageReserved() * + * Note: if the memory being marked %MEMBLOCK_NOMAP was allocated from + * memblock, the caller must inform kmemleak to ignore that memory + * * Return: 0 on success, -errno on failure. */ int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) -- cgit v1.2.3 From ee71fb6c4d99c51f2d82a32c503c872b7e40e7f7 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 21 Oct 2021 22:20:48 +0200 Subject: drm/i915/selftests: Properly reset mock object propers for each test I forgot to do this properly in commit 6f11f37459d8f9f74ff1c299c0bedd50b458057a Author: Daniel Vetter Date: Fri Jul 23 10:34:55 2021 +0200 drm/plane: remove drm_helper_get_plane_damage_clips intel-gfx CI didn't spot this because we run each selftest in each own invocations, which means reloading i915.ko. But if you just run all the selftests in one go at boot-up, then it falls apart and eventually we cross over the hardcoded limited of how many properties can be attached to a single object. Fix this by resetting the property count. Nothing else to clean up since it's all static storage anyway. Reported-and-tested-by: Sebastian Andrzej Siewior Cc: Sebastian Andrzej Siewior Fixes: 6f11f37459d8 ("drm/plane: remove drm_helper_get_plane_damage_clips") Cc: José Roberto de Souza Cc: Ville Syrjälä Cc: Gwan-gyeong Mun Cc: Hans de Goede Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Acked-by: Thomas Zimmermann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20211021202048.2638668-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/selftests/test-drm_damage_helper.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/selftests/test-drm_damage_helper.c b/drivers/gpu/drm/selftests/test-drm_damage_helper.c index 1c19a5d3eefb..8d8d8e214c28 100644 --- a/drivers/gpu/drm/selftests/test-drm_damage_helper.c +++ b/drivers/gpu/drm/selftests/test-drm_damage_helper.c @@ -30,6 +30,7 @@ static void mock_setup(struct drm_plane_state *state) mock_device.driver = &mock_driver; mock_device.mode_config.prop_fb_damage_clips = &mock_prop; mock_plane.dev = &mock_device; + mock_obj_props.count = 0; mock_plane.base.properties = &mock_obj_props; mock_prop.base.id = 1; /* 0 is an invalid id */ mock_prop.dev = &mock_device; -- cgit v1.2.3 From 1394103fd72ce9c67d20f882a93d59403c8da057 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Thu, 23 Sep 2021 15:57:22 +0800 Subject: vduse: Disallow injecting interrupt before DRIVER_OK is set The interrupt callback should not be triggered before DRIVER_OK is set. Otherwise, it might break the virtio device driver. So let's add a check to avoid the unexpected behavior. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20210923075722.98-1-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_user/vduse_dev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 26e3d90d1e7c..cefb301b2ee4 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -966,6 +966,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, break; } case VDUSE_DEV_INJECT_CONFIG_IRQ: + ret = -EINVAL; + if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) + break; + ret = 0; queue_work(vduse_irq_wq, &dev->inject); break; @@ -1045,6 +1049,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, case VDUSE_VQ_INJECT_IRQ: { u32 index; + ret = -EINVAL; + if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) + break; + ret = -EFAULT; if (get_user(index, (u32 __user *)argp)) break; -- cgit v1.2.3 From 0943aacf5ae10471b68a702c022b42c89e91ba9e Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 29 Sep 2021 16:30:50 +0800 Subject: vduse: Fix race condition between resetting and irq injecting The interrupt might be triggered after a reset since there is no synchronization between resetting and irq injecting. And it might break something if the interrupt is delayed until a new round of device initialization. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20210929083050.88-1-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_user/vduse_dev.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index cefb301b2ee4..841667a896dd 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -80,6 +80,7 @@ struct vduse_dev { struct vdpa_callback config_cb; struct work_struct inject; spinlock_t irq_lock; + struct rw_semaphore rwsem; int minor; bool broken; bool connected; @@ -410,6 +411,8 @@ static void vduse_dev_reset(struct vduse_dev *dev) if (domain->bounce_map) vduse_domain_reset_bounce_map(domain); + down_write(&dev->rwsem); + dev->status = 0; dev->driver_features = 0; dev->generation++; @@ -443,6 +446,8 @@ static void vduse_dev_reset(struct vduse_dev *dev) flush_work(&vq->inject); flush_work(&vq->kick); } + + up_write(&dev->rwsem); } static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, @@ -885,6 +890,23 @@ static void vduse_vq_irq_inject(struct work_struct *work) spin_unlock_irq(&vq->irq_lock); } +static int vduse_dev_queue_irq_work(struct vduse_dev *dev, + struct work_struct *irq_work) +{ + int ret = -EINVAL; + + down_read(&dev->rwsem); + if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) + goto unlock; + + ret = 0; + queue_work(vduse_irq_wq, irq_work); +unlock: + up_read(&dev->rwsem); + + return ret; +} + static long vduse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -966,12 +988,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, break; } case VDUSE_DEV_INJECT_CONFIG_IRQ: - ret = -EINVAL; - if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) - break; - - ret = 0; - queue_work(vduse_irq_wq, &dev->inject); + ret = vduse_dev_queue_irq_work(dev, &dev->inject); break; case VDUSE_VQ_SETUP: { struct vduse_vq_config config; @@ -1049,10 +1066,6 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, case VDUSE_VQ_INJECT_IRQ: { u32 index; - ret = -EINVAL; - if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) - break; - ret = -EFAULT; if (get_user(index, (u32 __user *)argp)) break; @@ -1061,9 +1074,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, if (index >= dev->vq_num) break; - ret = 0; index = array_index_nospec(index, dev->vq_num); - queue_work(vduse_irq_wq, &dev->vqs[index].inject); + ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); break; } default: @@ -1144,6 +1156,7 @@ static struct vduse_dev *vduse_dev_create(void) INIT_LIST_HEAD(&dev->send_list); INIT_LIST_HEAD(&dev->recv_list); spin_lock_init(&dev->irq_lock); + init_rwsem(&dev->rwsem); INIT_WORK(&dev->inject, vduse_dev_irq_inject); init_waitqueue_head(&dev->waitq); -- cgit v1.2.3 From f8690a4b5a1b64f74ae5c4f7c4ea880d8a8e1a0d Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Fri, 15 Oct 2021 11:47:33 +0800 Subject: crypto: x86/sm4 - Fix invalid section entry size This fixes the following warning: vmlinux.o: warning: objtool: elf_update: invalid section entry size The size of the rodata section is 164 bytes, directly using the entry_size of 164 bytes will cause errors in some versions of the gcc compiler, while using 16 bytes directly will cause errors in the clang compiler. This patch correct it by filling the size of rodata to a 16-byte boundary. Fixes: a7ee22ee1445 ("crypto: x86/sm4 - add AES-NI/AVX/x86_64 implementation") Fixes: 5b2efa2bb865 ("crypto: x86/sm4 - add AES-NI/AVX2/x86_64 implementation") Reported-by: Peter Zijlstra Reported-by: Abaci Robot Signed-off-by: Tianjia Zhang Tested-by: Heyuan Shi Signed-off-by: Herbert Xu --- arch/x86/crypto/sm4-aesni-avx-asm_64.S | 6 +++++- arch/x86/crypto/sm4-aesni-avx2-asm_64.S | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S index 18d2f5199194..1cc72b4804fa 100644 --- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -78,7 +78,7 @@ vpxor tmp0, x, x; -.section .rodata.cst164, "aM", @progbits, 164 +.section .rodata.cst16, "aM", @progbits, 16 .align 16 /* @@ -133,6 +133,10 @@ .L0f0f0f0f: .long 0x0f0f0f0f +/* 12 bytes, only for padding */ +.Lpadding_deadbeef: + .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .text .align 16 diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S index d2ffd7f76ee2..9c5d3f3ad45a 100644 --- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S @@ -93,7 +93,7 @@ vpxor tmp0, x, x; -.section .rodata.cst164, "aM", @progbits, 164 +.section .rodata.cst16, "aM", @progbits, 16 .align 16 /* @@ -148,6 +148,10 @@ .L0f0f0f0f: .long 0x0f0f0f0f +/* 12 bytes, only for padding */ +.Lpadding_deadbeef: + .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .text .align 16 -- cgit v1.2.3 From b5998402e3de429b5e5f9bdea08ddf77c5fd661e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Oct 2021 10:22:34 -0400 Subject: KVM: SEV-ES: rename guest_ins_data to sev_pio_data We will be using this field for OUTS emulation as well, in case the data that is pushed via OUTS spans more than one page. In that case, there will be a need to save the data pointer across exits to userspace. So, change the name to something that refers to any kind of PIO. Also spell out what it is used for, namely SEV-ES. No functional change intended. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/x86.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f8f48a7ec577..6bed6c416c6c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -702,7 +702,7 @@ struct kvm_vcpu_arch { struct kvm_pio_request pio; void *pio_data; - void *guest_ins_data; + void *sev_pio_data; u8 event_exit_inst_len; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 381384a54790..379175b725a1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12370,7 +12370,7 @@ EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) { - memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data, + memcpy(vcpu->arch.sev_pio_data, vcpu->arch.pio_data, vcpu->arch.pio.count * vcpu->arch.pio.size); vcpu->arch.pio.count = 0; @@ -12402,7 +12402,7 @@ static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, if (ret) { vcpu->arch.pio.count = 0; } else { - vcpu->arch.guest_ins_data = data; + vcpu->arch.sev_pio_data = data; vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins; } -- cgit v1.2.3 From 0d33b1baeb6ca7165d5ed4fdd1a8f969985e35b9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 13 Oct 2021 12:29:42 -0400 Subject: KVM: x86: leave vcpu->arch.pio.count alone in emulator_pio_in_out Currently emulator_pio_in clears vcpu->arch.pio.count twice if emulator_pio_in_out performs kernel PIO. Move the clear into emulator_pio_out where it is actually necessary. No functional change intended. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 379175b725a1..dff28a4fbb21 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6914,10 +6914,8 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, vcpu->arch.pio.count = count; vcpu->arch.pio.size = size; - if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { - vcpu->arch.pio.count = 0; + if (!kernel_pio(vcpu, vcpu->arch.pio_data)) return 1; - } vcpu->run->exit_reason = KVM_EXIT_IO; vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; @@ -6963,9 +6961,16 @@ static int emulator_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port, const void *val, unsigned int count) { + int ret; + memcpy(vcpu->arch.pio_data, val, size * count); trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data); - return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false); + ret = emulator_pio_in_out(vcpu, size, port, (void *)val, count, false); + if (ret) + vcpu->arch.pio.count = 0; + + return ret; + } static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, -- cgit v1.2.3 From ea724ea420aac58b41bc822d1aed6940b136b78d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Oct 2021 10:51:55 -0400 Subject: KVM: SEV-ES: clean up kvm_sev_es_ins/outs A few very small cleanups to the functions, smushed together because the patch is already very small like this: - inline emulator_pio_in_emulated and emulator_pio_out_emulated, since we already have the vCPU - remove the data argument and pull setting vcpu->arch.sev_pio_data into the caller - remove unnecessary clearing of vcpu->arch.pio.count when emulation is done by the kernel (and therefore vcpu->arch.pio.count is already clear on exit from emulator_pio_in and emulator_pio_out). No functional change intended. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dff28a4fbb21..78ed0fe9fa1e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12383,34 +12383,32 @@ static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) } static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, - unsigned int port, void *data, unsigned int count) + unsigned int port, unsigned int count) { - int ret; + int ret = emulator_pio_out(vcpu, size, port, + vcpu->arch.sev_pio_data, count); - ret = emulator_pio_out_emulated(vcpu->arch.emulate_ctxt, size, port, - data, count); - if (ret) + if (ret) { + /* Emulation done by the kernel. */ return ret; + } vcpu->arch.pio.count = 0; - return 0; } static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, - unsigned int port, void *data, unsigned int count) + unsigned int port, unsigned int count) { - int ret; + int ret = emulator_pio_in(vcpu, size, port, + vcpu->arch.sev_pio_data, count); - ret = emulator_pio_in_emulated(vcpu->arch.emulate_ctxt, size, port, - data, count); if (ret) { - vcpu->arch.pio.count = 0; - } else { - vcpu->arch.sev_pio_data = data; - vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins; + /* Emulation done by the kernel. */ + return ret; } + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins; return 0; } @@ -12418,8 +12416,9 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, unsigned int port, void *data, unsigned int count, int in) { - return in ? kvm_sev_es_ins(vcpu, size, port, data, count) - : kvm_sev_es_outs(vcpu, size, port, data, count); + vcpu->arch.sev_pio_data = data; + return in ? kvm_sev_es_ins(vcpu, size, port, count) + : kvm_sev_es_outs(vcpu, size, port, count); } EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); -- cgit v1.2.3 From 3b27de27183911d461afedf50c6fa30c59740c07 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 13 Oct 2021 12:32:02 -0400 Subject: KVM: x86: split the two parts of emulator_pio_in emulator_pio_in handles both the case where the data is pending in vcpu->arch.pio.count, and the case where I/O has to be done via either an in-kernel device or a userspace exit. For SEV-ES we would like to split these, to identify clearly the moment at which the sev_pio_data is consumed. To this end, create two different functions: __emulator_pio_in fills in vcpu->arch.pio.count, while complete_emulator_pio_in clears it and releases vcpu->arch.pio.data. Because this patch has to be backported, things are left a bit messy. kernel_pio() operates on vcpu->arch.pio, which leads to emulator_pio_in() having with two calls to complete_emulator_pio_in(). It will be fixed in the next release. While at it, remove the unused void* val argument of emulator_pio_in_out. The function currently hardcodes vcpu->arch.pio_data as the source/destination buffer, which sucks but will be fixed after the more severe SEV-ES buffer overflow. No functional change intended. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 78ed0fe9fa1e..c51ea81019e3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6906,7 +6906,7 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) } static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, - unsigned short port, void *val, + unsigned short port, unsigned int count, bool in) { vcpu->arch.pio.port = port; @@ -6927,26 +6927,38 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, return 0; } -static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, - unsigned short port, void *val, unsigned int count) +static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size, + unsigned short port, unsigned int count) { - int ret; + WARN_ON(vcpu->arch.pio.count); + memset(vcpu->arch.pio_data, 0, size * count); + return emulator_pio_in_out(vcpu, size, port, count, true); +} - if (vcpu->arch.pio.count) - goto data_avail; +static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, int size, + unsigned short port, void *val) +{ + memcpy(val, vcpu->arch.pio_data, size * vcpu->arch.pio.count); + trace_kvm_pio(KVM_PIO_IN, port, size, vcpu->arch.pio.count, vcpu->arch.pio_data); + vcpu->arch.pio.count = 0; +} - memset(vcpu->arch.pio_data, 0, size * count); +static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, + unsigned short port, void *val, unsigned int count) +{ + if (vcpu->arch.pio.count) { + /* Complete previous iteration. */ + } else { + int r = __emulator_pio_in(vcpu, size, port, count); + if (!r) + return r; - ret = emulator_pio_in_out(vcpu, size, port, val, count, true); - if (ret) { -data_avail: - memcpy(val, vcpu->arch.pio_data, size * count); - trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data); - vcpu->arch.pio.count = 0; - return 1; + /* Results already available, fall through. */ } - return 0; + WARN_ON(count != vcpu->arch.pio.count); + complete_emulator_pio_in(vcpu, size, port, val); + return 1; } static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, @@ -6965,12 +6977,11 @@ static int emulator_pio_out(struct kvm_vcpu *vcpu, int size, memcpy(vcpu->arch.pio_data, val, size * count); trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data); - ret = emulator_pio_in_out(vcpu, size, port, (void *)val, count, false); + ret = emulator_pio_in_out(vcpu, size, port, count, false); if (ret) vcpu->arch.pio.count = 0; return ret; - } static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, -- cgit v1.2.3 From 6b5efc930bbc8c97e4a1fe2ccb9a6f286365a56d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Oct 2021 12:35:20 -0400 Subject: KVM: x86: remove unnecessary arguments from complete_emulator_pio_in complete_emulator_pio_in can expect that vcpu->arch.pio has been filled in, and therefore does not need the size and count arguments. This makes things nicer when the function is called directly from a complete_userspace_io callback. No functional change intended. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c51ea81019e3..63f9cb33cc19 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6935,11 +6935,12 @@ static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size, return emulator_pio_in_out(vcpu, size, port, count, true); } -static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, int size, - unsigned short port, void *val) +static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, void *val) { - memcpy(val, vcpu->arch.pio_data, size * vcpu->arch.pio.count); - trace_kvm_pio(KVM_PIO_IN, port, size, vcpu->arch.pio.count, vcpu->arch.pio_data); + int size = vcpu->arch.pio.size; + unsigned count = vcpu->arch.pio.count; + memcpy(val, vcpu->arch.pio_data, size * count); + trace_kvm_pio(KVM_PIO_IN, vcpu->arch.pio.port, size, count, vcpu->arch.pio_data); vcpu->arch.pio.count = 0; } @@ -6957,7 +6958,7 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, } WARN_ON(count != vcpu->arch.pio.count); - complete_emulator_pio_in(vcpu, size, port, val); + complete_emulator_pio_in(vcpu, val); return 1; } -- cgit v1.2.3 From 4fa4b38dae6fc6a3695695add8c18fa8b6a05a1a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Oct 2021 11:25:45 -0400 Subject: KVM: SEV-ES: keep INS functions together Make the diff a little nicer when we actually get to fixing the bug. No functional change intended. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 63f9cb33cc19..23e772412134 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12385,15 +12385,6 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, } EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); -static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) -{ - memcpy(vcpu->arch.sev_pio_data, vcpu->arch.pio_data, - vcpu->arch.pio.count * vcpu->arch.pio.size); - vcpu->arch.pio.count = 0; - - return 1; -} - static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, unsigned int port, unsigned int count) { @@ -12409,6 +12400,15 @@ static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, return 0; } +static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) +{ + memcpy(vcpu->arch.sev_pio_data, vcpu->arch.pio_data, + vcpu->arch.pio.count * vcpu->arch.pio.size); + vcpu->arch.pio.count = 0; + + return 1; +} + static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, unsigned int port, unsigned int count) { -- cgit v1.2.3 From 95e16b4792b0429f1933872f743410f00e590c55 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Oct 2021 11:33:03 -0400 Subject: KVM: SEV-ES: go over the sev_pio_data buffer in multiple passes if needed The PIO scratch buffer is larger than a single page, and therefore it is not possible to copy it in a single step to vcpu->arch/pio_data. Bound each call to emulator_pio_in/out to a single page; keep track of how many I/O operations are left in vcpu->arch.sev_pio_count, so that the operation can be restarted in the complete_userspace_io callback. For OUT, this means that the previous kvm_sev_es_outs implementation becomes an iterator of the loop, and we can consume the sev_pio_data buffer before leaving to userspace. For IN, instead, consuming the buffer and decreasing sev_pio_count is always done in the complete_userspace_io callback, because that is when the memcpy is done into sev_pio_data. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reported-by: Felix Wilhelm Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 72 ++++++++++++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6bed6c416c6c..5a0298aa56ba 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -703,6 +703,7 @@ struct kvm_vcpu_arch { struct kvm_pio_request pio; void *pio_data; void *sev_pio_data; + unsigned sev_pio_count; u8 event_exit_inst_len; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 23e772412134..b26647a5ea22 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12386,38 +12386,77 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, - unsigned int port, unsigned int count) + unsigned int port); + +static int complete_sev_es_emulated_outs(struct kvm_vcpu *vcpu) { - int ret = emulator_pio_out(vcpu, size, port, - vcpu->arch.sev_pio_data, count); + int size = vcpu->arch.pio.size; + int port = vcpu->arch.pio.port; + + vcpu->arch.pio.count = 0; + if (vcpu->arch.sev_pio_count) + return kvm_sev_es_outs(vcpu, size, port); + return 1; +} + +static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port) +{ + for (;;) { + unsigned int count = + min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count); + int ret = emulator_pio_out(vcpu, size, port, vcpu->arch.sev_pio_data, count); + + /* memcpy done already by emulator_pio_out. */ + vcpu->arch.sev_pio_count -= count; + vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size; + if (!ret) + break; - if (ret) { /* Emulation done by the kernel. */ - return ret; + if (!vcpu->arch.sev_pio_count) + return 1; } - vcpu->arch.pio.count = 0; + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_outs; return 0; } +static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port); + +static void advance_sev_es_emulated_ins(struct kvm_vcpu *vcpu) +{ + unsigned count = vcpu->arch.pio.count; + complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data); + vcpu->arch.sev_pio_count -= count; + vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size; +} + static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) { - memcpy(vcpu->arch.sev_pio_data, vcpu->arch.pio_data, - vcpu->arch.pio.count * vcpu->arch.pio.size); - vcpu->arch.pio.count = 0; + int size = vcpu->arch.pio.size; + int port = vcpu->arch.pio.port; + advance_sev_es_emulated_ins(vcpu); + if (vcpu->arch.sev_pio_count) + return kvm_sev_es_ins(vcpu, size, port); return 1; } static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, - unsigned int port, unsigned int count) + unsigned int port) { - int ret = emulator_pio_in(vcpu, size, port, - vcpu->arch.sev_pio_data, count); + for (;;) { + unsigned int count = + min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count); + if (!__emulator_pio_in(vcpu, size, port, count)) + break; - if (ret) { /* Emulation done by the kernel. */ - return ret; + advance_sev_es_emulated_ins(vcpu); + if (!vcpu->arch.sev_pio_count) + return 1; } vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins; @@ -12429,8 +12468,9 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, int in) { vcpu->arch.sev_pio_data = data; - return in ? kvm_sev_es_ins(vcpu, size, port, count) - : kvm_sev_es_outs(vcpu, size, port, count); + vcpu->arch.sev_pio_count = count; + return in ? kvm_sev_es_ins(vcpu, size, port) + : kvm_sev_es_outs(vcpu, size, port); } EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); -- cgit v1.2.3 From 8017c99680fa65e1e8d999df1583de476a187830 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Oct 2021 15:19:08 +0200 Subject: hyperv/vmbus: include linux/bitops.h On arm64 randconfig builds, hyperv sometimes fails with this error: In file included from drivers/hv/hv_trace.c:3: In file included from drivers/hv/hyperv_vmbus.h:16: In file included from arch/arm64/include/asm/sync_bitops.h:5: arch/arm64/include/asm/bitops.h:11:2: error: only can be included directly In file included from include/asm-generic/bitops/hweight.h:5: include/asm-generic/bitops/arch_hweight.h:9:9: error: implicit declaration of function '__sw_hweight32' [-Werror,-Wimplicit-function-declaration] include/asm-generic/bitops/atomic.h:17:7: error: implicit declaration of function 'BIT_WORD' [-Werror,-Wimplicit-function-declaration] Include the correct header first. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211018131929.2260087-1-arnd@kernel.org Signed-off-by: Wei Liu --- drivers/hv/hyperv_vmbus.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 42f3d9d123a1..d030577ad6a2 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -13,6 +13,7 @@ #define _HYPERV_VMBUS_H #include +#include #include #include #include -- cgit v1.2.3 From 7f678def99d29c520418607509bb19c7fc96a6db Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 22 Oct 2021 13:28:37 +0300 Subject: skb_expand_head() adjust skb->truesize incorrectly Christoph Paasch reports [1] about incorrect skb->truesize after skb_expand_head() call in ip6_xmit. This may happen because of two reasons: - skb_set_owner_w() for newly cloned skb is called too early, before pskb_expand_head() where truesize is adjusted for (!skb-sk) case. - pskb_expand_head() does not adjust truesize in (skb->sk) case. In this case sk->sk_wmem_alloc should be adjusted too. [1] https://lkml.org/lkml/2021/8/20/1082 Fixes: f1260ff15a71 ("skbuff: introduce skb_expand_head()") Fixes: 2d85a1b31dde ("ipv6: ip6_finish_output2: set sk into newly allocated nskb") Reported-by: Christoph Paasch Signed-off-by: Vasily Averin Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/644330dd-477e-0462-83bf-9f514c41edd1@virtuozzo.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 36 +++++++++++++++++++++++------------- net/core/sock_destructor.h | 12 ++++++++++++ 2 files changed, 35 insertions(+), 13 deletions(-) create mode 100644 net/core/sock_destructor.h diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2170bea2c7de..fe9358437380 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -80,6 +80,7 @@ #include #include "datagram.h" +#include "sock_destructor.h" struct kmem_cache *skbuff_head_cache __ro_after_init; static struct kmem_cache *skbuff_fclone_cache __ro_after_init; @@ -1804,30 +1805,39 @@ EXPORT_SYMBOL(skb_realloc_headroom); struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom) { int delta = headroom - skb_headroom(skb); + int osize = skb_end_offset(skb); + struct sock *sk = skb->sk; if (WARN_ONCE(delta <= 0, "%s is expecting an increase in the headroom", __func__)) return skb; - /* pskb_expand_head() might crash, if skb is shared */ - if (skb_shared(skb)) { + delta = SKB_DATA_ALIGN(delta); + /* pskb_expand_head() might crash, if skb is shared. */ + if (skb_shared(skb) || !is_skb_wmem(skb)) { struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); - if (likely(nskb)) { - if (skb->sk) - skb_set_owner_w(nskb, skb->sk); - consume_skb(skb); - } else { - kfree_skb(skb); - } + if (unlikely(!nskb)) + goto fail; + + if (sk) + skb_set_owner_w(nskb, sk); + consume_skb(skb); skb = nskb; } - if (skb && - pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(skb); - skb = NULL; + if (pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) + goto fail; + + if (sk && is_skb_wmem(skb)) { + delta = skb_end_offset(skb) - osize; + refcount_add(delta, &sk->sk_wmem_alloc); + skb->truesize += delta; } return skb; + +fail: + kfree_skb(skb); + return NULL; } EXPORT_SYMBOL(skb_expand_head); diff --git a/net/core/sock_destructor.h b/net/core/sock_destructor.h new file mode 100644 index 000000000000..2f396e6bfba5 --- /dev/null +++ b/net/core/sock_destructor.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_CORE_SOCK_DESTRUCTOR_H +#define _NET_CORE_SOCK_DESTRUCTOR_H +#include + +static inline bool is_skb_wmem(const struct sk_buff *skb) +{ + return skb->destructor == sock_wfree || + skb->destructor == __sock_wfree || + (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree); +} +#endif -- cgit v1.2.3 From 4f7019c7eb33967eb87766e0e4602b5576873680 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:41 -0400 Subject: sctp: use init_tag from inithdr for ABORT chunk Currently Linux SCTP uses the verification tag of the existing SCTP asoc when failing to process and sending the packet with the ABORT chunk. This will result in the peer accepting the ABORT chunk and removing the SCTP asoc. One could exploit this to terminate a SCTP asoc. This patch is to fix it by always using the initiate tag of the received INIT chunk for the ABORT chunk to be sent. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 32df65f68c12..7f8306968c39 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6348,6 +6348,7 @@ static struct sctp_packet *sctp_ootb_pkt_new( * yet. */ switch (chunk->chunk_hdr->type) { + case SCTP_CID_INIT: case SCTP_CID_INIT_ACK: { struct sctp_initack_chunk *initack; -- cgit v1.2.3 From eae5783908042a762c24e1bd11876edb91d314b1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:42 -0400 Subject: sctp: fix the processing for INIT chunk This patch fixes the problems below: 1. In non-shutdown_ack_sent states: in sctp_sf_do_5_1B_init() and sctp_sf_do_5_2_2_dupinit(): chunk length check should be done before any checks that may cause to send abort, as making packet for abort will access the init_tag from init_hdr in sctp_ootb_pkt_new(). 2. In shutdown_ack_sent state: in sctp_sf_do_9_2_reshutack(): The same checks as does in sctp_sf_do_5_2_2_dupinit() is needed for sctp_sf_do_9_2_reshutack(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 72 +++++++++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7f8306968c39..9bfa8cca9974 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -156,6 +156,12 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort( void *arg, struct sctp_cmd_seq *commands); +static enum sctp_disposition +__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands); + /* Small helper function that checks if the chunk length * is of the appropriate length. The 'required_length' argument * is set to be the size of a specific chunk we are testing. @@ -337,6 +343,14 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the INIT chunk has a valid length. + * Normally, this would cause an ABORT with a Protocol Violation + * error, but since we don't have an association, we'll + * just discard the packet. + */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ @@ -351,14 +365,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); - /* Make sure that the INIT chunk has a valid length. - * Normally, this would cause an ABORT with a Protocol Violation - * error, but since we don't have an association, we'll - * just discard the packet. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - /* If the INIT is coming toward a closing socket, we'll send back * and ABORT. Essentially, this catches the race of INIT being * backloged to the socket at the same time as the user issues close(). @@ -1524,20 +1530,16 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the INIT chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); - /* Make sure that the INIT chunk has a valid length. - * In this case, we generate a protocol violation since we have - * an association established. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); - if (SCTP_INPUT_CB(chunk->skb)->encap_port != chunk->transport->encap_port) return sctp_sf_new_encap_port(net, ep, asoc, type, arg, commands); @@ -1882,9 +1884,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( * its peer. */ if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) { - disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc, - SCTP_ST_CHUNK(chunk->chunk_hdr->type), - chunk, commands); + disposition = __sctp_sf_do_9_2_reshutack(net, ep, asoc, + SCTP_ST_CHUNK(chunk->chunk_hdr->type), + chunk, commands); if (SCTP_DISPOSITION_NOMEM == disposition) goto nomem; @@ -2970,13 +2972,11 @@ enum sctp_disposition sctp_sf_do_9_2_shut_ctsn( * that belong to this association, it should discard the INIT chunk and * retransmit the SHUTDOWN ACK chunk. */ -enum sctp_disposition sctp_sf_do_9_2_reshutack( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const union sctp_subtype type, - void *arg, - struct sctp_cmd_seq *commands) +static enum sctp_disposition +__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; @@ -3010,6 +3010,26 @@ nomem: return SCTP_DISPOSITION_NOMEM; } +enum sctp_disposition +sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands) +{ + struct sctp_chunk *chunk = arg; + + if (!chunk->singleton) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + + if (chunk->sctp_hdr->vtag != 0) + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); + + return __sctp_sf_do_9_2_reshutack(net, ep, asoc, type, arg, commands); +} + /* * sctp_sf_do_ecn_cwr * -- cgit v1.2.3 From 438b95a7c98f77d51cbf4db021f41b602d750a3f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:43 -0400 Subject: sctp: fix the processing for INIT_ACK chunk Currently INIT_ACK chunk in non-cookie_echoed state is processed in sctp_sf_discard_chunk() to send an abort with the existent asoc's vtag if the chunk length is not valid. But the vtag in the chunk's sctphdr is not verified, which may be exploited by one to cook a malicious chunk to terminal a SCTP asoc. sctp_sf_discard_chunk() also is called in many other places to send an abort, and most of those have this problem. This patch is to fix it by sending abort with the existent asoc's vtag only if the vtag from the chunk's sctphdr is verified in sctp_sf_discard_chunk(). Note on sctp_sf_do_9_1_abort() and sctp_sf_shutdown_pending_abort(), the chunk length has been verified before sctp_sf_discard_chunk(), so replace it with sctp_sf_discard(). On sctp_sf_do_asconf_ack() and sctp_sf_do_asconf(), move the sctp_chunk_length_valid check ahead of sctp_sf_discard_chunk(), then replace it with sctp_sf_discard(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 9bfa8cca9974..672e5308839b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2343,7 +2343,7 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2389,7 +2389,7 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2659,7 +2659,7 @@ enum sctp_disposition sctp_sf_do_9_1_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -3865,6 +3865,11 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } + /* Make sure that the ASCONF ADDIP chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk))) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* ADD-IP: Section 4.1.1 * This chunk MUST be sent in an authenticated way by using * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk @@ -3873,13 +3878,7 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, */ if (!asoc->peer.asconf_capable || (!net->sctp.addip_noauth && !chunk->auth)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, - commands); - - /* Make sure that the ASCONF ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); hdr = (struct sctp_addiphdr *)chunk->skb->data; serial = ntohl(hdr->serial); @@ -4008,6 +4007,12 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } + /* Make sure that the ADDIP chunk has a valid length. */ + if (!sctp_chunk_length_valid(asconf_ack, + sizeof(struct sctp_addip_chunk))) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* ADD-IP, Section 4.1.2: * This chunk MUST be sent in an authenticated way by using * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk @@ -4016,14 +4021,7 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, */ if (!asoc->peer.asconf_capable || (!net->sctp.addip_noauth && !asconf_ack->auth)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, - commands); - - /* Make sure that the ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(asconf_ack, - sizeof(struct sctp_addip_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data; rcvd_serial = ntohl(addip_hdr->serial); @@ -4595,6 +4593,9 @@ enum sctp_disposition sctp_sf_discard_chunk(struct net *net, { struct sctp_chunk *chunk = arg; + if (asoc && !sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the chunk has a valid length. * Since we don't know the chunk type, we use a general * chunkhdr structure to make a comparison. -- cgit v1.2.3 From a64b341b8695e1c744dd972b39868371b4f68f83 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:44 -0400 Subject: sctp: fix the processing for COOKIE_ECHO chunk 1. In closed state: in sctp_sf_do_5_1D_ce(): When asoc is NULL, making packet for abort will use chunk's vtag in sctp_ootb_pkt_new(). But when asoc exists, vtag from the chunk should be verified before using peer.i.init_tag to make packet for abort in sctp_ootb_pkt_new(), and just discard it if vtag is not correct. 2. In the other states: in sctp_sf_do_5_2_4_dupcook(): asoc always exists, but duplicate cookie_echo's vtag will be handled by sctp_tietags_compare() and then take actions, so before that we only verify the vtag for the abort sent for invalid chunk length. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 672e5308839b..96a069d725e9 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -710,6 +710,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, struct sock *sk; int error = 0; + if (asoc && !sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ @@ -724,7 +727,8 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, * in sctp_unpack_cookie(). */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); /* If the endpoint is not listening or if the number of associations * on the TCP-style socket exceed the max backlog, respond with an @@ -2204,9 +2208,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( * enough for the chunk header. Cookie length verification is * done later. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) { + if (!sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); + } /* "Decode" the chunk. We have no optional parameters so we * are in good shape. -- cgit v1.2.3 From aa0f697e45286a6b5f0ceca9418acf54b9099d99 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:45 -0400 Subject: sctp: add vtag check in sctp_sf_violation sctp_sf_violation() is called when processing HEARTBEAT_ACK chunk in cookie_wait state, and some other places are also using it. The vtag in the chunk's sctphdr should be verified, otherwise, as later in chunk length check, it may send abort with the existent asoc's vtag, which can be exploited by one to cook a malicious chunk to terminate a SCTP asoc. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 96a069d725e9..36328ab88bdd 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4669,6 +4669,9 @@ enum sctp_disposition sctp_sf_violation(struct net *net, { struct sctp_chunk *chunk = arg; + if (!sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, -- cgit v1.2.3 From ef16b1734f0a176277b7bb9c71a6d977a6ef3998 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:46 -0400 Subject: sctp: add vtag check in sctp_sf_do_8_5_1_E_sa sctp_sf_do_8_5_1_E_sa() is called when processing SHUTDOWN_ACK chunk in cookie_wait and cookie_echoed state. The vtag in the chunk's sctphdr should be verified, otherwise, as later in chunk length check, it may send abort with the existent asoc's vtag, which can be exploited by one to cook a malicious chunk to terminate a SCTP asoc. Note that when fails to verify the vtag from SHUTDOWN-ACK chunk, SHUTDOWN COMPLETE message will still be sent back to peer, but with the vtag from SHUTDOWN-ACK chunk, as said in 5) of rfc4960#section-8.4. While at it, also remove the unnecessary chunk length check from sctp_sf_shut_8_4_5(), as it's already done in both places where it calls sctp_sf_shut_8_4_5(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 36328ab88bdd..a3545498a038 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3803,12 +3803,6 @@ static enum sctp_disposition sctp_sf_shut_8_4_5( SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); - /* If the chunk length is invalid, we don't want to process - * the reset of the packet. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - /* We need to discard the rest of the packet to prevent * potential boomming attacks from additional bundled chunks. * This is documented in SCTP Threats ID. @@ -3836,6 +3830,9 @@ enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net, { struct sctp_chunk *chunk = arg; + if (!sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, -- cgit v1.2.3 From 9d02831e517aa36ee6bdb453a0eb47bd49923fe3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:47 -0400 Subject: sctp: add vtag check in sctp_sf_ootb sctp_sf_ootb() is called when processing DATA chunk in closed state, and many other places are also using it. The vtag in the chunk's sctphdr should be verified, otherwise, as later in chunk length check, it may send abort with the existent asoc's vtag, which can be exploited by one to cook a malicious chunk to terminate a SCTP asoc. When fails to verify the vtag from the chunk, this patch sets asoc to NULL, so that the abort will be made with the vtag from the received chunk later. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a3545498a038..fb3da4d8f4a3 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3688,6 +3688,9 @@ enum sctp_disposition sctp_sf_ootb(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); + if (asoc && !sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + ch = (struct sctp_chunkhdr *)chunk->chunk_hdr; do { /* Report violation if the chunk is less then minimal */ -- cgit v1.2.3 From 1f83b835a3eaa5ae4bd825fb07182698bfc243ba Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 Oct 2021 16:02:47 +0200 Subject: fcnal-test: kill hanging ping/nettest binaries on cleanup On my box I see a bunch of ping/nettest processes hanging around after fcntal-test.sh is done. Clean those up before netns deletion. Signed-off-by: Florian Westphal Acked-by: David Ahern Link: https://lore.kernel.org/r/20211021140247.29691-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 8e67a252b672..3313566ce906 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -445,10 +445,13 @@ cleanup() ip -netns ${NSA} link set dev ${NSA_DEV} down ip -netns ${NSA} link del dev ${NSA_DEV} + ip netns pids ${NSA} | xargs kill 2>/dev/null ip netns del ${NSA} fi + ip netns pids ${NSB} | xargs kill 2>/dev/null ip netns del ${NSB} + ip netns pids ${NSC} | xargs kill 2>/dev/null ip netns del ${NSC} >/dev/null 2>&1 } -- cgit v1.2.3 From 8f04db78e4e36a5d4858ce841a3e9cc3d69bde36 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 14 Oct 2021 15:25:51 +0100 Subject: bpf: Define bpf_jit_alloc_exec_limit for riscv JIT Expose the maximum amount of useable memory from the riscv JIT. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Luke Nelson Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20211014142554.53120-2-lmb@cloudflare.com --- arch/riscv/net/bpf_jit_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index fed86f42dfbe..0fee2cbaaf53 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -166,6 +166,11 @@ out: return prog; } +u64 bpf_jit_alloc_exec_limit(void) +{ + return BPF_JIT_REGION_SIZE; +} + void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, -- cgit v1.2.3 From 5d63ae908242f028bd10860cba98450d11c079b8 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 14 Oct 2021 15:25:52 +0100 Subject: bpf: Define bpf_jit_alloc_exec_limit for arm64 JIT Expose the maximum amount of useable memory from the arm64 JIT. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211014142554.53120-3-lmb@cloudflare.com --- arch/arm64/net/bpf_jit_comp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 41c23f474ea6..803e7773fa86 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1136,6 +1136,11 @@ out: return prog; } +u64 bpf_jit_alloc_exec_limit(void) +{ + return BPF_JIT_REGION_SIZE; +} + void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, -- cgit v1.2.3 From fadb7ff1a6c2c565af56b4aacdd086b067eed440 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 14 Oct 2021 15:25:53 +0100 Subject: bpf: Prevent increasing bpf_jit_limit above max Restrict bpf_jit_limit to the maximum supported by the arch's JIT. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211014142554.53120-4-lmb@cloudflare.com --- include/linux/filter.h | 1 + kernel/bpf/core.c | 4 +++- net/core/sysctl_net_core.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 4a93c12543ee..ef03ff34234d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1051,6 +1051,7 @@ extern int bpf_jit_enable; extern int bpf_jit_harden; extern int bpf_jit_kallsyms; extern long bpf_jit_limit; +extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d6b7dfdd8066..c1e7eb3f1876 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -524,6 +524,7 @@ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_harden __read_mostly; long bpf_jit_limit __read_mostly; +long bpf_jit_limit_max __read_mostly; static void bpf_prog_ksym_set_addr(struct bpf_prog *prog) @@ -817,7 +818,8 @@ u64 __weak bpf_jit_alloc_exec_limit(void) static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ - bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2, + bpf_jit_limit_max = bpf_jit_alloc_exec_limit(); + bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2, PAGE_SIZE), LONG_MAX); return 0; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index c8496c1142c9..5f88526ad61c 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -419,7 +419,7 @@ static struct ctl_table net_core_table[] = { .mode = 0600, .proc_handler = proc_dolongvec_minmax_bpf_restricted, .extra1 = &long_one, - .extra2 = &long_max, + .extra2 = &bpf_jit_limit_max, }, #endif { -- cgit v1.2.3 From fda7a38714f40b635f5502ec4855602c6b33dad2 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Tue, 19 Oct 2021 03:29:34 +0000 Subject: bpf: Fix error usage of map_fd and fdget() in generic_map_update_batch() 1. The ufd in generic_map_update_batch() should be read from batch.map_fd; 2. A call to fdget() should be followed by a symmetric call to fdput(). Fixes: aa2e93b8e58e ("bpf: Add generic support for update and delete batch ops") Signed-off-by: Xu Kuohai Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211019032934.1210517-1-xukuohai@huawei.com --- kernel/bpf/syscall.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4e50c0bfdb7d..9dab49d3f394 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1337,12 +1337,11 @@ int generic_map_update_batch(struct bpf_map *map, void __user *values = u64_to_user_ptr(attr->batch.values); void __user *keys = u64_to_user_ptr(attr->batch.keys); u32 value_size, cp, max_count; - int ufd = attr->map_fd; + int ufd = attr->batch.map_fd; void *key, *value; struct fd f; int err = 0; - f = fdget(ufd); if (attr->batch.elem_flags & ~BPF_F_LOCK) return -EINVAL; @@ -1367,6 +1366,7 @@ int generic_map_update_batch(struct bpf_map *map, return -ENOMEM; } + f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */ for (cp = 0; cp < max_count; cp++) { err = -EFAULT; if (copy_from_user(key, keys + cp * map->key_size, @@ -1386,6 +1386,7 @@ int generic_map_update_batch(struct bpf_map *map, kvfree(value); kvfree(key); + fdput(f); return err; } -- cgit v1.2.3 From 04f8ef5643bcd8bcde25dfdebef998aea480b2ba Mon Sep 17 00:00:00 2001 From: Quanyang Wang Date: Mon, 18 Oct 2021 15:56:23 +0800 Subject: cgroup: Fix memory leak caused by missing cgroup_bpf_offline When enabling CONFIG_CGROUP_BPF, kmemleak can be observed by running the command as below: $mount -t cgroup -o none,name=foo cgroup cgroup/ $umount cgroup/ unreferenced object 0xc3585c40 (size 64): comm "mount", pid 425, jiffies 4294959825 (age 31.990s) hex dump (first 32 bytes): 01 00 00 80 84 8c 28 c0 00 00 00 00 00 00 00 00 ......(......... 00 00 00 00 00 00 00 00 6c 43 a0 c3 00 00 00 00 ........lC...... backtrace: [] cgroup_bpf_inherit+0x44/0x24c [<1f03679c>] cgroup_setup_root+0x174/0x37c [] cgroup1_get_tree+0x2c0/0x4a0 [] vfs_get_tree+0x24/0x108 [] path_mount+0x384/0x988 [] do_mount+0x64/0x9c [<208c9cfe>] sys_mount+0xfc/0x1f4 [<06dd06e0>] ret_fast_syscall+0x0/0x48 [] 0xbeb4daa8 This is because that since the commit 2b0d3d3e4fcf ("percpu_ref: reduce memory footprint of percpu_ref in fast path") root_cgrp->bpf.refcnt.data is allocated by the function percpu_ref_init in cgroup_bpf_inherit which is called by cgroup_setup_root when mounting, but not freed along with root_cgrp when umounting. Adding cgroup_bpf_offline which calls percpu_ref_kill to cgroup_kill_sb can free root_cgrp->bpf.refcnt.data in umount path. This patch also fixes the commit 4bfc0bb2c60e ("bpf: decouple the lifetime of cgroup_bpf from cgroup itself"). A cgroup_bpf_offline is needed to do a cleanup that frees the resources which are allocated by cgroup_bpf_inherit in cgroup_setup_root. And inside cgroup_bpf_offline, cgroup_get() is at the beginning and cgroup_put is at the end of cgroup_bpf_release which is called by cgroup_bpf_offline. So cgroup_bpf_offline can keep the balance of cgroup's refcount. Fixes: 2b0d3d3e4fcf ("percpu_ref: reduce memory footprint of percpu_ref in fast path") Fixes: 4bfc0bb2c60e ("bpf: decouple the lifetime of cgroup_bpf from cgroup itself") Signed-off-by: Quanyang Wang Signed-off-by: Alexei Starovoitov Acked-by: Roman Gushchin Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211018075623.26884-1-quanyang.wang@windriver.com --- kernel/cgroup/cgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 570b0c97392a..ea08f01d0111 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2187,8 +2187,10 @@ static void cgroup_kill_sb(struct super_block *sb) * And don't kill the default root. */ if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root && - !percpu_ref_is_dying(&root->cgrp.self.refcnt)) + !percpu_ref_is_dying(&root->cgrp.self.refcnt)) { + cgroup_bpf_offline(&root->cgrp); percpu_ref_kill(&root->cgrp.self.refcnt); + } cgroup_put(&root->cgrp); kernfs_kill_sb(sb); } -- cgit v1.2.3 From d6423d2ec39cce2bfca418c81ef51792891576bc Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Fri, 22 Oct 2021 11:13:53 -0400 Subject: net: ethernet: microchip: lan743x: Fix driver crash when lan743x_pm_resume fails The driver needs to clean up and return when the initialization fails on resume. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 9e8561cdc32a..3ac968b1c6ce 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3019,6 +3019,8 @@ static int lan743x_pm_resume(struct device *dev) if (ret) { netif_err(adapter, probe, adapter->netdev, "lan743x_hardware_init returned %d\n", ret); + lan743x_pci_cleanup(adapter); + return ret; } /* open netdev when netdev is at running state while resume. -- cgit v1.2.3 From 95a359c9553342d36d408d35331ff0bfce75272f Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Fri, 22 Oct 2021 11:53:43 -0400 Subject: net: ethernet: microchip: lan743x: Fix dma allocation failure by using dma_set_mask_and_coherent The dma failure was reported in the raspberry pi github (issue #4117). https://github.com/raspberrypi/linux/issues/4117 The use of dma_set_mask_and_coherent fixes the issue. Tested on 32/64-bit raspberry pi CM4 and 64-bit ubuntu x86 PC with EVB-LAN7430. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 3ac968b1c6ce..c4f32c7e0db1 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1743,6 +1743,16 @@ static int lan743x_tx_ring_init(struct lan743x_tx *tx) ret = -EINVAL; goto cleanup; } + if (dma_set_mask_and_coherent(&tx->adapter->pdev->dev, + DMA_BIT_MASK(64))) { + if (dma_set_mask_and_coherent(&tx->adapter->pdev->dev, + DMA_BIT_MASK(32))) { + dev_warn(&tx->adapter->pdev->dev, + "lan743x_: No suitable DMA available\n"); + ret = -ENOMEM; + goto cleanup; + } + } ring_allocation_size = ALIGN(tx->ring_size * sizeof(struct lan743x_tx_descriptor), PAGE_SIZE); @@ -2276,6 +2286,16 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx) ret = -EINVAL; goto cleanup; } + if (dma_set_mask_and_coherent(&rx->adapter->pdev->dev, + DMA_BIT_MASK(64))) { + if (dma_set_mask_and_coherent(&rx->adapter->pdev->dev, + DMA_BIT_MASK(32))) { + dev_warn(&rx->adapter->pdev->dev, + "lan743x_: No suitable DMA available\n"); + ret = -ENOMEM; + goto cleanup; + } + } ring_allocation_size = ALIGN(rx->ring_size * sizeof(struct lan743x_rx_descriptor), PAGE_SIZE); -- cgit v1.2.3 From 87066fdd2e30fe9dd531125d95257c118a74617e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Oct 2021 09:48:33 -1000 Subject: Revert "mm/secretmem: use refcount_t instead of atomic_t" This reverts commit 110860541f443f950c1274f217a1a3e298670a33. Converting the "secretmem_users" counter to a refcount is incorrect, because a refcount is special in zero and can't just be incremented (but a count of users is not, and "no users" is actually perfectly valid and not a sign of a free'd resource). Reported-by: syzbot+75639e6a0331cd61d3e2@syzkaller.appspotmail.com Cc: Jordy Zomer Cc: Kees Cook , Cc: Jordy Zomer Cc: James Bottomley Cc: Mike Rapoport Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/secretmem.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mm/secretmem.c b/mm/secretmem.c index 1fea68b8d5a6..030f02ddc7c1 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -18,7 +18,6 @@ #include #include #include -#include #include @@ -41,11 +40,11 @@ module_param_named(enable, secretmem_enable, bool, 0400); MODULE_PARM_DESC(secretmem_enable, "Enable secretmem and memfd_secret(2) system call"); -static refcount_t secretmem_users; +static atomic_t secretmem_users; bool secretmem_active(void) { - return !!refcount_read(&secretmem_users); + return !!atomic_read(&secretmem_users); } static vm_fault_t secretmem_fault(struct vm_fault *vmf) @@ -104,7 +103,7 @@ static const struct vm_operations_struct secretmem_vm_ops = { static int secretmem_release(struct inode *inode, struct file *file) { - refcount_dec(&secretmem_users); + atomic_dec(&secretmem_users); return 0; } @@ -218,7 +217,7 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) file->f_flags |= O_LARGEFILE; fd_install(fd, file); - refcount_inc(&secretmem_users); + atomic_inc(&secretmem_users); return fd; err_put_fd: -- cgit v1.2.3 From a0023bb9dd9bc439d44604eeec62426a990054cd Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Fri, 22 Oct 2021 09:12:26 +0000 Subject: ata: sata_mv: Fix the error handling of mv_chip_id() mv_init_host() propagates the value returned by mv_chip_id() which in turn gets propagated by mv_pci_init_one() and hits local_pci_probe(). During the process of driver probing, the probe function should return < 0 for failure, otherwise, the kernel will treat value > 0 as success. Since this is a bug rather than a recoverable runtime error we should use dev_alert() instead of dev_err(). Signed-off-by: Zheyu Ma Signed-off-by: Damien Le Moal --- drivers/ata/sata_mv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 9d86203e1e7a..c53633d47bfb 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -3896,8 +3896,8 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx) break; default: - dev_err(host->dev, "BUG: invalid board index %u\n", board_idx); - return 1; + dev_alert(host->dev, "BUG: invalid board index %u\n", board_idx); + return -EINVAL; } hpriv->hp_flags = hp_flags; -- cgit v1.2.3 From 41ee7232fa5f3c034f22baa52bc287e494e2129a Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 21 Oct 2021 00:23:13 +1100 Subject: powerpc/pseries/iommu: Use correct vfree for it_map The it_map array is vzalloc'ed so use vfree() for it when creating a huge DMA window failed for whatever reason. While at this, write zero to it_map. Fixes: 381ceda88c4c ("powerpc/pseries/iommu: Make use of DDW for indirect mapping") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211020132315.2287178-3-aik@ozlabs.ru --- arch/powerpc/platforms/pseries/iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index dab5c56ffd0e..c003f698eedd 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1440,7 +1440,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) /* Keep default DMA window stuct if removed */ if (default_win_removed) { tbl->it_size = 0; - kfree(tbl->it_map); + vfree(tbl->it_map); + tbl->it_map = NULL; } set_iommu_table_base(&dev->dev, newtbl); -- cgit v1.2.3 From 92fe01b7c655b9767724e7d62bdded0761d232ff Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 21 Oct 2021 00:23:14 +1100 Subject: powerpc/pseries/iommu: Check if the default window in use before removing it At the moment this check is performed after we remove the default window which is late and disallows to revert whatever changes enable_ddw() has made to DMA windows. This moves the check and error exit before removing the window. This raised the message severity from "debug" to "warning" as this should not happen in practice and cannot be triggered by the userspace. Fixes: 381ceda88c4c ("powerpc/pseries/iommu: Make use of DDW for indirect mapping") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211020132315.2287178-4-aik@ozlabs.ru --- arch/powerpc/platforms/pseries/iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index c003f698eedd..8f921e38ce74 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1302,6 +1302,12 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) struct property *default_win; int reset_win_ext; + /* DDW + IOMMU on single window may fail if there is any allocation */ + if (iommu_table_in_use(tbl)) { + dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); + goto out_failed; + } + default_win = of_find_property(pdn, "ibm,dma-window", NULL); if (!default_win) goto out_failed; @@ -1356,12 +1362,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) query.largest_available_block, 1ULL << page_shift); - /* DDW + IOMMU on single window may fail if there is any allocation */ - if (default_win_removed && iommu_table_in_use(tbl)) { - dev_dbg(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); - goto out_failed; - } - len = order_base_2(query.largest_available_block << page_shift); win_name = DMA64_PROPNAME; } else { -- cgit v1.2.3 From d853adc7adf601d7d6823afe3ed396065a3e08d1 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 21 Oct 2021 00:23:15 +1100 Subject: powerpc/pseries/iommu: Create huge DMA window if no MMIO32 is present The iommu_init_table() helper takes an address range to reserve in the IOMMU table being initialized to exclude MMIO addresses, this is useful if the window stretches far beyond 4GB (although wastes some TCEs). At the moment the code searches for such MMIO32 range and fails if none found which is considered a problem while it really is not: it is actually better as this says there is no MMIO32 to reserve and we can use usually wasted TCEs. Furthermore PHYP never actually allows creating windows starting at busaddress=0 so this MMIO32 range is never useful. This removes error exit and initializes the table with zero range if no MMIO32 is detected. Fixes: 381ceda88c4c ("powerpc/pseries/iommu: Make use of DDW for indirect mapping") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211020132315.2287178-5-aik@ozlabs.ru --- arch/powerpc/platforms/pseries/iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 8f921e38ce74..a52af8fbf571 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1411,18 +1411,19 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) } else { struct iommu_table *newtbl; int i; + unsigned long start = 0, end = 0; for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; /* Look for MMIO32 */ - if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) + if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) { + start = pci->phb->mem_resources[i].start; + end = pci->phb->mem_resources[i].end; break; + } } - if (i == ARRAY_SIZE(pci->phb->mem_resources)) - goto out_del_list; - /* New table for using DDW instead of the default DMA window */ newtbl = iommu_pseries_alloc_table(pci->phb->node); if (!newtbl) { @@ -1432,8 +1433,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr, 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops); - iommu_init_table(newtbl, pci->phb->node, pci->phb->mem_resources[i].start, - pci->phb->mem_resources[i].end); + iommu_init_table(newtbl, pci->phb->node, start, end); pci->table_group->tables[1] = newtbl; -- cgit v1.2.3 From 85fe6415c146d5d42ce300c12f1ecf4d4af47d40 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Thu, 14 Oct 2021 14:33:42 +0200 Subject: gpio: xgs-iproc: fix parsing of ngpios property of_property_read_u32 returns 0 on success, not true, so we need to invert the check to actually take over the provided ngpio value. Fixes: 6a41b6c5fc20 ("gpio: Add xgs-iproc driver") Signed-off-by: Jonas Gorski Reviewed-by: Chris Packham Reviewed-by: Florian Fainelli Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-xgs-iproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-xgs-iproc.c b/drivers/gpio/gpio-xgs-iproc.c index fa9b4d8c3ff5..43ca52fa6f9a 100644 --- a/drivers/gpio/gpio-xgs-iproc.c +++ b/drivers/gpio/gpio-xgs-iproc.c @@ -224,7 +224,7 @@ static int iproc_gpio_probe(struct platform_device *pdev) } chip->gc.label = dev_name(dev); - if (of_property_read_u32(dn, "ngpios", &num_gpios)) + if (!of_property_read_u32(dn, "ngpios", &num_gpios)) chip->gc.ngpio = num_gpios; irq = platform_get_irq(pdev, 0); -- cgit v1.2.3 From c0eee6fbfa2b3377f1efed10dad539abeb7312aa Mon Sep 17 00:00:00 2001 From: Asmaa Mnebhi Date: Fri, 22 Oct 2021 09:44:38 -0400 Subject: gpio: mlxbf2.c: Add check for bgpio_init failure Add a check if bgpio_init fails. Signed-off-by: Asmaa Mnebhi Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mlxbf2.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c index 177d03ef4529..40a052bc6784 100644 --- a/drivers/gpio/gpio-mlxbf2.c +++ b/drivers/gpio/gpio-mlxbf2.c @@ -256,6 +256,11 @@ mlxbf2_gpio_probe(struct platform_device *pdev) NULL, 0); + if (ret) { + dev_err(dev, "bgpio_init failed\n"); + return ret; + } + gc->direction_input = mlxbf2_gpio_direction_input; gc->direction_output = mlxbf2_gpio_direction_output; gc->ngpio = npins; -- cgit v1.2.3 From 00568b8a6364e15009b345b462e927e0b9fc2bb9 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Thu, 21 Oct 2021 10:26:57 +0100 Subject: ARM: 9148/1: handle CONFIG_CPU_ENDIAN_BE32 in arch/arm/kernel/head.S My intel-ixp42x-welltech-epbx100 no longer boot since 4.14. This is due to commit 463dbba4d189 ("ARM: 9104/2: Fix Keystone 2 kernel mapping regression") which forgot to handle CONFIG_CPU_ENDIAN_BE32 as possible BE config. Suggested-by: Krzysztof Hałasa Fixes: 463dbba4d189 ("ARM: 9104/2: Fix Keystone 2 kernel mapping regression") Signed-off-by: Corentin Labbe Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/head.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 29070eb8df7d..3fc7f9750ce4 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -253,7 +253,7 @@ __create_page_tables: add r0, r4, #KERNEL_OFFSET >> (SECTION_SHIFT - PMD_ORDER) ldr r6, =(_end - 1) adr_l r5, kernel_sec_start @ _pa(kernel_sec_start) -#ifdef CONFIG_CPU_ENDIAN_BE8 +#if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32 str r8, [r5, #4] @ Save physical start of kernel (BE) #else str r8, [r5] @ Save physical start of kernel (LE) @@ -266,7 +266,7 @@ __create_page_tables: bls 1b eor r3, r3, r7 @ Remove the MMU flags adr_l r5, kernel_sec_end @ _pa(kernel_sec_end) -#ifdef CONFIG_CPU_ENDIAN_BE8 +#if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32 str r3, [r5, #4] @ Save physical end of kernel (BE) #else str r3, [r5] @ Save physical end of kernel (LE) -- cgit v1.2.3 From 8228c77d8b56e3f735baf71fefb1b548c23691a7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 23 Oct 2021 21:29:22 +0100 Subject: KVM: x86: switch pvclock_gtod_sync_lock to a raw spinlock On the preemption path when updating a Xen guest's runstate times, this lock is taken inside the scheduler rq->lock, which is a raw spinlock. This was shown in a lockdep warning: [ 89.138354] ============================= [ 89.138356] [ BUG: Invalid wait context ] [ 89.138358] 5.15.0-rc5+ #834 Tainted: G S I E [ 89.138360] ----------------------------- [ 89.138361] xen_shinfo_test/2575 is trying to lock: [ 89.138363] ffffa34a0364efd8 (&kvm->arch.pvclock_gtod_sync_lock){....}-{3:3}, at: get_kvmclock_ns+0x1f/0x130 [kvm] [ 89.138442] other info that might help us debug this: [ 89.138444] context-{5:5} [ 89.138445] 4 locks held by xen_shinfo_test/2575: [ 89.138447] #0: ffff972bdc3b8108 (&vcpu->mutex){+.+.}-{4:4}, at: kvm_vcpu_ioctl+0x77/0x6f0 [kvm] [ 89.138483] #1: ffffa34a03662e90 (&kvm->srcu){....}-{0:0}, at: kvm_arch_vcpu_ioctl_run+0xdc/0x8b0 [kvm] [ 89.138526] #2: ffff97331fdbac98 (&rq->__lock){-.-.}-{2:2}, at: __schedule+0xff/0xbd0 [ 89.138534] #3: ffffa34a03662e90 (&kvm->srcu){....}-{0:0}, at: kvm_arch_vcpu_put+0x26/0x170 [kvm] ... [ 89.138695] get_kvmclock_ns+0x1f/0x130 [kvm] [ 89.138734] kvm_xen_update_runstate+0x14/0x90 [kvm] [ 89.138783] kvm_xen_update_runstate_guest+0x15/0xd0 [kvm] [ 89.138830] kvm_arch_vcpu_put+0xe6/0x170 [kvm] [ 89.138870] kvm_sched_out+0x2f/0x40 [kvm] [ 89.138900] __schedule+0x5de/0xbd0 Cc: stable@vger.kernel.org Reported-by: syzbot+b282b65c2c68492df769@syzkaller.appspotmail.com Fixes: 30b5c851af79 ("KVM: x86/xen: Add support for vCPU runstate information") Signed-off-by: David Woodhouse Message-Id: <1b02a06421c17993df337493a68ba923f3bd5c0f.camel@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/x86.c | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f8f48a7ec577..70771376e246 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1097,7 +1097,7 @@ struct kvm_arch { u64 cur_tsc_generation; int nr_vcpus_matched_tsc; - spinlock_t pvclock_gtod_sync_lock; + raw_spinlock_t pvclock_gtod_sync_lock; bool use_master_clock; u64 master_kernel_ns; u64 master_cycle_now; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c8b5129effd..5c82eff19e4a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2542,7 +2542,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) kvm_vcpu_write_tsc_offset(vcpu, offset); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); - spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags); if (!matched) { kvm->arch.nr_vcpus_matched_tsc = 0; } else if (!already_matched) { @@ -2550,7 +2550,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) } kvm_track_tsc_matching(vcpu); - spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags); } static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, @@ -2780,9 +2780,9 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) kvm_make_mclock_inprogress_request(kvm); /* no guest entries from this point */ - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); pvclock_update_vm_gtod_copy(kvm); - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); kvm_for_each_vcpu(i, vcpu, kvm) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -2800,15 +2800,15 @@ u64 get_kvmclock_ns(struct kvm *kvm) unsigned long flags; u64 ret; - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); if (!ka->use_master_clock) { - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); return get_kvmclock_base_ns() + ka->kvmclock_offset; } hv_clock.tsc_timestamp = ka->master_cycle_now; hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); @@ -2902,13 +2902,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) * If the host uses TSC clock, then passthrough TSC as stable * to the guest. */ - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); use_master_clock = ka->use_master_clock; if (use_master_clock) { host_tsc = ka->master_cycle_now; kernel_ns = ka->master_kernel_ns; } - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); @@ -6100,13 +6100,13 @@ set_pit2_out: * is slightly ahead) here we risk going negative on unsigned * 'system_time' when 'user_ns.clock' is very small. */ - spin_lock_irq(&ka->pvclock_gtod_sync_lock); + raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock); if (kvm->arch.use_master_clock) now_ns = ka->master_kernel_ns; else now_ns = get_kvmclock_base_ns(); ka->kvmclock_offset = user_ns.clock - now_ns; - spin_unlock_irq(&ka->pvclock_gtod_sync_lock); + raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock); kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); break; @@ -8139,9 +8139,9 @@ static void kvm_hyperv_tsc_notifier(void) list_for_each_entry(kvm, &vm_list, vm_list) { struct kvm_arch *ka = &kvm->arch; - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); pvclock_update_vm_gtod_copy(kvm); - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); kvm_for_each_vcpu(cpu, vcpu, kvm) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -11182,7 +11182,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) raw_spin_lock_init(&kvm->arch.tsc_write_lock); mutex_init(&kvm->arch.apic_map_lock); - spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); + raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); kvm->arch.kvmclock_offset = -get_kvmclock_base_ns(); pvclock_update_vm_gtod_copy(kvm); -- cgit v1.2.3 From c10a485c3de5ccbf1fff65a382cebcb2730c6b06 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 24 Oct 2021 21:48:02 +0200 Subject: phy: phy_ethtool_ksettings_get: Lock the phy for consistency The PHY structure should be locked while copying information out if it, otherwise there is no guarantee of self consistency. Without the lock the PHY state machine could be updating the structure. Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index f124a8a58bd4..8457b829667e 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -299,6 +299,7 @@ EXPORT_SYMBOL(phy_ethtool_ksettings_set); void phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd) { + mutex_lock(&phydev->lock); linkmode_copy(cmd->link_modes.supported, phydev->supported); linkmode_copy(cmd->link_modes.advertising, phydev->advertising); linkmode_copy(cmd->link_modes.lp_advertising, phydev->lp_advertising); @@ -317,6 +318,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, cmd->base.autoneg = phydev->autoneg; cmd->base.eth_tp_mdix_ctrl = phydev->mdix_ctrl; cmd->base.eth_tp_mdix = phydev->mdix; + mutex_unlock(&phydev->lock); } EXPORT_SYMBOL(phy_ethtool_ksettings_get); -- cgit v1.2.3 From 64cd92d5e8180c2ded3fdea76862de6f596ae2c9 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 24 Oct 2021 21:48:03 +0200 Subject: phy: phy_ethtool_ksettings_set: Move after phy_start_aneg This allows it to make use of a helper which assume the PHY is already locked. Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 106 +++++++++++++++++++++++++------------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 8457b829667e..ee584fa8b76d 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -243,59 +243,6 @@ static void phy_sanitize_settings(struct phy_device *phydev) } } -int phy_ethtool_ksettings_set(struct phy_device *phydev, - const struct ethtool_link_ksettings *cmd) -{ - __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); - u8 autoneg = cmd->base.autoneg; - u8 duplex = cmd->base.duplex; - u32 speed = cmd->base.speed; - - if (cmd->base.phy_address != phydev->mdio.addr) - return -EINVAL; - - linkmode_copy(advertising, cmd->link_modes.advertising); - - /* We make sure that we don't pass unsupported values in to the PHY */ - linkmode_and(advertising, advertising, phydev->supported); - - /* Verify the settings we care about. */ - if (autoneg != AUTONEG_ENABLE && autoneg != AUTONEG_DISABLE) - return -EINVAL; - - if (autoneg == AUTONEG_ENABLE && linkmode_empty(advertising)) - return -EINVAL; - - if (autoneg == AUTONEG_DISABLE && - ((speed != SPEED_1000 && - speed != SPEED_100 && - speed != SPEED_10) || - (duplex != DUPLEX_HALF && - duplex != DUPLEX_FULL))) - return -EINVAL; - - phydev->autoneg = autoneg; - - if (autoneg == AUTONEG_DISABLE) { - phydev->speed = speed; - phydev->duplex = duplex; - } - - linkmode_copy(phydev->advertising, advertising); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - phydev->advertising, autoneg == AUTONEG_ENABLE); - - phydev->master_slave_set = cmd->base.master_slave_cfg; - phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; - - /* Restart the PHY */ - phy_start_aneg(phydev); - - return 0; -} -EXPORT_SYMBOL(phy_ethtool_ksettings_set); - void phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd) { @@ -802,6 +749,59 @@ static int phy_poll_aneg_done(struct phy_device *phydev) return ret < 0 ? ret : 0; } +int phy_ethtool_ksettings_set(struct phy_device *phydev, + const struct ethtool_link_ksettings *cmd) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); + u8 autoneg = cmd->base.autoneg; + u8 duplex = cmd->base.duplex; + u32 speed = cmd->base.speed; + + if (cmd->base.phy_address != phydev->mdio.addr) + return -EINVAL; + + linkmode_copy(advertising, cmd->link_modes.advertising); + + /* We make sure that we don't pass unsupported values in to the PHY */ + linkmode_and(advertising, advertising, phydev->supported); + + /* Verify the settings we care about. */ + if (autoneg != AUTONEG_ENABLE && autoneg != AUTONEG_DISABLE) + return -EINVAL; + + if (autoneg == AUTONEG_ENABLE && linkmode_empty(advertising)) + return -EINVAL; + + if (autoneg == AUTONEG_DISABLE && + ((speed != SPEED_1000 && + speed != SPEED_100 && + speed != SPEED_10) || + (duplex != DUPLEX_HALF && + duplex != DUPLEX_FULL))) + return -EINVAL; + + phydev->autoneg = autoneg; + + if (autoneg == AUTONEG_DISABLE) { + phydev->speed = speed; + phydev->duplex = duplex; + } + + linkmode_copy(phydev->advertising, advertising); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + phydev->advertising, autoneg == AUTONEG_ENABLE); + + phydev->master_slave_set = cmd->base.master_slave_cfg; + phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; + + /* Restart the PHY */ + phy_start_aneg(phydev); + + return 0; +} +EXPORT_SYMBOL(phy_ethtool_ksettings_set); + /** * phy_speed_down - set speed to lowest speed supported by both link partners * @phydev: the phy_device struct -- cgit v1.2.3 From 707293a56f95f8e7e0cfae008010c7933fb68973 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 24 Oct 2021 21:48:04 +0200 Subject: phy: phy_start_aneg: Add an unlocked version Split phy_start_aneg into a wrapper which takes the PHY lock, and a helper doing the real work. This will be needed when phy_ethtook_ksettings_set takes the lock. Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index ee584fa8b76d..d845afab1af7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -700,7 +700,7 @@ static int phy_check_link_status(struct phy_device *phydev) } /** - * phy_start_aneg - start auto-negotiation for this PHY device + * _phy_start_aneg - start auto-negotiation for this PHY device * @phydev: the phy_device struct * * Description: Sanitizes the settings (if we're not autonegotiating @@ -708,25 +708,43 @@ static int phy_check_link_status(struct phy_device *phydev) * If the PHYCONTROL Layer is operating, we change the state to * reflect the beginning of Auto-negotiation or forcing. */ -int phy_start_aneg(struct phy_device *phydev) +static int _phy_start_aneg(struct phy_device *phydev) { int err; + lockdep_assert_held(&phydev->lock); + if (!phydev->drv) return -EIO; - mutex_lock(&phydev->lock); - if (AUTONEG_DISABLE == phydev->autoneg) phy_sanitize_settings(phydev); err = phy_config_aneg(phydev); if (err < 0) - goto out_unlock; + return err; if (phy_is_started(phydev)) err = phy_check_link_status(phydev); -out_unlock: + + return err; +} + +/** + * phy_start_aneg - start auto-negotiation for this PHY device + * @phydev: the phy_device struct + * + * Description: Sanitizes the settings (if we're not autonegotiating + * them), and then calls the driver's config_aneg function. + * If the PHYCONTROL Layer is operating, we change the state to + * reflect the beginning of Auto-negotiation or forcing. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + int err; + + mutex_lock(&phydev->lock); + err = _phy_start_aneg(phydev); mutex_unlock(&phydev->lock); return err; -- cgit v1.2.3 From af1a02aa23c37045e6adfcf074cf7dbac167a403 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 24 Oct 2021 21:48:05 +0200 Subject: phy: phy_ethtool_ksettings_set: Lock the PHY while changing settings There is a race condition where the PHY state machine can change members of the phydev structure at the same time userspace requests a change via ethtool. To prevent this, have phy_ethtool_ksettings_set take the PHY lock. Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support") Reported-by: Walter Stoll Suggested-by: Walter Stoll Tested-by: Walter Stoll Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index d845afab1af7..a3bfb156c83d 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -798,6 +798,7 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, duplex != DUPLEX_FULL))) return -EINVAL; + mutex_lock(&phydev->lock); phydev->autoneg = autoneg; if (autoneg == AUTONEG_DISABLE) { @@ -814,8 +815,9 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; /* Restart the PHY */ - phy_start_aneg(phydev); + _phy_start_aneg(phydev); + mutex_unlock(&phydev->lock); return 0; } EXPORT_SYMBOL(phy_ethtool_ksettings_set); -- cgit v1.2.3 From 0985dba842eaa391858972cfe2724c3c174a2827 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 23 Oct 2021 20:47:19 +0100 Subject: KVM: x86/xen: Fix kvm_xen_has_interrupt() sleeping in kvm_vcpu_block() In kvm_vcpu_block, the current task is set to TASK_INTERRUPTIBLE before making a final check whether the vCPU should be woken from HLT by any incoming interrupt. This is a problem for the get_user() in __kvm_xen_has_interrupt(), which really shouldn't be sleeping when the task state has already been set. I think it's actually harmless as it would just manifest itself as a spurious wakeup, but it's causing a debug warning: [ 230.963649] do not call blocking ops when !TASK_RUNNING; state=1 set at [<00000000b6bcdbc9>] prepare_to_swait_exclusive+0x30/0x80 Fix the warning by turning it into an *explicit* spurious wakeup. When invoked with !task_is_running(current) (and we might as well add in_atomic() there while we're at it), just return 1 to indicate that an IRQ is pending, which will cause a wakeup and then something will call it again in a context that *can* sleep so it can fault the page back in. Cc: stable@vger.kernel.org Fixes: 40da8ccd724f ("KVM: x86/xen: Add event channel interrupt vector upcall") Signed-off-by: David Woodhouse Message-Id: <168bf8c689561da904e48e2ff5ae4713eaef9e2d.camel@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 9ea9c3dabe37..8f62baebd028 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -190,6 +190,7 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) int __kvm_xen_has_interrupt(struct kvm_vcpu *v) { + int err; u8 rc = 0; /* @@ -216,13 +217,29 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) if (likely(slots->generation == ghc->generation && !kvm_is_error_hva(ghc->hva) && ghc->memslot)) { /* Fast path */ - __get_user(rc, (u8 __user *)ghc->hva + offset); - } else { - /* Slow path */ - kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset, - sizeof(rc)); + pagefault_disable(); + err = __get_user(rc, (u8 __user *)ghc->hva + offset); + pagefault_enable(); + if (!err) + return rc; } + /* Slow path */ + + /* + * This function gets called from kvm_vcpu_block() after setting the + * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately + * from a HLT. So we really mustn't sleep. If the page ended up absent + * at that point, just return 1 in order to trigger an immediate wake, + * and we'll end up getting called again from a context where we *can* + * fault in the page and wait for it. + */ + if (in_atomic() || !task_is_running(current)) + return 1; + + kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset, + sizeof(rc)); + return rc; } -- cgit v1.2.3 From 09b1d5dc6ce1c9151777f6c4e128a59457704c97 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Oct 2021 13:31:12 +0200 Subject: cfg80211: fix management registrations locking The management registrations locking was broken, the list was locked for each wdev, but cfg80211_mgmt_registrations_update() iterated it without holding all the correct spinlocks, causing list corruption. Rather than trying to fix it with fine-grained locking, just move the lock to the wiphy/rdev (still need the list on each wdev), we already need to hold the wdev lock to change it, so there's no contention on the lock in any case. This trivially fixes the bug since we hold one wdev's lock already, and now will hold the lock that protects all lists. Cc: stable@vger.kernel.org Reported-by: Jouni Malinen Fixes: 6cd536fe62ef ("cfg80211: change internal management frame registration API") Link: https://lore.kernel.org/r/20211025133111.5cf733eab0f4.I7b0abb0494ab712f74e2efcd24bb31ac33f7eee9@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 -- net/wireless/core.c | 2 +- net/wireless/core.h | 2 ++ net/wireless/mlme.c | 26 ++++++++++++++------------ 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 62dd8422e0dc..27336fc70467 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5376,7 +5376,6 @@ static inline void wiphy_unlock(struct wiphy *wiphy) * netdev and may otherwise be used by driver read-only, will be update * by cfg80211 on change_interface * @mgmt_registrations: list of registrations for management frames - * @mgmt_registrations_lock: lock for the list * @mgmt_registrations_need_update: mgmt registrations were updated, * need to propagate the update to the driver * @mtx: mutex used to lock data in this struct, may be used by drivers @@ -5423,7 +5422,6 @@ struct wireless_dev { u32 identifier; struct list_head mgmt_registrations; - spinlock_t mgmt_registrations_lock; u8 mgmt_registrations_need_update:1; struct mutex mtx; diff --git a/net/wireless/core.c b/net/wireless/core.c index 03323121ca50..aaba847d79eb 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -524,6 +524,7 @@ use_default_name: INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk); INIT_WORK(&rdev->mgmt_registrations_update_wk, cfg80211_mgmt_registrations_update_wk); + spin_lock_init(&rdev->mgmt_registrations_lock); #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; @@ -1279,7 +1280,6 @@ void cfg80211_init_wdev(struct wireless_dev *wdev) INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); INIT_LIST_HEAD(&wdev->mgmt_registrations); - spin_lock_init(&wdev->mgmt_registrations_lock); INIT_LIST_HEAD(&wdev->pmsr_list); spin_lock_init(&wdev->pmsr_lock); INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk); diff --git a/net/wireless/core.h b/net/wireless/core.h index b35d0db12f1d..1720abf36f92 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -100,6 +100,8 @@ struct cfg80211_registered_device { struct work_struct propagate_cac_done_wk; struct work_struct mgmt_registrations_update_wk; + /* lock for all wdev lists */ + spinlock_t mgmt_registrations_lock; /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 3aa69b375a10..783acd2c4211 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -452,9 +452,9 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) lockdep_assert_held(&rdev->wiphy.mtx); - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); if (!wdev->mgmt_registrations_need_update) { - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); return; } @@ -479,7 +479,7 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) rcu_read_unlock(); wdev->mgmt_registrations_need_update = 0; - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); rdev_update_mgmt_frame_registrations(rdev, wdev, &upd); } @@ -503,6 +503,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, int match_len, bool multicast_rx, struct netlink_ext_ack *extack) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_mgmt_registration *reg, *nreg; int err = 0; u16 mgmt_type; @@ -548,7 +549,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, if (!nreg) return -ENOMEM; - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry(reg, &wdev->mgmt_registrations, list) { int mlen = min(match_len, reg->match_len); @@ -583,7 +584,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, list_add(&nreg->list, &wdev->mgmt_registrations); } wdev->mgmt_registrations_need_update = 1; - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); cfg80211_mgmt_registrations_update(wdev); @@ -591,7 +592,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, out: kfree(nreg); - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); return err; } @@ -602,7 +603,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { if (reg->nlportid != nlportid) @@ -615,7 +616,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) schedule_work(&rdev->mgmt_registrations_update_wk); } - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); if (nlportid && rdev->crit_proto_nlportid == nlportid) { rdev->crit_proto_nlportid = 0; @@ -628,15 +629,16 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { list_del(®->list); kfree(reg); } wdev->mgmt_registrations_need_update = 1; - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); cfg80211_mgmt_registrations_update(wdev); } @@ -784,7 +786,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm, data = buf + ieee80211_hdrlen(mgmt->frame_control); data_len = len - ieee80211_hdrlen(mgmt->frame_control); - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry(reg, &wdev->mgmt_registrations, list) { if (reg->frame_type != ftype) @@ -808,7 +810,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm, break; } - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); trace_cfg80211_return_bool(result); return result; -- cgit v1.2.3 From 689a0a9f505f7bffdefe6f17fddb41c8ab6344f6 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Sun, 24 Oct 2021 22:15:46 +0200 Subject: cfg80211: correct bridge/4addr mode check Without the patch we fail: $ sudo brctl addbr br0 $ sudo brctl addif br0 wlp1s0 $ sudo iw wlp1s0 set 4addr on command failed: Device or resource busy (-16) Last command failed but iface was already in 4addr mode. Fixes: ad4bb6f8883a ("cfg80211: disallow bridging managed/adhoc interfaces") Signed-off-by: Janusz Dziedzic Link: https://lore.kernel.org/r/20211024201546.614379-1-janusz.dziedzic@gmail.com [add fixes tag, fix indentation, edit commit log] Signed-off-by: Johannes Berg --- net/wireless/util.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 18dba3d7c638..a1a99a574984 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1028,14 +1028,14 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, !(rdev->wiphy.interface_modes & (1 << ntype))) return -EOPNOTSUPP; - /* if it's part of a bridge, reject changing type to station/ibss */ - if (netif_is_bridge_port(dev) && - (ntype == NL80211_IFTYPE_ADHOC || - ntype == NL80211_IFTYPE_STATION || - ntype == NL80211_IFTYPE_P2P_CLIENT)) - return -EBUSY; - if (ntype != otype) { + /* if it's part of a bridge, reject changing type to station/ibss */ + if (netif_is_bridge_port(dev) && + (ntype == NL80211_IFTYPE_ADHOC || + ntype == NL80211_IFTYPE_STATION || + ntype == NL80211_IFTYPE_P2P_CLIENT)) + return -EBUSY; + dev->ieee80211_ptr->use_4addr = false; dev->ieee80211_ptr->mesh_id_up_len = 0; wdev_lock(dev->ieee80211_ptr); -- cgit v1.2.3 From ace19b992436a257d9a793672e57abc28fe83e2e Mon Sep 17 00:00:00 2001 From: Trevor Woerner Date: Sun, 24 Oct 2021 13:50:02 -0400 Subject: net: nxp: lpc_eth.c: avoid hang when bringing interface down A hard hang is observed whenever the ethernet interface is brought down. If the PHY is stopped before the LPC core block is reset, the SoC will hang. Comparing lpc_eth_close() and lpc_eth_open() I re-arranged the ordering of the functions calls in lpc_eth_close() to reset the hardware before stopping the PHY. Fixes: b7370112f519 ("lpc32xx: Added ethernet driver") Signed-off-by: Trevor Woerner Acked-by: Vladimir Zapolskiy Signed-off-by: David S. Miller --- drivers/net/ethernet/nxp/lpc_eth.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index d29fe562b3de..c910fa2f40a4 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1015,9 +1015,6 @@ static int lpc_eth_close(struct net_device *ndev) napi_disable(&pldat->napi); netif_stop_queue(ndev); - if (ndev->phydev) - phy_stop(ndev->phydev); - spin_lock_irqsave(&pldat->lock, flags); __lpc_eth_reset(pldat); netif_carrier_off(ndev); @@ -1025,6 +1022,8 @@ static int lpc_eth_close(struct net_device *ndev) writel(0, LPC_ENET_MAC2(pldat->net_base)); spin_unlock_irqrestore(&pldat->lock, flags); + if (ndev->phydev) + phy_stop(ndev->phydev); clk_disable_unprepare(pldat->clk); return 0; -- cgit v1.2.3 From 64733956ebba7cc629856f4a6ee35a52bc9c023f Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 24 Oct 2021 09:08:20 +0300 Subject: RDMA/sa_query: Use strscpy_pad instead of memcpy to copy a string When copying the device name, the length of the data memcpy copied exceeds the length of the source buffer, which cause the KASAN issue below. Use strscpy_pad() instead. BUG: KASAN: slab-out-of-bounds in ib_nl_set_path_rec_attrs+0x136/0x320 [ib_core] Read of size 64 at addr ffff88811a10f5e0 by task rping/140263 CPU: 3 PID: 140263 Comm: rping Not tainted 5.15.0-rc1+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x57/0x7d print_address_description.constprop.0+0x1d/0xa0 kasan_report+0xcb/0x110 kasan_check_range+0x13d/0x180 memcpy+0x20/0x60 ib_nl_set_path_rec_attrs+0x136/0x320 [ib_core] ib_nl_make_request+0x1c6/0x380 [ib_core] send_mad+0x20a/0x220 [ib_core] ib_sa_path_rec_get+0x3e3/0x800 [ib_core] cma_query_ib_route+0x29b/0x390 [rdma_cm] rdma_resolve_route+0x308/0x3e0 [rdma_cm] ucma_resolve_route+0xe1/0x150 [rdma_ucm] ucma_write+0x17b/0x1f0 [rdma_ucm] vfs_write+0x142/0x4d0 ksys_write+0x133/0x160 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f26499aa90f Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 29 fd ff ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 44 24 08 e8 5c fd ff ff 48 RSP: 002b:00007f26495f2dc0 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00000000000007d0 RCX: 00007f26499aa90f RDX: 0000000000000010 RSI: 00007f26495f2e00 RDI: 0000000000000003 RBP: 00005632a8315440 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000293 R12: 00007f26495f2e00 R13: 00005632a83154e0 R14: 00005632a8315440 R15: 00005632a830a810 Allocated by task 131419: kasan_save_stack+0x1b/0x40 __kasan_kmalloc+0x7c/0x90 proc_self_get_link+0x8b/0x100 pick_link+0x4f1/0x5c0 step_into+0x2eb/0x3d0 walk_component+0xc8/0x2c0 link_path_walk+0x3b8/0x580 path_openat+0x101/0x230 do_filp_open+0x12e/0x240 do_sys_openat2+0x115/0x280 __x64_sys_openat+0xce/0x140 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 2ca546b92a02 ("IB/sa: Route SA pathrecord query through netlink") Link: https://lore.kernel.org/r/72ede0f6dab61f7f23df9ac7a70666e07ef314b0.1635055496.git.leonro@nvidia.com Signed-off-by: Mark Zhang Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sa_query.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index a20b8108e160..c00f8e28aab7 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -706,8 +706,9 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, /* Construct the family header first */ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); - memcpy(header->device_name, dev_name(&query->port->agent->device->dev), - LS_DEVICE_NAME_MAX); + strscpy_pad(header->device_name, + dev_name(&query->port->agent->device->dev), + LS_DEVICE_NAME_MAX); header->port_num = query->port->port_num; if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) && -- cgit v1.2.3 From 0c57eeecc559ca6bc18b8c4e2808bc78dbe769b0 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Oct 2021 05:05:28 -0400 Subject: net: Prevent infinite while loop in skb_tx_hash() Drivers call netdev_set_num_tc() and then netdev_set_tc_queue() to set the queue count and offset for each TC. So the queue count and offset for the TCs may be zero for a short period after dev->num_tc has been set. If a TX packet is being transmitted at this time in the code path netdev_pick_tx() -> skb_tx_hash(), skb_tx_hash() may see nonzero dev->num_tc but zero qcount for the TC. The while loop that keeps looping while hash >= qcount will not end. Fix it by checking the TC's qcount to be nonzero before using it. Fixes: eadec877ce9c ("net: Add support for subordinate traffic classes to netdev_pick_tx") Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 7ee9fecd3aff..ea2366497697 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3163,6 +3163,12 @@ static u16 skb_tx_hash(const struct net_device *dev, qoffset = sb_dev->tc_to_txq[tc].offset; qcount = sb_dev->tc_to_txq[tc].count; + if (unlikely(!qcount)) { + net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n", + sb_dev->name, qoffset, tc); + qoffset = 0; + qcount = dev->real_num_tx_queues; + } } if (skb_rx_queue_recorded(skb)) { -- cgit v1.2.3 From 042b2046d0f05cf8124c26ff65dbb6148a4404fb Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Fri, 22 Oct 2021 16:31:39 -0700 Subject: xen/netfront: stop tx queues during live migration The tx queues are not stopped during the live migration. As a result, the ndo_start_xmit() may access netfront_info->queues which is freed by talk_to_netback()->xennet_destroy_queues(). This patch is to netif_device_detach() at the beginning of xen-netfront resuming, and netif_device_attach() at the end of resuming. CPU A CPU B talk_to_netback() -> if (info->queues) xennet_destroy_queues(info); to free netfront_info->queues xennet_start_xmit() to access netfront_info->queues -> err = xennet_create_queues(info, &num_queues); The idea is borrowed from virtio-net. Cc: Joe Jin Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e31b98403f31..fc41ba95f81d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1730,6 +1730,10 @@ static int netfront_resume(struct xenbus_device *dev) dev_dbg(&dev->dev, "%s\n", dev->nodename); + netif_tx_lock_bh(info->netdev); + netif_device_detach(info->netdev); + netif_tx_unlock_bh(info->netdev); + xennet_disconnect_backend(info); return 0; } @@ -2349,6 +2353,10 @@ static int xennet_connect(struct net_device *dev) * domain a kick because we've probably just requeued some * packets. */ + netif_tx_lock_bh(np->netdev); + netif_device_attach(np->netdev); + netif_tx_unlock_bh(np->netdev); + netif_carrier_on(np->netdev); for (j = 0; j < num_queues; ++j) { queue = &np->queues[j]; -- cgit v1.2.3 From f7a1e76d0f608961cc2fc681f867a834f2746bce Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 25 Oct 2021 02:31:48 -0400 Subject: net-sysfs: initialize uid and gid before calling net_ns_get_ownership Currently in net_ns_get_ownership() it may not be able to set uid or gid if make_kuid or make_kgid returns an invalid value, and an uninit-value issue can be triggered by this. This patch is to fix it by initializing the uid and gid before calling net_ns_get_ownership(), as it does in kobject_get_ownership() Fixes: e6dee9f3893c ("net-sysfs: add netdev_change_owner()") Reported-by: Paolo Abeni Signed-off-by: Xin Long Acked-by: Christian Brauner Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f6197774048b..b2e49eb7001d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1973,9 +1973,9 @@ int netdev_register_kobject(struct net_device *ndev) int netdev_change_owner(struct net_device *ndev, const struct net *net_old, const struct net *net_new) { + kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID; + kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID; struct device *dev = &ndev->dev; - kuid_t old_uid, new_uid; - kgid_t old_gid, new_gid; int error; net_ns_get_ownership(net_old, &old_uid, &old_gid); -- cgit v1.2.3 From ac8a6eba2a117e0fdc04da62ab568d1b7ca4c8f6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 25 Oct 2021 10:46:41 -0700 Subject: spi: Fix tegra20 build with CONFIG_PM=n once again Commit efafec27c565 ("spi: Fix tegra20 build with CONFIG_PM=n") already fixed the build without PM support once. There was an alternative fix by Guenter in commit 2bab94090b01 ("spi: tegra20-slink: Declare runtime suspend and resume functions conditionally"), and Mark then merged the two correctly in ffb1e76f4f32 ("Merge tag 'v5.15-rc2' into spi-5.15"). But for some inexplicable reason, Mark then merged things _again_ in commit 59c4e190b10c ("Merge tag 'v5.15-rc3' into spi-5.15"), and screwed things up at that point, and the __maybe_unused attribute on tegra_slink_runtime_resume() went missing. Reinstate it, so that alpha (and other architectures without PM support) builds cleanly again. Btw, this is another prime example of how random back-merges are not good. Just don't do them. Subsystem developers should not merge my tree in any normal circumstances. Both of those merge commits pointed to above are bad: even the one that got the merge result right doesn't even mention _why_ it was done, and the one that got it wrong is obviously broken. Reported-by: Guenter Roeck Cc: Mark Brown Signed-off-by: Linus Torvalds --- drivers/spi/spi-tegra20-slink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 713292b0c71e..3226c4e1c7c0 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1194,7 +1194,7 @@ static int __maybe_unused tegra_slink_runtime_suspend(struct device *dev) return 0; } -static int tegra_slink_runtime_resume(struct device *dev) +static int __maybe_unused tegra_slink_runtime_resume(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); struct tegra_slink_data *tspi = spi_master_get_devdata(master); -- cgit v1.2.3 From cb685432398122053f3e1dc6a1d68924e5b77be4 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox (Oracle) Date: Mon, 25 Oct 2021 19:16:34 +0100 Subject: secretmem: Prevent secretmem_users from wrapping to zero Commit 110860541f44 ("mm/secretmem: use refcount_t instead of atomic_t") attempted to fix the problem of secretmem_users wrapping to zero and allowing suspend once again. But it was reverted in commit 87066fdd2e30 ("Revert 'mm/secretmem: use refcount_t instead of atomic_t'") because of the problems it caused - a refcount_t was not semantically the right type to use. Instead prevent secretmem_users from wrapping to zero by forbidding new users if the number of users has wrapped from positive to negative. This stops a long way short of reaching the necessary 4 billion users where it wraps to zero again, so there's no need to be clever with special anti-wrap types or checking the return value from atomic_inc(). Signed-off-by: Matthew Wilcox (Oracle) Cc: Jordy Zomer Cc: Kees Cook , Cc: James Bottomley Cc: Mike Rapoport Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/secretmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/secretmem.c b/mm/secretmem.c index 030f02ddc7c1..c2dda408bb36 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -203,6 +203,8 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC)) return -EINVAL; + if (atomic_read(&secretmem_users) < 0) + return -ENFILE; fd = get_unused_fd_flags(flags & O_CLOEXEC); if (fd < 0) -- cgit v1.2.3 From 3906fe9bb7f1a2c8667ae54e967dc8690824f4ea Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 25 Oct 2021 11:30:31 -0700 Subject: Linux 5.15-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 91297670da8e..30c7c81d0437 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Opossums on Parade # *DOCUMENTATION* -- cgit v1.2.3 From e091b836a3baee4c8b1423a969589196b88a9e06 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 14 Oct 2021 16:54:10 +0300 Subject: Revert "arm64: dts: qcom: sm8250: remove bus clock from the mdss node for sm8250 target" This reverts commit 001ce9785c0674d913531345e86222c965fc8bf4. This upstream commit broke AOSP (post Android 12 merge) build on RB5. The device either silently crashes into USB crash mode after android boot animation or we see a blank blue screen with following dpu errors in dmesg: [ T444] hw recovery is not complete for ctl:3 [ T444] [drm:dpu_encoder_phys_vid_prepare_for_kickoff:539] [dpu error]enc31 intf1 ctl 3 reset failure: -22 [ T444] [drm:dpu_encoder_phys_vid_wait_for_commit_done:513] [dpu error]vblank timeout [ T444] [drm:dpu_kms_wait_for_commit_done:454] [dpu error]wait for commit done returned -110 [ C7] [drm:dpu_encoder_frame_done_timeout:2127] [dpu error]enc31 frame done timeout [ T444] [drm:dpu_encoder_phys_vid_wait_for_commit_done:513] [dpu error]vblank timeout [ T444] [drm:dpu_kms_wait_for_commit_done:454] [dpu error]wait for commit done returned -110 Fixes: 001ce9785c06 ("arm64: dts: qcom: sm8250: remove bus clock from the mdss node for sm8250 target") Signed-off-by: Amit Pundir Signed-off-by: Dmitry Baryshkov Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211014135410.4136412-1-dmitry.baryshkov@linaro.org --- arch/arm64/boot/dts/qcom/sm8250.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 8c15d9fed08f..d12e4cbfc852 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -2590,9 +2590,10 @@ power-domains = <&dispcc MDSS_GDSC>; clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&gcc GCC_DISP_HF_AXI_CLK>, <&gcc GCC_DISP_SF_AXI_CLK>, <&dispcc DISP_CC_MDSS_MDP_CLK>; - clock-names = "iface", "nrt_bus", "core"; + clock-names = "iface", "bus", "nrt_bus", "core"; assigned-clocks = <&dispcc DISP_CC_MDSS_MDP_CLK>; assigned-clock-rates = <460000000>; -- cgit v1.2.3 From 6a8b357278f5f8b9817147277ab8f12879dce8a8 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Thu, 7 Oct 2021 08:40:31 -0700 Subject: ice: Respond to a NETDEV_UNREGISTER event for LAG When the PF is a member of a link aggregate, and the driver is removed, the process will hang unless we respond to the NETDEV_UNREGISTER event that is sent to the event_handler for LAG. Add a case statement for the ice_lag_event_handler to unlink the PF from the link aggregate. Also remove code that was incorrectly applying a dev_hold to peer_netdevs that were associated with the ice driver. Fixes: df006dd4b1dc ("ice: Add initial support framework for LAG") Signed-off-by: Dave Ertman Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lag.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 37c18c66b5c7..e375ac849aec 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -100,9 +100,9 @@ static void ice_display_lag_info(struct ice_lag *lag) */ static void ice_lag_info_event(struct ice_lag *lag, void *ptr) { - struct net_device *event_netdev, *netdev_tmp; struct netdev_notifier_bonding_info *info; struct netdev_bonding_info *bonding_info; + struct net_device *event_netdev; const char *lag_netdev_name; event_netdev = netdev_notifier_info_to_dev(ptr); @@ -123,19 +123,6 @@ static void ice_lag_info_event(struct ice_lag *lag, void *ptr) goto lag_out; } - rcu_read_lock(); - for_each_netdev_in_bond_rcu(lag->upper_netdev, netdev_tmp) { - if (!netif_is_ice(netdev_tmp)) - continue; - - if (netdev_tmp && netdev_tmp != lag->netdev && - lag->peer_netdev != netdev_tmp) { - dev_hold(netdev_tmp); - lag->peer_netdev = netdev_tmp; - } - } - rcu_read_unlock(); - if (bonding_info->slave.state) ice_lag_set_backup(lag); else @@ -319,6 +306,9 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event, case NETDEV_BONDING_INFO: ice_lag_info_event(lag, ptr); break; + case NETDEV_UNREGISTER: + ice_lag_unlink(lag, ptr); + break; default: break; } -- cgit v1.2.3 From fd1b5beb177a8372cea2a0d014835491e4886f77 Mon Sep 17 00:00:00 2001 From: Yongxin Liu Date: Mon, 11 Oct 2021 15:02:16 +0800 Subject: ice: check whether PTP is initialized in ice_ptp_release() PTP is currently only supported on E810 devices, it is checked in ice_ptp_init(). However, there is no check in ice_ptp_release(). For other E800 series devices, ice_ptp_release() will be wrongly executed. Fix the following calltrace. INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. Workqueue: ice ice_service_task [ice] Call Trace: dump_stack_lvl+0x5b/0x82 dump_stack+0x10/0x12 register_lock_class+0x495/0x4a0 ? find_held_lock+0x3c/0xb0 __lock_acquire+0x71/0x1830 lock_acquire+0x1e6/0x330 ? ice_ptp_release+0x3c/0x1e0 [ice] ? _raw_spin_lock+0x19/0x70 ? ice_ptp_release+0x3c/0x1e0 [ice] _raw_spin_lock+0x38/0x70 ? ice_ptp_release+0x3c/0x1e0 [ice] ice_ptp_release+0x3c/0x1e0 [ice] ice_prepare_for_reset+0xcb/0xe0 [ice] ice_do_reset+0x38/0x110 [ice] ice_service_task+0x138/0xf10 [ice] ? __this_cpu_preempt_check+0x13/0x20 process_one_work+0x26a/0x650 worker_thread+0x3f/0x3b0 ? __kthread_parkme+0x51/0xb0 ? process_one_work+0x650/0x650 kthread+0x161/0x190 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x1f/0x30 Fixes: 4dd0d5c33c3e ("ice: add lock around Tx timestamp tracker flush") Signed-off-by: Yongxin Liu Reviewed-by: Jacob Keller Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 80380aed8882..d1ef3d48a4b0 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1571,6 +1571,9 @@ err_kworker: */ void ice_ptp_release(struct ice_pf *pf) { + if (!test_bit(ICE_FLAG_PTP, pf->flags)) + return; + /* Disable timestamping for both Tx and Rx */ ice_ptp_cfg_timestamp(pf, false); -- cgit v1.2.3 From 3dd60fb9d95db9c78fec86ba4df20852a7b974ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Oct 2021 09:36:40 +0200 Subject: nvdimm/pmem: stop using q_usage_count as external pgmap refcount Originally all DAX access when through block_device operations and thus needed a queue reference. But since commit cccbce671582 ("filesystem-dax: convert to dax_direct_access()") all this happens at the DAX device level which uses its own refcounting. Having the external refcount thus wasn't needed but has otherwise been harmless for long time. But now that "block: drain file system I/O on del_gendisk" waits for q_usage_count to reach 0 in del_gendisk this whole scheme can't work anymore (and pmem is the only driver abusing q_usage_count like that). So switch to the internal reference and remove the unbalanced blk_freeze_queue_start that is taken care of by del_gendisk. Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk") Reported-by: Yi Zhang Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211019073641.2323410-2-hch@lst.de Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 33 ++------------------------------- 1 file changed, 2 insertions(+), 31 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index ef4950f80832..054154c22899 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -333,26 +333,6 @@ static const struct attribute_group *pmem_attribute_groups[] = { NULL, }; -static void pmem_pagemap_cleanup(struct dev_pagemap *pgmap) -{ - struct pmem_device *pmem = pgmap->owner; - - blk_cleanup_disk(pmem->disk); -} - -static void pmem_release_queue(void *pgmap) -{ - pmem_pagemap_cleanup(pgmap); -} - -static void pmem_pagemap_kill(struct dev_pagemap *pgmap) -{ - struct request_queue *q = - container_of(pgmap->ref, struct request_queue, q_usage_counter); - - blk_freeze_queue_start(q); -} - static void pmem_release_disk(void *__pmem) { struct pmem_device *pmem = __pmem; @@ -360,12 +340,9 @@ static void pmem_release_disk(void *__pmem) kill_dax(pmem->dax_dev); put_dax(pmem->dax_dev); del_gendisk(pmem->disk); -} -static const struct dev_pagemap_ops fsdax_pagemap_ops = { - .kill = pmem_pagemap_kill, - .cleanup = pmem_pagemap_cleanup, -}; + blk_cleanup_disk(pmem->disk); +} static int pmem_attach_disk(struct device *dev, struct nd_namespace_common *ndns) @@ -427,10 +404,8 @@ static int pmem_attach_disk(struct device *dev, pmem->disk = disk; pmem->pgmap.owner = pmem; pmem->pfn_flags = PFN_DEV; - pmem->pgmap.ref = &q->q_usage_counter; if (is_nd_pfn(dev)) { pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; - pmem->pgmap.ops = &fsdax_pagemap_ops; addr = devm_memremap_pages(dev, &pmem->pgmap); pfn_sb = nd_pfn->pfn_sb; pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); @@ -444,16 +419,12 @@ static int pmem_attach_disk(struct device *dev, pmem->pgmap.range.end = res->end; pmem->pgmap.nr_range = 1; pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; - pmem->pgmap.ops = &fsdax_pagemap_ops; addr = devm_memremap_pages(dev, &pmem->pgmap); pmem->pfn_flags |= PFN_MAP; bb_range = pmem->pgmap.range; } else { addr = devm_memremap(dev, pmem->phys_addr, pmem->size, ARCH_MEMREMAP_PMEM); - if (devm_add_action_or_reset(dev, pmem_release_queue, - &pmem->pgmap)) - return -ENOMEM; bb_range.start = res->start; bb_range.end = res->end; } -- cgit v1.2.3 From 759635760a804b0d8ad0cc677b650f1544cae22f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 24 Oct 2021 09:40:14 +0300 Subject: mlxsw: pci: Recycle received packet upon allocation failure When the driver fails to allocate a new Rx buffer, it passes an empty Rx descriptor (contains zero address and size) to the device and marks it as invalid by setting the skb pointer in the descriptor's metadata to NULL. After processing enough Rx descriptors, the driver will try to process the invalid descriptor, but will return immediately seeing that the skb pointer is NULL. Since the driver no longer passes new Rx descriptors to the device, the Rx queue will eventually become full and the device will start to drop packets. Fix this by recycling the received packet if allocation of the new packet failed. This means that allocation is no longer performed at the end of the Rx routine, but at the start, before tearing down the DMA mapping of the received packet. Remove the comment about the descriptor being zeroed as it is no longer correct. This is OK because we either use the descriptor as-is (when recycling) or overwrite its address and size fields with that of the newly allocated Rx buffer. The issue was discovered when a process ("perf") consumed too much memory and put the system under memory pressure. It can be reproduced by injecting slab allocation failures [1]. After the fix, the Rx queue no longer comes to a halt. [1] # echo 10 > /sys/kernel/debug/failslab/times # echo 1000 > /sys/kernel/debug/failslab/interval # echo 100 > /sys/kernel/debug/failslab/probability FAULT_INJECTION: forcing a failure. name failslab, interval 1000, probability 100, space 0, times 8 [...] Call Trace: dump_stack_lvl+0x34/0x44 should_fail.cold+0x32/0x37 should_failslab+0x5/0x10 kmem_cache_alloc_node+0x23/0x190 __alloc_skb+0x1f9/0x280 __netdev_alloc_skb+0x3a/0x150 mlxsw_pci_rdq_skb_alloc+0x24/0x90 mlxsw_pci_cq_tasklet+0x3dc/0x1200 tasklet_action_common.constprop.0+0x9f/0x100 __do_softirq+0xb5/0x252 irq_exit_rcu+0x7a/0xa0 common_interrupt+0x83/0xa0 asm_common_interrupt+0x1e/0x40 RIP: 0010:cpuidle_enter_state+0xc8/0x340 [...] mlxsw_spectrum2 0000:06:00.0: Failed to alloc skb for RDQ Fixes: eda6500a987a ("mlxsw: Add PCI bus implementation") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Link: https://lore.kernel.org/r/20211024064014.1060919-1-idosch@idosch.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 13b0259f7ea6..fcace73eae40 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -353,13 +353,10 @@ static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci, struct sk_buff *skb; int err; - elem_info->u.rdq.skb = NULL; skb = netdev_alloc_skb_ip_align(NULL, buf_len); if (!skb) return -ENOMEM; - /* Assume that wqe was previously zeroed. */ - err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data, buf_len, DMA_FROM_DEVICE); if (err) @@ -597,21 +594,26 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, struct pci_dev *pdev = mlxsw_pci->pdev; struct mlxsw_pci_queue_elem_info *elem_info; struct mlxsw_rx_info rx_info = {}; - char *wqe; + char wqe[MLXSW_PCI_WQE_SIZE]; struct sk_buff *skb; u16 byte_count; int err; elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); - skb = elem_info->u.sdq.skb; - if (!skb) - return; - wqe = elem_info->elem; - mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); + skb = elem_info->u.rdq.skb; + memcpy(wqe, elem_info->elem, MLXSW_PCI_WQE_SIZE); if (q->consumer_counter++ != consumer_counter_limit) dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); + err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); + if (err) { + dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n"); + goto out; + } + + mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); + if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) { rx_info.is_lag = true; rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe); @@ -647,10 +649,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, skb_put(skb, byte_count); mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); - memset(wqe, 0, q->elem_size); - err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); - if (err) - dev_dbg_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n"); +out: /* Everything is set up, ring doorbell to pass elem to HW */ q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); -- cgit v1.2.3 From fcf918ffd3b35e288097036c04af7446b2c6f2f1 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 14 Oct 2021 12:09:39 +0300 Subject: drm/i915: Convert unconditional clflush to drm_clflush_virt_range() This one is apparently a "clflush for good measure", so bit more justification (if you can call it that) than some of the others. Convert to drm_clflush_virt_range() again so that machines without clflush will survive the ordeal. Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Cc: Thomas Hellström #v1 Fixes: 12ca695d2c1e ("drm/i915: Do not share hwsp across contexts any more, v8.") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20211014090941.12159-3-ville.syrjala@linux.intel.com Reviewed-by: Dave Airlie (cherry picked from commit af7b6d234eefa30c461cc16912bafb32b9e6141c) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_timeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 1257f4f11e66..23d7328892ed 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -225,7 +225,7 @@ void intel_timeline_reset_seqno(const struct intel_timeline *tl) memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno)); WRITE_ONCE(*hwsp_seqno, tl->seqno); - clflush(hwsp_seqno); + drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES); } void intel_timeline_enter(struct intel_timeline *tl) -- cgit v1.2.3 From 9761ffb8f1090289b908590039e2c363cc35cf45 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 14 Oct 2021 12:09:40 +0300 Subject: drm/i915: Catch yet another unconditioal clflush Replace the unconditional clflush() with drm_clflush_virt_range() which does the wbinvd() fallback when clflush is not available. This time no justification is given for the clflush in the offending commit. Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Cc: Thomas Hellström Fixes: 2c8ab3339e39 ("drm/i915: Pin timeline map after first timeline pin, v4.") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20211014090941.12159-4-ville.syrjala@linux.intel.com Reviewed-by: Dave Airlie (cherry picked from commit 9ced12182d0d8401d821e9602e56e276459900fc) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_timeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 23d7328892ed..438bbc7b8147 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -64,7 +64,7 @@ intel_timeline_pin_map(struct intel_timeline *timeline) timeline->hwsp_map = vaddr; timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES); - clflush(vaddr + ofs); + drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES); return 0; } -- cgit v1.2.3 From 6e6f96630805874fa80b0067e1a57aafc06225f6 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 18 Oct 2021 12:41:49 +0300 Subject: drm/i915/dp: Skip the HW readout of DPCD on disabled encoders Reading out the DP encoders' DPCD during booting or resume is only required for enabled encoders: such encoders may be modesetted during the initial commit and the link training this involves depends on an initialized DPCD. For DDI encoders reading out the DPCD is skipped, do the same on pre-DDI platforms. Atm, the first DPCD readout without a sink connected - which is a likely scneario if the encoder is disabled - leaves intel_dp->num_common_rates at 0, which resulted in intel_dp_sync_state()->intel_dp_max_common_rate() in a intel_dp->common_rates[-1] access. This by definition results in an undefined behaviour, though to my best knowledge in all HW/compiler configurations it actually results in accessing the array item type value preceding the array. In this case the preceding value happens to be intel_dp->num_common_rates, which is 0, so this issue - by luck - didn't cause a user visible problem. Nevertheless it's still an undefined behaviour and in CONFIG_UBSAN builds leads to a kernel BUG() (which revealed this problem for us), hence CC:stable. A related problem in case the encoder is enabled but the sink is not connected or the DPCD readout fails is fixed by the next patch. v2: Amend the commit message describing the root cause of the CONFIG_UBSAN BUG(). Fixes: a532cde31de3 ("drm/i915/tc: Fix TypeC port init/resume time sanitization") References: https://gitlab.freedesktop.org/drm/intel/-/issues/4297 Reported-and-tested-by: Mat Jonczyk Cc: Mat Jonczyk Cc: José Roberto de Souza Cc: Jani Nikula Cc: Ville Syrjälä Cc: Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20211018094154.1407705-2-imre.deak@intel.com (cherry picked from commit 4ec5ffc341cecbea060739aea1d53398ac2ec3f8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index abe3d61b6243..5cf152be4487 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1916,6 +1916,9 @@ void intel_dp_sync_state(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + if (!crtc_state) + return; + /* * Don't clobber DPCD if it's been already read out during output * setup (eDP) or detect. -- cgit v1.2.3 From 926245c7d22271307606c88b1fbb2539a8550e94 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 15 Oct 2021 10:26:34 +0200 Subject: nvmet-tcp: fix a memory leak when releasing a queue page_frag_free() won't completely release the memory allocated for the commands, the cache page must be explicitly freed by calling __page_frag_cache_drain(). This bug can be easily reproduced by repeatedly executing the following command on the initiator: $echo 1 > /sys/devices/virtual/nvme-fabrics/ctl/nvme0/reset_controller Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Reviewed-by: John Meneghini Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 07ee347ea3f3..c33a0464346f 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1428,6 +1428,7 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) static void nvmet_tcp_release_queue_work(struct work_struct *w) { + struct page *page; struct nvmet_tcp_queue *queue = container_of(w, struct nvmet_tcp_queue, release_work); @@ -1447,6 +1448,8 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) nvmet_tcp_free_crypto(queue); ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); + page = virt_to_head_page(queue->pf_cache.va); + __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); kfree(queue); } -- cgit v1.2.3 From 25e1f67eda4a19c91dc05c84d6d413c53efb447b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 24 Oct 2021 10:43:31 +0300 Subject: nvme-tcp: fix H2CData PDU send accounting (again) We should not access request members after the last send, even to determine if indeed it was the last data payload send. The reason is that a completion could have arrived and trigger a new execution of the request which overridden these members. This was fixed by commit 825619b09ad3 ("nvme-tcp: fix possible use-after-completion"). Commit e371af033c56 broke that assumption again to address cases where multiple r2t pdus are sent per request. To fix it, we need to record the request data_sent and data_len and after the payload network send we reference these counters to determine weather we should advance the request iterator. Fixes: e371af033c56 ("nvme-tcp: fix incorrect h2cdata pdu offset accounting") Reported-by: Keith Busch Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 3c1c29dd3020..0626d14e6d4c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -926,12 +926,14 @@ static void nvme_tcp_fail_request(struct nvme_tcp_request *req) static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; + int req_data_len = req->data_len; while (true) { struct page *page = nvme_tcp_req_cur_page(req); size_t offset = nvme_tcp_req_cur_offset(req); size_t len = nvme_tcp_req_cur_length(req); bool last = nvme_tcp_pdu_last_send(req, len); + int req_data_sent = req->data_sent; int ret, flags = MSG_DONTWAIT; if (last && !queue->data_digest && !nvme_tcp_queue_more(queue)) @@ -958,7 +960,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) * in the request where we don't want to modify it as we may * compete with the RX path completing the request. */ - if (req->data_sent + ret < req->data_len) + if (req_data_sent + ret < req_data_len) nvme_tcp_advance_req(req, ret); /* fully successful last send in current PDU */ -- cgit v1.2.3 From d81d0e41ed5fe7229a2c9a29d13bad288c7cf2d2 Mon Sep 17 00:00:00 2001 From: Thomas Perrot Date: Fri, 22 Oct 2021 16:21:04 +0200 Subject: spi: spl022: fix Microwire full duplex mode There are missing braces in the function that verify controller parameters, then an error is always returned when the parameter to select Microwire frames operation is used on devices allowing it. Signed-off-by: Thomas Perrot Link: https://lore.kernel.org/r/20211022142104.1386379-1-thomas.perrot@bootlin.com Signed-off-by: Mark Brown --- drivers/spi/spi-pl022.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index feebda66f56e..e4484ace584e 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -1716,12 +1716,13 @@ static int verify_controller_parameters(struct pl022 *pl022, return -EINVAL; } } else { - if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) + if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) { dev_err(&pl022->adev->dev, "Microwire half duplex mode requested," " but this is only available in the" " ST version of PL022\n"); - return -EINVAL; + return -EINVAL; + } } } return 0; -- cgit v1.2.3 From 9122a70a6333705c0c35614ddc51c274ed1d3637 Mon Sep 17 00:00:00 2001 From: Cyril Strejc Date: Sun, 24 Oct 2021 22:14:25 +0200 Subject: net: multicast: calculate csum of looped-back and forwarded packets During a testing of an user-space application which transmits UDP multicast datagrams and utilizes multicast routing to send the UDP datagrams out of defined network interfaces, I've found a multicast router does not fill-in UDP checksum into locally produced, looped-back and forwarded UDP datagrams, if an original output NIC the datagrams are sent to has UDP TX checksum offload enabled. The datagrams are sent malformed out of the NIC the datagrams have been forwarded to. It is because: 1. If TX checksum offload is enabled on the output NIC, UDP checksum is not calculated by kernel and is not filled into skb data. 2. dev_loopback_xmit(), which is called solely by ip_mc_finish_output(), sets skb->ip_summed = CHECKSUM_UNNECESSARY unconditionally. 3. Since 35fc92a9 ("[NET]: Allow forwarding of ip_summed except CHECKSUM_COMPLETE"), the ip_summed value is preserved during forwarding. 4. If ip_summed != CHECKSUM_PARTIAL, checksum is not calculated during a packet egress. The minimum fix in dev_loopback_xmit(): 1. Preserves skb->ip_summed CHECKSUM_PARTIAL. This is the case when the original output NIC has TX checksum offload enabled. The effects are: a) If the forwarding destination interface supports TX checksum offloading, the NIC driver is responsible to fill-in the checksum. b) If the forwarding destination interface does NOT support TX checksum offloading, checksums are filled-in by kernel before skb is submitted to the NIC driver. c) For local delivery, checksum validation is skipped as in the case of CHECKSUM_UNNECESSARY, thanks to skb_csum_unnecessary(). 2. Translates ip_summed CHECKSUM_NONE to CHECKSUM_UNNECESSARY. It means, for CHECKSUM_NONE, the behavior is unmodified and is there to skip a looped-back packet local delivery checksum validation. Signed-off-by: Cyril Strejc Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/udp.h | 5 +++-- net/core/dev.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 360df454356c..909ecf447e0f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -494,8 +494,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial * packets in udp_gro_complete_segment. As does UDP GSO, verified by * udp_send_skb. But when those packets are looped in dev_loopback_xmit - * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this - * specific case, where PARTIAL is both correct and required. + * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY. + * Reset in this specific case, where PARTIAL is both correct and + * required. */ if (skb->pkt_type == PACKET_LOOPBACK) skb->ip_summed = CHECKSUM_PARTIAL; diff --git a/net/core/dev.c b/net/core/dev.c index ea2366497697..eb3a366bf212 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3912,7 +3912,8 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) skb_reset_mac_header(skb); __skb_pull(skb, skb_network_offset(skb)); skb->pkt_type = PACKET_LOOPBACK; - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (skb->ip_summed == CHECKSUM_NONE) + skb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(skb)); skb_dst_force(skb); netif_rx_ni(skb); -- cgit v1.2.3 From 2195f2062e4cc93870da8e71c318ef98a1c51cef Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 25 Oct 2021 16:49:36 +0200 Subject: nfc: port100: fix using -ERRNO as command type mask During probing, the driver tries to get a list (mask) of supported command types in port100_get_command_type_mask() function. The value is u64 and 0 is treated as invalid mask (no commands supported). The function however returns also -ERRNO as u64 which will be interpret as valid command mask. Return 0 on every error case of port100_get_command_type_mask(), so the probing will stop. Cc: Fixes: 0347a6ab300a ("NFC: port100: Commands mechanism implementation") Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/port100.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 517376c43b86..16ceb763594f 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -1006,11 +1006,11 @@ static u64 port100_get_command_type_mask(struct port100 *dev) skb = port100_alloc_skb(dev, 0); if (!skb) - return -ENOMEM; + return 0; resp = port100_send_cmd_sync(dev, PORT100_CMD_GET_COMMAND_TYPE, skb); if (IS_ERR(resp)) - return PTR_ERR(resp); + return 0; if (resp->len < 8) mask = 0; -- cgit v1.2.3 From fa40d9734a57bcbfa79a280189799f76c88f7bb0 Mon Sep 17 00:00:00 2001 From: Max VA Date: Mon, 25 Oct 2021 17:31:53 +0200 Subject: tipc: fix size validations for the MSG_CRYPTO type The function tipc_crypto_key_rcv is used to parse MSG_CRYPTO messages to receive keys from other nodes in the cluster in order to decrypt any further messages from them. This patch verifies that any supplied sizes in the message body are valid for the received message. Fixes: 1ef6f7c9390f ("tipc: add automatic session key exchange") Signed-off-by: Max VA Acked-by: Ying Xue Signed-off-by: Greg Kroah-Hartman Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/crypto.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index c9391d38de85..dc60c32bb70d 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -2285,43 +2285,53 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr) u16 key_gen = msg_key_gen(hdr); u16 size = msg_data_sz(hdr); u8 *data = msg_data(hdr); + unsigned int keylen; + + /* Verify whether the size can exist in the packet */ + if (unlikely(size < sizeof(struct tipc_aead_key) + TIPC_AEAD_KEYLEN_MIN)) { + pr_debug("%s: message data size is too small\n", rx->name); + goto exit; + } + + keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME))); + + /* Verify the supplied size values */ + if (unlikely(size != keylen + sizeof(struct tipc_aead_key) || + keylen > TIPC_AEAD_KEY_SIZE_MAX)) { + pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name); + goto exit; + } spin_lock(&rx->lock); if (unlikely(rx->skey || (key_gen == rx->key_gen && rx->key.keys))) { pr_err("%s: key existed <%p>, gen %d vs %d\n", rx->name, rx->skey, key_gen, rx->key_gen); - goto exit; + goto exit_unlock; } /* Allocate memory for the key */ skey = kmalloc(size, GFP_ATOMIC); if (unlikely(!skey)) { pr_err("%s: unable to allocate memory for skey\n", rx->name); - goto exit; + goto exit_unlock; } /* Copy key from msg data */ - skey->keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME))); + skey->keylen = keylen; memcpy(skey->alg_name, data, TIPC_AEAD_ALG_NAME); memcpy(skey->key, data + TIPC_AEAD_ALG_NAME + sizeof(__be32), skey->keylen); - /* Sanity check */ - if (unlikely(size != tipc_aead_key_size(skey))) { - kfree(skey); - skey = NULL; - goto exit; - } - rx->key_gen = key_gen; rx->skey_mode = msg_key_mode(hdr); rx->skey = skey; rx->nokey = 0; mb(); /* for nokey flag */ -exit: +exit_unlock: spin_unlock(&rx->lock); +exit: /* Schedule the key attaching on this crypto */ if (likely(skey && queue_delayed_work(tx->wq, &rx->work, 0))) return true; -- cgit v1.2.3 From 6f68cd634856f8ca93bafd623ba5357e0f648c68 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sun, 24 Oct 2021 16:13:56 +0300 Subject: net: batman-adv: fix error handling Syzbot reported ODEBUG warning in batadv_nc_mesh_free(). The problem was in wrong error handling in batadv_mesh_init(). Before this patch batadv_mesh_init() was calling batadv_mesh_free() in case of any batadv_*_init() calls failure. This approach may work well, when there is some kind of indicator, which can tell which parts of batadv are initialized; but there isn't any. All written above lead to cleaning up uninitialized fields. Even if we hide ODEBUG warning by initializing bat_priv->nc.work, syzbot was able to hit GPF in batadv_nc_purge_paths(), because hash pointer in still NULL. [1] To fix these bugs we can unwind batadv_*_init() calls one by one. It is good approach for 2 reasons: 1) It fixes bugs on error handling path 2) It improves the performance, since we won't call unneeded batadv_*_free() functions. So, this patch makes all batadv_*_init() clean up all allocated memory before returning with an error to no call correspoing batadv_*_free() and open-codes batadv_mesh_free() with proper order to avoid touching uninitialized fields. Link: https://lore.kernel.org/netdev/000000000000c87fbd05cef6bcb0@google.com/ [1] Reported-and-tested-by: syzbot+28b0702ada0bf7381f58@syzkaller.appspotmail.com Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Pavel Skripkin Acked-by: Sven Eckelmann Signed-off-by: David S. Miller --- net/batman-adv/bridge_loop_avoidance.c | 8 +++-- net/batman-adv/main.c | 56 ++++++++++++++++++++++++---------- net/batman-adv/network-coding.c | 4 ++- net/batman-adv/translation-table.c | 4 ++- 4 files changed, 52 insertions(+), 20 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 1669744304c5..17687848daec 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1560,10 +1560,14 @@ int batadv_bla_init(struct batadv_priv *bat_priv) return 0; bat_priv->bla.claim_hash = batadv_hash_new(128); - bat_priv->bla.backbone_hash = batadv_hash_new(32); + if (!bat_priv->bla.claim_hash) + return -ENOMEM; - if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash) + bat_priv->bla.backbone_hash = batadv_hash_new(32); + if (!bat_priv->bla.backbone_hash) { + batadv_hash_destroy(bat_priv->bla.claim_hash); return -ENOMEM; + } batadv_hash_set_lock_class(bat_priv->bla.claim_hash, &batadv_claim_hash_lock_class_key); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 3ddd66e4c29e..5207cd8d6ad8 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -190,29 +190,41 @@ int batadv_mesh_init(struct net_device *soft_iface) bat_priv->gw.generation = 0; - ret = batadv_v_mesh_init(bat_priv); - if (ret < 0) - goto err; - ret = batadv_originator_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_orig; + } ret = batadv_tt_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_tt; + } + + ret = batadv_v_mesh_init(bat_priv); + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_v; + } ret = batadv_bla_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_bla; + } ret = batadv_dat_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_dat; + } ret = batadv_nc_mesh_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_nc; + } batadv_gw_init(bat_priv); batadv_mcast_init(bat_priv); @@ -222,8 +234,20 @@ int batadv_mesh_init(struct net_device *soft_iface) return 0; -err: - batadv_mesh_free(soft_iface); +err_nc: + batadv_dat_free(bat_priv); +err_dat: + batadv_bla_free(bat_priv); +err_bla: + batadv_v_mesh_free(bat_priv); +err_v: + batadv_tt_free(bat_priv); +err_tt: + batadv_originator_free(bat_priv); +err_orig: + batadv_purge_outstanding_packets(bat_priv, NULL); + atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); + return ret; } diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 9f06132e007d..0a7f1d36a6a8 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -152,8 +152,10 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv) &batadv_nc_coding_hash_lock_class_key); bat_priv->nc.decoding_hash = batadv_hash_new(128); - if (!bat_priv->nc.decoding_hash) + if (!bat_priv->nc.decoding_hash) { + batadv_hash_destroy(bat_priv->nc.coding_hash); goto err; + } batadv_hash_set_lock_class(bat_priv->nc.decoding_hash, &batadv_nc_decoding_hash_lock_class_key); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index e0b3dace2020..4b7ad6684bc4 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -4162,8 +4162,10 @@ int batadv_tt_init(struct batadv_priv *bat_priv) return ret; ret = batadv_tt_global_init(bat_priv); - if (ret < 0) + if (ret < 0) { + batadv_tt_local_table_free(bat_priv); return ret; + } batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1, batadv_tt_tvlv_unicast_handler_v1, -- cgit v1.2.3 From db6c3c064f5d55fa9969f33eafca3cdbefbb3541 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 26 Oct 2021 12:36:17 +0200 Subject: net: lan78xx: fix division by zero in send path Add the missing endpoint max-packet sanity check to probe() to avoid division by zero in lan78xx_tx_bh() in case a malicious device has broken descriptors (or when doing descriptor fuzz testing). Note that USB core will reject URBs submitted for endpoints with zero wMaxPacketSize but that drivers doing packet-size calculations still need to handle this (cf. commit 2548288b4fb0 ("USB: Fix: Don't skip endpoint descriptors with maxpacket=0")). Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Cc: stable@vger.kernel.org # 4.3 Cc: Woojung.Huh@microchip.com Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 793f8fbe0069..63cd72c5f580 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -4122,6 +4122,12 @@ static int lan78xx_probe(struct usb_interface *intf, dev->maxpacket = usb_maxpacket(dev->udev, dev->pipe_out, 1); + /* Reject broken descriptors. */ + if (dev->maxpacket == 0) { + ret = -ENODEV; + goto out4; + } + /* driver requires remote-wakeup capability during autosuspend. */ intf->needs_remote_wakeup = 1; -- cgit v1.2.3 From 19fa0887c57d35b57bfb895e6caf8e72d9601ec0 Mon Sep 17 00:00:00 2001 From: Vadym Kochan Date: Tue, 26 Oct 2021 15:19:07 +0300 Subject: MAINTAINERS: please remove myself from the Prestera driver Signed-off-by: Vadym Kochan Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d118d7957d2..086f5c89216b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11278,7 +11278,6 @@ F: Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst F: drivers/net/ethernet/marvell/octeontx2/af/ MARVELL PRESTERA ETHERNET SWITCH DRIVER -M: Vadym Kochan M: Taras Chornyi S: Supported W: https://github.com/Marvell-switching/switchdev-prestera -- cgit v1.2.3 From d308ae0d299a6bb15be4efb91849582d19c23213 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 26 Oct 2021 18:12:04 +0800 Subject: block: drain queue after disk is removed from sysfs Before removing disk from sysfs, userspace still may change queue via sysfs, such as switching elevator or setting wbt latency, both may reinitialize wbt, then the warning in blk_free_queue_stats() will be triggered since rq_qos_exit() is moved to del_gendisk(). Fixes the issue by moving draining queue & tearing down after disk is removed from sysfs, at that time no one can come into queue's store()/show(). Reported-by: Yi Zhang Tested-by: Yi Zhang Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20211026101204.2897166-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/genhd.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index b49858550fa6..ab12ae6e636e 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -588,16 +588,6 @@ void del_gendisk(struct gendisk *disk) * Prevent new I/O from crossing bio_queue_enter(). */ blk_queue_start_drain(q); - blk_mq_freeze_queue_wait(q); - - rq_qos_exit(q); - blk_sync_queue(q); - blk_flush_integrity(); - /* - * Allow using passthrough request again after the queue is torn down. - */ - blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); - __blk_mq_unfreeze_queue(q, true); if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); @@ -620,6 +610,18 @@ void del_gendisk(struct gendisk *disk) sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); device_del(disk_to_dev(disk)); + + blk_mq_freeze_queue_wait(q); + + rq_qos_exit(q); + blk_sync_queue(q); + blk_flush_integrity(); + /* + * Allow using passthrough request again after the queue is torn down. + */ + blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); + __blk_mq_unfreeze_queue(q, true); + } EXPORT_SYMBOL(del_gendisk); -- cgit v1.2.3 From 05d5da3cb11c91c39e607066d3313a6ce621796a Mon Sep 17 00:00:00 2001 From: Christoph Niedermaier Date: Mon, 25 Oct 2021 09:37:06 +0200 Subject: MAINTAINERS: Add maintainers for DHCOM i.MX6 and DHCOM/DHCOR STM32MP1 Add maintainers for DH electronics DHCOM i.MX6 and DHCOM/DHCOR STM32MP1 boards. Signed-off-by: Christoph Niedermaier Cc: linux-arm-kernel@lists.infradead.org Cc: kernel@dh-electronics.com Cc: arnd@arndb.de Link: https://lore.kernel.org/r/20211025073706.2794-1-cniedermaier@dh-electronics.com' To: soc@kernel.org To: linux-kernel@vger.kernel.org Signed-off-by: Arnd Bergmann --- MAINTAINERS | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d118d7957d2..6fbcb632649f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5458,6 +5458,19 @@ F: include/net/devlink.h F: include/uapi/linux/devlink.h F: net/core/devlink.c +DH ELECTRONICS IMX6 DHCOM BOARD SUPPORT +M: Christoph Niedermaier +L: kernel@dh-electronics.com +S: Maintained +F: arch/arm/boot/dts/imx6*-dhcom-* + +DH ELECTRONICS STM32MP1 DHCOM/DHCOR BOARD SUPPORT +M: Marek Vasut +L: kernel@dh-electronics.com +S: Maintained +F: arch/arm/boot/dts/stm32mp1*-dhcom-* +F: arch/arm/boot/dts/stm32mp1*-dhcor-* + DIALOG SEMICONDUCTOR DRIVERS M: Support Opensource S: Supported -- cgit v1.2.3 From 697542bceae51f7620af333b065dd09d213629fb Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Fri, 22 Oct 2021 17:21:06 +0900 Subject: mmc: dw_mmc: exynos: fix the finding clock sample value Even though there are candiates value if can't find best value, it's returned -EIO. It's not proper behavior. If there is not best value, use a first candiate value to work eMMC. Signed-off-by: Jaehoon Chung Tested-by: Marek Szyprowski Tested-by: Christian Hewitt Cc: stable@vger.kernel.org Fixes: c537a1c5ff63 ("mmc: dw_mmc: exynos: add variable delay tuning sequence") Link: https://lore.kernel.org/r/20211022082106.1557-1-jh80.chung@samsung.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc-exynos.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c index 0c75810812a0..1f8a3c0ddfe1 100644 --- a/drivers/mmc/host/dw_mmc-exynos.c +++ b/drivers/mmc/host/dw_mmc-exynos.c @@ -464,6 +464,18 @@ static s8 dw_mci_exynos_get_best_clksmpl(u8 candiates) } } + /* + * If there is no cadiates value, then it needs to return -EIO. + * If there are candiates values and don't find bset clk sample value, + * then use a first candiates clock sample value. + */ + for (i = 0; i < iter; i++) { + __c = ror8(candiates, i); + if ((__c & 0x1) == 0x1) { + loc = i; + goto out; + } + } out: return loc; } @@ -494,6 +506,8 @@ static int dw_mci_exynos_execute_tuning(struct dw_mci_slot *slot, u32 opcode) priv->tuned_sample = found; } else { ret = -EIO; + dev_warn(&mmc->class_dev, + "There is no candiates value about clksmpl!\n"); } return ret; -- cgit v1.2.3 From 8c8171929116cc23f74743d99251eedadf62341a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 25 Oct 2021 13:56:08 +0200 Subject: mmc: vub300: fix control-message timeouts USB control-message timeouts are specified in milliseconds and should specifically not vary with CONFIG_HZ. Fixes: 88095e7b473a ("mmc: Add new VUB300 USB-to-SD/SDIO/MMC driver") Cc: stable@vger.kernel.org # 3.0 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20211025115608.5287-1-johan@kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/vub300.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c index 4950d10d3a19..97beece62fec 100644 --- a/drivers/mmc/host/vub300.c +++ b/drivers/mmc/host/vub300.c @@ -576,7 +576,7 @@ static void check_vub300_port_status(struct vub300_mmc_host *vub300) GET_SYSTEM_PORT_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, &vub300->system_port_status, - sizeof(vub300->system_port_status), HZ); + sizeof(vub300->system_port_status), 1000); if (sizeof(vub300->system_port_status) == retval) new_system_port_status(vub300); } @@ -1241,7 +1241,7 @@ static void __download_offload_pseudocode(struct vub300_mmc_host *vub300, SET_INTERRUPT_PSEUDOCODE, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, - xfer_buffer, xfer_length, HZ); + xfer_buffer, xfer_length, 1000); kfree(xfer_buffer); if (retval < 0) goto copy_error_message; @@ -1284,7 +1284,7 @@ static void __download_offload_pseudocode(struct vub300_mmc_host *vub300, SET_TRANSFER_PSEUDOCODE, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, - xfer_buffer, xfer_length, HZ); + xfer_buffer, xfer_length, 1000); kfree(xfer_buffer); if (retval < 0) goto copy_error_message; @@ -1991,7 +1991,7 @@ static void __set_clock_speed(struct vub300_mmc_host *vub300, u8 buf[8], usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_CLOCK_SPEED, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x00, 0x00, buf, buf_array_size, HZ); + 0x00, 0x00, buf, buf_array_size, 1000); if (retval != 8) { dev_err(&vub300->udev->dev, "SET_CLOCK_SPEED" " %dkHz failed with retval=%d\n", kHzClock, retval); @@ -2013,14 +2013,14 @@ static void vub300_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_SD_POWER, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x0000, 0x0000, NULL, 0, HZ); + 0x0000, 0x0000, NULL, 0, 1000); /* must wait for the VUB300 u-proc to boot up */ msleep(600); } else if ((ios->power_mode == MMC_POWER_UP) && !vub300->card_powered) { usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_SD_POWER, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x0001, 0x0000, NULL, 0, HZ); + 0x0001, 0x0000, NULL, 0, 1000); msleep(600); vub300->card_powered = 1; } else if (ios->power_mode == MMC_POWER_ON) { @@ -2275,14 +2275,14 @@ static int vub300_probe(struct usb_interface *interface, GET_HC_INF0, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, &vub300->hc_info, - sizeof(vub300->hc_info), HZ); + sizeof(vub300->hc_info), 1000); if (retval < 0) goto error5; retval = usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_ROM_WAIT_STATES, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - firmware_rom_wait_states, 0x0000, NULL, 0, HZ); + firmware_rom_wait_states, 0x0000, NULL, 0, 1000); if (retval < 0) goto error5; dev_info(&vub300->udev->dev, @@ -2297,7 +2297,7 @@ static int vub300_probe(struct usb_interface *interface, GET_SYSTEM_PORT_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, &vub300->system_port_status, - sizeof(vub300->system_port_status), HZ); + sizeof(vub300->system_port_status), 1000); if (retval < 0) { goto error4; } else if (sizeof(vub300->system_port_status) == retval) { -- cgit v1.2.3 From 92b18252b91de567cd875f2e84722b10ab34ee28 Mon Sep 17 00:00:00 2001 From: Wenbin Mei Date: Tue, 26 Oct 2021 15:08:12 +0800 Subject: mmc: cqhci: clear HALT state after CQE enable While mmc0 enter suspend state, we need halt CQE to send legacy cmd(flush cache) and disable cqe, for resume back, we enable CQE and not clear HALT state. In this case MediaTek mmc host controller will keep the value for HALT state after CQE disable/enable flow, so the next CQE transfer after resume will be timeout due to CQE is in HALT state, the log as below: <4>.(4)[318:kworker/4:1H]mmc0: cqhci: timeout for tag 2 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: ============ CQHCI REGISTER DUMP =========== <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Caps: 0x100020b6 | Version: 0x00000510 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Config: 0x00001103 | Control: 0x00000001 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Int stat: 0x00000000 | Int enab: 0x00000006 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Int sig: 0x00000006 | Int Coal: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: TDL base: 0xfd05f000 | TDL up32: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Doorbell: 0x8000203c | TCN: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Dev queue: 0x00000000 | Dev Pend: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Task clr: 0x00000000 | SSC1: 0x00001000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: SSC2: 0x00000001 | DCMD rsp: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: RED mask: 0xfdf9a080 | TERRI: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: Resp idx: 0x00000000 | Resp arg: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: CRNQP: 0x00000000 | CRNQDUN: 0x00000000 <4>.(4)[318:kworker/4:1H]mmc0: cqhci: CRNQIS: 0x00000000 | CRNQIE: 0x00000000 This change check HALT state after CQE enable, if CQE is in HALT state, we will clear it. Signed-off-by: Wenbin Mei Cc: stable@vger.kernel.org Acked-by: Adrian Hunter Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host") Link: https://lore.kernel.org/r/20211026070812.9359-1-wenbin.mei@mediatek.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/cqhci-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c index 38559a956330..31f841231609 100644 --- a/drivers/mmc/host/cqhci-core.c +++ b/drivers/mmc/host/cqhci-core.c @@ -282,6 +282,9 @@ static void __cqhci_enable(struct cqhci_host *cq_host) cqhci_writel(cq_host, cqcfg, CQHCI_CFG); + if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) + cqhci_writel(cq_host, 0, CQHCI_CTL); + mmc->cqe_on = true; if (cq_host->ops->enable) -- cgit v1.2.3 From 6e7733ef0bb9372d5491168635f6ecba8ac3cb8a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 7 Oct 2021 17:33:02 -0700 Subject: Revert "watchdog: iTCO_wdt: Account for rebooting on second timeout" This reverts commit cb011044e34c ("watchdog: iTCO_wdt: Account for rebooting on second timeout") and commit aec42642d91f ("watchdog: iTCO_wdt: Fix detection of SMI-off case") since those patches cause a regression on certain boards (https://bugzilla.kernel.org/show_bug.cgi?id=213809). While this revert may result in some boards to only reset after twice the configured timeout value, that is still better than a watchdog reset after half the configured value. Fixes: cb011044e34c ("watchdog: iTCO_wdt: Account for rebooting on second timeout") Fixes: aec42642d91f ("watchdog: iTCO_wdt: Fix detection of SMI-off case") Cc: Jan Kiszka Cc: Mantas Mikulėnas Reported-by: Javier S. Pedro Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20211008003302.1461733-1-linux@roeck-us.net Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/iTCO_wdt.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 643c6c2d0b72..ced2fc0deb8c 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -71,8 +71,6 @@ #define TCOBASE(p) ((p)->tco_res->start) /* SMI Control and Enable Register */ #define SMI_EN(p) ((p)->smi_res->start) -#define TCO_EN (1 << 13) -#define GBL_SMI_EN (1 << 0) #define TCO_RLD(p) (TCOBASE(p) + 0x00) /* TCO Timer Reload/Curr. Value */ #define TCOv1_TMR(p) (TCOBASE(p) + 0x01) /* TCOv1 Timer Initial Value*/ @@ -357,12 +355,8 @@ static int iTCO_wdt_set_timeout(struct watchdog_device *wd_dev, unsigned int t) tmrval = seconds_to_ticks(p, t); - /* - * If TCO SMIs are off, the timer counts down twice before rebooting. - * Otherwise, the BIOS generally reboots when the SMI triggers. - */ - if (p->smi_res && - (inl(SMI_EN(p)) & (TCO_EN | GBL_SMI_EN)) != (TCO_EN | GBL_SMI_EN)) + /* For TCO v1 the timer counts down twice before rebooting */ + if (p->iTCO_version == 1) tmrval /= 2; /* from the specs: */ @@ -527,7 +521,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev) * Disables TCO logic generating an SMI# */ val32 = inl(SMI_EN(p)); - val32 &= ~TCO_EN; /* Turn off SMI clearing watchdog */ + val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */ outl(val32, SMI_EN(p)); } -- cgit v1.2.3 From f31afb502c3151855df3ed40f5974c7884c10d14 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Fri, 3 Sep 2021 12:21:01 +0100 Subject: watchdog: sbsa: only use 32-bit accessors SBSA says of the generic watchdog: All registers are 32 bits in size and should be accessed using 32-bit reads and writes. If an access size other than 32 bits is used then the results are IMPLEMENTATION DEFINED. and for qemu, the implementation will only allow 32-bit accesses resulting in a synchronous external abort when configuring the watchdog. Use lo_hi_* accessors rather than a readq/writeq. Fixes: abd3ac7902fb ("watchdog: sbsa: Support architecture version 1") Signed-off-by: Jamie Iles Reviewed-by: Guenter Roeck Reviewed-by: Shaokun Zhang Link: https://lore.kernel.org/r/20210903112101.493552-1-quic_jiles@quicinc.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sbsa_gwdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c index ee9ff38929eb..6f4319bdbc50 100644 --- a/drivers/watchdog/sbsa_gwdt.c +++ b/drivers/watchdog/sbsa_gwdt.c @@ -130,7 +130,7 @@ static u64 sbsa_gwdt_reg_read(struct sbsa_gwdt *gwdt) if (gwdt->version == 0) return readl(gwdt->control_base + SBSA_GWDT_WOR); else - return readq(gwdt->control_base + SBSA_GWDT_WOR); + return lo_hi_readq(gwdt->control_base + SBSA_GWDT_WOR); } static void sbsa_gwdt_reg_write(u64 val, struct sbsa_gwdt *gwdt) @@ -138,7 +138,7 @@ static void sbsa_gwdt_reg_write(u64 val, struct sbsa_gwdt *gwdt) if (gwdt->version == 0) writel((u32)val, gwdt->control_base + SBSA_GWDT_WOR); else - writeq(val, gwdt->control_base + SBSA_GWDT_WOR); + lo_hi_writeq(val, gwdt->control_base + SBSA_GWDT_WOR); } /* -- cgit v1.2.3 From bcc3e704f1b73f7f8674364f0bda6593bbc9fd2b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 17 Sep 2021 11:20:23 +0200 Subject: watchdog: sbsa: drop unneeded MODULE_ALIAS The MODULE_DEVICE_TABLE already creates proper alias for platform driver. Having another MODULE_ALIAS causes the alias to be duplicated. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20210917092024.19323-1-krzysztof.kozlowski@canonical.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sbsa_gwdt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c index 6f4319bdbc50..9791c74aebd4 100644 --- a/drivers/watchdog/sbsa_gwdt.c +++ b/drivers/watchdog/sbsa_gwdt.c @@ -411,4 +411,3 @@ MODULE_AUTHOR("Suravee Suthikulpanit "); MODULE_AUTHOR("Al Stone "); MODULE_AUTHOR("Timur Tabi "); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:" DRV_NAME); -- cgit v1.2.3 From abd1c6adc16d1b82109a29b570fc545b775049d5 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 10 Sep 2021 21:29:25 -0700 Subject: watchdog: ixp4xx_wdt: Fix address space warning sparse reports the following address space warning. drivers/watchdog/ixp4xx_wdt.c:122:20: sparse: incorrect type in assignment (different address spaces) drivers/watchdog/ixp4xx_wdt.c:122:20: sparse: expected void [noderef] __iomem *base drivers/watchdog/ixp4xx_wdt.c:122:20: sparse: got void *platform_data Add a typecast to solve the problem. Fixes: 21a0a29d16c6 ("watchdog: ixp4xx: Rewrite driver to use core") Cc: Linus Walleij Reviewed-by: Linus Walleij Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20210911042925.556889-1-linux@roeck-us.net Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ixp4xx_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/ixp4xx_wdt.c b/drivers/watchdog/ixp4xx_wdt.c index 2693ffb24ac7..31b03fa71341 100644 --- a/drivers/watchdog/ixp4xx_wdt.c +++ b/drivers/watchdog/ixp4xx_wdt.c @@ -119,7 +119,7 @@ static int ixp4xx_wdt_probe(struct platform_device *pdev) iwdt = devm_kzalloc(dev, sizeof(*iwdt), GFP_KERNEL); if (!iwdt) return -ENOMEM; - iwdt->base = dev->platform_data; + iwdt->base = (void __iomem *)dev->platform_data; /* * Retrieve rate from a fixed clock from the device tree if -- cgit v1.2.3 From cd004d8299f1dc6cfa6a4eea8f94cb45eaedf070 Mon Sep 17 00:00:00 2001 From: Walter Stoll Date: Thu, 14 Oct 2021 12:22:29 +0200 Subject: watchdog: Fix OMAP watchdog early handling TI's implementation does not service the watchdog even if the kernel command line parameter omap_wdt.early_enable is set to 1. This patch fixes the issue. Signed-off-by: Walter Stoll Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/88a8fe5229cd68fa0f1fd22f5d66666c1b7057a0.camel@duagon.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/omap_wdt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index 1616f93dfad7..74d785b2b478 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -268,8 +268,12 @@ static int omap_wdt_probe(struct platform_device *pdev) wdev->wdog.bootstatus = WDIOF_CARDRESET; } - if (!early_enable) + if (early_enable) { + omap_wdt_start(&wdev->wdog); + set_bit(WDOG_HW_RUNNING, &wdev->wdog.status); + } else { omap_wdt_disable(wdev); + } ret = watchdog_register_device(&wdev->wdog); if (ret) { -- cgit v1.2.3 From 61b1d445f3bfe4c3ba4335ceeb7e8ba688fd31e2 Mon Sep 17 00:00:00 2001 From: Mario Date: Tue, 26 Oct 2021 13:27:37 +0200 Subject: drm: panel-orientation-quirks: Add quirk for GPD Win3 Fixes screen orientation for GPD Win 3 handheld gaming console. Signed-off-by: Mario Risoldi Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20211026112737.9181-1-awxkrnl@gmail.com --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 30c17a76f49a..e1b2ce4921ae 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -191,6 +191,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "Default string"), }, .driver_data = (void *)&gpd_win2, + }, { /* GPD Win 3 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1618-03") + }, + .driver_data = (void *)&lcd720x1280_rightside_up, }, { /* I.T.Works TW891 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), -- cgit v1.2.3 From cd9733f5d75c94a32544d6ce5be47e14194cf137 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Tue, 12 Oct 2021 13:20:19 +0800 Subject: tcp_bpf: Fix one concurrency problem in the tcp_bpf_send_verdict function With two Msgs, msgA and msgB and a user doing nonblocking sendmsg calls (or multiple cores) on a single socket 'sk' we could get the following flow. msgA, sk msgB, sk ----------- --------------- tcp_bpf_sendmsg() lock(sk) psock = sk->psock tcp_bpf_sendmsg() lock(sk) ... blocking tcp_bpf_send_verdict if (psock->eval == NONE) psock->eval = sk_psock_msg_verdict .. < handle SK_REDIRECT case > release_sock(sk) < lock dropped so grab here > ret = tcp_bpf_sendmsg_redir psock = sk->psock tcp_bpf_send_verdict lock_sock(sk) ... blocking on B if (psock->eval == NONE) <- boom. psock->eval will have msgA state The problem here is we dropped the lock on msgA and grabbed it with msgB. Now we have old state in psock and importantly psock->eval has not been cleared. So msgB will run whatever action was done on A and the verdict program may never see it. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Liu Jian Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211012052019.184398-1-liujian56@huawei.com --- net/ipv4/tcp_bpf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index d3e9386b493e..9d068153c316 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -232,6 +232,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, bool cork = false, enospc = sk_msg_full(msg); struct sock *sk_redir; u32 tosend, delta = 0; + u32 eval = __SK_NONE; int ret; more_data: @@ -275,13 +276,24 @@ more_data: case __SK_REDIRECT: sk_redir = psock->sk_redir; sk_msg_apply_bytes(psock, tosend); + if (!psock->apply_bytes) { + /* Clean up before releasing the sock lock. */ + eval = psock->eval; + psock->eval = __SK_NONE; + psock->sk_redir = NULL; + } if (psock->cork) { cork = true; psock->cork = NULL; } sk_msg_return(sk, msg, tosend); release_sock(sk); + ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); + + if (eval == __SK_REDIRECT) + sock_put(sk_redir); + lock_sock(sk); if (unlikely(ret < 0)) { int free = sk_msg_free_nocharge(sk, msg); -- cgit v1.2.3 From 7b50ecfcc6cdfe87488576bc3ed443dc8d083b90 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 8 Oct 2021 13:33:03 -0700 Subject: net: Rename ->stream_memory_read to ->sock_is_readable The proto ops ->stream_memory_read() is currently only used by TCP to check whether psock queue is empty or not. We need to rename it before reusing it for non-TCP protocols, and adjust the exsiting users accordingly. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211008203306.37525-2-xiyou.wangcong@gmail.com --- include/net/sock.h | 8 +++++++- include/net/tls.h | 2 +- net/ipv4/tcp.c | 5 +---- net/ipv4/tcp_bpf.c | 4 ++-- net/tls/tls_main.c | 4 ++-- net/tls/tls_sw.c | 2 +- 6 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index ea6fbc88c8f9..463f390d90b3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1208,7 +1208,7 @@ struct proto { #endif bool (*stream_memory_free)(const struct sock *sk, int wake); - bool (*stream_memory_read)(const struct sock *sk); + bool (*sock_is_readable)(struct sock *sk); /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); void (*leave_memory_pressure)(struct sock *sk); @@ -2820,4 +2820,10 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs); int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len); +static inline bool sk_is_readable(struct sock *sk) +{ + if (sk->sk_prot->sock_is_readable) + return sk->sk_prot->sock_is_readable(sk); + return false; +} #endif /* _SOCK_H */ diff --git a/include/net/tls.h b/include/net/tls.h index be4b3e1cac46..01d2e3744393 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -375,7 +375,7 @@ void tls_sw_release_resources_rx(struct sock *sk); void tls_sw_free_ctx_rx(struct tls_context *tls_ctx); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -bool tls_sw_stream_read(const struct sock *sk); +bool tls_sw_sock_is_readable(struct sock *sk); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e8b48df73c85..f5c336f8b0c8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -486,10 +486,7 @@ static bool tcp_stream_is_readable(struct sock *sk, int target) { if (tcp_epollin_ready(sk, target)) return true; - - if (sk->sk_prot->stream_memory_read) - return sk->sk_prot->stream_memory_read(sk); - return false; + return sk_is_readable(sk); } /* diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 9d068153c316..7e71e9e278cb 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -150,7 +150,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir); #ifdef CONFIG_BPF_SYSCALL -static bool tcp_bpf_stream_read(const struct sock *sk) +static bool tcp_bpf_sock_is_readable(struct sock *sk) { struct sk_psock *psock; bool empty = true; @@ -491,7 +491,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], prot[TCP_BPF_BASE].unhash = sock_map_unhash; prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; - prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read; + prot[TCP_BPF_BASE].sock_is_readable = tcp_bpf_sock_is_readable; prot[TCP_BPF_TX] = prot[TCP_BPF_BASE]; prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index fde56ff49163..9ab81db8a654 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -681,12 +681,12 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE]; prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg; - prot[TLS_BASE][TLS_SW].stream_memory_read = tls_sw_stream_read; + prot[TLS_BASE][TLS_SW].sock_is_readable = tls_sw_sock_is_readable; prot[TLS_BASE][TLS_SW].close = tls_sk_proto_close; prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE]; prot[TLS_SW][TLS_SW].recvmsg = tls_sw_recvmsg; - prot[TLS_SW][TLS_SW].stream_memory_read = tls_sw_stream_read; + prot[TLS_SW][TLS_SW].sock_is_readable = tls_sw_sock_is_readable; prot[TLS_SW][TLS_SW].close = tls_sk_proto_close; #ifdef CONFIG_TLS_DEVICE diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 4feb95e34b64..d5d09bd817b7 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2026,7 +2026,7 @@ splice_read_end: return copied ? : err; } -bool tls_sw_stream_read(const struct sock *sk) +bool tls_sw_sock_is_readable(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); -- cgit v1.2.3 From fb4e0a5e73d4bb5ab69b7905abd2ec3b580e9b59 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 8 Oct 2021 13:33:04 -0700 Subject: skmsg: Extract and reuse sk_msg_is_readable() tcp_bpf_sock_is_readable() is pretty much generic, we can extract it and reuse it for non-TCP sockets. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211008203306.37525-3-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 1 + net/core/skmsg.c | 14 ++++++++++++++ net/ipv4/tcp_bpf.c | 15 +-------------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 14ab0c0bc924..1ce9a9eb223b 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -128,6 +128,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags); +bool sk_msg_is_readable(struct sock *sk); static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes) { diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 2d6249b28928..a86ef7e844f8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -474,6 +474,20 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, } EXPORT_SYMBOL_GPL(sk_msg_recvmsg); +bool sk_msg_is_readable(struct sock *sk) +{ + struct sk_psock *psock; + bool empty = true; + + rcu_read_lock(); + psock = sk_psock(sk); + if (likely(psock)) + empty = list_empty(&psock->ingress_msg); + rcu_read_unlock(); + return !empty; +} +EXPORT_SYMBOL_GPL(sk_msg_is_readable); + static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, struct sk_buff *skb) { diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 7e71e9e278cb..5f4d6f45d87f 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -150,19 +150,6 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir); #ifdef CONFIG_BPF_SYSCALL -static bool tcp_bpf_sock_is_readable(struct sock *sk) -{ - struct sk_psock *psock; - bool empty = true; - - rcu_read_lock(); - psock = sk_psock(sk); - if (likely(psock)) - empty = list_empty(&psock->ingress_msg); - rcu_read_unlock(); - return !empty; -} - static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, long timeo) { @@ -491,7 +478,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], prot[TCP_BPF_BASE].unhash = sock_map_unhash; prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; - prot[TCP_BPF_BASE].sock_is_readable = tcp_bpf_sock_is_readable; + prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable; prot[TCP_BPF_TX] = prot[TCP_BPF_BASE]; prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg; -- cgit v1.2.3 From af493388950b6ea3a86f860cfaffab137e024fc8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 8 Oct 2021 13:33:05 -0700 Subject: net: Implement ->sock_is_readable() for UDP and AF_UNIX Yucong noticed we can't poll() sockets in sockmap even when they are the destination sockets of redirections. This is because we never poll any psock queues in ->poll(), except for TCP. With ->sock_is_readable() now we can overwrite >sock_is_readable(), invoke and implement it for both UDP and AF_UNIX sockets. Reported-by: Yucong Sun Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211008203306.37525-4-xiyou.wangcong@gmail.com --- net/ipv4/udp.c | 3 +++ net/ipv4/udp_bpf.c | 1 + net/unix/af_unix.c | 4 ++++ net/unix/unix_bpf.c | 2 ++ 4 files changed, 10 insertions(+) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8536b2a7210b..2fffcf2b54f3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2867,6 +2867,9 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) mask &= ~(EPOLLIN | EPOLLRDNORM); + /* psock ingress_msg queue should not contain any bad checksum frames */ + if (sk_is_readable(sk)) + mask |= EPOLLIN | EPOLLRDNORM; return mask; } diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 7a1d5f473878..bbe6569c9ad3 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -114,6 +114,7 @@ static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base) *prot = *base; prot->close = sock_map_close; prot->recvmsg = udp_bpf_recvmsg; + prot->sock_is_readable = sk_msg_is_readable; } static void udp_bpf_check_v6_needs_rebuild(struct proto *ops) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 89f9e85ae970..78e08e82c08c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3052,6 +3052,8 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa /* readable? */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; + if (sk_is_readable(sk)) + mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && @@ -3091,6 +3093,8 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, /* readable? */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; + if (sk_is_readable(sk)) + mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ if (sk->sk_type == SOCK_SEQPACKET) { diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index b927e2baae50..452376c6f419 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -102,6 +102,7 @@ static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *prot = *base; prot->close = sock_map_close; prot->recvmsg = unix_bpf_recvmsg; + prot->sock_is_readable = sk_msg_is_readable; } static void unix_stream_bpf_rebuild_protos(struct proto *prot, @@ -110,6 +111,7 @@ static void unix_stream_bpf_rebuild_protos(struct proto *prot, *prot = *base; prot->close = sock_map_close; prot->recvmsg = unix_bpf_recvmsg; + prot->sock_is_readable = sk_msg_is_readable; prot->unhash = sock_map_unhash; } -- cgit v1.2.3 From 67b821502dbd6c9b23715da79cb9b37fa7d969dc Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Fri, 8 Oct 2021 13:33:06 -0700 Subject: selftests/bpf: Use recv_timeout() instead of retries We use non-blocking sockets in those tests, retrying for EAGAIN is ugly because there is no upper bound for the packet arrival time, at least in theory. After we fix poll() on sockmap sockets, now we can switch to select()+recv(). Signed-off-by: Yucong Sun Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211008203306.37525-5-xiyou.wangcong@gmail.com --- .../selftests/bpf/prog_tests/sockmap_listen.c | 75 ++++++---------------- 1 file changed, 20 insertions(+), 55 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 5c5979046523..d88bb65b74cc 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -949,7 +949,6 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, int err, n; u32 key; char b; - int retries = 100; zero_verdict_count(verd_mapfd); @@ -1002,17 +1001,11 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, goto close_peer1; if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_peer1: xclose(p1); @@ -1571,7 +1564,6 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; int sfd[2]; u32 key; @@ -1606,17 +1598,11 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close: xclose(c1); @@ -1748,7 +1734,6 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; u32 key; char b; @@ -1781,17 +1766,11 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_cli1: xclose(c1); @@ -1841,7 +1820,6 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; int sfd[2]; u32 key; @@ -1876,17 +1854,11 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_cli1: xclose(c1); @@ -1932,7 +1904,6 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, int sfd[2]; u32 key; char b; - int retries = 100; zero_verdict_count(verd_mapfd); @@ -1963,17 +1934,11 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close: xclose(c1); -- cgit v1.2.3 From 99d0a3831e3500d945162cdb2310e3a5fce90b60 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Oct 2021 08:46:10 -1000 Subject: bpf: Move BPF_MAP_TYPE for INODE_STORAGE and TASK_STORAGE outside of CONFIG_NET bpf_types.h has BPF_MAP_TYPE_INODE_STORAGE and BPF_MAP_TYPE_TASK_STORAGE declared inside #ifdef CONFIG_NET although they are built regardless of CONFIG_NET. So, when CONFIG_BPF_SYSCALL && !CONFIG_NET, they are built without the declarations leading to spurious build failures and not registered to bpf_map_types making them unavailable. Fix it by moving the BPF_MAP_TYPE for the two map types outside of CONFIG_NET. Reported-by: kernel test robot Fixes: a10787e6d58c ("bpf: Enable task local storage for tracing programs") Signed-off-by: Tejun Heo Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/YXG1cuuSJDqHQfRY@slm.duckdns.org --- include/linux/bpf_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 9c81724e4b98..bbe1eefa4c8a 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -101,14 +101,14 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) -#ifdef CONFIG_NET -BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) #ifdef CONFIG_BPF_LSM BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) +#ifdef CONFIG_NET +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) -- cgit v1.2.3 From 54713c85f536048e685258f880bf298a74c3620d Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 26 Oct 2021 13:00:19 +0200 Subject: bpf: Fix potential race in tail call compatibility check Lorenzo noticed that the code testing for program type compatibility of tail call maps is potentially racy in that two threads could encounter a map with an unset type simultaneously and both return true even though they are inserting incompatible programs. The race window is quite small, but artificially enlarging it by adding a usleep_range() inside the check in bpf_prog_array_compatible() makes it trivial to trigger from userspace with a program that does, essentially: map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, 4, 4, 2, 0); pid = fork(); if (pid) { key = 0; value = xdp_fd; } else { key = 1; value = tc_fd; } err = bpf_map_update_elem(map_fd, &key, &value, 0); While the race window is small, it has potentially serious ramifications in that triggering it would allow a BPF program to tail call to a program of a different type. So let's get rid of it by protecting the update with a spinlock. The commit in the Fixes tag is the last commit that touches the code in question. v2: - Use a spinlock instead of an atomic variable and cmpxchg() (Alexei) v3: - Put lock and the members it protects into an embedded 'owner' struct (Daniel) Fixes: 3324b584b6f6 ("ebpf: misc core cleanup") Reported-by: Lorenzo Bianconi Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211026110019.363464-1-toke@redhat.com --- include/linux/bpf.h | 7 +++++-- kernel/bpf/arraymap.c | 1 + kernel/bpf/core.c | 20 +++++++++++++------- kernel/bpf/syscall.c | 6 ++++-- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 020a7d5bf470..3db6f6c95489 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -929,8 +929,11 @@ struct bpf_array_aux { * stored in the map to make sure that all callers and callees have * the same prog type and JITed flag. */ - enum bpf_prog_type type; - bool jited; + struct { + spinlock_t lock; + enum bpf_prog_type type; + bool jited; + } owner; /* Programs with direct jumps into programs part of this array. */ struct list_head poke_progs; struct bpf_map *map; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index cebd4fb06d19..447def540544 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -1072,6 +1072,7 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) INIT_WORK(&aux->work, prog_array_map_clear_deferred); INIT_LIST_HEAD(&aux->poke_progs); mutex_init(&aux->poke_mutex); + spin_lock_init(&aux->owner.lock); map = array_map_alloc(attr); if (IS_ERR(map)) { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c1e7eb3f1876..6e3ae90ad107 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1823,20 +1823,26 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { + bool ret; + if (fp->kprobe_override) return false; - if (!array->aux->type) { + spin_lock(&array->aux->owner.lock); + + if (!array->aux->owner.type) { /* There's no owner yet where we could check for * compatibility. */ - array->aux->type = fp->type; - array->aux->jited = fp->jited; - return true; + array->aux->owner.type = fp->type; + array->aux->owner.jited = fp->jited; + ret = true; + } else { + ret = array->aux->owner.type == fp->type && + array->aux->owner.jited == fp->jited; } - - return array->aux->type == fp->type && - array->aux->jited == fp->jited; + spin_unlock(&array->aux->owner.lock); + return ret; } static int bpf_check_tail_call(const struct bpf_prog *fp) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9dab49d3f394..1cad6979a0d0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -543,8 +543,10 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { array = container_of(map, struct bpf_array, map); - type = array->aux->type; - jited = array->aux->jited; + spin_lock(&array->aux->owner.lock); + type = array->aux->owner.type; + jited = array->aux->owner.jited; + spin_unlock(&array->aux->owner.lock); } seq_printf(m, -- cgit v1.2.3 From 9586e67b911c95ba158fcc247b230e9c2d718623 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 27 Oct 2021 01:51:27 +0900 Subject: block: schedule queue restart after BLK_STS_ZONE_RESOURCE When dispatching a zone append write request to a SCSI zoned block device, if the target zone of the request is already locked, the device driver will return BLK_STS_ZONE_RESOURCE and the request will be pushed back to the hctx dipatch queue. The queue will be marked as RESTART in dd_finish_request() and restarted in __blk_mq_free_request(). However, this restart applies to the hctx of the completed request. If the requeued request is on a different hctx, dispatch will no be retried until another request is submitted or the next periodic queue run triggers, leading to up to 30 seconds latency for the requeued request. Fix this problem by scheduling a queue restart similarly to the BLK_STS_RESOURCE case or when we cannot get the budget. Also, consolidate the checks into the "need_resource" variable to simplify the condition. Signed-off-by: Naohiro Aota Reviewed-by: Christoph Hellwig Cc: Niklas Cassel Link: https://lore.kernel.org/r/20211026165127.4151055-1-naohiro.aota@wdc.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index bc026372de43..652a31fc3bb3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1325,6 +1325,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, int errors, queued; blk_status_t ret = BLK_STS_OK; LIST_HEAD(zone_list); + bool needs_resource = false; if (list_empty(list)) return false; @@ -1370,6 +1371,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, queued++; break; case BLK_STS_RESOURCE: + needs_resource = true; + fallthrough; case BLK_STS_DEV_RESOURCE: blk_mq_handle_dev_resource(rq, list); goto out; @@ -1380,6 +1383,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, * accept. */ blk_mq_handle_zone_resource(rq, &zone_list); + needs_resource = true; break; default: errors++; @@ -1406,7 +1410,6 @@ out: /* For non-shared tags, the RESTART check will suffice */ bool no_tag = prep == PREP_DISPATCH_NO_TAG && (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED); - bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET; if (nr_budgets) blk_mq_release_budgets(q, list); @@ -1447,14 +1450,16 @@ out: * If driver returns BLK_STS_RESOURCE and SCHED_RESTART * bit is set, run queue after a delay to avoid IO stalls * that could otherwise occur if the queue is idle. We'll do - * similar if we couldn't get budget and SCHED_RESTART is set. + * similar if we couldn't get budget or couldn't lock a zone + * and SCHED_RESTART is set. */ needs_restart = blk_mq_sched_needs_restart(hctx); + if (prep == PREP_DISPATCH_NO_BUDGET) + needs_resource = true; if (!needs_restart || (no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) blk_mq_run_hw_queue(hctx, true); - else if (needs_restart && (ret == BLK_STS_RESOURCE || - no_budget_avail)) + else if (needs_restart && needs_resource) blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY); blk_mq_update_dispatch_busy(hctx, true); -- cgit v1.2.3 From ce7723e9cdae4eb3030da082876580f4b2dc0861 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 26 Oct 2021 19:01:55 +0530 Subject: nvme-tcp: fix possible req->offset corruption With commit db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") r2t and response PDU can get processed while send function is executing. Current data digest send code uses req->offset after kernel_sendmsg(), this creates a race condition where req->offset gets reset before it is used in send function. This can happen in two cases - 1. Target sends r2t PDU which resets req->offset. 2. Target send response PDU which completes the req and then req is used for a new command, nvme_tcp_setup_cmd_pdu() resets req->offset. Fix this by storing req->offset in a local variable and using this local variable after kernel_sendmsg(). Fixes: db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") Signed-off-by: Varun Prakash Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 0626d14e6d4c..1a209f0d7181 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1050,6 +1050,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; + size_t offset = req->offset; int ret; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { @@ -1066,7 +1067,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) if (unlikely(ret <= 0)) return ret; - if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) { + if (offset + ret == NVME_TCP_DIGEST_LENGTH) { nvme_tcp_done_send_req(queue); return 1; } -- cgit v1.2.3 From d89b9f3bbb58e9e378881209756b0723694f22ff Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Mon, 25 Oct 2021 22:47:30 +0530 Subject: nvme-tcp: fix data digest pointer calculation ddgst is of type __le32, &req->ddgst + req->offset increases &req->ddgst by 4 * req->offset, fix this by type casting &req->ddgst to u8 *. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1a209f0d7181..4ae562d30d2b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1054,7 +1054,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) int ret; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { - .iov_base = &req->ddgst + req->offset, + .iov_base = (u8 *)&req->ddgst + req->offset, .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset }; -- cgit v1.2.3 From e790de54e94a7a15fb725b34724d41d41cbaa60c Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Mon, 25 Oct 2021 22:46:54 +0530 Subject: nvmet-tcp: fix data digest pointer calculation exp_ddgst is of type __le32, &cmd->exp_ddgst + cmd->offset increases &cmd->exp_ddgst by 4 * cmd->offset, fix this by type casting &cmd->exp_ddgst to u8 *. Fixes: 872d26a391da ("nvmet-tcp: add NVMe over TCP target driver") Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c33a0464346f..586ca20837e7 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -702,7 +702,7 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) struct nvmet_tcp_queue *queue = cmd->queue; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { - .iov_base = &cmd->exp_ddgst + cmd->offset, + .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset, .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset }; int ret; -- cgit v1.2.3 From 86aeda32b887cdaeb0f4b7bfc9971e36377181c7 Mon Sep 17 00:00:00 2001 From: Amit Engel Date: Wed, 27 Oct 2021 09:49:27 +0300 Subject: nvmet-tcp: fix header digest verification Pass the correct length to nvmet_tcp_verify_hdgst, which is the pdu header length. This fixes a wrong behaviour where header digest verification passes although the digest is wrong. Signed-off-by: Amit Engel Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 586ca20837e7..46c3b3be7e03 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1096,7 +1096,7 @@ recv: } if (queue->hdr_digest && - nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) { + nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) { nvmet_tcp_fatal_error(queue); /* fatal */ return -EPROTO; } -- cgit v1.2.3 From 3a60f6537c9adf0828bf1dd868c59f659395257e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 27 Oct 2021 10:39:03 +0200 Subject: Revert "btrfs: compression: drop kmap/kunmap from generic helpers" This reverts commit 4c2bf276b56d8d27ddbafcdf056ef3fc60ae50b0. The kmaps in compression code are still needed and cause crashes on 32bit machines (ARM, x86). Reproducible eg. by running fstest btrfs/004 with enabled LZO or ZSTD compression. Link: https://lore.kernel.org/all/CAJCQCtT+OuemovPO7GZk8Y8=qtOObr0XTDp8jh4OHD6y84AFxw@mail.gmail.com/ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=214839 Signed-off-by: David Sterba --- fs/btrfs/compression.c | 3 ++- fs/btrfs/inode.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 7869ad12bc6e..0913ee50e6c3 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -172,9 +172,10 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, /* Hash through the page sector by sector */ for (pg_offset = 0; pg_offset < bytes_left; pg_offset += sectorsize) { - kaddr = page_address(page); + kaddr = kmap_atomic(page); crypto_shash_digest(shash, kaddr + pg_offset, sectorsize, csum); + kunmap_atomic(kaddr); if (memcmp(&csum, cb_sum, csum_size) != 0) { btrfs_print_data_csum_error(inode, disk_start, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2aa9646bce56..e6772f55943e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -287,8 +287,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, cur_size = min_t(unsigned long, compressed_size, PAGE_SIZE); - kaddr = page_address(cpage); + kaddr = kmap_atomic(cpage); write_extent_buffer(leaf, kaddr, ptr, cur_size); + kunmap_atomic(kaddr); i++; ptr += cur_size; -- cgit v1.2.3 From e0c60d0102a5ad3475401e1a2faa3d3623eefce4 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 26 Oct 2021 15:01:15 +0900 Subject: block: Fix partition check for host-aware zoned block devices Commit a33df75c6328 ("block: use an xarray for disk->part_tbl") modified the method to check partition existence in host-aware zoned block devices from disk_has_partitions() helper function call to empty check of xarray disk->part_tbl. However, disk->part_tbl always has single entry for disk->part0 and never becomes empty. This resulted in the host-aware zoned devices always judged to have partitions, and it made the sysfs queue/zoned attribute to be "none" instead of "host-aware" regardless of partition existence in the devices. This also caused DEBUG_LOCKS_WARN_ON(lock->magic != lock) for sdkp->rev_mutex in scsi layer when the kernel detects host-aware zoned device. Since block layer handled the host-aware zoned devices as non- zoned devices, scsi layer did not have chance to initialize the mutex for zone revalidation. Therefore, the warning was triggered. To fix the issues, call the helper function disk_has_partitions() in place of disk->part_tbl empty check. Since the function was removed with the commit a33df75c6328, reimplement it to walk through entries in the xarray disk->part_tbl. Fixes: a33df75c6328 ("block: use an xarray for disk->part_tbl") Signed-off-by: Shin'ichiro Kawasaki Cc: stable@vger.kernel.org # v5.14+ Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211026060115.753746-1-shinichiro.kawasaki@wdc.com Signed-off-by: Jens Axboe --- block/blk-settings.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index a7c857ad7d10..b880c70e22e4 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -842,6 +842,24 @@ bool blk_queue_can_use_dma_map_merging(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging); +static bool disk_has_partitions(struct gendisk *disk) +{ + unsigned long idx; + struct block_device *part; + bool ret = false; + + rcu_read_lock(); + xa_for_each(&disk->part_tbl, idx, part) { + if (bdev_is_partition(part)) { + ret = true; + break; + } + } + rcu_read_unlock(); + + return ret; +} + /** * blk_queue_set_zoned - configure a disk queue zoned model. * @disk: the gendisk of the queue to configure @@ -876,7 +894,7 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model) * we do nothing special as far as the block layer is concerned. */ if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || - !xa_empty(&disk->part_tbl)) + disk_has_partitions(disk)) model = BLK_ZONED_NONE; break; case BLK_ZONED_NONE: -- cgit v1.2.3 From 3bda2e5df476417b6d08967e2d84234a59d57b1c Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 27 Oct 2021 20:11:43 +0800 Subject: net: hns3: fix pause config problem after autoneg disabled If a TP port is configured by follow steps: 1.ethtool -s ethx autoneg off speed 100 duplex full 2.ethtool -A ethx rx on tx on 3.ethtool -s ethx autoneg on(rx&tx negotiated pause results are off) 4.ethtool -s ethx autoneg off speed 100 duplex full In step 3, driver will set rx&tx pause parameters of hardware to off as pause parameters negotiated with link partner are off. After step 4, the "ethtool -a ethx" command shows both rx and tx pause parameters are on. However, pause parameters of hardware are still off and port has no flow control function actually. To fix this problem, if autoneg is disabled, driver uses its saved parameters to restore pause of hardware. If the speed is not changed in this case, there is no link state changed for phy, it will cause the pause parameter is not taken effect, so we need to force phy to go down and up. Fixes: aacbe27e82f0 ("net: hns3: modify how pause options is displayed") Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 + drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 33 ++++++++++++++++------ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 30 ++++++++++++++++++++ .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 2 +- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 1 + 5 files changed, 57 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index d701451596c8..da3a593f6a56 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -568,6 +568,7 @@ struct hnae3_ae_ops { u32 *auto_neg, u32 *rx_en, u32 *tx_en); int (*set_pauseparam)(struct hnae3_handle *handle, u32 auto_neg, u32 rx_en, u32 tx_en); + int (*restore_pauseparam)(struct hnae3_handle *handle); int (*set_autoneg)(struct hnae3_handle *handle, bool enable); int (*get_autoneg)(struct hnae3_handle *handle); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 5ebd96f6833d..7d92dd273ed7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -824,6 +824,26 @@ static int hns3_check_ksettings_param(const struct net_device *netdev, return 0; } +static int hns3_set_phy_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) +{ + struct hnae3_handle *handle = hns3_get_handle(netdev); + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + int ret; + + if (cmd->base.speed == SPEED_1000 && + cmd->base.autoneg == AUTONEG_DISABLE) + return -EINVAL; + + if (cmd->base.autoneg == AUTONEG_DISABLE && ops->restore_pauseparam) { + ret = ops->restore_pauseparam(handle); + if (ret) + return ret; + } + + return phy_ethtool_ksettings_set(netdev->phydev, cmd); +} + static int hns3_set_link_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *cmd) { @@ -842,16 +862,11 @@ static int hns3_set_link_ksettings(struct net_device *netdev, cmd->base.autoneg, cmd->base.speed, cmd->base.duplex); /* Only support ksettings_set for netdev with phy attached for now */ - if (netdev->phydev) { - if (cmd->base.speed == SPEED_1000 && - cmd->base.autoneg == AUTONEG_DISABLE) - return -EINVAL; - - return phy_ethtool_ksettings_set(netdev->phydev, cmd); - } else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && - ops->set_phy_link_ksettings) { + if (netdev->phydev) + return hns3_set_phy_link_ksettings(netdev, cmd); + else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && + ops->set_phy_link_ksettings) return ops->set_phy_link_ksettings(handle, cmd); - } if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index dcd40cc73082..c6b9806c75a5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11021,6 +11021,35 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg, return -EOPNOTSUPP; } +static int hclge_restore_pauseparam(struct hnae3_handle *handle) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + u32 auto_neg, rx_pause, tx_pause; + int ret; + + hclge_get_pauseparam(handle, &auto_neg, &rx_pause, &tx_pause); + /* when autoneg is disabled, the pause setting of phy has no effect + * unless the link goes down. + */ + ret = phy_suspend(hdev->hw.mac.phydev); + if (ret) + return ret; + + phy_set_asym_pause(hdev->hw.mac.phydev, rx_pause, tx_pause); + + ret = phy_resume(hdev->hw.mac.phydev); + if (ret) + return ret; + + ret = hclge_mac_pause_setup_hw(hdev); + if (ret) + dev_err(&hdev->pdev->dev, + "restore pauseparam error, ret = %d.\n", ret); + + return ret; +} + static void hclge_get_ksettings_an_result(struct hnae3_handle *handle, u8 *auto_neg, u32 *speed, u8 *duplex) { @@ -12984,6 +13013,7 @@ static const struct hnae3_ae_ops hclge_ops = { .halt_autoneg = hclge_halt_autoneg, .get_pauseparam = hclge_get_pauseparam, .set_pauseparam = hclge_set_pauseparam, + .restore_pauseparam = hclge_restore_pauseparam, .set_mtu = hclge_set_mtu, .reset_queue = hclge_reset_tqp, .get_stats = hclge_get_stats, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 95074e91a846..124791e4bfee 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -1435,7 +1435,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc) return 0; } -static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) +int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) { bool tx_en, rx_en; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 2ee9b795f71d..4b2c3a788980 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -244,6 +244,7 @@ int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight); int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id, enum hclge_opcode_type cmd, struct hclge_tm_shaper_para *para); +int hclge_mac_pause_setup_hw(struct hclge_dev *hdev); int hclge_tm_get_q_to_qs_map(struct hclge_dev *hdev, u16 q_id, u16 *qset_id); int hclge_tm_get_q_to_tc(struct hclge_dev *hdev, u16 q_id, u8 *tc_id); int hclge_tm_get_pg_to_pri_map(struct hclge_dev *hdev, u8 pg_id, -- cgit v1.2.3 From f29da4088fb4eeba457219a931327d1d5f45196a Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Wed, 27 Oct 2021 20:11:44 +0800 Subject: net: hns3: change hclge/hclgevf workqueue to WQ_UNBOUND mode Currently, the workqueue of hclge/hclgevf is executed on the CPU that initiates scheduling requests by default. In stress scenarios, the CPU may be busy and workqueue scheduling is completed after a long period of time. To avoid this situation and implement proper scheduling, use the WQ_UNBOUND mode instead. In this way, the workqueue can be performed on a relatively idle CPU. Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 34 ++++------------------ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 1 - .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +- 3 files changed, 6 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c6b9806c75a5..3dbde0496545 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2847,33 +2847,28 @@ static void hclge_mbx_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state)) - mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - hclge_wq, &hdev->service_task, 0); + mod_delayed_work(hclge_wq, &hdev->service_task, 0); } static void hclge_reset_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) - mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - hclge_wq, &hdev->service_task, 0); + mod_delayed_work(hclge_wq, &hdev->service_task, 0); } static void hclge_errhand_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_ERR_SERVICE_SCHED, &hdev->state)) - mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - hclge_wq, &hdev->service_task, 0); + mod_delayed_work(hclge_wq, &hdev->service_task, 0); } void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) - mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - hclge_wq, &hdev->service_task, - delay_time); + mod_delayed_work(hclge_wq, &hdev->service_task, delay_time); } static int hclge_get_mac_link_status(struct hclge_dev *hdev, int *link_status) @@ -3491,33 +3486,14 @@ static void hclge_get_misc_vector(struct hclge_dev *hdev) hdev->num_msi_used += 1; } -static void hclge_irq_affinity_notify(struct irq_affinity_notify *notify, - const cpumask_t *mask) -{ - struct hclge_dev *hdev = container_of(notify, struct hclge_dev, - affinity_notify); - - cpumask_copy(&hdev->affinity_mask, mask); -} - -static void hclge_irq_affinity_release(struct kref *ref) -{ -} - static void hclge_misc_affinity_setup(struct hclge_dev *hdev) { irq_set_affinity_hint(hdev->misc_vector.vector_irq, &hdev->affinity_mask); - - hdev->affinity_notify.notify = hclge_irq_affinity_notify; - hdev->affinity_notify.release = hclge_irq_affinity_release; - irq_set_affinity_notifier(hdev->misc_vector.vector_irq, - &hdev->affinity_notify); } static void hclge_misc_affinity_teardown(struct hclge_dev *hdev) { - irq_set_affinity_notifier(hdev->misc_vector.vector_irq, NULL); irq_set_affinity_hint(hdev->misc_vector.vector_irq, NULL); } @@ -13082,7 +13058,7 @@ static int hclge_init(void) { pr_info("%s is initializing\n", HCLGE_NAME); - hclge_wq = alloc_workqueue("%s", 0, 0, HCLGE_NAME); + hclge_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, HCLGE_NAME); if (!hclge_wq) { pr_err("%s: failed to create workqueue\n", HCLGE_NAME); return -ENOMEM; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index de6afbcbfbac..69cd8f87b4c8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -944,7 +944,6 @@ struct hclge_dev { /* affinity mask and notify for misc interrupt */ cpumask_t affinity_mask; - struct irq_affinity_notify affinity_notify; struct hclge_ptp *ptp; struct devlink *devlink; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index bef6b98e2f50..5efa5420297d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3899,7 +3899,7 @@ static int hclgevf_init(void) { pr_info("%s is initializing\n", HCLGEVF_NAME); - hclgevf_wq = alloc_workqueue("%s", 0, 0, HCLGEVF_NAME); + hclgevf_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, HCLGEVF_NAME); if (!hclgevf_wq) { pr_err("%s: failed to create workqueue\n", HCLGEVF_NAME); return -ENOMEM; -- cgit v1.2.3 From 0251d196b0e1a19c870be882e5d4f496de8ab758 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 27 Oct 2021 20:11:45 +0800 Subject: net: hns3: ignore reset event before initialization process is done Currently, if there is a reset event triggered by RAS during device in initialization process, driver may run reset process concurrently with initialization process. In this case, it may cause problem. For example, the RSS indirection table may has not been alloc memory in initialization process yet, but it is used in reset process, it will cause a call trace like this: [61228.744836] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... [61228.897677] Workqueue: hclgevf hclgevf_service_task [hclgevf] [61228.911390] pstate: 40400009 (nZcv daif +PAN -UAO -TCO BTYPE=--) [61228.918670] pc : hclgevf_set_rss_indir_table+0xb4/0x190 [hclgevf] [61228.927812] lr : hclgevf_set_rss_indir_table+0x90/0x190 [hclgevf] [61228.937248] sp : ffff8000162ebb50 [61228.941087] x29: ffff8000162ebb50 x28: ffffb77add72dbc0 x27: ffff0820c7dc8080 [61228.949516] x26: 0000000000000000 x25: ffff0820ad4fc880 x24: ffff0820c7dc8080 [61228.958220] x23: ffff0820c7dc8090 x22: 00000000ffffffff x21: 0000000000000040 [61228.966360] x20: ffffb77add72b9c0 x19: 0000000000000000 x18: 0000000000000030 [61228.974646] x17: 0000000000000000 x16: ffffb77ae713feb0 x15: ffff0820ad4fcce8 [61228.982808] x14: ffffffffffffffff x13: ffff8000962eb7f7 x12: 00003834ec70c960 [61228.991990] x11: 00e0fafa8c206982 x10: 9670facc78a8f9a8 x9 : ffffb77add717530 [61229.001123] x8 : ffff0820ad4fd6b8 x7 : 0000000000000000 x6 : 0000000000000011 [61229.010249] x5 : 00000000000cb1b0 x4 : 0000000000002adb x3 : 0000000000000049 [61229.018662] x2 : ffff8000162ebbb8 x1 : 0000000000000000 x0 : 0000000000000480 [61229.027002] Call trace: [61229.030177] hclgevf_set_rss_indir_table+0xb4/0x190 [hclgevf] [61229.039009] hclgevf_rss_init_hw+0x128/0x1b4 [hclgevf] [61229.046809] hclgevf_reset_rebuild+0x17c/0x69c [hclgevf] [61229.053862] hclgevf_reset_service_task+0x4cc/0xa80 [hclgevf] [61229.061306] hclgevf_service_task+0x6c/0x630 [hclgevf] [61229.068491] process_one_work+0x1dc/0x48c [61229.074121] worker_thread+0x15c/0x464 [61229.078562] kthread+0x168/0x16c [61229.082873] ret_from_fork+0x10/0x18 [61229.088221] Code: 7900e7f6 f904a683 d503201f 9101a3e2 (38616b43) [61229.095357] ---[ end trace 153661a538f6768c ]--- To fix this problem, don't schedule reset task before initialization process is done. Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1 + drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 +++ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 1 + 3 files changed, 5 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 3dbde0496545..269e579762b2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2853,6 +2853,7 @@ static void hclge_mbx_task_schedule(struct hclge_dev *hdev) static void hclge_reset_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && + test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state) && !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) mod_delayed_work(hclge_wq, &hdev->service_task, 0); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 5efa5420297d..cf00ad7bb881 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2232,6 +2232,7 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev) void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev) { if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) && + test_bit(HCLGEVF_STATE_SERVICE_INITED, &hdev->state) && !test_and_set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state)) mod_delayed_work(hclgevf_wq, &hdev->service_task, 0); @@ -3449,6 +3450,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) hclgevf_init_rxd_adv_layout(hdev); + set_bit(HCLGEVF_STATE_SERVICE_INITED, &hdev->state); + hdev->last_reset_time = jiffies; dev_info(&hdev->pdev->dev, "finished initializing %s driver\n", HCLGEVF_DRIVER_NAME); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 883130a9b48f..28288d7e3303 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -146,6 +146,7 @@ enum hclgevf_states { HCLGEVF_STATE_REMOVING, HCLGEVF_STATE_NIC_REGISTERED, HCLGEVF_STATE_ROCE_REGISTERED, + HCLGEVF_STATE_SERVICE_INITED, /* task states */ HCLGEVF_STATE_RST_SERVICE_SCHED, HCLGEVF_STATE_RST_HANDLING, -- cgit v1.2.3 From 2a21dab594a98c338c4bfbc31864cbca15888549 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Wed, 27 Oct 2021 20:11:46 +0800 Subject: net: hns3: fix data endian problem of some functions of debugfs The member data in struct hclge_desc is type of __le32, it needs endian conversion before using it, and some functions of debugfs didn't do that, so this patch fixes it. Fixes: c0ebebb9ccc1 ("net: hns3: Add "dcb register" status information query function") Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 30 ++++++++++------------ 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 32f62cd2dd99..9cda8b3562b8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -391,7 +391,7 @@ static int hclge_dbg_dump_mac(struct hclge_dev *hdev, char *buf, int len) static int hclge_dbg_dump_dcb_qset(struct hclge_dev *hdev, char *buf, int len, int *pos) { - struct hclge_dbg_bitmap_cmd *bitmap; + struct hclge_dbg_bitmap_cmd req; struct hclge_desc desc; u16 qset_id, qset_num; int ret; @@ -408,12 +408,12 @@ static int hclge_dbg_dump_dcb_qset(struct hclge_dev *hdev, char *buf, int len, if (ret) return ret; - bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1]; + req.bitmap = (u8)le32_to_cpu(desc.data[1]); *pos += scnprintf(buf + *pos, len - *pos, "%04u %#x %#x %#x %#x\n", - qset_id, bitmap->bit0, bitmap->bit1, - bitmap->bit2, bitmap->bit3); + qset_id, req.bit0, req.bit1, req.bit2, + req.bit3); } return 0; @@ -422,7 +422,7 @@ static int hclge_dbg_dump_dcb_qset(struct hclge_dev *hdev, char *buf, int len, static int hclge_dbg_dump_dcb_pri(struct hclge_dev *hdev, char *buf, int len, int *pos) { - struct hclge_dbg_bitmap_cmd *bitmap; + struct hclge_dbg_bitmap_cmd req; struct hclge_desc desc; u8 pri_id, pri_num; int ret; @@ -439,12 +439,11 @@ static int hclge_dbg_dump_dcb_pri(struct hclge_dev *hdev, char *buf, int len, if (ret) return ret; - bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1]; + req.bitmap = (u8)le32_to_cpu(desc.data[1]); *pos += scnprintf(buf + *pos, len - *pos, "%03u %#x %#x %#x\n", - pri_id, bitmap->bit0, bitmap->bit1, - bitmap->bit2); + pri_id, req.bit0, req.bit1, req.bit2); } return 0; @@ -453,7 +452,7 @@ static int hclge_dbg_dump_dcb_pri(struct hclge_dev *hdev, char *buf, int len, static int hclge_dbg_dump_dcb_pg(struct hclge_dev *hdev, char *buf, int len, int *pos) { - struct hclge_dbg_bitmap_cmd *bitmap; + struct hclge_dbg_bitmap_cmd req; struct hclge_desc desc; u8 pg_id; int ret; @@ -466,12 +465,11 @@ static int hclge_dbg_dump_dcb_pg(struct hclge_dev *hdev, char *buf, int len, if (ret) return ret; - bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1]; + req.bitmap = (u8)le32_to_cpu(desc.data[1]); *pos += scnprintf(buf + *pos, len - *pos, "%03u %#x %#x %#x\n", - pg_id, bitmap->bit0, bitmap->bit1, - bitmap->bit2); + pg_id, req.bit0, req.bit1, req.bit2); } return 0; @@ -511,7 +509,7 @@ static int hclge_dbg_dump_dcb_queue(struct hclge_dev *hdev, char *buf, int len, static int hclge_dbg_dump_dcb_port(struct hclge_dev *hdev, char *buf, int len, int *pos) { - struct hclge_dbg_bitmap_cmd *bitmap; + struct hclge_dbg_bitmap_cmd req; struct hclge_desc desc; u8 port_id = 0; int ret; @@ -521,12 +519,12 @@ static int hclge_dbg_dump_dcb_port(struct hclge_dev *hdev, char *buf, int len, if (ret) return ret; - bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1]; + req.bitmap = (u8)le32_to_cpu(desc.data[1]); *pos += scnprintf(buf + *pos, len - *pos, "port_mask: %#x\n", - bitmap->bit0); + req.bit0); *pos += scnprintf(buf + *pos, len - *pos, "port_shaping_pass: %#x\n", - bitmap->bit1); + req.bit1); return 0; } -- cgit v1.2.3 From 6754614a787cbcbf87bae8a75619c24a33ea6791 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Wed, 27 Oct 2021 20:11:47 +0800 Subject: net: hns3: add more string spaces for dumping packets number of queue info in debugfs As the width of packets number registers is 32 bits, they needs at most 10 characters for decimal data printing, but now the string spaces is not enough, so this patch fixes it. Fixes: e44c495d95e ("net: hns3: refactor queue info of debugfs") Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 2b66c59f5eaf..3aaad96d0176 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -462,7 +462,7 @@ static const struct hns3_dbg_item rx_queue_info_items[] = { { "TAIL", 2 }, { "HEAD", 2 }, { "FBDNUM", 2 }, - { "PKTNUM", 2 }, + { "PKTNUM", 5 }, { "COPYBREAK", 2 }, { "RING_EN", 2 }, { "RX_RING_EN", 2 }, @@ -565,7 +565,7 @@ static const struct hns3_dbg_item tx_queue_info_items[] = { { "HEAD", 2 }, { "FBDNUM", 2 }, { "OFFSET", 2 }, - { "PKTNUM", 2 }, + { "PKTNUM", 5 }, { "RING_EN", 2 }, { "TX_RING_EN", 2 }, { "BASE_ADDR", 10 }, -- cgit v1.2.3 From c7a6e3978ea952efb107ecf511c095c3bbb2945f Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 27 Oct 2021 20:11:48 +0800 Subject: net: hns3: expand buffer len for some debugfs command The specified buffer length for three debugfs files fd_tcam, uc and tqp is not enough for their maximum needs, so this patch fixes them. Fixes: b5a0b70d77b9 ("net: hns3: refactor dump fd tcam of debugfs") Fixes: 1556ea9120ff ("net: hns3: refactor dump mac list of debugfs") Fixes: d96b0e59468d ("net: hns3: refactor dump reg of debugfs") Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 3aaad96d0176..f2ade0446208 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -137,7 +137,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "uc", .cmd = HNAE3_DBG_CMD_MAC_UC, .dentry = HNS3_DBG_DENTRY_MAC, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_128KB, .init = hns3_dbg_common_file_init, }, { @@ -256,7 +256,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "tqp", .cmd = HNAE3_DBG_CMD_REG_TQP, .dentry = HNS3_DBG_DENTRY_REG, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_128KB, .init = hns3_dbg_common_file_init, }, { @@ -298,7 +298,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "fd_tcam", .cmd = HNAE3_DBG_CMD_FD_TCAM, .dentry = HNS3_DBG_DENTRY_FD, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_1MB, .init = hns3_dbg_common_file_init, }, { -- cgit v1.2.3 From 630a6738da82e2c29e1c38eafd6f8328e0a4963c Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 27 Oct 2021 20:11:49 +0800 Subject: net: hns3: adjust string spaces of some parameters of tx bd info in debugfs This patch adjusts the string spaces of some parameters of tx bd info in debugfs according to their maximum needs. Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index f2ade0446208..e54f96251fea 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -790,13 +790,13 @@ static int hns3_dbg_rx_bd_info(struct hns3_dbg_data *d, char *buf, int len) } static const struct hns3_dbg_item tx_bd_info_items[] = { - { "BD_IDX", 5 }, - { "ADDRESS", 2 }, + { "BD_IDX", 2 }, + { "ADDRESS", 13 }, { "VLAN_TAG", 2 }, { "SIZE", 2 }, { "T_CS_VLAN_TSO", 2 }, { "OT_VLAN_TAG", 3 }, - { "TV", 2 }, + { "TV", 5 }, { "OLT_VLAN_LEN", 2 }, { "PAYLEN_OL4CS", 2 }, { "BD_FE_SC_VLD", 2 }, -- cgit v1.2.3 From 4a089e95b4d6bb625044d47aed0c442a8f7bd093 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 20 Oct 2021 12:11:16 -0700 Subject: nios2: Make NIOS2_DTB_SOURCE_BOOL depend on !COMPILE_TEST nios2:allmodconfig builds fail with make[1]: *** No rule to make target 'arch/nios2/boot/dts/""', needed by 'arch/nios2/boot/dts/built-in.a'. Stop. make: [Makefile:1868: arch/nios2/boot/dts] Error 2 (ignored) This is seen with compile tests since those enable NIOS2_DTB_SOURCE_BOOL, which in turn enables NIOS2_DTB_SOURCE. This causes the build error because the default value for NIOS2_DTB_SOURCE is an empty string. Disable NIOS2_DTB_SOURCE_BOOL for compile tests to avoid the error. Fixes: 2fc8483fdcde ("nios2: Build infrastructure") Signed-off-by: Guenter Roeck Reviewed-by: Randy Dunlap Signed-off-by: Dinh Nguyen --- arch/nios2/platform/Kconfig.platform | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/nios2/platform/Kconfig.platform b/arch/nios2/platform/Kconfig.platform index 9e32fb7f3d4c..e849daff6fd1 100644 --- a/arch/nios2/platform/Kconfig.platform +++ b/arch/nios2/platform/Kconfig.platform @@ -37,6 +37,7 @@ config NIOS2_DTB_PHYS_ADDR config NIOS2_DTB_SOURCE_BOOL bool "Compile and link device tree into kernel image" + depends on !COMPILE_TEST help This allows you to specify a dts (device tree source) file which will be compiled and linked into the kernel image. -- cgit v1.2.3 From 9b0971ca7fc75daca80c0bb6c02e96059daea90a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 25 Oct 2021 12:14:31 -0400 Subject: KVM: SEV-ES: fix another issue with string I/O VMGEXITs If the guest requests string I/O from the hypervisor via VMGEXIT, SW_EXITINFO2 will contain the REP count. However, sev_es_string_io was incorrectly treating it as the size of the GHCB buffer in bytes. This fixes the "outsw" test in the experimental SEV tests of kvm-unit-tests. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reported-by: Marc Orr Tested-by: Marc Orr Reviewed-by: Marc Orr Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index e672493b5d8d..efd207fd335e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2579,11 +2579,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) { - if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2)) + int count; + int bytes; + + if (svm->vmcb->control.exit_info_2 > INT_MAX) + return -EINVAL; + + count = svm->vmcb->control.exit_info_2; + if (unlikely(check_mul_overflow(count, size, &bytes))) + return -EINVAL; + + if (!setup_vmgexit_scratch(svm, in, bytes)) return -EINVAL; - return kvm_sev_es_string_io(&svm->vcpu, size, port, - svm->ghcb_sa, svm->ghcb_sa_len / size, in); + return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in); } void sev_es_init_vmcb(struct vcpu_svm *svm) -- cgit v1.2.3 From 4e84dc47bb48accbbeeba4e6bb3f31aa7895323c Mon Sep 17 00:00:00 2001 From: Steven Rostedt (VMware) Date: Wed, 27 Oct 2021 12:51:01 -0400 Subject: ftrace/nds32: Update the proto for ftrace_trace_function to match ftrace_stub The ftrace callback prototype was changed to pass a special ftrace_regs instead of pt_regs as the last parameter, but the static ftrace for nds32 missed updating ftrace_trace_function and this caused a warning when compared to ftrace_stub: ../arch/nds32/kernel/ftrace.c: In function '_mcount': ../arch/nds32/kernel/ftrace.c:24:35: error: comparison of distinct pointer types lacks a cast [-Werror] 24 | if (ftrace_trace_function != ftrace_stub) | ^~ Link: https://lore.kernel.org/all/20211027055554.19372-1-rdunlap@infradead.org/ Link: https://lkml.kernel.org/r/20211027125101.33449969@gandalf.local.home Cc: Ingo Molnar Cc: Andrew Morton Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: stable@vger.kernel.org Fixes: d19ad0775dcd6 ("ftrace: Have the callbacks receive a struct ftrace_regs instead of pt_regs") Reported-by: Randy Dunlap Signed-off-by: Steven Rostedt (VMware) --- arch/nds32/kernel/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 0e23e3a8df6b..d55b73b18149 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -6,7 +6,7 @@ #ifndef CONFIG_DYNAMIC_FTRACE extern void (*ftrace_trace_function)(unsigned long, unsigned long, - struct ftrace_ops*, struct pt_regs*); + struct ftrace_ops*, struct ftrace_regs*); extern void ftrace_graph_caller(void); noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, -- cgit v1.2.3 From 6f7c88691191e6c52ef2543d6f1da8d360b27a24 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 26 Oct 2021 20:40:15 +0800 Subject: usbnet: fix error return code in usbnet_probe() Return error code if usb_maxpacket() returns 0 in usbnet_probe() Fixes: 397430b50a36 ("usbnet: sanity check for maxpacket") Reported-by: Hulk Robot Signed-off-by: Wang Hai Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20211026124015.3025136-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 80432ee0ce69..a33d7fb82a00 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1790,6 +1790,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->maxpacket = usb_maxpacket (dev->udev, dev->out, 1); if (dev->maxpacket == 0) { /* that is a broken device */ + status = -ENODEV; goto out4; } -- cgit v1.2.3 From 890d33561337ffeba0d8ba42517e71288cfee2b6 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 26 Oct 2021 15:31:00 +0200 Subject: virtio-ring: fix DMA metadata flags The flags are currently overwritten, leading to the wrong direction being passed to the DMA unmap functions. Fixes: 72b5e8958738aaa4 ("virtio-ring: store DMA metadata in desc_extra for split virtqueue") Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20211026133100.17541-1-vincent.whitchurch@axis.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index dd95dfd85e98..3035bb6f5458 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -576,7 +576,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); if (!indirect && vq->use_dma_api) - vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags = + vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= ~VRING_DESC_F_NEXT; if (indirect) { -- cgit v1.2.3 From 64a19591a2938b170aa736443d5d3bf4c51e1388 Mon Sep 17 00:00:00 2001 From: Chen Lu Date: Mon, 18 Oct 2021 13:22:38 +0800 Subject: riscv: fix misalgned trap vector base address The trap vector marked by label .Lsecondary_park must align on a 4-byte boundary, as the {m,s}tvec is defined to require 4-byte alignment. Signed-off-by: Chen Lu <181250012@smail.nju.edu.cn> Reviewed-by: Anup Patel Fixes: e011995e826f ("RISC-V: Move relocate and few other functions out of __init") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index fce5184b22c3..52c5ff9804c5 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -193,6 +193,7 @@ setup_trap_vector: csrw CSR_SCRATCH, zero ret +.align 2 .Lsecondary_park: /* We lack SMP support or have too many harts, so park this hart */ wfi -- cgit v1.2.3 From f82161516756be2827609ec4845e1e4316df0709 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 24 Oct 2021 09:38:31 -0700 Subject: ptp: Document the PTP_CLK_MAGIC ioctl number Add PTP_CLK_MAGIC to the userspace-api/ioctl/ioctl-number.rst documentation file. Fixes: d94ba80ebbea ("ptp: Added a brand new class driver for ptp clocks.") Signed-off-by: Randy Dunlap Cc: Richard Cochran Cc: John Stultz Cc: Jonathan Corbet Link: https://lore.kernel.org/r/20211024163831.10200-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- Documentation/userspace-api/ioctl/ioctl-number.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 2e8134059c87..6655d929a351 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -104,6 +104,7 @@ Code Seq# Include File Comments '8' all SNP8023 advanced NIC card ';' 64-7F linux/vfio.h +'=' 00-3f uapi/linux/ptp_clock.h '@' 00-0F linux/radeonfb.h conflict! '@' 00-0F drivers/video/aty/aty128fb.c conflict! 'A' 00-1F linux/apm_bios.h conflict! -- cgit v1.2.3 From 72f898ca0ab85fde6facf78b14d9f67a4a7b32d1 Mon Sep 17 00:00:00 2001 From: Janghyub Seo Date: Tue, 26 Oct 2021 07:12:42 +0000 Subject: r8169: Add device 10ec:8162 to driver r8169 This patch makes the driver r8169 pick up device Realtek Semiconductor Co. , Ltd. Device [10ec:8162]. Signed-off-by: Janghyub Seo Suggested-by: Rushab Shah Link: https://lore.kernel.org/r/1635231849296.1489250046.441294000@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 46a6ff9a782d..2918947dd57c 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -157,6 +157,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_VDEVICE(REALTEK, 0x8129) }, { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT }, { PCI_VDEVICE(REALTEK, 0x8161) }, + { PCI_VDEVICE(REALTEK, 0x8162) }, { PCI_VDEVICE(REALTEK, 0x8167) }, { PCI_VDEVICE(REALTEK, 0x8168) }, { PCI_VDEVICE(NCUBE, 0x8168) }, -- cgit v1.2.3 From 7fa598f9706d40bd16f2ab286bdf5808e1393d35 Mon Sep 17 00:00:00 2001 From: Steven Rostedt (VMware) Date: Wed, 27 Oct 2021 12:08:54 -0400 Subject: tracing: Do not warn when connecting eprobe to non existing event When the syscall trace points are not configured in, the kselftests for ftrace will try to attach an event probe (eprobe) to one of the system call trace points. This triggered a WARNING, because the failure only expects to see memory issues. But this is not the only failure. The user may attempt to attach to a non existent event, and the kernel must not warn about it. Link: https://lkml.kernel.org/r/20211027120854.0680aa0f@gandalf.local.home Fixes: 7491e2c442781 ("tracing: Add a probe that attaches to trace events") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_eprobe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index c4a15aef36af..5c5f208c15d3 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -904,8 +904,8 @@ static int __trace_eprobe_create(int argc, const char *argv[]) if (IS_ERR(ep)) { ret = PTR_ERR(ep); - /* This must return -ENOMEM, else there is a bug */ - WARN_ON_ONCE(ret != -ENOMEM); + /* This must return -ENOMEM or misssing event, else there is a bug */ + WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV); ep = NULL; goto error; } -- cgit v1.2.3 From 3f4e54bd312d3dafb59daf2b97ffa08abebe60f5 Mon Sep 17 00:00:00 2001 From: Patrik Jakobsson Date: Wed, 27 Oct 2021 16:27:30 +0200 Subject: drm/amdgpu: Fix even more out of bound writes from debugfs CVE-2021-42327 was fixed by: commit f23750b5b3d98653b31d4469592935ef6364ad67 Author: Thelford Williams Date: Wed Oct 13 16:04:13 2021 -0400 drm/amdgpu: fix out of bounds write but amdgpu_dm_debugfs.c contains more of the same issue so fix the remaining ones. v2: * Add missing fix in dp_max_bpc_write (Harry Wentland) Fixes: 918698d5c2b5 ("drm/amd/display: Return the number of bytes parsed than allocated") Signed-off-by: Patrik Jakobsson Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 17f2756a64dc..8080bba5b7a7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -487,7 +487,7 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf, if (!wr_buf) return -ENOSPC; - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -639,7 +639,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us if (!wr_buf) return -ENOSPC; - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -914,7 +914,7 @@ static ssize_t dp_dsc_passthrough_set(struct file *f, const char __user *buf, return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, ¶m, buf, max_param_num, ¶m_nums)) { @@ -1211,7 +1211,7 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -1396,7 +1396,7 @@ static ssize_t dp_dsc_clock_en_write(struct file *f, const char __user *buf, return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -1581,7 +1581,7 @@ static ssize_t dp_dsc_slice_width_write(struct file *f, const char __user *buf, return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -1766,7 +1766,7 @@ static ssize_t dp_dsc_slice_height_write(struct file *f, const char __user *buf, return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -1944,7 +1944,7 @@ static ssize_t dp_dsc_bits_per_pixel_write(struct file *f, const char __user *bu return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { @@ -2382,7 +2382,7 @@ static ssize_t dp_max_bpc_write(struct file *f, const char __user *buf, return -ENOSPC; } - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { -- cgit v1.2.3 From 54149d13f369e1ab02f36b91feee02069184c1d8 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 21 Oct 2021 13:27:16 -0400 Subject: drm/amd/display: Fallback to clocks which meet requested voltage on DCN31 [WHY] On certain configs, SMU clock table voltages don't match which cause parser to behave incorrectly by leaving dcfclk and socclk table entries unpopulated. [HOW] Currently the function that finds the corresponding clock for a given voltage only checks for exact voltage level matches. In the case that no match gets found, parser now falls back to searching for the max clock which meets the requested voltage (i.e. its corresponding voltage is below requested). Signed-off-by: Michael Strauss Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 035ba0ef6369..377c4e53a2b3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -518,14 +518,21 @@ static unsigned int find_clk_for_voltage( unsigned int voltage) { int i; + int max_voltage = 0; + int clock = 0; for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) { - if (clock_table->SocVoltage[i] == voltage) + if (clock_table->SocVoltage[i] == voltage) { return clocks[i]; + } else if (clock_table->SocVoltage[i] >= max_voltage && + clock_table->SocVoltage[i] < voltage) { + max_voltage = clock_table->SocVoltage[i]; + clock = clocks[i]; + } } - ASSERT(0); - return 0; + ASSERT(clock); + return clock; } void dcn31_clk_mgr_helper_populate_bw_params( -- cgit v1.2.3 From ad76744b041d8c87ef1c9adbb04fb7eaa20a179e Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 22 Oct 2021 16:14:24 -0400 Subject: drm/amd/display: Fix deadlock when falling back to v2 from v3 [Why] A deadlock in the kernel occurs when we fallback from the V3 to V2 add_topology_to_display or remove_topology_to_display because they both try to acquire the dtm_mutex but recursive locking isn't supported on mutex_lock(). [How] Make the mutex_lock/unlock more fine grained and move them up such that they're only required for the psp invocation itself. Fixes: bf62221e9d0e ("drm/amd/display: Add DCN3.1 HDCP support") Signed-off-by: Nicholas Kazlauskas Reviewed-by: Aric Cyr Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index e9bd84ec027d..be61975f1470 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -105,6 +105,7 @@ static enum mod_hdcp_status remove_display_from_topology_v3( dtm_cmd->dtm_status = TA_DTM_STATUS__GENERIC_FAILURE; psp_dtm_invoke(psp, dtm_cmd->cmd_id); + mutex_unlock(&psp->dtm_context.mutex); if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS) { status = remove_display_from_topology_v2(hdcp, index); @@ -115,8 +116,6 @@ static enum mod_hdcp_status remove_display_from_topology_v3( HDCP_TOP_REMOVE_DISPLAY_TRACE(hdcp, display->index); } - mutex_unlock(&psp->dtm_context.mutex); - return status; } @@ -205,6 +204,7 @@ static enum mod_hdcp_status add_display_to_topology_v3( dtm_cmd->dtm_in_message.topology_update_v3.link_hdcp_cap = link->hdcp_supported_informational; psp_dtm_invoke(psp, dtm_cmd->cmd_id); + mutex_unlock(&psp->dtm_context.mutex); if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS) { status = add_display_to_topology_v2(hdcp, display); @@ -214,8 +214,6 @@ static enum mod_hdcp_status add_display_to_topology_v3( HDCP_TOP_ADD_DISPLAY_TRACE(hdcp, display->index); } - mutex_unlock(&psp->dtm_context.mutex); - return status; } -- cgit v1.2.3 From 9a4aa3a2f1606a03c220b21049baa4a2b6169626 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Wed, 27 Oct 2021 12:32:55 +0300 Subject: drm/i915: Revert 'guc_id' from i915_request tracepoint Avoid adding backend specific data to the tracepoints outside of the LOW_LEVEL_TRACEPOINTS kernel config protection. These bits of information are bound to change depending on the selected submission method per platform and are not necessarily possible to maintain in the future. Fixes: dbf9da8d55ef ("drm/i915/guc: Add trace point for GuC submit") Signed-off-by: Joonas Lahtinen Cc: John Harrison Cc: Matthew Brost Cc: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: Matt Roper Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20211027093255.66489-1-joonas.lahtinen@linux.intel.com (cherry picked from commit 64512a66b67e6546e2db15192b3603cd6d58b75c) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_trace.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 806ad688274b..63fec1c3c132 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -794,7 +794,6 @@ DECLARE_EVENT_CLASS(i915_request, TP_STRUCT__entry( __field(u32, dev) __field(u64, ctx) - __field(u32, guc_id) __field(u16, class) __field(u16, instance) __field(u32, seqno) @@ -805,16 +804,14 @@ DECLARE_EVENT_CLASS(i915_request, __entry->dev = rq->engine->i915->drm.primary->index; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; - __entry->guc_id = rq->context->guc_id; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; __entry->tail = rq->tail; ), - TP_printk("dev=%u, engine=%u:%u, guc_id=%u, ctx=%llu, seqno=%u, tail=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, tail=%u", __entry->dev, __entry->class, __entry->instance, - __entry->guc_id, __entry->ctx, __entry->seqno, - __entry->tail) + __entry->ctx, __entry->seqno, __entry->tail) ); DEFINE_EVENT(i915_request, i915_request_add, -- cgit v1.2.3 From e8a1ff65927080278e6826f797b7c197fb2611a6 Mon Sep 17 00:00:00 2001 From: Wenbin Mei Date: Thu, 28 Oct 2021 10:20:49 +0800 Subject: mmc: mediatek: Move cqhci init behind ungate clock We must enable clock before cqhci init, because crypto needs read information from CQHCI registers, otherwise, it will hang in MediaTek mmc host controller. Signed-off-by: Wenbin Mei Fixes: 88bd652b3c74 ("mmc: mediatek: command queue support") Cc: stable@vger.kernel.org Acked-by: Chaotian Jing Link: https://lore.kernel.org/r/20211028022049.22129-1-wenbin.mei@mediatek.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 4dfc246c5f95..b06b4dcb7c78 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -2577,6 +2577,25 @@ static int msdc_drv_probe(struct platform_device *pdev) host->dma_mask = DMA_BIT_MASK(32); mmc_dev(mmc)->dma_mask = &host->dma_mask; + host->timeout_clks = 3 * 1048576; + host->dma.gpd = dma_alloc_coherent(&pdev->dev, + 2 * sizeof(struct mt_gpdma_desc), + &host->dma.gpd_addr, GFP_KERNEL); + host->dma.bd = dma_alloc_coherent(&pdev->dev, + MAX_BD_NUM * sizeof(struct mt_bdma_desc), + &host->dma.bd_addr, GFP_KERNEL); + if (!host->dma.gpd || !host->dma.bd) { + ret = -ENOMEM; + goto release_mem; + } + msdc_init_gpd_bd(host, &host->dma); + INIT_DELAYED_WORK(&host->req_timeout, msdc_request_timeout); + spin_lock_init(&host->lock); + + platform_set_drvdata(pdev, mmc); + msdc_ungate_clock(host); + msdc_init_hw(host); + if (mmc->caps2 & MMC_CAP2_CQE) { host->cq_host = devm_kzalloc(mmc->parent, sizeof(*host->cq_host), @@ -2597,25 +2616,6 @@ static int msdc_drv_probe(struct platform_device *pdev) mmc->max_seg_size = 64 * 1024; } - host->timeout_clks = 3 * 1048576; - host->dma.gpd = dma_alloc_coherent(&pdev->dev, - 2 * sizeof(struct mt_gpdma_desc), - &host->dma.gpd_addr, GFP_KERNEL); - host->dma.bd = dma_alloc_coherent(&pdev->dev, - MAX_BD_NUM * sizeof(struct mt_bdma_desc), - &host->dma.bd_addr, GFP_KERNEL); - if (!host->dma.gpd || !host->dma.bd) { - ret = -ENOMEM; - goto release_mem; - } - msdc_init_gpd_bd(host, &host->dma); - INIT_DELAYED_WORK(&host->req_timeout, msdc_request_timeout); - spin_lock_init(&host->lock); - - platform_set_drvdata(pdev, mmc); - msdc_ungate_clock(host); - msdc_init_hw(host); - ret = devm_request_irq(&pdev->dev, host->irq, msdc_irq, IRQF_TRIGGER_NONE, pdev->name, host); if (ret) -- cgit v1.2.3 From 9159f102402a64ac85e676b75cc1f9c62c5b4b73 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Tue, 26 Oct 2021 14:50:31 -0700 Subject: vmxnet3: do not stop tx queues after netif_device_detach() The netif_device_detach() conditionally stops all tx queues if the queues are running. There is no need to call netif_tx_stop_all_queues() again. Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 142f70670f5c..8799854bacb2 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3833,7 +3833,6 @@ vmxnet3_suspend(struct device *device) vmxnet3_free_intr_resources(adapter); netif_device_detach(netdev); - netif_tx_stop_all_queues(netdev); /* Create wake-up filters. */ pmConf = adapter->pm_conf; -- cgit v1.2.3 From 90a881fc352a953f1c8beb61977a2db0818157d4 Mon Sep 17 00:00:00 2001 From: Yu Xiao Date: Thu, 28 Oct 2021 12:00:36 +0200 Subject: nfp: bpf: relax prog rejection for mtu check through max_pkt_offset MTU change is refused whenever the value of new MTU is bigger than the max packet bytes that fits in NFP Cluster Target Memory (CTM). However, an eBPF program doesn't always need to access the whole packet data. The maximum direct packet access (DPA) offset has always been caculated by verifier and stored in the max_pkt_offset field of prog aux data. Signed-off-by: Yu Xiao Reviewed-by: Yinjun Zhang Reviewed-by: Niklas Soderlund Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 16 +++++++++++----- drivers/net/ethernet/netronome/nfp/bpf/main.h | 2 ++ drivers/net/ethernet/netronome/nfp/bpf/offload.c | 17 +++++++++++++---- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 11c83a99b014..f469950c7265 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -182,15 +182,21 @@ static int nfp_bpf_check_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) { struct nfp_net *nn = netdev_priv(netdev); - unsigned int max_mtu; + struct nfp_bpf_vnic *bv; + struct bpf_prog *prog; if (~nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) return 0; - max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; - if (new_mtu > max_mtu) { - nn_info(nn, "BPF offload active, MTU over %u not supported\n", - max_mtu); + if (nn->xdp_hw.prog) { + prog = nn->xdp_hw.prog; + } else { + bv = nn->app_priv; + prog = bv->tc_prog; + } + + if (nfp_bpf_offload_check_mtu(nn, prog, new_mtu)) { + nn_info(nn, "BPF offload active, potential packet access beyond hardware packet boundary"); return -EBUSY; } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index d0e17eebddd9..16841bb750b7 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -560,6 +560,8 @@ bool nfp_is_subprog_start(struct nfp_insn_meta *meta); void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog); int nfp_bpf_jit(struct nfp_prog *prog); bool nfp_bpf_supported_opcode(u8 code); +bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog, + unsigned int mtu); int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 53851853562c..9d97cd281f18 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -481,19 +481,28 @@ int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, return 0; } +bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog, + unsigned int mtu) +{ + unsigned int fw_mtu, pkt_off; + + fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + pkt_off = min(prog->aux->max_pkt_offset, mtu); + + return fw_mtu < pkt_off; +} + static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, struct netlink_ext_ack *extack) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - unsigned int fw_mtu, pkt_off, max_stack, max_prog_len; + unsigned int max_stack, max_prog_len; dma_addr_t dma_addr; void *img; int err; - fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; - pkt_off = min(prog->aux->max_pkt_offset, nn->dp.netdev->mtu); - if (fw_mtu < pkt_off) { + if (nfp_bpf_offload_check_mtu(nn, prog, nn->dp.netdev->mtu)) { NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with potential packet access beyond HW packet split boundary"); return -EOPNOTSUPP; } -- cgit v1.2.3 From c4a146c7cf5e8ad76231523b174d161bf152c6e7 Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Thu, 28 Oct 2021 15:13:45 +0800 Subject: net/smc: Fix smc_link->llc_testlink_time overflow The value of llc_testlink_time is set to the value stored in net->ipv4.sysctl_tcp_keepalive_time when linkgroup init. The value of sysctl_tcp_keepalive_time is already jiffies, so we don't need to multiply by HZ, which would cause smc_link->llc_testlink_time overflow, and test_link send flood. Signed-off-by: Tony Lu Reviewed-by: Xuan Zhuo Reviewed-by: Wen Gu Signed-off-by: David S. Miller --- net/smc/smc_llc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 72f4b72eb175..f1d323439a2a 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1822,7 +1822,7 @@ void smc_llc_link_active(struct smc_link *link) link->smcibdev->ibdev->name, link->ibport); link->state = SMC_LNK_ACTIVE; if (link->lgr->llc_testlink_time) { - link->llc_testlink_time = link->lgr->llc_testlink_time * HZ; + link->llc_testlink_time = link->lgr->llc_testlink_time; schedule_delayed_work(&link->llc_testlink_wrk, link->llc_testlink_time); } -- cgit v1.2.3 From f3a3a0fe0b644582fa5d83dd94b398f99fc57914 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 28 Oct 2021 15:13:47 +0800 Subject: net/smc: Correct spelling mistake to TCPF_SYN_RECV There should use TCPF_SYN_RECV instead of TCP_SYN_RECV. Signed-off-by: Wen Gu Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- net/smc/af_smc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c038efc23ce3..78b663dbfa1f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1057,7 +1057,7 @@ static void smc_connect_work(struct work_struct *work) if (smc->clcsock->sk->sk_err) { smc->sk.sk_err = smc->clcsock->sk->sk_err; } else if ((1 << smc->clcsock->sk->sk_state) & - (TCPF_SYN_SENT | TCP_SYN_RECV)) { + (TCPF_SYN_SENT | TCPF_SYN_RECV)) { rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo); if ((rc == -EPIPE) && ((1 << smc->clcsock->sk->sk_state) & -- cgit v1.2.3 From da353fac65fede6b8b4cfe207f0d9408e3121105 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Wed, 27 Oct 2021 17:59:20 -0400 Subject: net/tls: Fix flipped sign in tls_err_abort() calls sk->sk_err appears to expect a positive value, a convention that ktls doesn't always follow and that leads to memory corruption in other code. For instance, [kworker] tls_encrypt_done(..., err=) tls_err_abort(.., err) sk->sk_err = err; [task] splice_from_pipe_feed ... tls_sw_do_sendpage if (sk->sk_err) { ret = -sk->sk_err; // ret is positive splice_from_pipe_feed (continued) ret = actor(...) // ret is still positive and interpreted as bytes // written, resulting in underflow of buf->len and // sd->len, leading to huge buf->offset and bogus // addresses computed in later calls to actor() Fix all tls_err_abort() callers to pass a negative error code consistently and centralize the error-prone sign flip there, throwing in a warning to catch future misuse and uninlining the function so it really does only warn once. Cc: stable@vger.kernel.org Fixes: c46234ebb4d1e ("tls: RX path for ktls") Reported-by: syzbot+b187b77c8474f9648fae@syzkaller.appspotmail.com Signed-off-by: Daniel Jordan Signed-off-by: David S. Miller --- include/net/tls.h | 9 ++------- net/tls/tls_sw.c | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 01d2e3744393..1fffb206f09f 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -358,6 +358,7 @@ int tls_sk_query(struct sock *sk, int optname, char __user *optval, int __user *optlen); int tls_sk_attach(struct sock *sk, int optname, char __user *optval, unsigned int optlen); +void tls_err_abort(struct sock *sk, int err); int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); @@ -466,12 +467,6 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) #endif } -static inline void tls_err_abort(struct sock *sk, int err) -{ - sk->sk_err = err; - sk_error_report(sk); -} - static inline bool tls_bigint_increment(unsigned char *seq, int len) { int i; @@ -512,7 +507,7 @@ static inline void tls_advance_record_sn(struct sock *sk, struct cipher_context *ctx) { if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); if (prot->version != TLS_1_3_VERSION && prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d5d09bd817b7..1644f8baea19 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -35,6 +35,7 @@ * SOFTWARE. */ +#include #include #include #include @@ -43,6 +44,14 @@ #include #include +noinline void tls_err_abort(struct sock *sk, int err) +{ + WARN_ON_ONCE(err >= 0); + /* sk->sk_err should contain a positive error code. */ + sk->sk_err = -err; + sk_error_report(sk); +} + static int __skb_nsg(struct sk_buff *skb, int offset, int len, unsigned int recursion_level) { @@ -419,7 +428,7 @@ int tls_tx_records(struct sock *sk, int flags) tx_err: if (rc < 0 && rc != -EAGAIN) - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); return rc; } @@ -763,7 +772,7 @@ static int tls_push_record(struct sock *sk, int flags, msg_pl->sg.size + prot->tail_size, i); if (rc < 0) { if (rc != -EINPROGRESS) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); if (split) { tls_ctx->pending_open_record_frags = true; tls_merge_open_record(sk, rec, tmp, orig_end); @@ -1827,7 +1836,7 @@ int tls_sw_recvmsg(struct sock *sk, err = decrypt_skb_update(sk, skb, &msg->msg_iter, &chunk, &zc, async_capable); if (err < 0 && err != -EINPROGRESS) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); goto recv_end; } @@ -2007,7 +2016,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, } if (err < 0) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); goto splice_read_end; } ctx->decrypted = 1; -- cgit v1.2.3 From 1d9d6fd21ad4a28b16ed9ee5432ae738b9dc58aa Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Wed, 27 Oct 2021 17:59:21 -0400 Subject: net/tls: Fix flipped sign in async_wait.err assignment sk->sk_err contains a positive number, yet async_wait.err wants the opposite. Fix the missed sign flip, which Jakub caught by inspection. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Suggested-by: Jakub Kicinski Signed-off-by: Daniel Jordan Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 1644f8baea19..1b08b877a890 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -459,7 +459,7 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) /* If err is already set on socket, return the same code */ if (sk->sk_err) { - ctx->async_wait.err = sk->sk_err; + ctx->async_wait.err = -sk->sk_err; } else { ctx->async_wait.err = err; tls_err_abort(sk, err); -- cgit v1.2.3 From e8684db191e4164f3f5f3ad7dec04a6734c25f1c Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Wed, 27 Oct 2021 14:23:02 -0400 Subject: net: ethernet: microchip: lan743x: Fix skb allocation failure The driver allocates skb during ndo_open with GFP_ATOMIC which has high chance of failure when there are multiple instances. GFP_KERNEL is enough while open and use GFP_ATOMIC only from interrupt context. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index c4f32c7e0db1..4d5a5d6595b3 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1944,7 +1944,8 @@ static void lan743x_rx_update_tail(struct lan743x_rx *rx, int index) index); } -static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index) +static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index, + gfp_t gfp) { struct net_device *netdev = rx->adapter->netdev; struct device *dev = &rx->adapter->pdev->dev; @@ -1958,7 +1959,7 @@ static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index) descriptor = &rx->ring_cpu_ptr[index]; buffer_info = &rx->buffer_info[index]; - skb = __netdev_alloc_skb(netdev, buffer_length, GFP_ATOMIC | GFP_DMA); + skb = __netdev_alloc_skb(netdev, buffer_length, gfp); if (!skb) return -ENOMEM; dma_ptr = dma_map_single(dev, skb->data, buffer_length, DMA_FROM_DEVICE); @@ -2120,7 +2121,8 @@ static int lan743x_rx_process_buffer(struct lan743x_rx *rx) /* save existing skb, allocate new skb and map to dma */ skb = buffer_info->skb; - if (lan743x_rx_init_ring_element(rx, rx->last_head)) { + if (lan743x_rx_init_ring_element(rx, rx->last_head, + GFP_ATOMIC | GFP_DMA)) { /* failed to allocate next skb. * Memory is very low. * Drop this packet and reuse buffer. @@ -2335,13 +2337,16 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx) rx->last_head = 0; for (index = 0; index < rx->ring_size; index++) { - ret = lan743x_rx_init_ring_element(rx, index); + ret = lan743x_rx_init_ring_element(rx, index, GFP_KERNEL); if (ret) goto cleanup; } return 0; cleanup: + netif_warn(rx->adapter, ifup, rx->adapter->netdev, + "Error allocating memory for LAN743x\n"); + lan743x_rx_ring_cleanup(rx); return ret; } -- cgit v1.2.3 From cc45b96e2de7ada26520f101dada0abafa4ba997 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 27 Oct 2021 23:02:32 +0530 Subject: octeontx2-af: Check whether ipolicers exists While displaying ingress policers information in debugfs check whether ingress policers exist in the hardware or not because some platforms(CN9XXX) do not have this feature. Fixes: e7d8971763f3 ("octeontx2-af: cn10k: Debugfs support for bandwidth") Signed-off-by: Subbaraya Sundeep Signed-off-by: Rakesh Babu Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 9338765da048..6c589ca9b577 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -1719,6 +1719,10 @@ static int rvu_dbg_nix_band_prof_ctx_display(struct seq_file *m, void *unused) u16 pcifunc; char *str; + /* Ingress policers do not exist on all platforms */ + if (!nix_hw->ipolicer) + return 0; + for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) { if (layer == BAND_PROF_INVAL_LAYER) continue; @@ -1768,6 +1772,10 @@ static int rvu_dbg_nix_band_prof_rsrc_display(struct seq_file *m, void *unused) int layer; char *str; + /* Ingress policers do not exist on all platforms */ + if (!nix_hw->ipolicer) + return 0; + seq_puts(m, "\nBandwidth profile resource free count\n"); seq_puts(m, "=====================================\n"); for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) { -- cgit v1.2.3 From e77bcdd1f639809950c45234b08647ac6d3ffe7b Mon Sep 17 00:00:00 2001 From: Rakesh Babu Date: Wed, 27 Oct 2021 23:02:33 +0530 Subject: octeontx2-af: Display all enabled PF VF rsrc_alloc entries. Currently, we are using a fixed buffer size of length 2048 to display rsrc_alloc output. As a result a maximum of 2048 characters of rsrc_alloc output is displayed, which may lead sometimes to display only partial output. This patch fixes this dependency on max limit of buffer size and displays all PF VF entries. Each column of the debugfs entry "rsrc_alloc" uses a fixed width of 12 characters to print the list of LFs of each block for a PF/VF. If the length of list of LFs of a block exceeds this fixed width then the list gets truncated and displays only a part of the list. This patch fixes this by using the maximum possible length of list of LFs among all blocks of all PFs and VFs entries as the width size. Fixes: f7884097141b ("octeontx2-af: Formatting debugfs entry rsrc_alloc.") Fixes: 23205e6d06d4 ("octeontx2-af: Dump current resource provisioning status") Signed-off-by: Rakesh Babu Signed-off-by: Nithin Dabilpuram Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/af/rvu_debugfs.c | 138 ++++++++++++++++----- 1 file changed, 106 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 6c589ca9b577..c7e12464c243 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -226,18 +226,85 @@ static const struct file_operations rvu_dbg_##name##_fops = { \ static void print_nix_qsize(struct seq_file *filp, struct rvu_pfvf *pfvf); +static void get_lf_str_list(struct rvu_block block, int pcifunc, + char *lfs) +{ + int lf = 0, seq = 0, len = 0, prev_lf = block.lf.max; + + for_each_set_bit(lf, block.lf.bmap, block.lf.max) { + if (lf >= block.lf.max) + break; + + if (block.fn_map[lf] != pcifunc) + continue; + + if (lf == prev_lf + 1) { + prev_lf = lf; + seq = 1; + continue; + } + + if (seq) + len += sprintf(lfs + len, "-%d,%d", prev_lf, lf); + else + len += (len ? sprintf(lfs + len, ",%d", lf) : + sprintf(lfs + len, "%d", lf)); + + prev_lf = lf; + seq = 0; + } + + if (seq) + len += sprintf(lfs + len, "-%d", prev_lf); + + lfs[len] = '\0'; +} + +static int get_max_column_width(struct rvu *rvu) +{ + int index, pf, vf, lf_str_size = 12, buf_size = 256; + struct rvu_block block; + u16 pcifunc; + char *buf; + + buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (pf = 0; pf < rvu->hw->total_pfs; pf++) { + for (vf = 0; vf <= rvu->hw->total_vfs; vf++) { + pcifunc = pf << 10 | vf; + if (!pcifunc) + continue; + + for (index = 0; index < BLK_COUNT; index++) { + block = rvu->hw->block[index]; + if (!strlen(block.name)) + continue; + + get_lf_str_list(block, pcifunc, buf); + if (lf_str_size <= strlen(buf)) + lf_str_size = strlen(buf) + 1; + } + } + } + + kfree(buf); + return lf_str_size; +} + /* Dumps current provisioning status of all RVU block LFs */ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { - int index, off = 0, flag = 0, go_back = 0, len = 0; + int index, off = 0, flag = 0, len = 0, i = 0; struct rvu *rvu = filp->private_data; - int lf, pf, vf, pcifunc; + int bytes_not_copied = 0; struct rvu_block block; - int bytes_not_copied; - int lf_str_size = 12; + int pf, vf, pcifunc; int buf_size = 2048; + int lf_str_size; char *lfs; char *buf; @@ -249,6 +316,9 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, if (!buf) return -ENOSPC; + /* Get the maximum width of a column */ + lf_str_size = get_max_column_width(rvu); + lfs = kzalloc(lf_str_size, GFP_KERNEL); if (!lfs) { kfree(buf); @@ -262,65 +332,69 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, "%-*s", lf_str_size, rvu->hw->block[index].name); } + off += scnprintf(&buf[off], buf_size - 1 - off, "\n"); + bytes_not_copied = copy_to_user(buffer + (i * off), buf, off); + if (bytes_not_copied) + goto out; + + i++; + *ppos += off; for (pf = 0; pf < rvu->hw->total_pfs; pf++) { for (vf = 0; vf <= rvu->hw->total_vfs; vf++) { + off = 0; + flag = 0; pcifunc = pf << 10 | vf; if (!pcifunc) continue; if (vf) { sprintf(lfs, "PF%d:VF%d", pf, vf - 1); - go_back = scnprintf(&buf[off], - buf_size - 1 - off, - "%-*s", lf_str_size, lfs); + off = scnprintf(&buf[off], + buf_size - 1 - off, + "%-*s", lf_str_size, lfs); } else { sprintf(lfs, "PF%d", pf); - go_back = scnprintf(&buf[off], - buf_size - 1 - off, - "%-*s", lf_str_size, lfs); + off = scnprintf(&buf[off], + buf_size - 1 - off, + "%-*s", lf_str_size, lfs); } - off += go_back; - for (index = 0; index < BLKTYPE_MAX; index++) { + for (index = 0; index < BLK_COUNT; index++) { block = rvu->hw->block[index]; if (!strlen(block.name)) continue; len = 0; lfs[len] = '\0'; - for (lf = 0; lf < block.lf.max; lf++) { - if (block.fn_map[lf] != pcifunc) - continue; + get_lf_str_list(block, pcifunc, lfs); + if (strlen(lfs)) flag = 1; - len += sprintf(&lfs[len], "%d,", lf); - } - if (flag) - len--; - lfs[len] = '\0'; off += scnprintf(&buf[off], buf_size - 1 - off, "%-*s", lf_str_size, lfs); - if (!strlen(lfs)) - go_back += lf_str_size; } - if (!flag) - off -= go_back; - else - flag = 0; - off--; - off += scnprintf(&buf[off], buf_size - 1 - off, "\n"); + if (flag) { + off += scnprintf(&buf[off], + buf_size - 1 - off, "\n"); + bytes_not_copied = copy_to_user(buffer + + (i * off), + buf, off); + if (bytes_not_copied) + goto out; + + i++; + *ppos += off; + } } } - bytes_not_copied = copy_to_user(buffer, buf, off); +out: kfree(lfs); kfree(buf); - if (bytes_not_copied) return -EFAULT; - *ppos = off; - return off; + return *ppos; } RVU_DEBUG_FOPS(rsrc_status, rsrc_attach_status, NULL); -- cgit v1.2.3 From c2d4c543f74c90f883e8ec62a31973ae8807d354 Mon Sep 17 00:00:00 2001 From: Rakesh Babu Saladi Date: Wed, 27 Oct 2021 23:02:34 +0530 Subject: octeontx2-af: Fix possible null pointer dereference. This patch fixes possible null pointer dereference in files "rvu_debugfs.c" and "rvu_nix.c" Fixes: 8756828a8148 ("octeontx2-af: Add NPA aura and pool contexts to debugfs") Fixes: 9a946def264d ("octeontx2-af: Modify nix_vtag_cfg mailbox to support TX VTAG entries") Signed-off-by: Rakesh Babu Saladi Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 2 +- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index c7e12464c243..49d822a98ada 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -578,7 +578,7 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp, if (cmd_buf) ret = -EINVAL; - if (!strncmp(subtoken, "help", 4) || ret < 0) { + if (ret < 0 || !strncmp(subtoken, "help", 4)) { dev_info(rvu->dev, "Use echo <%s-lf > qsize\n", blk_string); goto qsize_write_done; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 9ef4e942e31e..6970540dc470 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -2507,6 +2507,9 @@ static void nix_free_tx_vtag_entries(struct rvu *rvu, u16 pcifunc) return; nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return; + vlan = &nix_hw->txvlan; mutex_lock(&vlan->rsrc_lock); -- cgit v1.2.3 From f3d1436d4bf8ced1c9a62a045d193a65567e1fcc Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 26 Oct 2021 04:12:38 +0100 Subject: KVM: x86: Take srcu lock in post_kvm_run_save() The Xen interrupt injection for event channels relies on accessing the guest's vcpu_info structure in __kvm_xen_has_interrupt(), through a gfn_to_hva_cache. This requires the srcu lock to be held, which is mostly the case except for this code path: [ 11.822877] WARNING: suspicious RCU usage [ 11.822965] ----------------------------- [ 11.823013] include/linux/kvm_host.h:664 suspicious rcu_dereference_check() usage! [ 11.823131] [ 11.823131] other info that might help us debug this: [ 11.823131] [ 11.823196] [ 11.823196] rcu_scheduler_active = 2, debug_locks = 1 [ 11.823253] 1 lock held by dom:0/90: [ 11.823292] #0: ffff998956ec8118 (&vcpu->mutex){+.+.}, at: kvm_vcpu_ioctl+0x85/0x680 [ 11.823379] [ 11.823379] stack backtrace: [ 11.823428] CPU: 2 PID: 90 Comm: dom:0 Kdump: loaded Not tainted 5.4.34+ #5 [ 11.823496] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [ 11.823612] Call Trace: [ 11.823645] dump_stack+0x7a/0xa5 [ 11.823681] lockdep_rcu_suspicious+0xc5/0x100 [ 11.823726] __kvm_xen_has_interrupt+0x179/0x190 [ 11.823773] kvm_cpu_has_extint+0x6d/0x90 [ 11.823813] kvm_cpu_accept_dm_intr+0xd/0x40 [ 11.823853] kvm_vcpu_ready_for_interrupt_injection+0x20/0x30 < post_kvm_run_save() inlined here > [ 11.823906] kvm_arch_vcpu_ioctl_run+0x135/0x6a0 [ 11.823947] kvm_vcpu_ioctl+0x263/0x680 Fixes: 40da8ccd724f ("KVM: x86/xen: Add event channel interrupt vector upcall") Signed-off-by: David Woodhouse Cc: stable@vger.kernel.org Message-Id: <606aaaf29fca3850a63aa4499826104e77a72346.camel@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5c82eff19e4a..ffd9dd7d5d14 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8783,9 +8783,17 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); + + /* + * The call to kvm_ready_for_interrupt_injection() may end up in + * kvm_xen_has_interrupt() which may require the srcu lock to be + * held, to protect against changes in the vcpu_info address. + */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); kvm_run->ready_for_interrupt_injection = pic_in_kernel(vcpu->kvm) || kvm_vcpu_ready_for_interrupt_injection(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (is_smm(vcpu)) kvm_run->flags |= KVM_RUN_X86_SMM; -- cgit v1.2.3 From 27de809a3d83a6199664479ebb19712533d6fd9b Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Thu, 28 Oct 2021 14:51:15 +0200 Subject: riscv, bpf: Fix potential NULL dereference The bpf_jit_binary_free() function requires a non-NULL argument. When the RISC-V BPF JIT fails to converge in NR_JIT_ITERATIONS steps, jit_data->header will be NULL, which triggers a NULL dereference. Avoid this by checking the argument, prior calling the function. Fixes: ca6cb5447cec ("riscv, bpf: Factor common RISC-V JIT code") Signed-off-by: Björn Töpel Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/20211028125115.514587-1-bjorn@kernel.org Signed-off-by: Jakub Kicinski --- arch/riscv/net/bpf_jit_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 0fee2cbaaf53..753d85bdfad0 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -125,7 +125,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (i == NR_JIT_ITERATIONS) { pr_err("bpf-jit: image did not converge in <%d passes!\n", i); - bpf_jit_binary_free(jit_data->header); + if (jit_data->header) + bpf_jit_binary_free(jit_data->header); prog = orig_prog; goto out_offset; } -- cgit v1.2.3 From f7cc8890f30d3ddc785e2b2ddc647da5b4b3c3ec Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 27 Oct 2021 13:38:55 -0700 Subject: mptcp: fix corrupt receiver key in MPC + data + checksum using packetdrill it's possible to observe that the receiver key contains random values when clients transmit MP_CAPABLE with data and checksum (as specified in RFC8684 §3.1). Fix the layout of mptcp_out_options, to avoid using the skb extension copy when writing the MP_CAPABLE sub-option. Fixes: d7b269083786 ("mptcp: shrink mptcp_out_options struct") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/233 Reported-by: Poorva Sonparote Signed-off-by: Davide Caratti Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20211027203855.264600-1-mathew.j.martineau@linux.intel.com Signed-off-by: Jakub Kicinski --- include/net/mptcp.h | 4 ++++ net/mptcp/options.c | 39 ++++++++++++++++++++++++--------------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 6026bbefbffd..3214848402ec 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -69,6 +69,10 @@ struct mptcp_out_options { struct { u64 sndr_key; u64 rcvr_key; + u64 data_seq; + u32 subflow_seq; + u16 data_len; + __sum16 csum; }; struct { struct mptcp_addr_info addr; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index c41273cefc51..f0f22eb4fd5f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -485,11 +485,11 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, mpext = mptcp_get_ext(skb); data_len = mpext ? mpext->data_len : 0; - /* we will check ext_copy.data_len in mptcp_write_options() to + /* we will check ops->data_len in mptcp_write_options() to * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and * TCPOLEN_MPTCP_MPC_ACK */ - opts->ext_copy.data_len = data_len; + opts->data_len = data_len; opts->suboptions = OPTION_MPTCP_MPC_ACK; opts->sndr_key = subflow->local_key; opts->rcvr_key = subflow->remote_key; @@ -505,9 +505,9 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, len = TCPOLEN_MPTCP_MPC_ACK_DATA; if (opts->csum_reqd) { /* we need to propagate more info to csum the pseudo hdr */ - opts->ext_copy.data_seq = mpext->data_seq; - opts->ext_copy.subflow_seq = mpext->subflow_seq; - opts->ext_copy.csum = mpext->csum; + opts->data_seq = mpext->data_seq; + opts->subflow_seq = mpext->subflow_seq; + opts->csum = mpext->csum; len += TCPOLEN_MPTCP_DSS_CHECKSUM; } *size = ALIGN(len, 4); @@ -1227,7 +1227,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp) WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); } -static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum16 sum) { struct csum_pseudo_header header; __wsum csum; @@ -1237,15 +1237,21 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext) * always the 64-bit value, irrespective of what length is used in the * DSS option itself. */ - header.data_seq = cpu_to_be64(mpext->data_seq); - header.subflow_seq = htonl(mpext->subflow_seq); - header.data_len = htons(mpext->data_len); + header.data_seq = cpu_to_be64(data_seq); + header.subflow_seq = htonl(subflow_seq); + header.data_len = htons(data_len); header.csum = 0; - csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum)); + csum = csum_partial(&header, sizeof(header), ~csum_unfold(sum)); return (__force u16)csum_fold(csum); } +static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +{ + return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len, + mpext->csum); +} + void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { @@ -1337,7 +1343,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, len = TCPOLEN_MPTCP_MPC_SYN; } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYNACK; - } else if (opts->ext_copy.data_len) { + } else if (opts->data_len) { len = TCPOLEN_MPTCP_MPC_ACK_DATA; if (opts->csum_reqd) len += TCPOLEN_MPTCP_DSS_CHECKSUM; @@ -1366,14 +1372,17 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, put_unaligned_be64(opts->rcvr_key, ptr); ptr += 2; - if (!opts->ext_copy.data_len) + if (!opts->data_len) goto mp_capable_done; if (opts->csum_reqd) { - put_unaligned_be32(opts->ext_copy.data_len << 16 | - mptcp_make_csum(&opts->ext_copy), ptr); + put_unaligned_be32(opts->data_len << 16 | + __mptcp_make_csum(opts->data_seq, + opts->subflow_seq, + opts->data_len, + opts->csum), ptr); } else { - put_unaligned_be32(opts->ext_copy.data_len << 16 | + put_unaligned_be32(opts->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); } ptr += 1; -- cgit v1.2.3 From 35392da51b1ab7ba0c63de0a553e2a93c2314266 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Thu, 28 Oct 2021 22:06:24 +0800 Subject: Revert "net: hns3: fix pause config problem after autoneg disabled" This reverts commit 3bda2e5df476417b6d08967e2d84234a59d57b1c. According to discussion with Andrew as follow: https://lore.kernel.org/netdev/09eda9fe-196b-006b-6f01-f54e75715961@huawei.com/ HNS3 driver needs to separate pause autoneg from general autoneg, so revert this incorrect patch. Signed-off-by: Guangbin Huang Link: https://lore.kernel.org/r/20211028140624.53149-1-huangguangbin2@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 - drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 33 ++++++---------------- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 30 -------------------- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 2 +- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 1 - 5 files changed, 10 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index da3a593f6a56..d701451596c8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -568,7 +568,6 @@ struct hnae3_ae_ops { u32 *auto_neg, u32 *rx_en, u32 *tx_en); int (*set_pauseparam)(struct hnae3_handle *handle, u32 auto_neg, u32 rx_en, u32 tx_en); - int (*restore_pauseparam)(struct hnae3_handle *handle); int (*set_autoneg)(struct hnae3_handle *handle, bool enable); int (*get_autoneg)(struct hnae3_handle *handle); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 7d92dd273ed7..5ebd96f6833d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -824,26 +824,6 @@ static int hns3_check_ksettings_param(const struct net_device *netdev, return 0; } -static int hns3_set_phy_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *cmd) -{ - struct hnae3_handle *handle = hns3_get_handle(netdev); - const struct hnae3_ae_ops *ops = handle->ae_algo->ops; - int ret; - - if (cmd->base.speed == SPEED_1000 && - cmd->base.autoneg == AUTONEG_DISABLE) - return -EINVAL; - - if (cmd->base.autoneg == AUTONEG_DISABLE && ops->restore_pauseparam) { - ret = ops->restore_pauseparam(handle); - if (ret) - return ret; - } - - return phy_ethtool_ksettings_set(netdev->phydev, cmd); -} - static int hns3_set_link_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *cmd) { @@ -862,11 +842,16 @@ static int hns3_set_link_ksettings(struct net_device *netdev, cmd->base.autoneg, cmd->base.speed, cmd->base.duplex); /* Only support ksettings_set for netdev with phy attached for now */ - if (netdev->phydev) - return hns3_set_phy_link_ksettings(netdev, cmd); - else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && - ops->set_phy_link_ksettings) + if (netdev->phydev) { + if (cmd->base.speed == SPEED_1000 && + cmd->base.autoneg == AUTONEG_DISABLE) + return -EINVAL; + + return phy_ethtool_ksettings_set(netdev->phydev, cmd); + } else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && + ops->set_phy_link_ksettings) { return ops->set_phy_link_ksettings(handle, cmd); + } if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 269e579762b2..d891390d492f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -10998,35 +10998,6 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg, return -EOPNOTSUPP; } -static int hclge_restore_pauseparam(struct hnae3_handle *handle) -{ - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; - u32 auto_neg, rx_pause, tx_pause; - int ret; - - hclge_get_pauseparam(handle, &auto_neg, &rx_pause, &tx_pause); - /* when autoneg is disabled, the pause setting of phy has no effect - * unless the link goes down. - */ - ret = phy_suspend(hdev->hw.mac.phydev); - if (ret) - return ret; - - phy_set_asym_pause(hdev->hw.mac.phydev, rx_pause, tx_pause); - - ret = phy_resume(hdev->hw.mac.phydev); - if (ret) - return ret; - - ret = hclge_mac_pause_setup_hw(hdev); - if (ret) - dev_err(&hdev->pdev->dev, - "restore pauseparam error, ret = %d.\n", ret); - - return ret; -} - static void hclge_get_ksettings_an_result(struct hnae3_handle *handle, u8 *auto_neg, u32 *speed, u8 *duplex) { @@ -12990,7 +12961,6 @@ static const struct hnae3_ae_ops hclge_ops = { .halt_autoneg = hclge_halt_autoneg, .get_pauseparam = hclge_get_pauseparam, .set_pauseparam = hclge_set_pauseparam, - .restore_pauseparam = hclge_restore_pauseparam, .set_mtu = hclge_set_mtu, .reset_queue = hclge_reset_tqp, .get_stats = hclge_get_stats, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 124791e4bfee..95074e91a846 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -1435,7 +1435,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc) return 0; } -int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) +static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) { bool tx_en, rx_en; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 4b2c3a788980..2ee9b795f71d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -244,7 +244,6 @@ int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight); int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id, enum hclge_opcode_type cmd, struct hclge_tm_shaper_para *para); -int hclge_mac_pause_setup_hw(struct hclge_dev *hdev); int hclge_tm_get_q_to_qs_map(struct hclge_dev *hdev, u16 q_id, u16 *qset_id); int hclge_tm_get_q_to_tc(struct hclge_dev *hdev, u16 q_id, u8 *tc_id); int hclge_tm_get_pg_to_pri_map(struct hclge_dev *hdev, u8 pg_id, -- cgit v1.2.3 From b112166a894db446f47a8c31781b037f28ac1721 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 28 Oct 2021 19:08:57 +0200 Subject: MAINTAINERS: dri-devel is for all of drivers/gpu Somehow we only have a list of subdirectories, which apparently made it harder for folks to find the gpu maintainers. Fix that. References: https://lore.kernel.org/dri-devel/YXrAAZlxxStNFG%2FK@phenom.ffwll.local/ Signed-off-by: Daniel Vetter Cc: David Airlie Cc: Daniel Vetter Cc: Steven Rostedt Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20211028170857.4029606-1-daniel.vetter@ffwll.ch --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index f26920f0fa65..c2fbfc53c749 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6147,8 +6147,7 @@ T: git git://anongit.freedesktop.org/drm/drm F: Documentation/devicetree/bindings/display/ F: Documentation/devicetree/bindings/gpu/ F: Documentation/gpu/ -F: drivers/gpu/drm/ -F: drivers/gpu/vga/ +F: drivers/gpu/ F: include/drm/ F: include/linux/vga* F: include/uapi/drm/ -- cgit v1.2.3 From 90935eb303e0d12f3d3d0383262e65290321f5f6 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 28 Oct 2021 21:51:49 +0200 Subject: mmc: tmio: reenable card irqs after the reset callback The reset callback may clear the internal card detect interrupts, so make sure to reenable them if needed. Fixes: b4d86f37eacb ("mmc: renesas_sdhi: do hard reset if possible") Reported-by: Biju Das Signed-off-by: Wolfram Sang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211028195149.8003-1-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_core.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 7dfc26f48c18..e2affa52ef46 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -195,6 +195,10 @@ static void tmio_mmc_reset(struct tmio_mmc_host *host) sd_ctrl_write32_as_16_and_16(host, CTL_IRQ_MASK, host->sdcard_irq_mask_all); host->sdcard_irq_mask = host->sdcard_irq_mask_all; + if (host->native_hotplug) + tmio_mmc_enable_mmc_irqs(host, + TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT); + tmio_mmc_set_bus_width(host, host->mmc->ios.bus_width); if (host->pdata->flags & TMIO_MMC_SDIO_IRQ) { @@ -956,8 +960,15 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) case MMC_POWER_OFF: tmio_mmc_power_off(host); /* For R-Car Gen2+, we need to reset SDHI specific SCC */ - if (host->pdata->flags & TMIO_MMC_MIN_RCAR2) + if (host->pdata->flags & TMIO_MMC_MIN_RCAR2) { host->reset(host); + + if (host->native_hotplug) + tmio_mmc_enable_mmc_irqs(host, + TMIO_STAT_CARD_REMOVE | + TMIO_STAT_CARD_INSERT); + } + host->set_clock(host, 0); break; case MMC_POWER_UP: @@ -1185,10 +1196,6 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host) _host->set_clock(_host, 0); tmio_mmc_reset(_host); - if (_host->native_hotplug) - tmio_mmc_enable_mmc_irqs(_host, - TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT); - spin_lock_init(&_host->lock); mutex_init(&_host->ios_lock); -- cgit v1.2.3 From 8dcb3060d81dbfa8d954a2ec64ef7ca330f5bb4d Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 28 Oct 2021 14:36:04 -0700 Subject: memcg: page_alloc: skip bulk allocator for __GFP_ACCOUNT Commit 5c1f4e690eec ("mm/vmalloc: switch to bulk allocator in __vmalloc_area_node()") switched to bulk page allocator for order 0 allocation backing vmalloc. However bulk page allocator does not support __GFP_ACCOUNT allocations and there are several users of kvmalloc(__GFP_ACCOUNT). For now make __GFP_ACCOUNT allocations bypass bulk page allocator. In future if there is workload that can be significantly improved with the bulk page allocator with __GFP_ACCCOUNT support, we can revisit the decision. Link: https://lkml.kernel.org/r/20211014151607.2171970-1-shakeelb@google.com Fixes: 5c1f4e690eec ("mm/vmalloc: switch to bulk allocator in __vmalloc_area_node()") Signed-off-by: Shakeel Butt Reported-by: Vasily Averin Tested-by: Vasily Averin Acked-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Roman Gushchin Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b37435c274cf..3ec39552d00f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5223,6 +5223,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, if (unlikely(page_array && nr_pages - nr_populated == 0)) goto out; + /* Bulk allocator does not support memcg accounting. */ + if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT)) + goto failed; + /* Use the single page allocator for one page. */ if (nr_pages - nr_populated == 1) goto failed; -- cgit v1.2.3 From c7cb42e94473aafe553c0f2a3d8ca904599399ed Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 28 Oct 2021 14:36:07 -0700 Subject: mm: hwpoison: remove the unnecessary THP check When handling THP hwpoison checked if the THP is in allocation or free stage since hwpoison may mistreat it as hugetlb page. After commit 415c64c1453a ("mm/memory-failure: split thp earlier in memory error handling") the problem has been fixed, so this check is no longer needed. Remove it. The side effect of the removal is hwpoison may report unsplit THP instead of unknown error for shmem THP. It seems not like a big deal. The following patch "mm: filemap: check if THP has hwpoisoned subpage for PMD page fault" depends on this, which fixes shmem THP with hwpoisoned subpage(s) are mapped PMD wrongly. So this patch needs to be backported to -stable as well. Link: https://lkml.kernel.org/r/20211020210755.23964-2-shy828301@gmail.com Signed-off-by: Yang Shi Suggested-by: Naoya Horiguchi Acked-by: Naoya Horiguchi Cc: Hugh Dickins Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Oscar Salvador Cc: Peter Xu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 3e6449f2102a..73f68699e7ab 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1147,20 +1147,6 @@ static int __get_hwpoison_page(struct page *page) if (!HWPoisonHandlable(head)) return -EBUSY; - if (PageTransHuge(head)) { - /* - * Non anonymous thp exists only in allocation/free time. We - * can't handle such a case correctly, so let's give it up. - * This should be better than triggering BUG_ON when kernel - * tries to touch the "partially handled" page. - */ - if (!PageAnon(head)) { - pr_err("Memory failure: %#lx: non anonymous thp\n", - page_to_pfn(page)); - return 0; - } - } - if (get_page_unless_zero(head)) { if (head == compound_head(page)) return 1; -- cgit v1.2.3 From eac96c3efdb593df1a57bb5b95dbe037bfa9a522 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 28 Oct 2021 14:36:11 -0700 Subject: mm: filemap: check if THP has hwpoisoned subpage for PMD page fault When handling shmem page fault the THP with corrupted subpage could be PMD mapped if certain conditions are satisfied. But kernel is supposed to send SIGBUS when trying to map hwpoisoned page. There are two paths which may do PMD map: fault around and regular fault. Before commit f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") the thing was even worse in fault around path. The THP could be PMD mapped as long as the VMA fits regardless what subpage is accessed and corrupted. After this commit as long as head page is not corrupted the THP could be PMD mapped. In the regular fault path the THP could be PMD mapped as long as the corrupted page is not accessed and the VMA fits. This loophole could be fixed by iterating every subpage to check if any of them is hwpoisoned or not, but it is somewhat costly in page fault path. So introduce a new page flag called HasHWPoisoned on the first tail page. It indicates the THP has hwpoisoned subpage(s). It is set if any subpage of THP is found hwpoisoned by memory failure and after the refcount is bumped successfully, then cleared when the THP is freed or split. The soft offline path doesn't need this since soft offline handler just marks a subpage hwpoisoned when the subpage is migrated successfully. But shmem THP didn't get split then migrated at all. Link: https://lkml.kernel.org/r/20211020210755.23964-3-shy828301@gmail.com Fixes: 800d8c63b2e9 ("shmem: add huge pages support") Signed-off-by: Yang Shi Reviewed-by: Naoya Horiguchi Suggested-by: Kirill A. Shutemov Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Oscar Salvador Cc: Peter Xu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 23 +++++++++++++++++++++++ mm/huge_memory.c | 2 ++ mm/memory-failure.c | 14 ++++++++++++++ mm/memory.c | 9 +++++++++ mm/page_alloc.c | 4 +++- 5 files changed, 51 insertions(+), 1 deletion(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a558d67ee86f..fbfd3fad48f2 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -171,6 +171,15 @@ enum pageflags { /* Compound pages. Stored in first tail page's flags */ PG_double_map = PG_workingset, +#ifdef CONFIG_MEMORY_FAILURE + /* + * Compound pages. Stored in first tail page's flags. + * Indicates that at least one subpage is hwpoisoned in the + * THP. + */ + PG_has_hwpoisoned = PG_mappedtodisk, +#endif + /* non-lru isolated movable page */ PG_isolated = PG_reclaim, @@ -668,6 +677,20 @@ PAGEFLAG_FALSE(DoubleMap) TESTSCFLAG_FALSE(DoubleMap) #endif +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +/* + * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the + * compound page. + * + * This flag is set by hwpoison handler. Cleared by THP split or free page. + */ +PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) + TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) +#else +PAGEFLAG_FALSE(HasHWPoisoned) + TESTSCFLAG_FALSE(HasHWPoisoned) +#endif + /* * Check if a page is currently marked HWPoisoned. Note that this check is * best effort only and inherently racy: there is no way to synchronize with diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 92192cb086c7..c5142d237e48 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2426,6 +2426,8 @@ static void __split_huge_page(struct page *page, struct list_head *list, /* lock lru list/PageCompound, ref frozen by page_ref_freeze */ lruvec = lock_page_lruvec(head); + ClearPageHasHWPoisoned(head); + for (i = nr - 1; i >= 1; i--) { __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond EOF: drop them from page cache */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 73f68699e7ab..bdbbb32211a5 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1694,6 +1694,20 @@ try_again: } if (PageTransHuge(hpage)) { + /* + * The flag must be set after the refcount is bumped + * otherwise it may race with THP split. + * And the flag can't be set in get_hwpoison_page() since + * it is called by soft offline too and it is just called + * for !MF_COUNT_INCREASE. So here seems to be the best + * place. + * + * Don't need care about the above error handling paths for + * get_hwpoison_page() since they handle either free page + * or unhandlable page. The refcount is bumped iff the + * page is a valid handlable page. + */ + SetPageHasHWPoisoned(hpage); if (try_to_split_thp_page(p, "Memory Failure") < 0) { action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); res = -EBUSY; diff --git a/mm/memory.c b/mm/memory.c index adf9b9ef8277..c52be6d6b605 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3906,6 +3906,15 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) if (compound_order(page) != HPAGE_PMD_ORDER) return ret; + /* + * Just backoff if any subpage of a THP is corrupted otherwise + * the corrupted page may mapped by PMD silently to escape the + * check. This kind of THP just can be PTE mapped. Access to + * the corrupted subpage should trigger SIGBUS as expected. + */ + if (unlikely(PageHasHWPoisoned(page))) + return ret; + /* * Archs like ppc64 need additional space to store information * related to pte entry. Use the preallocated table for that. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3ec39552d00f..23d3339ac4e8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1312,8 +1312,10 @@ static __always_inline bool free_pages_prepare(struct page *page, VM_BUG_ON_PAGE(compound && compound_order(page) != order, page); - if (compound) + if (compound) { ClearPageDoubleMap(page); + ClearPageHasHWPoisoned(page); + } for (i = 1; i < (1 << order); i++) { if (compound) bad += free_tail_pages_check(page, page + i); -- cgit v1.2.3 From 337546e83fc7e50917f44846beee936abb9c9f1f Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 28 Oct 2021 14:36:14 -0700 Subject: mm/oom_kill.c: prevent a race between process_mrelease and exit_mmap Race between process_mrelease and exit_mmap, where free_pgtables is called while __oom_reap_task_mm is in progress, leads to kernel crash during pte_offset_map_lock call. oom-reaper avoids this race by setting MMF_OOM_VICTIM flag and causing exit_mmap to take and release mmap_write_lock, blocking it until oom-reaper releases mmap_read_lock. Reusing MMF_OOM_VICTIM for process_mrelease would be the simplest way to fix this race, however that would be considered a hack. Fix this race by elevating mm->mm_users and preventing exit_mmap from executing until process_mrelease is finished. Patch slightly refactors the code to adapt for a possible mmget_not_zero failure. This fix has considerable negative impact on process_mrelease performance and will likely need later optimization. Link: https://lkml.kernel.org/r/20211022014658.263508-1-surenb@google.com Fixes: 884a7e5964e0 ("mm: introduce process_mrelease system call") Signed-off-by: Suren Baghdasaryan Acked-by: Michal Hocko Cc: David Rientjes Cc: Matthew Wilcox (Oracle) Cc: Johannes Weiner Cc: Roman Gushchin Cc: Rik van Riel Cc: Minchan Kim Cc: Christian Brauner Cc: Christoph Hellwig Cc: Oleg Nesterov Cc: David Hildenbrand Cc: Jann Horn Cc: Shakeel Butt Cc: Andy Lutomirski Cc: Christian Brauner Cc: Florian Weimer Cc: Jan Engelhardt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 831340e7ad8b..989f35a2bbb1 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -1150,7 +1150,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) struct task_struct *task; struct task_struct *p; unsigned int f_flags; - bool reap = true; + bool reap = false; struct pid *pid; long ret = 0; @@ -1177,15 +1177,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) goto put_task; } - mm = p->mm; - mmgrab(mm); - - /* If the work has been done already, just exit with success */ - if (test_bit(MMF_OOM_SKIP, &mm->flags)) - reap = false; - else if (!task_will_free_mem(p)) { - reap = false; - ret = -EINVAL; + if (mmget_not_zero(p->mm)) { + mm = p->mm; + if (task_will_free_mem(p)) + reap = true; + else { + /* Error only if the work has not been done already */ + if (!test_bit(MMF_OOM_SKIP, &mm->flags)) + ret = -EINVAL; + } } task_unlock(p); @@ -1201,7 +1201,8 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) mmap_read_unlock(mm); drop_mm: - mmdrop(mm); + if (mm) + mmput(mm); put_task: put_task_struct(task); put_pid: -- cgit v1.2.3 From 6f1b228529ae49b0f85ab89bcdb6c365df401558 Mon Sep 17 00:00:00 2001 From: Gautham Ananthakrishna Date: Thu, 28 Oct 2021 14:36:17 -0700 Subject: ocfs2: fix race between searching chunks and release journal_head from buffer_head Encountered a race between ocfs2_test_bg_bit_allocatable() and jbd2_journal_put_journal_head() resulting in the below vmcore. PID: 106879 TASK: ffff880244ba9c00 CPU: 2 COMMAND: "loop3" Call trace: panic oops_end no_context __bad_area_nosemaphore bad_area_nosemaphore __do_page_fault do_page_fault page_fault [exception RIP: ocfs2_block_group_find_clear_bits+316] ocfs2_block_group_find_clear_bits [ocfs2] ocfs2_cluster_group_search [ocfs2] ocfs2_search_chain [ocfs2] ocfs2_claim_suballoc_bits [ocfs2] __ocfs2_claim_clusters [ocfs2] ocfs2_claim_clusters [ocfs2] ocfs2_local_alloc_slide_window [ocfs2] ocfs2_reserve_local_alloc_bits [ocfs2] ocfs2_reserve_clusters_with_limit [ocfs2] ocfs2_reserve_clusters [ocfs2] ocfs2_lock_refcount_allocators [ocfs2] ocfs2_make_clusters_writable [ocfs2] ocfs2_replace_cow [ocfs2] ocfs2_refcount_cow [ocfs2] ocfs2_file_write_iter [ocfs2] lo_rw_aio loop_queue_work kthread_worker_fn kthread ret_from_fork When ocfs2_test_bg_bit_allocatable() called bh2jh(bg_bh), the bg_bh->b_private NULL as jbd2_journal_put_journal_head() raced and released the jounal head from the buffer head. Needed to take bit lock for the bit 'BH_JournalHead' to fix this race. Link: https://lkml.kernel.org/r/1634820718-6043-1-git-send-email-gautham.ananthakrishna@oracle.com Signed-off-by: Gautham Ananthakrishna Reviewed-by: Joseph Qi Cc: Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/suballoc.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8521942f5af2..481017e1dac5 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1251,7 +1251,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, { struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct journal_head *jh; - int ret; + int ret = 1; if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) return 0; @@ -1259,14 +1259,18 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, if (!buffer_jbd(bg_bh)) return 1; - jh = bh2jh(bg_bh); - spin_lock(&jh->b_state_lock); - bg = (struct ocfs2_group_desc *) jh->b_committed_data; - if (bg) - ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); - else - ret = 1; - spin_unlock(&jh->b_state_lock); + jbd_lock_bh_journal_head(bg_bh); + if (buffer_jbd(bg_bh)) { + jh = bh2jh(bg_bh); + spin_lock(&jh->b_state_lock); + bg = (struct ocfs2_group_desc *) jh->b_committed_data; + if (bg) + ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); + else + ret = 1; + spin_unlock(&jh->b_state_lock); + } + jbd_unlock_bh_journal_head(bg_bh); return ret; } -- cgit v1.2.3 From 855d44434fa24d5344c1cb0edb38723f891cd415 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 28 Oct 2021 14:36:21 -0700 Subject: mm/secretmem: avoid letting secretmem_users drop to zero Quoting Dmitry: "refcount_inc() needs to be done before fd_install(). After fd_install() finishes, the fd can be used by userspace and we can have secret data in memory before the refcount_inc(). A straightforward misuse where a user will predict the returned fd in another thread before the syscall returns and will use it to store secret data is somewhat dubious because such a user just shoots themself in the foot. But a more interesting misuse would be to close the predicted fd and decrement the refcount before the corresponding refcount_inc, this way one can briefly drop the refcount to zero while there are other users of secretmem." Move fd_install() after refcount_inc(). Link: https://lkml.kernel.org/r/20211021154046.880251-1-keescook@chromium.org Link: https://lore.kernel.org/lkml/CACT4Y+b1sW6-Hkn8HQYw_SsT7X3tp-CJNh2ci0wG3ZnQz9jjig@mail.gmail.com Fixes: 9a436f8ff631 ("PM: hibernate: disable when there are active secretmem users") Signed-off-by: Kees Cook Reported-by: Dmitry Vyukov Reviewed-by: Dmitry Vyukov Reviewed-by: David Hildenbrand Reviewed-by: Jordy Zomer Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/secretmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/secretmem.c b/mm/secretmem.c index c2dda408bb36..22b310adb53d 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -218,8 +218,8 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) file->f_flags |= O_LARGEFILE; - fd_install(fd, file); atomic_inc(&secretmem_users); + fd_install(fd, file); return fd; err_put_fd: -- cgit v1.2.3 From ffb29b1c255ab48cb0062a3d11c101501e3e9b3f Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Thu, 28 Oct 2021 14:36:24 -0700 Subject: mm/vmalloc: fix numa spreading for large hash tables Eric Dumazet reported a strange numa spreading info in [1], and found commit 121e6f3258fe ("mm/vmalloc: hugepage vmalloc mappings") introduced this issue [2]. Dig into the difference before and after this patch, page allocation has some difference: before: alloc_large_system_hash __vmalloc __vmalloc_node(..., NUMA_NO_NODE, ...) __vmalloc_node_range __vmalloc_area_node alloc_page /* because NUMA_NO_NODE, so choose alloc_page branch */ alloc_pages_current alloc_page_interleave /* can be proved by print policy mode */ after: alloc_large_system_hash __vmalloc __vmalloc_node(..., NUMA_NO_NODE, ...) __vmalloc_node_range __vmalloc_area_node alloc_pages_node /* choose nid by nuam_mem_id() */ __alloc_pages_node(nid, ....) So after commit 121e6f3258fe ("mm/vmalloc: hugepage vmalloc mappings"), it will allocate memory in current node instead of interleaving allocate memory. Link: https://lore.kernel.org/linux-mm/CANn89iL6AAyWhfxdHO+jaT075iOa3XcYn9k6JJc7JR2XYn6k_Q@mail.gmail.com/ [1] Link: https://lore.kernel.org/linux-mm/CANn89iLofTR=AK-QOZY87RdUZENCZUT4O6a0hvhu3_EwRMerOg@mail.gmail.com/ [2] Link: https://lkml.kernel.org/r/20211021080744.874701-2-chenwandun@huawei.com Fixes: 121e6f3258fe ("mm/vmalloc: hugepage vmalloc mappings") Signed-off-by: Chen Wandun Reported-by: Eric Dumazet Cc: Shakeel Butt Cc: Nicholas Piggin Cc: Kefeng Wang Cc: Hanjun Guo Cc: Uladzislau Rezki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d77830ff604c..e8a807c78110 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2816,6 +2816,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, unsigned int order, unsigned int nr_pages, struct page **pages) { unsigned int nr_allocated = 0; + struct page *page; + int i; /* * For order-0 pages we make use of bulk allocator, if @@ -2823,7 +2825,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, * to fails, fallback to a single page allocator that is * more permissive. */ - if (!order) { + if (!order && nid != NUMA_NO_NODE) { while (nr_allocated < nr_pages) { unsigned int nr, nr_pages_request; @@ -2848,7 +2850,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, if (nr != nr_pages_request) break; } - } else + } else if (order) /* * Compound pages required for remap_vmalloc_page if * high-order pages. @@ -2856,11 +2858,12 @@ vm_area_alloc_pages(gfp_t gfp, int nid, gfp |= __GFP_COMP; /* High-order pages or fallback path if "bulk" fails. */ - while (nr_allocated < nr_pages) { - struct page *page; - int i; - page = alloc_pages_node(nid, gfp, order); + while (nr_allocated < nr_pages) { + if (nid == NUMA_NO_NODE) + page = alloc_pages(gfp, order); + else + page = alloc_pages_node(nid, gfp, order); if (unlikely(!page)) break; -- cgit v1.2.3 From 74c42e1baacf206338b1dd6b6199ac964512b5bb Mon Sep 17 00:00:00 2001 From: Rongwei Wang Date: Thu, 28 Oct 2021 14:36:27 -0700 Subject: mm, thp: bail out early in collapse_file for writeback page Currently collapse_file does not explicitly check PG_writeback, instead, page_has_private and try_to_release_page are used to filter writeback pages. This does not work for xfs with blocksize equal to or larger than pagesize, because in such case xfs has no page->private. This makes collapse_file bail out early for writeback page. Otherwise, xfs end_page_writeback will panic as follows. page:fffffe00201bcc80 refcount:0 mapcount:0 mapping:ffff0003f88c86a8 index:0x0 pfn:0x84ef32 aops:xfs_address_space_operations [xfs] ino:30000b7 dentry name:"libtest.so" flags: 0x57fffe0000008027(locked|referenced|uptodate|active|writeback) raw: 57fffe0000008027 ffff80001b48bc28 ffff80001b48bc28 ffff0003f88c86a8 raw: 0000000000000000 0000000000000000 00000000ffffffff ffff0000c3e9a000 page dumped because: VM_BUG_ON_PAGE(((unsigned int) page_ref_count(page) + 127u <= 127u)) page->mem_cgroup:ffff0000c3e9a000 ------------[ cut here ]------------ kernel BUG at include/linux/mm.h:1212! Internal error: Oops - BUG: 0 [#1] SMP Modules linked in: BUG: Bad page state in process khugepaged pfn:84ef32 xfs(E) page:fffffe00201bcc80 refcount:0 mapcount:0 mapping:0 index:0x0 pfn:0x84ef32 libcrc32c(E) rfkill(E) aes_ce_blk(E) crypto_simd(E) ... CPU: 25 PID: 0 Comm: swapper/25 Kdump: loaded Tainted: ... pstate: 60400005 (nZCv daif +PAN -UAO -TCO BTYPE=--) Call trace: end_page_writeback+0x1c0/0x214 iomap_finish_page_writeback+0x13c/0x204 iomap_finish_ioend+0xe8/0x19c iomap_writepage_end_bio+0x38/0x50 bio_endio+0x168/0x1ec blk_update_request+0x278/0x3f0 blk_mq_end_request+0x34/0x15c virtblk_request_done+0x38/0x74 [virtio_blk] blk_done_softirq+0xc4/0x110 __do_softirq+0x128/0x38c __irq_exit_rcu+0x118/0x150 irq_exit+0x1c/0x30 __handle_domain_irq+0x8c/0xf0 gic_handle_irq+0x84/0x108 el1_irq+0xcc/0x180 arch_cpu_idle+0x18/0x40 default_idle_call+0x4c/0x1a0 cpuidle_idle_call+0x168/0x1e0 do_idle+0xb4/0x104 cpu_startup_entry+0x30/0x9c secondary_start_kernel+0x104/0x180 Code: d4210000 b0006161 910c8021 94013f4d (d4210000) ---[ end trace 4a88c6a074082f8c ]--- Kernel panic - not syncing: Oops - BUG: Fatal exception in interrupt Link: https://lkml.kernel.org/r/20211022023052.33114-1-rongwei.wang@linux.alibaba.com Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS") Signed-off-by: Rongwei Wang Signed-off-by: Xu Yu Suggested-by: Yang Shi Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Yang Shi Acked-by: Kirill A. Shutemov Cc: Song Liu Cc: William Kucharski Cc: Hugh Dickins Cc: Mike Kravetz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 045cc579f724..48de4e1b0783 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1763,6 +1763,10 @@ static void collapse_file(struct mm_struct *mm, filemap_flush(mapping); result = SCAN_FAIL; goto xa_unlocked; + } else if (PageWriteback(page)) { + xas_unlock_irq(&xas); + result = SCAN_FAIL; + goto xa_unlocked; } else if (trylock_page(page)) { get_page(page); xas_unlock_irq(&xas); @@ -1798,7 +1802,8 @@ static void collapse_file(struct mm_struct *mm, goto out_unlock; } - if (!is_shmem && PageDirty(page)) { + if (!is_shmem && (PageDirty(page) || + PageWriteback(page))) { /* * khugepaged only works on read-only fd, so this * page is dirty because it hasn't been flushed -- cgit v1.2.3 From a4aeaa06d45e90f9b279f0b09de84bd00006e733 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 28 Oct 2021 14:36:30 -0700 Subject: mm: khugepaged: skip huge page collapse for special files The read-only THP for filesystems will collapse THP for files opened readonly and mapped with VM_EXEC. The intended usecase is to avoid TLB misses for large text segments. But it doesn't restrict the file types so a THP could be collapsed for a non-regular file, for example, block device, if it is opened readonly and mapped with EXEC permission. This may cause bugs, like [1] and [2]. This is definitely not the intended usecase, so just collapse THP for regular files in order to close the attack surface. [shy828301@gmail.com: fix vm_file check [3]] Link: https://lore.kernel.org/lkml/CACkBjsYwLYLRmX8GpsDpMthagWOjWWrNxqY6ZLNQVr6yx+f5vA@mail.gmail.com/ [1] Link: https://lore.kernel.org/linux-mm/000000000000c6a82505ce284e4c@google.com/ [2] Link: https://lkml.kernel.org/r/CAHbLzkqTW9U3VvTu1Ki5v_cLRC9gHW+znBukg_ycergE0JWj-A@mail.gmail.com [3] Link: https://lkml.kernel.org/r/20211027195221.3825-1-shy828301@gmail.com Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS") Signed-off-by: Hugh Dickins Signed-off-by: Yang Shi Reported-by: Hao Sun Reported-by: syzbot+aae069be1de40fb11825@syzkaller.appspotmail.com Cc: Matthew Wilcox Cc: Kirill A. Shutemov Cc: Song Liu Cc: Andrea Righi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 48de4e1b0783..8a8b3aa92937 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma, if (!transhuge_vma_enabled(vma, vm_flags)) return false; + if (vma->vm_file && !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - + vma->vm_pgoff, HPAGE_PMD_NR)) + return false; + /* Enabled via shmem mount options or sysfs settings. */ - if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) { - return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, - HPAGE_PMD_NR); - } + if (shmem_file(vma->vm_file)) + return shmem_huge_enabled(vma); /* THP settings require madvise. */ if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) return false; - /* Read-only file mappings need to be aligned for THP to work. */ + /* Only regular file is valid */ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file && - !inode_is_open_for_write(vma->vm_file->f_inode) && (vm_flags & VM_EXEC)) { - return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, - HPAGE_PMD_NR); + struct inode *inode = vma->vm_file->f_inode; + + return !inode_is_open_for_write(inode) && + S_ISREG(inode->i_mode); } if (!vma->anon_vma || vma->vm_ops) -- cgit v1.2.3 From 2e014660b3e4b7bd0e75f845cdcf745c0f632889 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 28 Oct 2021 14:36:33 -0700 Subject: mm/damon/core-test: fix wrong expectations for 'damon_split_regions_of()' Kunit test cases for 'damon_split_regions_of()' expects the number of regions after calling the function will be same to their request ('nr_sub'). However, the requested number is just an upper-limit, because the function randomly decides the size of each sub-region. This fixes the wrong expectation. Link: https://lkml.kernel.org/r/20211028090628.14948-1-sj@kernel.org Fixes: 17ccae8bb5c9 ("mm/damon: add kunit tests") Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/core-test.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/damon/core-test.h b/mm/damon/core-test.h index c938a9c34e6c..7008c3735e99 100644 --- a/mm/damon/core-test.h +++ b/mm/damon/core-test.h @@ -219,14 +219,14 @@ static void damon_test_split_regions_of(struct kunit *test) r = damon_new_region(0, 22); damon_add_region(r, t); damon_split_regions_of(c, t, 2); - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2u); + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u); damon_free_target(t); t = damon_new_target(42); r = damon_new_region(0, 220); damon_add_region(r, t); damon_split_regions_of(c, t, 4); - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 4u); + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u); damon_free_target(t); damon_destroy_ctx(c); } -- cgit v1.2.3 From 9c7516d669e68e94e17203469a873ff5d1d3a41a Mon Sep 17 00:00:00 2001 From: David Yang Date: Thu, 28 Oct 2021 14:36:36 -0700 Subject: tools/testing/selftests/vm/split_huge_page_test.c: fix application of sizeof to pointer The coccinelle check report: ./tools/testing/selftests/vm/split_huge_page_test.c:344:36-42: ERROR: application of sizeof to pointer Use "strlen" to fix it. Link: https://lkml.kernel.org/r/20211012030116.184027-1-davidcomponentone@gmail.com Signed-off-by: David Yang Reported-by: Zeal Robot Cc: Zi Yan Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/split_huge_page_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c index 1af16d2c2a0a..52497b7b9f1d 100644 --- a/tools/testing/selftests/vm/split_huge_page_test.c +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -341,7 +341,7 @@ void split_file_backed_thp(void) } /* write something to the file, so a file-backed THP can be allocated */ - num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc)); + num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); close(fd); if (num_written < 1) { -- cgit v1.2.3 From 56ee254d23c59fd48fb6f192d79858f60cfe2bf6 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 27 Oct 2021 10:42:27 +0200 Subject: Revert "btrfs: compression: drop kmap/kunmap from zstd" This reverts commit bbaf9715f3f5b5ff0de71da91fcc34ee9c198ed8. The kmaps in compression code are still needed and cause crashes on 32bit machines (ARM, x86). Reproducible eg. by running fstest btrfs/004 with enabled LZO or ZSTD compression. Example stacktrace with ZSTD on a 32bit ARM machine: Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c4159ed3 [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 PID: 210 Comm: kworker/u2:3 Not tainted 5.14.0-rc79+ #12 Hardware name: Allwinner sun4i/sun5i Families Workqueue: btrfs-delalloc btrfs_work_helper PC is at mmiocpy+0x48/0x330 LR is at ZSTD_compressStream_generic+0x15c/0x28c (mmiocpy) from [] (ZSTD_compressStream_generic+0x15c/0x28c) (ZSTD_compressStream_generic) from [] (ZSTD_compressStream+0x64/0xa0) (ZSTD_compressStream) from [] (zstd_compress_pages+0x170/0x488) (zstd_compress_pages) from [] (btrfs_compress_pages+0x124/0x12c) (btrfs_compress_pages) from [] (compress_file_range+0x3c0/0x834) (compress_file_range) from [] (async_cow_start+0x10/0x28) (async_cow_start) from [] (btrfs_work_helper+0x100/0x230) (btrfs_work_helper) from [] (process_one_work+0x1b4/0x418) (process_one_work) from [] (worker_thread+0x44/0x524) (worker_thread) from [] (kthread+0x180/0x1b0) (kthread) from [] Link: https://lore.kernel.org/all/CAJCQCtT+OuemovPO7GZk8Y8=qtOObr0XTDp8jh4OHD6y84AFxw@mail.gmail.com/ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=214839 Signed-off-by: David Sterba --- fs/btrfs/zstd.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 56dce9f00988..f06b68040352 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -399,7 +399,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, /* map in the first page of input data */ in_page = find_get_page(mapping, start >> PAGE_SHIFT); - workspace->in_buf.src = page_address(in_page); + workspace->in_buf.src = kmap(in_page); workspace->in_buf.pos = 0; workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE); @@ -411,7 +411,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, goto out; } pages[nr_pages++] = out_page; - workspace->out_buf.dst = page_address(out_page); + workspace->out_buf.dst = kmap(out_page); workspace->out_buf.pos = 0; workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE); @@ -446,6 +446,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, if (workspace->out_buf.pos == workspace->out_buf.size) { tot_out += PAGE_SIZE; max_out -= PAGE_SIZE; + kunmap(out_page); if (nr_pages == nr_dest_pages) { out_page = NULL; ret = -E2BIG; @@ -457,7 +458,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, goto out; } pages[nr_pages++] = out_page; - workspace->out_buf.dst = page_address(out_page); + workspace->out_buf.dst = kmap(out_page); workspace->out_buf.pos = 0; workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE); @@ -472,12 +473,13 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, /* Check if we need more input */ if (workspace->in_buf.pos == workspace->in_buf.size) { tot_in += PAGE_SIZE; + kunmap(in_page); put_page(in_page); start += PAGE_SIZE; len -= PAGE_SIZE; in_page = find_get_page(mapping, start >> PAGE_SHIFT); - workspace->in_buf.src = page_address(in_page); + workspace->in_buf.src = kmap(in_page); workspace->in_buf.pos = 0; workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE); } @@ -504,6 +506,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, tot_out += PAGE_SIZE; max_out -= PAGE_SIZE; + kunmap(out_page); if (nr_pages == nr_dest_pages) { out_page = NULL; ret = -E2BIG; @@ -515,7 +518,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, goto out; } pages[nr_pages++] = out_page; - workspace->out_buf.dst = page_address(out_page); + workspace->out_buf.dst = kmap(out_page); workspace->out_buf.pos = 0; workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE); } @@ -531,8 +534,12 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, out: *out_pages = nr_pages; /* Cleanup */ - if (in_page) + if (in_page) { + kunmap(in_page); put_page(in_page); + } + if (out_page) + kunmap(out_page); return ret; } @@ -556,7 +563,7 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) goto done; } - workspace->in_buf.src = page_address(pages_in[page_in_index]); + workspace->in_buf.src = kmap(pages_in[page_in_index]); workspace->in_buf.pos = 0; workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE); @@ -592,14 +599,14 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) break; if (workspace->in_buf.pos == workspace->in_buf.size) { - page_in_index++; + kunmap(pages_in[page_in_index++]); if (page_in_index >= total_pages_in) { workspace->in_buf.src = NULL; ret = -EIO; goto done; } srclen -= PAGE_SIZE; - workspace->in_buf.src = page_address(pages_in[page_in_index]); + workspace->in_buf.src = kmap(pages_in[page_in_index]); workspace->in_buf.pos = 0; workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE); } @@ -607,6 +614,8 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) ret = 0; zero_fill_bio(cb->orig_bio); done: + if (workspace->in_buf.src) + kunmap(pages_in[page_in_index]); return ret; } -- cgit v1.2.3 From 55276e14df4324ade34583adef110e11d249fb7b Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 27 Oct 2021 10:42:43 +0200 Subject: Revert "btrfs: compression: drop kmap/kunmap from zlib" This reverts commit 696ab562e6df9fbafd6052d8ce4aafcb2ed16069. The kmaps in compression code are still needed and cause crashes on 32bit machines (ARM, x86). Reproducible eg. by running fstest btrfs/004 with enabled LZO or ZSTD compression. Link: https://lore.kernel.org/all/CAJCQCtT+OuemovPO7GZk8Y8=qtOObr0XTDp8jh4OHD6y84AFxw@mail.gmail.com/ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=214839 Signed-off-by: David Sterba --- fs/btrfs/zlib.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 8afa90074891..767a0c6c9694 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -126,7 +126,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, ret = -ENOMEM; goto out; } - cpage_out = page_address(out_page); + cpage_out = kmap(out_page); pages[0] = out_page; nr_pages = 1; @@ -148,22 +148,26 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, int i; for (i = 0; i < in_buf_pages; i++) { - if (in_page) + if (in_page) { + kunmap(in_page); put_page(in_page); + } in_page = find_get_page(mapping, start >> PAGE_SHIFT); - data_in = page_address(in_page); + data_in = kmap(in_page); memcpy(workspace->buf + i * PAGE_SIZE, data_in, PAGE_SIZE); start += PAGE_SIZE; } workspace->strm.next_in = workspace->buf; } else { - if (in_page) + if (in_page) { + kunmap(in_page); put_page(in_page); + } in_page = find_get_page(mapping, start >> PAGE_SHIFT); - data_in = page_address(in_page); + data_in = kmap(in_page); start += PAGE_SIZE; workspace->strm.next_in = data_in; } @@ -192,6 +196,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, * the stream end if required */ if (workspace->strm.avail_out == 0) { + kunmap(out_page); if (nr_pages == nr_dest_pages) { out_page = NULL; ret = -E2BIG; @@ -202,7 +207,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, ret = -ENOMEM; goto out; } - cpage_out = page_address(out_page); + cpage_out = kmap(out_page); pages[nr_pages] = out_page; nr_pages++; workspace->strm.avail_out = PAGE_SIZE; @@ -229,6 +234,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, goto out; } else if (workspace->strm.avail_out == 0) { /* get another page for the stream end */ + kunmap(out_page); if (nr_pages == nr_dest_pages) { out_page = NULL; ret = -E2BIG; @@ -239,7 +245,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, ret = -ENOMEM; goto out; } - cpage_out = page_address(out_page); + cpage_out = kmap(out_page); pages[nr_pages] = out_page; nr_pages++; workspace->strm.avail_out = PAGE_SIZE; @@ -258,8 +264,13 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, *total_in = workspace->strm.total_in; out: *out_pages = nr_pages; - if (in_page) + if (out_page) + kunmap(out_page); + + if (in_page) { + kunmap(in_page); put_page(in_page); + } return ret; } @@ -276,7 +287,7 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) unsigned long buf_start; struct page **pages_in = cb->compressed_pages; - data_in = page_address(pages_in[page_in_index]); + data_in = kmap(pages_in[page_in_index]); workspace->strm.next_in = data_in; workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE); workspace->strm.total_in = 0; @@ -298,6 +309,7 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) { pr_warn("BTRFS: inflateInit failed\n"); + kunmap(pages_in[page_in_index]); return -EIO; } while (workspace->strm.total_in < srclen) { @@ -324,13 +336,13 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) if (workspace->strm.avail_in == 0) { unsigned long tmp; - + kunmap(pages_in[page_in_index]); page_in_index++; if (page_in_index >= total_pages_in) { data_in = NULL; break; } - data_in = page_address(pages_in[page_in_index]); + data_in = kmap(pages_in[page_in_index]); workspace->strm.next_in = data_in; tmp = srclen - workspace->strm.total_in; workspace->strm.avail_in = min(tmp, PAGE_SIZE); @@ -342,6 +354,8 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) ret = 0; done: zlib_inflateEnd(&workspace->strm); + if (data_in) + kunmap(pages_in[page_in_index]); if (!ret) zero_fill_bio(cb->orig_bio); return ret; -- cgit v1.2.3 From ccaa66c8dd277ac02f96914168bb7177f7ea8117 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 27 Oct 2021 10:44:21 +0200 Subject: Revert "btrfs: compression: drop kmap/kunmap from lzo" This reverts commit 8c945d32e60427cbc0859cf7045bbe6196bb03d8. The kmaps in compression code are still needed and cause crashes on 32bit machines (ARM, x86). Reproducible eg. by running fstest btrfs/004 with enabled LZO or ZSTD compression. The revert does not apply cleanly due to changes in a6e66e6f8c1b ("btrfs: rework lzo_decompress_bio() to make it subpage compatible") that reworked the page iteration so the revert is done to be equivalent to the original code. Link: https://lore.kernel.org/all/CAJCQCtT+OuemovPO7GZk8Y8=qtOObr0XTDp8jh4OHD6y84AFxw@mail.gmail.com/ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=214839 Tested-by: Qu Wenruo Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/lzo.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index c25dfd1a8a54..3dbe6eb5fda7 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -141,7 +141,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, *total_in = 0; in_page = find_get_page(mapping, start >> PAGE_SHIFT); - data_in = page_address(in_page); + data_in = kmap(in_page); /* * store the size of all chunks of compressed data in @@ -152,7 +152,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, ret = -ENOMEM; goto out; } - cpage_out = page_address(out_page); + cpage_out = kmap(out_page); out_offset = LZO_LEN; tot_out = LZO_LEN; pages[0] = out_page; @@ -210,6 +210,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, if (out_len == 0 && tot_in >= len) break; + kunmap(out_page); if (nr_pages == nr_dest_pages) { out_page = NULL; ret = -E2BIG; @@ -221,7 +222,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, ret = -ENOMEM; goto out; } - cpage_out = page_address(out_page); + cpage_out = kmap(out_page); pages[nr_pages++] = out_page; pg_bytes_left = PAGE_SIZE; @@ -243,11 +244,12 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, break; bytes_left = len - tot_in; + kunmap(in_page); put_page(in_page); start += PAGE_SIZE; in_page = find_get_page(mapping, start >> PAGE_SHIFT); - data_in = page_address(in_page); + data_in = kmap(in_page); in_len = min(bytes_left, PAGE_SIZE); } @@ -257,17 +259,22 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, } /* store the size of all chunks of compressed data */ - sizes_ptr = page_address(pages[0]); + sizes_ptr = kmap_local_page(pages[0]); write_compress_length(sizes_ptr, tot_out); + kunmap_local(sizes_ptr); ret = 0; *total_out = tot_out; *total_in = tot_in; out: *out_pages = nr_pages; + if (out_page) + kunmap(out_page); - if (in_page) + if (in_page) { + kunmap(in_page); put_page(in_page); + } return ret; } @@ -283,6 +290,7 @@ static void copy_compressed_segment(struct compressed_bio *cb, u32 orig_in = *cur_in; while (*cur_in < orig_in + len) { + char *kaddr; struct page *cur_page; u32 copy_len = min_t(u32, PAGE_SIZE - offset_in_page(*cur_in), orig_in + len - *cur_in); @@ -290,9 +298,11 @@ static void copy_compressed_segment(struct compressed_bio *cb, ASSERT(copy_len); cur_page = cb->compressed_pages[*cur_in / PAGE_SIZE]; + kaddr = kmap(cur_page); memcpy(dest + *cur_in - orig_in, - page_address(cur_page) + offset_in_page(*cur_in), + kaddr + offset_in_page(*cur_in), copy_len); + kunmap(cur_page); *cur_in += copy_len; } @@ -303,6 +313,7 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) struct workspace *workspace = list_entry(ws, struct workspace, list); const struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb); const u32 sectorsize = fs_info->sectorsize; + char *kaddr; int ret; /* Compressed data length, can be unaligned */ u32 len_in; @@ -311,7 +322,9 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) /* Bytes decompressed so far */ u32 cur_out = 0; - len_in = read_compress_length(page_address(cb->compressed_pages[0])); + kaddr = kmap(cb->compressed_pages[0]); + len_in = read_compress_length(kaddr); + kunmap(cb->compressed_pages[0]); cur_in += LZO_LEN; /* @@ -344,9 +357,9 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) ASSERT(cur_in / sectorsize == (cur_in + LZO_LEN - 1) / sectorsize); cur_page = cb->compressed_pages[cur_in / PAGE_SIZE]; + kaddr = kmap(cur_page); ASSERT(cur_page); - seg_len = read_compress_length(page_address(cur_page) + - offset_in_page(cur_in)); + seg_len = read_compress_length(kaddr + offset_in_page(cur_in)); cur_in += LZO_LEN; /* Copy the compressed segment payload into workspace */ @@ -431,7 +444,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in, destlen = min_t(unsigned long, destlen, PAGE_SIZE); bytes = min_t(unsigned long, destlen, out_len - start_byte); - kaddr = page_address(dest_page); + kaddr = kmap_local_page(dest_page); memcpy(kaddr, workspace->buf + start_byte, bytes); /* @@ -441,6 +454,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in, */ if (bytes < destlen) memset(kaddr+bytes, 0, destlen-bytes); + kunmap_local(kaddr); out: return ret; } -- cgit v1.2.3 From 6130722f1114cac4f3ec858763ad8995d5a2e586 Mon Sep 17 00:00:00 2001 From: Steven Rostedt (VMware) Date: Fri, 29 Oct 2021 09:52:23 -0400 Subject: ftrace: Fix kernel-doc formatting issues Some functions had kernel-doc that used a comma instead of a hash to separate the function name from the one line description. Also, the "ftrace_is_dead()" had an incomplete description. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 635fbdc9d589..feebf57c6458 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2208,7 +2208,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) } /** - * ftrace_update_record, set a record that now is tracing or not + * ftrace_update_record - set a record that now is tracing or not * @rec: the record to update * @enable: set to true if the record is tracing, false to force disable * @@ -2221,7 +2221,7 @@ int ftrace_update_record(struct dyn_ftrace *rec, bool enable) } /** - * ftrace_test_record, check if the record has been enabled or not + * ftrace_test_record - check if the record has been enabled or not * @rec: the record to test * @enable: set to true to check if enabled, false if it is disabled * @@ -2574,7 +2574,7 @@ struct ftrace_rec_iter { }; /** - * ftrace_rec_iter_start, start up iterating over traced functions + * ftrace_rec_iter_start - start up iterating over traced functions * * Returns an iterator handle that is used to iterate over all * the records that represent address locations where functions @@ -2605,7 +2605,7 @@ struct ftrace_rec_iter *ftrace_rec_iter_start(void) } /** - * ftrace_rec_iter_next, get the next record to process. + * ftrace_rec_iter_next - get the next record to process. * @iter: The handle to the iterator. * * Returns the next iterator after the given iterator @iter. @@ -2630,7 +2630,7 @@ struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter) } /** - * ftrace_rec_iter_record, get the record at the iterator location + * ftrace_rec_iter_record - get the record at the iterator location * @iter: The current iterator location * * Returns the record that the current @iter is at. @@ -2733,7 +2733,7 @@ static int __ftrace_modify_code(void *data) } /** - * ftrace_run_stop_machine, go back to the stop machine method + * ftrace_run_stop_machine - go back to the stop machine method * @command: The command to tell ftrace what to do * * If an arch needs to fall back to the stop machine method, the @@ -2745,7 +2745,7 @@ void ftrace_run_stop_machine(int command) } /** - * arch_ftrace_update_code, modify the code to trace or not trace + * arch_ftrace_update_code - modify the code to trace or not trace * @command: The command that needs to be done * * Archs can override this function if it does not need to @@ -7525,7 +7525,9 @@ void ftrace_kill(void) } /** - * Test if ftrace is dead or not. + * ftrace_is_dead - Test if ftrace is dead or not. + * + * Returns 1 if ftrace is "dead", zero otherwise. */ int ftrace_is_dead(void) { -- cgit v1.2.3 From ddcf906fe5ed842034b2d995064c1de1dcc7e812 Mon Sep 17 00:00:00 2001 From: Steven Rostedt (VMware) Date: Fri, 29 Oct 2021 09:54:14 -0400 Subject: tracing: Fix misspelling of "missing" My snake instinct was on and I wrote "misssing" instead of "missing". Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_eprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 5c5f208c15d3..928867f527e7 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -904,7 +904,7 @@ static int __trace_eprobe_create(int argc, const char *argv[]) if (IS_ERR(ep)) { ret = PTR_ERR(ep); - /* This must return -ENOMEM or misssing event, else there is a bug */ + /* This must return -ENOMEM or missing event, else there is a bug */ WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV); ep = NULL; goto error; -- cgit v1.2.3 From cf11d01135ea1ff7fddb612033e3cb5cde279ff2 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 29 Oct 2021 06:59:26 +0200 Subject: riscv: Do not re-populate shadow memory with kasan_populate_early_shadow When calling this function, all the shadow memory is already populated with kasan_early_shadow_pte which has PAGE_KERNEL protection. kasan_populate_early_shadow write-protects the mapping of the range of addresses passed in argument in zero_pte_populate, which actually write-protects all the shadow memory mapping since kasan_early_shadow_pte is used for all the shadow memory at this point. And then when using memblock API to populate the shadow memory, the first write access to the kernel stack triggers a trap. This becomes visible with the next commit that contains a fix for asan-stack. We already manually populate all the shadow memory in kasan_early_init and we write-protect kasan_early_shadow_pte at the end of kasan_init which makes the calls to kasan_populate_early_shadow superfluous so we can remove them. Signed-off-by: Alexandre Ghiti Fixes: e178d670f251 ("riscv/kasan: add KASAN_VMALLOC support") Fixes: 8ad8b72721d0 ("riscv: Add KASAN support") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/kasan_init.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index d7189c8714a9..89a8376ce44e 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -172,21 +172,10 @@ void __init kasan_init(void) phys_addr_t p_start, p_end; u64 i; - /* - * Populate all kernel virtual address space with kasan_early_shadow_page - * except for the linear mapping and the modules/kernel/BPF mapping. - */ - kasan_populate_early_shadow((void *)KASAN_SHADOW_START, - (void *)kasan_mem_to_shadow((void *) - VMEMMAP_END)); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) kasan_shallow_populate( (void *)kasan_mem_to_shadow((void *)VMALLOC_START), (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); - else - kasan_populate_early_shadow( - (void *)kasan_mem_to_shadow((void *)VMALLOC_START), - (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); /* Populate the linear mapping */ for_each_mem_range(i, &p_start, &p_end) { -- cgit v1.2.3 From 54c5639d8f507ebefa814f574cb6f763033a72a5 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 29 Oct 2021 06:59:27 +0200 Subject: riscv: Fix asan-stack clang build Nathan reported that because KASAN_SHADOW_OFFSET was not defined in Kconfig, it prevents asan-stack from getting disabled with clang even when CONFIG_KASAN_STACK is disabled: fix this by defining the corresponding config. Reported-by: Nathan Chancellor Signed-off-by: Alexandre Ghiti Fixes: 8ad8b72721d0 ("riscv: Add KASAN support") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 6 ++++++ arch/riscv/include/asm/kasan.h | 3 +-- arch/riscv/mm/kasan_init.c | 3 +++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c3f3fd583e04..6d5b63bd4bd9 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -163,6 +163,12 @@ config PAGE_OFFSET default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB +config KASAN_SHADOW_OFFSET + hex + depends on KASAN_GENERIC + default 0xdfffffc800000000 if 64BIT + default 0xffffffff if 32BIT + config ARCH_FLATMEM_ENABLE def_bool !NUMA diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h index a2b3d9cdbc86..b00f503ec124 100644 --- a/arch/riscv/include/asm/kasan.h +++ b/arch/riscv/include/asm/kasan.h @@ -30,8 +30,7 @@ #define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT)) #define KASAN_SHADOW_START KERN_VIRT_START #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) -#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \ - (64 - KASAN_SHADOW_SCALE_SHIFT))) +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) void kasan_init(void); asmlinkage void kasan_early_init(void); diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 89a8376ce44e..54294f83513d 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -17,6 +17,9 @@ asmlinkage void __init kasan_early_init(void) uintptr_t i; pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START); + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != + KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); + for (i = 0; i < PTRS_PER_PTE; ++i) set_pte(kasan_early_shadow_pte + i, mk_pte(virt_to_page(kasan_early_shadow_page), -- cgit v1.2.3 From 61a9f252c1c026f84129a7bfa476e880b75e80eb Mon Sep 17 00:00:00 2001 From: Martin K. Petersen Date: Wed, 27 Oct 2021 23:42:02 -0400 Subject: scsi: mpt3sas: Fix reference tag handling for WRITE_INSERT Testing revealed a problem with how the reference tag was handled for a WRITE_INSERT operation. The SCSI_PROT_REF_CHECK flag is not set when the controller is asked to generate the protection information (i.e. not DIX). And as a result the initial reference tag would not be set in the WRITE_INSERT case. Separate handling of the REF_CHECK and REF_INCREMENT flags to align with both the DIX spec and the MPI implementation. Link: https://lore.kernel.org/r/20211028034202.24225-1-martin.petersen@oracle.com Fixes: b3e2c72af1d5 ("scsi: mpt3sas: Use the proper SCSI midlayer interfaces for PI") Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index d383d4a03436..ad1b6c2b37a7 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -5065,9 +5065,12 @@ _scsih_setup_eedp(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, if (scmd->prot_flags & SCSI_PROT_GUARD_CHECK) eedp_flags |= MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD; - if (scmd->prot_flags & SCSI_PROT_REF_CHECK) { - eedp_flags |= MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG | - MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG; + if (scmd->prot_flags & SCSI_PROT_REF_CHECK) + eedp_flags |= MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG; + + if (scmd->prot_flags & SCSI_PROT_REF_INCREMENT) { + eedp_flags |= MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG; + mpi_request->CDB.EEDP32.PrimaryReferenceTag = cpu_to_be32(scsi_prot_ref_tag(scmd)); } -- cgit v1.2.3 From 09d9e4d041876684d33f21d02bcdaea6586734f1 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Sat, 30 Oct 2021 09:23:01 +0300 Subject: scsi: ufs: ufshpb: Remove HPB2.0 flows The Host Performance Buffer feature allows UFS read commands to carry the physical media addresses along with the LBAs, thus allowing less internal L2P-table switches in the device. HPB1.0 allowed a single LBA, while HPB2.0 increases this capacity up to 255 blocks. Carrying more than a single record, the read operation is no longer purely of type "read" but a "hybrid" command: Writing the physical address to the device in one operation and reading back the required payload in another. The JEDEC HPB spec defines two commands for this operation: HPB-WRITE-BUFFER (0x2) to write the physical addresses to device, and HPB-READ to read the payload. With the current HPB design the UFS driver has no alternative but to divide the READ request into 2 separate commands: HPB-WRITE-BUFFER and HPB-READ. This causes a great deal of aggravation to the block layer guys who demanded that we completely revert the entire HPB driver regardless of the huge amount of corporate effort already invested in it. As a compromise, remove only the pieces that implement the 2.0 specification. This is done as a matter of urgency for the final 5.15 release. Link: https://lore.kernel.org/r/20211030062301.248-1-avri.altman@wdc.com Tested-by: Avri Altman Tested-by: Bean Huo Reviewed-by: Bart Van Assche Reviewed-by: Bean Huo Co-developed-by: James Bottomley Signed-off-by: James Bottomley Signed-off-by: Avri Altman Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 7 +- drivers/scsi/ufs/ufshpb.c | 283 +--------------------------------------------- drivers/scsi/ufs/ufshpb.h | 2 - 3 files changed, 4 insertions(+), 288 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 95be7ecdfe10..41f2ff35f82b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2737,12 +2737,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) lrbp->req_abort_skip = false; - err = ufshpb_prep(hba, lrbp); - if (err == -EAGAIN) { - lrbp->cmd = NULL; - ufshcd_release(hba); - goto out; - } + ufshpb_prep(hba, lrbp); ufshcd_comp_scsi_upiu(hba, lrbp); diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c index 589af5f6b940..026a133149dc 100644 --- a/drivers/scsi/ufs/ufshpb.c +++ b/drivers/scsi/ufs/ufshpb.c @@ -84,16 +84,6 @@ static bool ufshpb_is_supported_chunk(struct ufshpb_lu *hpb, int transfer_len) return transfer_len <= hpb->pre_req_max_tr_len; } -/* - * In this driver, WRITE_BUFFER CMD support 36KB (len=9) ~ 1MB (len=256) as - * default. It is possible to change range of transfer_len through sysfs. - */ -static inline bool ufshpb_is_required_wb(struct ufshpb_lu *hpb, int len) -{ - return len > hpb->pre_req_min_tr_len && - len <= hpb->pre_req_max_tr_len; -} - static bool ufshpb_is_general_lun(int lun) { return lun < UFS_UPIU_MAX_UNIT_NUM_ID; @@ -334,7 +324,7 @@ ufshpb_get_pos_from_lpn(struct ufshpb_lu *hpb, unsigned long lpn, int *rgn_idx, static void ufshpb_set_hpb_read_to_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, - __be64 ppn, u8 transfer_len, int read_id) + __be64 ppn, u8 transfer_len) { unsigned char *cdb = lrbp->cmd->cmnd; __be64 ppn_tmp = ppn; @@ -346,256 +336,11 @@ ufshpb_set_hpb_read_to_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, /* ppn value is stored as big-endian in the host memory */ memcpy(&cdb[6], &ppn_tmp, sizeof(__be64)); cdb[14] = transfer_len; - cdb[15] = read_id; + cdb[15] = 0; lrbp->cmd->cmd_len = UFS_CDB_SIZE; } -static inline void ufshpb_set_write_buf_cmd(unsigned char *cdb, - unsigned long lpn, unsigned int len, - int read_id) -{ - cdb[0] = UFSHPB_WRITE_BUFFER; - cdb[1] = UFSHPB_WRITE_BUFFER_PREFETCH_ID; - - put_unaligned_be32(lpn, &cdb[2]); - cdb[6] = read_id; - put_unaligned_be16(len * HPB_ENTRY_SIZE, &cdb[7]); - - cdb[9] = 0x00; /* Control = 0x00 */ -} - -static struct ufshpb_req *ufshpb_get_pre_req(struct ufshpb_lu *hpb) -{ - struct ufshpb_req *pre_req; - - if (hpb->num_inflight_pre_req >= hpb->throttle_pre_req) { - dev_info(&hpb->sdev_ufs_lu->sdev_dev, - "pre_req throttle. inflight %d throttle %d", - hpb->num_inflight_pre_req, hpb->throttle_pre_req); - return NULL; - } - - pre_req = list_first_entry_or_null(&hpb->lh_pre_req_free, - struct ufshpb_req, list_req); - if (!pre_req) { - dev_info(&hpb->sdev_ufs_lu->sdev_dev, "There is no pre_req"); - return NULL; - } - - list_del_init(&pre_req->list_req); - hpb->num_inflight_pre_req++; - - return pre_req; -} - -static inline void ufshpb_put_pre_req(struct ufshpb_lu *hpb, - struct ufshpb_req *pre_req) -{ - pre_req->req = NULL; - bio_reset(pre_req->bio); - list_add_tail(&pre_req->list_req, &hpb->lh_pre_req_free); - hpb->num_inflight_pre_req--; -} - -static void ufshpb_pre_req_compl_fn(struct request *req, blk_status_t error) -{ - struct ufshpb_req *pre_req = (struct ufshpb_req *)req->end_io_data; - struct ufshpb_lu *hpb = pre_req->hpb; - unsigned long flags; - - if (error) { - struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); - struct scsi_sense_hdr sshdr; - - dev_err(&hpb->sdev_ufs_lu->sdev_dev, "block status %d", error); - scsi_command_normalize_sense(cmd, &sshdr); - dev_err(&hpb->sdev_ufs_lu->sdev_dev, - "code %x sense_key %x asc %x ascq %x", - sshdr.response_code, - sshdr.sense_key, sshdr.asc, sshdr.ascq); - dev_err(&hpb->sdev_ufs_lu->sdev_dev, - "byte4 %x byte5 %x byte6 %x additional_len %x", - sshdr.byte4, sshdr.byte5, - sshdr.byte6, sshdr.additional_length); - } - - blk_mq_free_request(req); - spin_lock_irqsave(&hpb->rgn_state_lock, flags); - ufshpb_put_pre_req(pre_req->hpb, pre_req); - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); -} - -static int ufshpb_prep_entry(struct ufshpb_req *pre_req, struct page *page) -{ - struct ufshpb_lu *hpb = pre_req->hpb; - struct ufshpb_region *rgn; - struct ufshpb_subregion *srgn; - __be64 *addr; - int offset = 0; - int copied; - unsigned long lpn = pre_req->wb.lpn; - int rgn_idx, srgn_idx, srgn_offset; - unsigned long flags; - - addr = page_address(page); - ufshpb_get_pos_from_lpn(hpb, lpn, &rgn_idx, &srgn_idx, &srgn_offset); - - spin_lock_irqsave(&hpb->rgn_state_lock, flags); - -next_offset: - rgn = hpb->rgn_tbl + rgn_idx; - srgn = rgn->srgn_tbl + srgn_idx; - - if (!ufshpb_is_valid_srgn(rgn, srgn)) - goto mctx_error; - - if (!srgn->mctx) - goto mctx_error; - - copied = ufshpb_fill_ppn_from_page(hpb, srgn->mctx, srgn_offset, - pre_req->wb.len - offset, - &addr[offset]); - - if (copied < 0) - goto mctx_error; - - offset += copied; - srgn_offset += copied; - - if (srgn_offset == hpb->entries_per_srgn) { - srgn_offset = 0; - - if (++srgn_idx == hpb->srgns_per_rgn) { - srgn_idx = 0; - rgn_idx++; - } - } - - if (offset < pre_req->wb.len) - goto next_offset; - - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); - return 0; -mctx_error: - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); - return -ENOMEM; -} - -static int ufshpb_pre_req_add_bio_page(struct ufshpb_lu *hpb, - struct request_queue *q, - struct ufshpb_req *pre_req) -{ - struct page *page = pre_req->wb.m_page; - struct bio *bio = pre_req->bio; - int entries_bytes, ret; - - if (!page) - return -ENOMEM; - - if (ufshpb_prep_entry(pre_req, page)) - return -ENOMEM; - - entries_bytes = pre_req->wb.len * sizeof(__be64); - - ret = bio_add_pc_page(q, bio, page, entries_bytes, 0); - if (ret != entries_bytes) { - dev_err(&hpb->sdev_ufs_lu->sdev_dev, - "bio_add_pc_page fail: %d", ret); - return -ENOMEM; - } - return 0; -} - -static inline int ufshpb_get_read_id(struct ufshpb_lu *hpb) -{ - if (++hpb->cur_read_id >= MAX_HPB_READ_ID) - hpb->cur_read_id = 1; - return hpb->cur_read_id; -} - -static int ufshpb_execute_pre_req(struct ufshpb_lu *hpb, struct scsi_cmnd *cmd, - struct ufshpb_req *pre_req, int read_id) -{ - struct scsi_device *sdev = cmd->device; - struct request_queue *q = sdev->request_queue; - struct request *req; - struct scsi_request *rq; - struct bio *bio = pre_req->bio; - - pre_req->hpb = hpb; - pre_req->wb.lpn = sectors_to_logical(cmd->device, - blk_rq_pos(scsi_cmd_to_rq(cmd))); - pre_req->wb.len = sectors_to_logical(cmd->device, - blk_rq_sectors(scsi_cmd_to_rq(cmd))); - if (ufshpb_pre_req_add_bio_page(hpb, q, pre_req)) - return -ENOMEM; - - req = pre_req->req; - - /* 1. request setup */ - blk_rq_append_bio(req, bio); - req->rq_disk = NULL; - req->end_io_data = (void *)pre_req; - req->end_io = ufshpb_pre_req_compl_fn; - - /* 2. scsi_request setup */ - rq = scsi_req(req); - rq->retries = 1; - - ufshpb_set_write_buf_cmd(rq->cmd, pre_req->wb.lpn, pre_req->wb.len, - read_id); - rq->cmd_len = scsi_command_size(rq->cmd); - - if (blk_insert_cloned_request(q, req) != BLK_STS_OK) - return -EAGAIN; - - hpb->stats.pre_req_cnt++; - - return 0; -} - -static int ufshpb_issue_pre_req(struct ufshpb_lu *hpb, struct scsi_cmnd *cmd, - int *read_id) -{ - struct ufshpb_req *pre_req; - struct request *req = NULL; - unsigned long flags; - int _read_id; - int ret = 0; - - req = blk_get_request(cmd->device->request_queue, - REQ_OP_DRV_OUT | REQ_SYNC, BLK_MQ_REQ_NOWAIT); - if (IS_ERR(req)) - return -EAGAIN; - - spin_lock_irqsave(&hpb->rgn_state_lock, flags); - pre_req = ufshpb_get_pre_req(hpb); - if (!pre_req) { - ret = -EAGAIN; - goto unlock_out; - } - _read_id = ufshpb_get_read_id(hpb); - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); - - pre_req->req = req; - - ret = ufshpb_execute_pre_req(hpb, cmd, pre_req, _read_id); - if (ret) - goto free_pre_req; - - *read_id = _read_id; - - return ret; -free_pre_req: - spin_lock_irqsave(&hpb->rgn_state_lock, flags); - ufshpb_put_pre_req(hpb, pre_req); -unlock_out: - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); - blk_put_request(req); - return ret; -} - /* * This function will set up HPB read command using host-side L2P map data. */ @@ -609,7 +354,6 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) __be64 ppn; unsigned long flags; int transfer_len, rgn_idx, srgn_idx, srgn_offset; - int read_id = 0; int err = 0; hpb = ufshpb_get_hpb_data(cmd->device); @@ -685,24 +429,8 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) dev_err(hba->dev, "get ppn failed. err %d\n", err); return err; } - if (!ufshpb_is_legacy(hba) && - ufshpb_is_required_wb(hpb, transfer_len)) { - err = ufshpb_issue_pre_req(hpb, cmd, &read_id); - if (err) { - unsigned long timeout; - - timeout = cmd->jiffies_at_alloc + msecs_to_jiffies( - hpb->params.requeue_timeout_ms); - - if (time_before(jiffies, timeout)) - return -EAGAIN; - - hpb->stats.miss_cnt++; - return 0; - } - } - ufshpb_set_hpb_read_to_upiu(hba, lrbp, ppn, transfer_len, read_id); + ufshpb_set_hpb_read_to_upiu(hba, lrbp, ppn, transfer_len); hpb->stats.hit_cnt++; return 0; @@ -1841,16 +1569,11 @@ static void ufshpb_lu_parameter_init(struct ufs_hba *hba, u32 entries_per_rgn; u64 rgn_mem_size, tmp; - /* for pre_req */ - hpb->pre_req_min_tr_len = hpb_dev_info->max_hpb_single_cmd + 1; - if (ufshpb_is_legacy(hba)) hpb->pre_req_max_tr_len = HPB_LEGACY_CHUNK_HIGH; else hpb->pre_req_max_tr_len = HPB_MULTI_CHUNK_HIGH; - hpb->cur_read_id = 0; - hpb->lu_pinned_start = hpb_lu_info->pinned_start; hpb->lu_pinned_end = hpb_lu_info->num_pinned ? (hpb_lu_info->pinned_start + hpb_lu_info->num_pinned - 1) diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h index a79e07398970..f15d8fdbce2e 100644 --- a/drivers/scsi/ufs/ufshpb.h +++ b/drivers/scsi/ufs/ufshpb.h @@ -241,8 +241,6 @@ struct ufshpb_lu { spinlock_t param_lock; struct list_head lh_pre_req_free; - int cur_read_id; - int pre_req_min_tr_len; int pre_req_max_tr_len; /* cached L2P map management worker */ -- cgit v1.2.3 From 095729484efc4aa4604c474792b059bd940addce Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 30 Sep 2021 09:28:49 +0300 Subject: perf build: Suppress 'rm dlfilter' build message The following build message: rm dlfilters/dlfilter-test-api-v0.o is unwanted. The object file is being treated as an intermediate file and being automatically removed. Mark the object file as .SECONDARY to prevent removal and hence the message. Requested-by: Arnaldo Carvalho de Melo Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20210930062849.110416-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5cd702062a04..b856afa6eb52 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -787,6 +787,8 @@ $(OUTPUT)dlfilters/%.o: dlfilters/%.c include/perf/perf_dlfilter.h $(Q)$(MKDIR) -p $(OUTPUT)dlfilters $(QUIET_CC)$(CC) -c -Iinclude $(EXTRA_CFLAGS) -o $@ -fpic $< +.SECONDARY: $(DLFILTERS:.so=.o) + $(OUTPUT)dlfilters/%.so: $(OUTPUT)dlfilters/%.o $(QUIET_LINK)$(CC) $(EXTRA_CFLAGS) -shared -o $@ $< -- cgit v1.2.3 From 29c77550eef31b0d72a45b49eeab03b8963264e8 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Sun, 3 Oct 2021 22:32:38 -0700 Subject: perf script: Check session->header.env.arch before using it When perf.data is not written cleanly, we would like to process existing data as much as possible (please see f_header.data.size == 0 condition in perf_session__read_header). However, perf.data with partial data may crash perf. Specifically, we see crash in 'perf script' for NULL session->header.env.arch. Fix this by checking session->header.env.arch before using it to determine native_arch. Also split the if condition so it is easier to read. Committer notes: If it is a pipe, we already assume is a native arch, so no need to check session->header.env.arch. Signed-off-by: Song Liu Cc: Peter Zijlstra Cc: kernel-team@fb.com Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20211004053238.514936-1-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6211d0b84b7a..42eb6bdf3c58 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -4039,11 +4039,15 @@ script_found: goto out_delete; uname(&uts); - if (data.is_pipe || /* assume pipe_mode indicates native_arch */ - !strcmp(uts.machine, session->header.env.arch) || - (!strcmp(uts.machine, "x86_64") && - !strcmp(session->header.env.arch, "i386"))) + if (data.is_pipe) { /* Assume pipe_mode indicates native_arch */ native_arch = true; + } else if (session->header.env.arch) { + if (!strcmp(uts.machine, session->header.env.arch)) + native_arch = true; + else if (!strcmp(uts.machine, "x86_64") && + !strcmp(session->header.env.arch, "i386")) + native_arch = true; + } script.session = session; script__setup_sample_type(&script); -- cgit v1.2.3 From 89ac61ff05a5c79545aa93f7b1da7e4dbc60fc9a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 28 Sep 2021 21:52:53 +0200 Subject: perf callchain: Fix compilation on powerpc with gcc11+ Got following build fail on powerpc: CC arch/powerpc/util/skip-callchain-idx.o In function ‘check_return_reg’, inlined from ‘check_return_addr’ at arch/powerpc/util/skip-callchain-idx.c:213:7, inlined from ‘arch_skip_callchain_idx’ at arch/powerpc/util/skip-callchain-idx.c:265:7: arch/powerpc/util/skip-callchain-idx.c:54:18: error: ‘dwarf_frame_register’ accessing 96 bytes \ in a region of size 64 [-Werror=stringop-overflow=] 54 | result = dwarf_frame_register(frame, ra_regno, ops_mem, &ops, &nops); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/powerpc/util/skip-callchain-idx.c: In function ‘arch_skip_callchain_idx’: arch/powerpc/util/skip-callchain-idx.c:54:18: note: referencing argument 3 of type ‘Dwarf_Op *’ In file included from /usr/include/elfutils/libdwfl.h:32, from arch/powerpc/util/skip-callchain-idx.c:10: /usr/include/elfutils/libdw.h:1069:12: note: in a call to function ‘dwarf_frame_register’ 1069 | extern int dwarf_frame_register (Dwarf_Frame *frame, int regno, | ^~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors The dwarf_frame_register args changed with [1], Updating ops_mem accordingly. [1] https://sourceware.org/git/?p=elfutils.git;a=commit;h=5621fe5443da23112170235dd5cac161e5c75e65 Reviewed-by: Kajol Jain Signed-off-by: Jiri Olsa Acked-by: Mark Wieelard Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Link: https://lore.kernel.org/r/20210928195253.1267023-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/skip-callchain-idx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 3018a054526a..20cd6244863b 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -45,7 +45,7 @@ static const Dwfl_Callbacks offline_callbacks = { */ static int check_return_reg(int ra_regno, Dwarf_Frame *frame) { - Dwarf_Op ops_mem[2]; + Dwarf_Op ops_mem[3]; Dwarf_Op dummy; Dwarf_Op *ops = &dummy; size_t nops; -- cgit v1.2.3 From 27730c8cd60d1574d8337276e7a9d7d2ca92e0d1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 29 Sep 2021 08:38:13 -0700 Subject: perf script: Fix PERF_SAMPLE_WEIGHT_STRUCT support -F weight in perf script is broken. # ./perf mem record # ./perf script -F weight Samples for 'dummy:HG' event do not have WEIGHT attribute set. Cannot print 'weight' field. The sample type, PERF_SAMPLE_WEIGHT_STRUCT, is an alternative of the PERF_SAMPLE_WEIGHT sample type. They share the same space, weight. The lower 32 bits are exactly the same for both sample type. The higher 32 bits may be different for different architecture. For a new kernel on x86, the PERF_SAMPLE_WEIGHT_STRUCT is used. For an old kernel or other ARCHs, the PERF_SAMPLE_WEIGHT is used. With -F weight, current perf script will only check the input string "weight" with the PERF_SAMPLE_WEIGHT sample type. Because the commit ea8d0ed6eae3 ("perf tools: Support PERF_SAMPLE_WEIGHT_STRUCT") didn't update the PERF_SAMPLE_WEIGHT_STRUCT sample type for perf script. For a new kernel on x86, the check fails. Use PERF_SAMPLE_WEIGHT_TYPE, which supports both sample types, to replace PERF_SAMPLE_WEIGHT Fixes: ea8d0ed6eae37b01 ("perf tools: Support PERF_SAMPLE_WEIGHT_STRUCT") Reported-by: Joe Mario Reviewed-by: Kajol Jain Signed-off-by: Kan Liang Tested-by: Jiri Olsa Tested-by: Joe Mario Acked-by: Jiri Olsa Acked-by: Joe Mario Cc: Andi Kleen Link: https://lore.kernel.org/r/1632929894-102778-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 42eb6bdf3c58..c32c2eb16d7d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -459,7 +459,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) return -EINVAL; if (PRINT_FIELD(WEIGHT) && - evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", PERF_OUTPUT_WEIGHT)) + evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT)) return -EINVAL; if (PRINT_FIELD(SYM) && -- cgit v1.2.3 From 8bb7eca972ad531c9b149c0a51ab43a417385813 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 31 Oct 2021 13:53:10 -0700 Subject: Linux 5.15 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 30c7c81d0437..ed6e7ec60eff 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,8 @@ VERSION = 5 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc7 -NAME = Opossums on Parade +EXTRAVERSION = +NAME = Trick or Treat # *DOCUMENTATION* # To see a list of typical targets execute "make help" -- cgit v1.2.3