diff options
449 files changed, 7643 insertions, 5622 deletions
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt index dab6da3382d9..b19fc34efdb1 100644 --- a/Documentation/atomic_ops.txt +++ b/Documentation/atomic_ops.txt @@ -266,7 +266,9 @@ with the given old and new values. Like all atomic_xxx operations, atomic_cmpxchg will only satisfy its atomicity semantics as long as all other accesses of *v are performed through atomic_xxx operations. -atomic_cmpxchg must provide explicit memory barriers around the operation. +atomic_cmpxchg must provide explicit memory barriers around the operation, +although if the comparison fails then no memory ordering guarantees are +required. The semantics for atomic_cmpxchg are the same as those defined for 'cas' below. diff --git a/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt b/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt index d3058768b23d..c53e0b08032f 100644 --- a/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt +++ b/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt @@ -35,7 +35,7 @@ Example: device_type = "dma"; reg = <0x0 0x1f270000 0x0 0x10000>, <0x0 0x1f200000 0x0 0x10000>, - <0x0 0x1b008000 0x0 0x2000>, + <0x0 0x1b000000 0x0 0x400000>, <0x0 0x1054a000 0x0 0x100>; interrupts = <0x0 0x82 0x4>, <0x0 0xb8 0x4>, diff --git a/Documentation/devicetree/bindings/sound/mt8173-max98090.txt b/Documentation/devicetree/bindings/sound/mt8173-max98090.txt index 829bd26d17f8..519e97c8f1b8 100644 --- a/Documentation/devicetree/bindings/sound/mt8173-max98090.txt +++ b/Documentation/devicetree/bindings/sound/mt8173-max98090.txt @@ -3,11 +3,13 @@ MT8173 with MAX98090 CODEC Required properties: - compatible : "mediatek,mt8173-max98090" - mediatek,audio-codec: the phandle of the MAX98090 audio codec +- mediatek,platform: the phandle of MT8173 ASoC platform Example: sound { compatible = "mediatek,mt8173-max98090"; mediatek,audio-codec = <&max98090>; + mediatek,platform = <&afe>; }; diff --git a/Documentation/devicetree/bindings/sound/mt8173-rt5650-rt5676.txt b/Documentation/devicetree/bindings/sound/mt8173-rt5650-rt5676.txt index 61e98c976bd4..f205ce9e31dd 100644 --- a/Documentation/devicetree/bindings/sound/mt8173-rt5650-rt5676.txt +++ b/Documentation/devicetree/bindings/sound/mt8173-rt5650-rt5676.txt @@ -3,11 +3,13 @@ MT8173 with RT5650 RT5676 CODECS Required properties: - compatible : "mediatek,mt8173-rt5650-rt5676" - mediatek,audio-codec: the phandles of rt5650 and rt5676 codecs +- mediatek,platform: the phandle of MT8173 ASoC platform Example: sound { compatible = "mediatek,mt8173-rt5650-rt5676"; mediatek,audio-codec = <&rt5650 &rt5676>; + mediatek,platform = <&afe>; }; diff --git a/Documentation/devicetree/bindings/spi/spi-ath79.txt b/Documentation/devicetree/bindings/spi/spi-ath79.txt index f1ad9c367532..9c696fa66f81 100644 --- a/Documentation/devicetree/bindings/spi/spi-ath79.txt +++ b/Documentation/devicetree/bindings/spi/spi-ath79.txt @@ -3,7 +3,7 @@ Binding for Qualcomm Atheros AR7xxx/AR9xxx SPI controller Required properties: - compatible: has to be "qca,<soc-type>-spi", "qca,ar7100-spi" as fallback. - reg: Base address and size of the controllers memory area -- clocks: phandle to the AHB clock. +- clocks: phandle of the AHB clock. - clock-names: has to be "ahb". - #address-cells: <1>, as required by generic SPI binding. - #size-cells: <0>, also as required by generic SPI binding. @@ -12,9 +12,9 @@ Child nodes as per the generic SPI binding. Example: - spi@1F000000 { + spi@1f000000 { compatible = "qca,ar9132-spi", "qca,ar7100-spi"; - reg = <0x1F000000 0x10>; + reg = <0x1f000000 0x10>; clocks = <&pll 2>; clock-names = "ahb"; diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt index 4cf1a2a6bd72..415484f3d59a 100644 --- a/Documentation/fault-injection/fault-injection.txt +++ b/Documentation/fault-injection/fault-injection.txt @@ -15,6 +15,10 @@ o fail_page_alloc injects page allocation failures. (alloc_pages(), get_free_pages(), ...) +o fail_futex + + injects futex deadlock and uaddr fault errors. + o fail_make_request injects disk IO errors on devices permitted by setting @@ -113,6 +117,12 @@ configuration of fault-injection capabilities. specifies the minimum page allocation order to be injected failures. +- /sys/kernel/debug/fail_futex/ignore-private: + + Format: { 'Y' | 'N' } + default is 'N', setting it to 'Y' will disable failure injections + when dealing with private (address space) futexes. + o Boot option In order to inject faults while debugfs is not available (early boot time), @@ -121,6 +131,7 @@ use the boot option: failslab= fail_page_alloc= fail_make_request= + fail_futex= mmc_core.fail_request=<interval>,<probability>,<space>,<times> How to add new fault injection capability diff --git a/Documentation/hwmon/nct7904 b/Documentation/hwmon/nct7904 index 014f112e2a14..57fffe33ebfc 100644 --- a/Documentation/hwmon/nct7904 +++ b/Documentation/hwmon/nct7904 @@ -35,11 +35,11 @@ temp1_input Local temperature (1/1000 degree, temp[2-9]_input CPU temperatures (1/1000 degree, 0.125 degree resolution) -fan[1-4]_mode R/W, 0/1 for manual or SmartFan mode +pwm[1-4]_enable R/W, 1/2 for manual or SmartFan mode Setting SmartFan mode is supported only if it has been previously configured by BIOS (or configuration EEPROM) -fan[1-4]_pwm R/O in SmartFan mode, R/W in manual control mode +pwm[1-4] R/O in SmartFan mode, R/W in manual control mode The driver checks sensor control registers and does not export the sensors that are not enabled. Anyway, a sensor that is enabled may actually be not diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 13feb697271f..18fc860df1be 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -2383,9 +2383,7 @@ about the state (old or new) implies an SMP-conditional general memory barrier explicit lock operations, described later). These include: xchg(); - cmpxchg(); atomic_xchg(); atomic_long_xchg(); - atomic_cmpxchg(); atomic_long_cmpxchg(); atomic_inc_return(); atomic_long_inc_return(); atomic_dec_return(); atomic_long_dec_return(); atomic_add_return(); atomic_long_add_return(); @@ -2398,7 +2396,9 @@ explicit lock operations, described later). These include: test_and_clear_bit(); test_and_change_bit(); - /* when succeeds (returns 1) */ + /* when succeeds */ + cmpxchg(); + atomic_cmpxchg(); atomic_long_cmpxchg(); atomic_add_unless(); atomic_long_add_unless(); These are used for such things as implementing ACQUIRE-class and RELEASE-class diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index c4407a41b0fc..f4cb0b2d5cd7 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt @@ -1,7 +1,22 @@ Static Keys ----------- -By: Jason Baron <jbaron@redhat.com> +DEPRECATED API: + +The use of 'struct static_key' directly, is now DEPRECATED. In addition +static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following: + +struct static_key false = STATIC_KEY_INIT_FALSE; +struct static_key true = STATIC_KEY_INIT_TRUE; +static_key_true() +static_key_false() + +The updated API replacements are: + +DEFINE_STATIC_KEY_TRUE(key); +DEFINE_STATIC_KEY_FALSE(key); +static_key_likely() +statick_key_unlikely() 0) Abstract @@ -9,22 +24,22 @@ Static keys allows the inclusion of seldom used features in performance-sensitive fast-path kernel code, via a GCC feature and a code patching technique. A quick example: - struct static_key key = STATIC_KEY_INIT_FALSE; + DEFINE_STATIC_KEY_FALSE(key); ... - if (static_key_false(&key)) + if (static_branch_unlikely(&key)) do unlikely code else do likely code ... - static_key_slow_inc(); + static_branch_enable(&key); ... - static_key_slow_inc(); + static_branch_disable(&key); ... -The static_key_false() branch will be generated into the code with as little +The static_branch_unlikely() branch will be generated into the code with as little impact to the likely code path as possible. @@ -56,7 +71,7 @@ the branch site to change the branch direction. For example, if we have a simple branch that is disabled by default: - if (static_key_false(&key)) + if (static_branch_unlikely(&key)) printk("I am the true branch\n"); Thus, by default the 'printk' will not be emitted. And the code generated will @@ -75,68 +90,55 @@ the basis for the static keys facility. In order to make use of this optimization you must first define a key: - struct static_key key; - -Which is initialized as: - - struct static_key key = STATIC_KEY_INIT_TRUE; + DEFINE_STATIC_KEY_TRUE(key); or: - struct static_key key = STATIC_KEY_INIT_FALSE; + DEFINE_STATIC_KEY_FALSE(key); + -If the key is not initialized, it is default false. The 'struct static_key', -must be a 'global'. That is, it can't be allocated on the stack or dynamically +The key must be global, that is, it can't be allocated on the stack or dynamically allocated at run-time. The key is then used in code as: - if (static_key_false(&key)) + if (static_branch_unlikely(&key)) do unlikely code else do likely code Or: - if (static_key_true(&key)) + if (static_branch_likely(&key)) do likely code else do unlikely code -A key that is initialized via 'STATIC_KEY_INIT_FALSE', must be used in a -'static_key_false()' construct. Likewise, a key initialized via -'STATIC_KEY_INIT_TRUE' must be used in a 'static_key_true()' construct. A -single key can be used in many branches, but all the branches must match the -way that the key has been initialized. +Keys defined via DEFINE_STATIC_KEY_TRUE(), or DEFINE_STATIC_KEY_FALSE, may +be used in either static_branch_likely() or static_branch_unlikely() +statemnts. -The branch(es) can then be switched via: +Branch(es) can be set true via: - static_key_slow_inc(&key); - ... - static_key_slow_dec(&key); +static_branch_enable(&key); -Thus, 'static_key_slow_inc()' means 'make the branch true', and -'static_key_slow_dec()' means 'make the branch false' with appropriate -reference counting. For example, if the key is initialized true, a -static_key_slow_dec(), will switch the branch to false. And a subsequent -static_key_slow_inc(), will change the branch back to true. Likewise, if the -key is initialized false, a 'static_key_slow_inc()', will change the branch to -true. And then a 'static_key_slow_dec()', will again make the branch false. +or false via: + +static_branch_disable(&key); -An example usage in the kernel is the implementation of tracepoints: +The branch(es) can then be switched via reference counts: - static inline void trace_##name(proto) \ - { \ - if (static_key_false(&__tracepoint_##name.key)) \ - __DO_TRACE(&__tracepoint_##name, \ - TP_PROTO(data_proto), \ - TP_ARGS(data_args), \ - TP_CONDITION(cond)); \ - } + static_branch_inc(&key); + ... + static_branch_dec(&key); -Tracepoints are disabled by default, and can be placed in performance critical -pieces of the kernel. Thus, by using a static key, the tracepoints can have -absolutely minimal impact when not in use. +Thus, 'static_branch_inc()' means 'make the branch true', and +'static_branch_dec()' means 'make the branch false' with appropriate +reference counting. For example, if the key is initialized true, a +static_branch_dec(), will switch the branch to false. And a subsequent +static_branch_inc(), will change the branch back to true. Likewise, if the +key is initialized false, a 'static_branch_inc()', will change the branch to +true. And then a 'static_branch_dec()', will again make the branch false. 4) Architecture level code patching interface, 'jump labels' @@ -150,9 +152,12 @@ simply fall back to a traditional, load, test, and jump sequence. * #define JUMP_LABEL_NOP_SIZE, see: arch/x86/include/asm/jump_label.h -* __always_inline bool arch_static_branch(struct static_key *key), see: +* __always_inline bool arch_static_branch(struct static_key *key, bool branch), see: arch/x86/include/asm/jump_label.h +* __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch), + see: arch/x86/include/asm/jump_label.h + * void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type), see: arch/x86/kernel/jump_label.c @@ -173,7 +178,7 @@ SYSCALL_DEFINE0(getppid) { int pid; -+ if (static_key_false(&key)) ++ if (static_branch_unlikely(&key)) + printk("I am the true branch\n"); rcu_read_lock(); diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 949de191fcdc..cda56df9b8a7 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -199,7 +199,8 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "#include <linux/string.h>\n" buf += "#include <linux/configfs.h>\n" buf += "#include <linux/ctype.h>\n" - buf += "#include <asm/unaligned.h>\n\n" + buf += "#include <asm/unaligned.h>\n" + buf += "#include <scsi/scsi_proto.h>\n\n" buf += "#include <target/target_core_base.h>\n" buf += "#include <target/target_core_fabric.h>\n" buf += "#include <target/target_core_fabric_configfs.h>\n" @@ -230,8 +231,14 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " }\n" buf += " tpg->" + fabric_mod_port + " = " + fabric_mod_port + ";\n" buf += " tpg->" + fabric_mod_port + "_tpgt = tpgt;\n\n" - buf += " ret = core_tpg_register(&" + fabric_mod_name + "_ops, wwn,\n" - buf += " &tpg->se_tpg, SCSI_PROTOCOL_SAS);\n" + + if proto_ident == "FC": + buf += " ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_FCP);\n" + elif proto_ident == "SAS": + buf += " ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_SAS);\n" + elif proto_ident == "iSCSI": + buf += " ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_ISCSI);\n" + buf += " if (ret < 0) {\n" buf += " kfree(tpg);\n" buf += " return NULL;\n" @@ -292,7 +299,7 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "static const struct target_core_fabric_ops " + fabric_mod_name + "_ops = {\n" buf += " .module = THIS_MODULE,\n" - buf += " .name = " + fabric_mod_name + ",\n" + buf += " .name = \"" + fabric_mod_name + "\",\n" buf += " .get_fabric_name = " + fabric_mod_name + "_get_fabric_name,\n" buf += " .tpg_get_wwn = " + fabric_mod_name + "_get_fabric_wwn,\n" buf += " .tpg_get_tag = " + fabric_mod_name + "_get_tag,\n" @@ -322,17 +329,17 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " .fabric_make_tpg = " + fabric_mod_name + "_make_tpg,\n" buf += " .fabric_drop_tpg = " + fabric_mod_name + "_drop_tpg,\n" buf += "\n" - buf += " .tfc_wwn_attrs = " + fabric_mod_name + "_wwn_attrs;\n" + buf += " .tfc_wwn_attrs = " + fabric_mod_name + "_wwn_attrs,\n" buf += "};\n\n" buf += "static int __init " + fabric_mod_name + "_init(void)\n" buf += "{\n" - buf += " return target_register_template(" + fabric_mod_name + "_ops);\n" + buf += " return target_register_template(&" + fabric_mod_name + "_ops);\n" buf += "};\n\n" buf += "static void __exit " + fabric_mod_name + "_exit(void)\n" buf += "{\n" - buf += " target_unregister_template(" + fabric_mod_name + "_ops);\n" + buf += " target_unregister_template(&" + fabric_mod_name + "_ops);\n" buf += "};\n\n" buf += "MODULE_DESCRIPTION(\"" + fabric_mod_name.upper() + " series fabric driver\");\n" diff --git a/MAINTAINERS b/MAINTAINERS index 9289ecb57b68..a9ae6c105520 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5600,6 +5600,7 @@ F: kernel/irq/ IRQCHIP DRIVERS M: Thomas Gleixner <tglx@linutronix.de> M: Jason Cooper <jason@lakedaemon.net> +M: Marc Zyngier <marc.zyngier@arm.com> L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core @@ -5608,11 +5609,14 @@ F: Documentation/devicetree/bindings/interrupt-controller/ F: drivers/irqchip/ IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY) -M: Benjamin Herrenschmidt <benh@kernel.crashing.org> +M: Jiang Liu <jiang.liu@linux.intel.com> +M: Marc Zyngier <marc.zyngier@arm.com> S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core F: Documentation/IRQ-domain.txt F: include/linux/irqdomain.h F: kernel/irq/irqdomain.c +F: kernel/irq/msi.c ISAPNP M: Jaroslav Kysela <perex@perex.cz> @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Hurr durr I'ma sheep # *DOCUMENTATION* diff --git a/arch/Kconfig b/arch/Kconfig index 8a8ea7110de8..a71cdbe2a04d 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -71,6 +71,12 @@ config JUMP_LABEL ( On 32-bit x86, the necessary options added to the compiler flags may increase the size of the kernel slightly. ) +config STATIC_KEYS_SELFTEST + bool "Static key selftest" + depends on JUMP_LABEL + help + Boot time self-test of the branch patching code. + config OPTPROBES def_bool y depends on KPROBES && HAVE_OPTPROBES diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi index d7201333e3bc..2db99433e17f 100644 --- a/arch/arm/boot/dts/exynos3250.dtsi +++ b/arch/arm/boot/dts/exynos3250.dtsi @@ -138,8 +138,8 @@ mipi_phy: video-phy@10020710 { compatible = "samsung,s5pv210-mipi-video-phy"; - reg = <0x10020710 8>; #phy-cells = <1>; + syscon = <&pmu_system_controller>; }; pd_cam: cam-power-domain@10023C00 { diff --git a/arch/arm/boot/dts/exynos4210-origen.dts b/arch/arm/boot/dts/exynos4210-origen.dts index e0abfc3324d1..e050d85cdacd 100644 --- a/arch/arm/boot/dts/exynos4210-origen.dts +++ b/arch/arm/boot/dts/exynos4210-origen.dts @@ -127,6 +127,10 @@ }; }; +&cpu0 { + cpu0-supply = <&buck1_reg>; +}; + &fimd { pinctrl-0 = <&lcd_en &lcd_clk &lcd_data24 &pwm0_out>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/exynos4210-trats.dts b/arch/arm/boot/dts/exynos4210-trats.dts index 98f3ce65cb9a..ba34886f8b65 100644 --- a/arch/arm/boot/dts/exynos4210-trats.dts +++ b/arch/arm/boot/dts/exynos4210-trats.dts @@ -188,6 +188,10 @@ }; }; +&cpu0 { + cpu0-supply = <&varm_breg>; +}; + &dsi_0 { vddcore-supply = <&vusb_reg>; vddio-supply = <&vmipi_reg>; diff --git a/arch/arm/boot/dts/exynos4210-universal_c210.dts b/arch/arm/boot/dts/exynos4210-universal_c210.dts index d4f2b11319dd..775892b2cc6a 100644 --- a/arch/arm/boot/dts/exynos4210-universal_c210.dts +++ b/arch/arm/boot/dts/exynos4210-universal_c210.dts @@ -548,6 +548,10 @@ }; }; +&cpu0 { + cpu0-supply = <&vdd_arm_reg>; +}; + &pinctrl_1 { hdmi_hpd: hdmi-hpd { samsung,pins = "gpx3-7"; diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi index 10d3c173396e..3e5ba665d200 100644 --- a/arch/arm/boot/dts/exynos4210.dtsi +++ b/arch/arm/boot/dts/exynos4210.dtsi @@ -40,6 +40,18 @@ device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <0x900>; + clocks = <&clock CLK_ARM_CLK>; + clock-names = "cpu"; + clock-latency = <160000>; + + operating-points = < + 1200000 1250000 + 1000000 1150000 + 800000 1075000 + 500000 975000 + 400000 975000 + 200000 950000 + >; cooling-min-level = <4>; cooling-max-level = <2>; #cooling-cells = <2>; /* min followed by max */ diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi index b6478e97d6a7..e6540b5cfa4c 100644 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@ -286,8 +286,8 @@ can1: can@53fe4000 { compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; reg = <0x53fe4000 0x1000>; - clocks = <&clks 33>; - clock-names = "ipg"; + clocks = <&clks 33>, <&clks 33>; + clock-names = "ipg", "per"; interrupts = <43>; status = "disabled"; }; @@ -295,8 +295,8 @@ can2: can@53fe8000 { compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; reg = <0x53fe8000 0x1000>; - clocks = <&clks 34>; - clock-names = "ipg"; + clocks = <&clks 34>, <&clks 34>; + clock-names = "ipg", "per"; interrupts = <44>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/k2e-clocks.dtsi b/arch/arm/boot/dts/k2e-clocks.dtsi index 4773d6af66a0..d56d68fe7ffc 100644 --- a/arch/arm/boot/dts/k2e-clocks.dtsi +++ b/arch/arm/boot/dts/k2e-clocks.dtsi @@ -13,9 +13,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/k2hk-clocks.dtsi b/arch/arm/boot/dts/k2hk-clocks.dtsi index d5adee3c0067..af9b7190533a 100644 --- a/arch/arm/boot/dts/k2hk-clocks.dtsi +++ b/arch/arm/boot/dts/k2hk-clocks.dtsi @@ -22,9 +22,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/k2l-clocks.dtsi b/arch/arm/boot/dts/k2l-clocks.dtsi index eb1e3e29f073..ef8464bb11ff 100644 --- a/arch/arm/boot/dts/k2l-clocks.dtsi +++ b/arch/arm/boot/dts/k2l-clocks.dtsi @@ -22,9 +22,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/ste-nomadik-nhk15.dts b/arch/arm/boot/dts/ste-nomadik-nhk15.dts index 3d0b8755caee..3d25dba143a5 100644 --- a/arch/arm/boot/dts/ste-nomadik-nhk15.dts +++ b/arch/arm/boot/dts/ste-nomadik-nhk15.dts @@ -17,6 +17,7 @@ }; aliases { + serial1 = &uart1; stmpe-i2c0 = &stmpe0; stmpe-i2c1 = &stmpe1; }; diff --git a/arch/arm/boot/dts/ste-nomadik-s8815.dts b/arch/arm/boot/dts/ste-nomadik-s8815.dts index 85d3b95dfdba..3c140d05f796 100644 --- a/arch/arm/boot/dts/ste-nomadik-s8815.dts +++ b/arch/arm/boot/dts/ste-nomadik-s8815.dts @@ -15,6 +15,10 @@ bootargs = "root=/dev/ram0 console=ttyAMA1,115200n8 earlyprintk"; }; + aliases { + serial1 = &uart1; + }; + src@101e0000 { /* These chrystal drivers are not used on this board */ disable-sxtalo; diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index 9a5f2ba139b7..ef794a33b4dc 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -757,6 +757,7 @@ clock-names = "uartclk", "apb_pclk"; pinctrl-names = "default"; pinctrl-0 = <&uart0_default_mux>; + status = "disabled"; }; uart1: uart@101fb000 { diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 6c2327e1c732..70393574e0fa 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -67,12 +67,12 @@ do { \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ___p1; \ diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index 5f337dc5c108..34f7b6980d21 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -4,23 +4,32 @@ #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <asm/unified.h> #define JUMP_LABEL_NOP_SIZE 4 -#ifdef CONFIG_THUMB2_KERNEL -#define JUMP_LABEL_NOP "nop.w" -#else -#define JUMP_LABEL_NOP "nop" -#endif +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + WASM(nop) "\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection\n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { asm_volatile_goto("1:\n\t" - JUMP_LABEL_NOP "\n\t" + WASM(b) " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" ".popsection\n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); return false; l_yes: diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c index e39cbf488cfe..845a5dd9c42b 100644 --- a/arch/arm/kernel/jump_label.c +++ b/arch/arm/kernel/jump_label.c @@ -12,7 +12,7 @@ static void __arch_jump_label_transform(struct jump_entry *entry, void *addr = (void *)entry->code; unsigned int insn; - if (type == JUMP_LABEL_ENABLE) + if (type == JUMP_LABEL_JMP) insn = arm_gen_branch(entry->code, entry->target); else insn = arm_gen_nop(); diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index d78c12e7cb5e..486cc4ded190 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2373,6 +2373,9 @@ static int of_dev_hwmod_lookup(struct device_node *np, * registers. This address is needed early so the OCP registers that * are part of the device's address space can be ioremapped properly. * + * If SYSC access is not needed, the registers will not be remapped + * and non-availability of MPU access is not treated as an error. + * * Returns 0 on success, -EINVAL if an invalid hwmod is passed, and * -ENXIO on absent or invalid register target address space. */ @@ -2387,6 +2390,11 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data, _save_mpu_port_index(oh); + /* if we don't need sysc access we don't need to ioremap */ + if (!oh->class->sysc) + return 0; + + /* we can't continue without MPU PORT if we need sysc access */ if (oh->_int_flags & _HWMOD_NO_MPU_PORT) return -ENXIO; @@ -2396,8 +2404,10 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data, oh->name); /* Extract the IO space from device tree blob */ - if (!np) + if (!np) { + pr_err("omap_hwmod: %s: no dt node\n", oh->name); return -ENXIO; + } va_start = of_iomap(np, index + oh->mpu_rt_idx); } else { @@ -2456,13 +2466,11 @@ static int __init _init(struct omap_hwmod *oh, void *data) oh->name, np->name); } - if (oh->class->sysc) { - r = _init_mpu_rt_base(oh, NULL, index, np); - if (r < 0) { - WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n", - oh->name); - return 0; - } + r = _init_mpu_rt_base(oh, NULL, index, np); + if (r < 0) { + WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n", + oh->name); + return 0; } r = _init_clocks(oh, NULL); diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c index 2606c6608bd8..562247bced49 100644 --- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c @@ -827,8 +827,7 @@ static struct omap_hwmod_class_sysconfig dra7xx_gpmc_sysc = { .syss_offs = 0x0014, .sysc_flags = (SYSC_HAS_AUTOIDLE | SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS), - .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART | - SIDLE_SMART_WKUP), + .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART), .sysc_fields = &omap_hwmod_sysc_type1, }; @@ -844,7 +843,7 @@ static struct omap_hwmod dra7xx_gpmc_hwmod = { .class = &dra7xx_gpmc_hwmod_class, .clkdm_name = "l3main1_clkdm", /* Skip reset for CONFIG_OMAP_GPMC_DEBUG for bootloader timings */ - .flags = HWMOD_SWSUP_SIDLE | DEBUG_OMAP_GPMC_HWMOD_FLAGS, + .flags = DEBUG_OMAP_GPMC_HWMOD_FLAGS, .main_clk = "l3_iclk_div", .prcm = { .omap4 = { diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index 0689c3fb56e3..58093edeea2e 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -823,7 +823,7 @@ device_type = "dma"; reg = <0x0 0x1f270000 0x0 0x10000>, <0x0 0x1f200000 0x0 0x10000>, - <0x0 0x1b008000 0x0 0x2000>, + <0x0 0x1b000000 0x0 0x400000>, <0x0 0x1054a000 0x0 0x100>; interrupts = <0x0 0x82 0x4>, <0x0 0xb8 0x4>, diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 0fa47c4275cb..ef93b20bc964 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -44,12 +44,12 @@ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ barrier(); \ ___p1; \ diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index c0e5165c2f76..1b5e0e843c3a 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -26,14 +26,28 @@ #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm goto("1: nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align 3\n\t" ".quad 1b, %l[l_yes], %c0\n\t" ".popsection\n\t" - : : "i"(key) : : l_yes); + : : "i"(&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm goto("1: b %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 3\n\t" + ".quad 1b, %l[l_yes], %c0\n\t" + ".popsection\n\t" + : : "i"(&((char *)key)[branch]) : : l_yes); return false; l_yes: diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 9d4aa18f2a82..e8ca6eaedd02 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -122,12 +122,12 @@ static int __init uefi_init(void) /* Show what we know for posterity */ c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), - sizeof(vendor)); + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) vendor[i] = c16[i]; vendor[i] = '\0'; - early_memunmap(c16, sizeof(vendor)); + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); } pr_info("EFI v%u.%.02u by %s\n", diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index 4f1fec7a46db..c2dd1ad3e648 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -28,7 +28,7 @@ void arch_jump_label_transform(struct jump_entry *entry, void *addr = (void *)entry->code; u32 insn; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { insn = aarch64_insn_gen_branch_imm(entry->code, entry->target, AARCH64_INSN_BRANCH_NOLINK); diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c index 23b1a97fae7a..52c179bec0cc 100644 --- a/arch/avr32/mach-at32ap/clock.c +++ b/arch/avr32/mach-at32ap/clock.c @@ -80,6 +80,9 @@ int clk_enable(struct clk *clk) { unsigned long flags; + if (!clk) + return 0; + spin_lock_irqsave(&clk_lock, flags); __clk_enable(clk); spin_unlock_irqrestore(&clk_lock, flags); @@ -106,6 +109,9 @@ void clk_disable(struct clk *clk) { unsigned long flags; + if (IS_ERR_OR_NULL(clk)) + return; + spin_lock_irqsave(&clk_lock, flags); __clk_disable(clk); spin_unlock_irqrestore(&clk_lock, flags); @@ -117,6 +123,9 @@ unsigned long clk_get_rate(struct clk *clk) unsigned long flags; unsigned long rate; + if (!clk) + return 0; + spin_lock_irqsave(&clk_lock, flags); rate = clk->get_rate(clk); spin_unlock_irqrestore(&clk_lock, flags); @@ -129,6 +138,9 @@ long clk_round_rate(struct clk *clk, unsigned long rate) { unsigned long flags, actual_rate; + if (!clk) + return 0; + if (!clk->set_rate) return -ENOSYS; @@ -145,6 +157,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate) unsigned long flags; long ret; + if (!clk) + return 0; + if (!clk->set_rate) return -ENOSYS; @@ -161,6 +176,9 @@ int clk_set_parent(struct clk *clk, struct clk *parent) unsigned long flags; int ret; + if (!clk) + return 0; + if (!clk->set_parent) return -ENOSYS; @@ -174,7 +192,7 @@ EXPORT_SYMBOL(clk_set_parent); struct clk *clk_get_parent(struct clk *clk) { - return clk->parent; + return !clk ? NULL : clk->parent; } EXPORT_SYMBOL(clk_get_parent); diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index 843ba435e43b..df896a1c41d3 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -66,12 +66,12 @@ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ barrier(); \ ___p1; \ diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index 5a696e507930..172b7e5efc53 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -90,12 +90,12 @@ static inline void fence(void) do { \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ___p1; \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index 7ecba84656d4..752e0b86c171 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -133,12 +133,12 @@ do { \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ___p1; \ diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 608aa57799c8..e77672539e8e 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -26,14 +26,29 @@ #define NOP_INSN "nop" #endif -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:\t" NOP_INSN "\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\tj %l[l_yes]\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + WORD_INSN " 1b, %l[l_yes], %0\n\t" + ".popsection\n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index dda800e9e731..3e586daa3a32 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -51,7 +51,7 @@ void arch_jump_label_transform(struct jump_entry *e, /* Target must have the right alignment and ISA must be preserved. */ BUG_ON((e->target & J_ALIGN_MASK) != J_ISA_BIT); - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { insn.j_format.opcode = J_ISA_BIT ? mm_j32_op : j_op; insn.j_format.target = e->target >> J_RANGE_SHIFT; } else { diff --git a/arch/parisc/configs/c8000_defconfig b/arch/parisc/configs/c8000_defconfig index 269c23d23fcb..1a8f6f95689e 100644 --- a/arch/parisc/configs/c8000_defconfig +++ b/arch/parisc/configs/c8000_defconfig @@ -242,7 +242,6 @@ CONFIG_LOCKUP_DETECTOR=y CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_PANIC_ON_OOPS=y CONFIG_DEBUG_RT_MUTEXES=y -CONFIG_RT_MUTEX_TESTER=y CONFIG_PROVE_RCU_DELAY=y CONFIG_DEBUG_BLOCK_EXT_DEVT=y CONFIG_LATENCYTOP=y diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 33b148f825ba..0ffb08ff5125 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -295,7 +295,6 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_RT_MUTEXES=y -CONFIG_RT_MUTEX_TESTER=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y CONFIG_RCU_CPU_STALL_INFO=y diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 51ccc7232042..0eca6efc0631 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -76,12 +76,12 @@ do { \ compiletime_assert_atomic_type(*p); \ smp_lwsync(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_lwsync(); \ ___p1; \ diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index efbf9a322a23..47e155f15433 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -18,14 +18,29 @@ #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" ".popsection \n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + "b %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index a1ed8a8c7cb4..6472472093d0 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -17,7 +17,7 @@ void arch_jump_label_transform(struct jump_entry *entry, { u32 *addr = (u32 *)(unsigned long)entry->code; - if (type == JUMP_LABEL_ENABLE) + if (type == JUMP_LABEL_JMP) patch_branch(addr, entry->target, 0); else patch_instruction(addr, PPC_INST_NOP); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 5cf5e6ea213b..7cf0df859d05 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1478,7 +1478,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) } /* Unmask the event */ - if (eeh_enabled()) + if (ret == EEH_NEXT_ERR_NONE && eeh_enabled()) enable_irq(eeh_event_irq); return ret; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5738d315248b..85cbc96eff6c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2220,7 +2220,7 @@ static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb) static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, unsigned levels, unsigned long limit, - unsigned long *current_offset) + unsigned long *current_offset, unsigned long *total_allocated) { struct page *tce_mem = NULL; __be64 *addr, *tmp; @@ -2236,6 +2236,7 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, } addr = page_address(tce_mem); memset(addr, 0, allocated); + *total_allocated += allocated; --levels; if (!levels) { @@ -2245,7 +2246,7 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, for (i = 0; i < entries; ++i) { tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, - levels, limit, current_offset); + levels, limit, current_offset, total_allocated); if (!tmp) break; @@ -2267,7 +2268,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, struct iommu_table *tbl) { void *addr; - unsigned long offset = 0, level_shift; + unsigned long offset = 0, level_shift, total_allocated = 0; const unsigned window_shift = ilog2(window_size); unsigned entries_shift = window_shift - page_shift; unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT); @@ -2286,7 +2287,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, /* Allocate TCE table */ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, - levels, tce_table_size, &offset); + levels, tce_table_size, &offset, &total_allocated); /* addr==NULL means that the first level allocation failed */ if (!addr) @@ -2308,7 +2309,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, page_shift); tbl->it_level_size = 1ULL << (level_shift - 3); tbl->it_indirect_levels = levels - 1; - tbl->it_allocated_size = offset; + tbl->it_allocated_size = total_allocated; pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", window_size, tce_table_size, bus_offset); diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index e6f8615a11eb..d48fe0162331 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -42,12 +42,12 @@ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ barrier(); \ ___p1; \ diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 69972b7957ee..7f9fd5e3f1bf 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -12,14 +12,29 @@ * We use a brcl 0,2 instruction for jump labels at compile time so it * can be easily distinguished from a hotpatch generated instruction. */ -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n" ".pushsection __jump_table, \"aw\"\n" ".balign 8\n" ".quad 0b, %l[label], %0\n" ".popsection\n" - : : "X" (key) : : label); + : : "X" (&((char *)key)[branch]) : : label); + + return false; +label: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("0: brcl 15, %l[label]\n" + ".pushsection __jump_table, \"aw\"\n" + ".balign 8\n" + ".quad 0b, %l[label], %0\n" + ".popsection\n" + : : "X" (&((char *)key)[branch]) : : label); + return false; label: return true; diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index bff5e3b6d822..8ba32436effe 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -138,6 +138,8 @@ int init_cache_level(unsigned int cpu) union cache_topology ct; enum cache_type ctype; + if (!test_facility(34)) + return -EOPNOTSUPP; if (!this_cpu_ci) return -EINVAL; ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index a90299600483..a83d2248fea9 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -64,7 +64,7 @@ static void __jump_label_transform(struct jump_entry *entry, { struct insn old, new; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { jump_label_make_nop(entry, &old); jump_label_make_branch(entry, &new); } else { diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 4614d415bb58..93cb1d09493d 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -15,7 +15,7 @@ #include <asm/mmu_context.h> #include <asm/facility.h> -static struct static_key have_mvcos = STATIC_KEY_INIT_FALSE; +static DEFINE_STATIC_KEY_FALSE(have_mvcos); static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, unsigned long size) @@ -104,7 +104,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { - if (static_key_false(&have_mvcos)) + if (static_branch_likely(&have_mvcos)) return copy_from_user_mvcos(to, from, n); return copy_from_user_mvcp(to, from, n); } @@ -177,7 +177,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { - if (static_key_false(&have_mvcos)) + if (static_branch_likely(&have_mvcos)) return copy_to_user_mvcos(to, from, n); return copy_to_user_mvcs(to, from, n); } @@ -240,7 +240,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n) { - if (static_key_false(&have_mvcos)) + if (static_branch_likely(&have_mvcos)) return copy_in_user_mvcos(to, from, n); return copy_in_user_mvc(to, from, n); } @@ -312,7 +312,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size) unsigned long __clear_user(void __user *to, unsigned long size) { - if (static_key_false(&have_mvcos)) + if (static_branch_likely(&have_mvcos)) return clear_user_mvcos(to, size); return clear_user_xc(to, size); } @@ -386,7 +386,7 @@ early_param("uaccess_primary", parse_uaccess_pt); static int __init uaccess_init(void) { if (!uaccess_primary && test_facility(27)) - static_key_slow_inc(&have_mvcos); + static_branch_enable(&have_mvcos); return 0; } early_initcall(uaccess_init); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index fee782acc2ee..8d2e5165865f 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -448,13 +448,13 @@ static void bpf_jit_prologue(struct bpf_jit *jit) EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, BPF_REG_1, offsetof(struct sk_buff, data)); } - /* BPF compatibility: clear A (%b7) and X (%b8) registers */ - if (REG_SEEN(BPF_REG_7)) - /* lghi %b7,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_7, 0); - if (REG_SEEN(BPF_REG_8)) - /* lghi %b8,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_8, 0); + /* BPF compatibility: clear A (%b0) and X (%b7) registers */ + if (REG_SEEN(BPF_REG_A)) + /* lghi %ba,0 */ + EMIT4_IMM(0xa7090000, BPF_REG_A, 0); + if (REG_SEEN(BPF_REG_X)) + /* lghi %bx,0 */ + EMIT4_IMM(0xa7090000, BPF_REG_X, 0); } /* diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 809941e33e12..14a928601657 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h @@ -60,12 +60,12 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ barrier(); \ ___p1; \ diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index cc9b04a2b11b..62d0354d1727 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -7,16 +7,33 @@ #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" - "nop\n\t" - "nop\n\t" - ".pushsection __jump_table, \"aw\"\n\t" - ".align 4\n\t" - ".word 1b, %l[l_yes], %c0\n\t" - ".popsection \n\t" - : : "i" (key) : : l_yes); + asm_volatile_goto("1:\n\t" + "nop\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 4\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + "b %l[l_yes]\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 4\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c index 48565c11e82a..59bbeff55024 100644 --- a/arch/sparc/kernel/jump_label.c +++ b/arch/sparc/kernel/jump_label.c @@ -16,7 +16,7 @@ void arch_jump_label_transform(struct jump_entry *entry, u32 val; u32 *insn = (u32 *) (unsigned long) entry->code; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { s32 off = (s32)entry->target - (s32)entry->code; #ifdef CONFIG_SPARC64 diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index 83a91f976330..35ab97e4bb9b 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -22,7 +22,8 @@ extern int kmalloc_ok; extern unsigned long alloc_stack(int order, int atomic); extern void free_stack(unsigned long stack, int order); -extern int do_signal(void); +struct pt_regs; +extern void do_signal(struct pt_regs *regs); extern void interrupt_end(void); extern void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 68b9119841cd..a6d922672b9f 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -90,12 +90,14 @@ void *__switch_to(struct task_struct *from, struct task_struct *to) void interrupt_end(void) { + struct pt_regs *regs = ¤t->thread.regs; + if (need_resched()) schedule(); if (test_thread_flag(TIF_SIGPENDING)) - do_signal(); + do_signal(regs); if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(¤t->thread.regs); + tracehook_notify_resume(regs); } void exit_thread(void) diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 4f60e4aad790..57acbd67d85d 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -64,7 +64,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) signal_setup_done(err, ksig, singlestep); } -static int kern_do_signal(struct pt_regs *regs) +void do_signal(struct pt_regs *regs) { struct ksignal ksig; int handled_sig = 0; @@ -110,10 +110,4 @@ static int kern_do_signal(struct pt_regs *regs) */ if (!handled_sig) restore_saved_sigmask(); - return handled_sig; -} - -int do_signal(void) -{ - return kern_do_signal(¤t->thread.regs); } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index f1b3eb14b855..2077248e8a72 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -291,7 +291,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, /* We are under mmap_sem, release it such that current can terminate */ up_write(¤t->mm->mmap_sem); force_sig(SIGKILL, current); - do_signal(); + do_signal(¤t->thread.regs); } } diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 557232f758b6..d8a9fce6ee2e 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -173,7 +173,7 @@ static void bad_segv(struct faultinfo fi, unsigned long ip) void fatal_sigsegv(void) { force_sigsegv(SIGSEGV, current); - do_signal(); + do_signal(¤t->thread.regs); /* * This is to tell gcc that we're not returning - do_signal * can, in general, return, but in this case, it's not, since diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b3a1a5d77d92..798658048db3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -133,7 +133,7 @@ config X86 select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_SYSCALL_TRACEPOINTS - select HAVE_UID16 if X86_32 + select HAVE_UID16 if X86_32 || IA32_EMULATION select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER select IRQ_FORCED_THREADING @@ -1002,19 +1002,41 @@ config X86_THERMAL_VECTOR def_bool y depends on X86_MCE_INTEL -config VM86 - bool "Enable VM86 support" if EXPERT - default y +config X86_LEGACY_VM86 + bool "Legacy VM86 support (obsolete)" + default n depends on X86_32 ---help--- - This option is required by programs like DOSEMU to run - 16-bit real mode legacy code on x86 processors. It also may - be needed by software like XFree86 to initialize some video - cards via BIOS. Disabling this option saves about 6K. + This option allows user programs to put the CPU into V8086 + mode, which is an 80286-era approximation of 16-bit real mode. + + Some very old versions of X and/or vbetool require this option + for user mode setting. Similarly, DOSEMU will use it if + available to accelerate real mode DOS programs. However, any + recent version of DOSEMU, X, or vbetool should be fully + functional even without kernel VM86 support, as they will all + fall back to (pretty well performing) software emulation. + + Anything that works on a 64-bit kernel is unlikely to need + this option, as 64-bit kernels don't, and can't, support V8086 + mode. This option is also unrelated to 16-bit protected mode + and is not needed to run most 16-bit programs under Wine. + + Enabling this option adds considerable attack surface to the + kernel and slows down system calls and exception handling. + + Unless you use very old userspace or need the last drop of + performance in your real mode DOS games and can't use KVM, + say N here. + +config VM86 + bool + default X86_LEGACY_VM86 config X86_16BIT bool "Enable support for 16-bit segments" if EXPERT default y + depends on MODIFY_LDT_SYSCALL ---help--- This option is required by programs like Wine to run 16-bit protected mode legacy code on x86 processors. Disabling @@ -1509,6 +1531,7 @@ config X86_RESERVE_LOW config MATH_EMULATION bool + depends on MODIFY_LDT_SYSCALL prompt "Math emulation" if X86_32 ---help--- Linux can emulate a math coprocessor (used for floating point @@ -2053,6 +2076,22 @@ config CMDLINE_OVERRIDE This is used to work around broken boot loaders. This should be set to 'N' under normal conditions. +config MODIFY_LDT_SYSCALL + bool "Enable the LDT (local descriptor table)" if EXPERT + default y + ---help--- + Linux can allow user programs to install a per-process x86 + Local Descriptor Table (LDT) using the modify_ldt(2) system + call. This is required to run 16-bit or segmented code such as + DOSEMU or some Wine programs. It is also used by some very old + threading libraries. + + Enabling this feature adds a small amount of overhead to + context switches and increases the low-level kernel attack + surface. Disabling it removes the modify_ldt(2) system call. + + Saying 'N' here may make sense for embedded or server kernels. + source "kernel/livepatch/Kconfig" endmenu @@ -2522,7 +2561,7 @@ config IA32_EMULATION depends on X86_64 select BINFMT_ELF select COMPAT_BINFMT_ELF - select HAVE_UID16 + select ARCH_WANT_OLD_COMPAT_IPC ---help--- Include code to run legacy 32-bit programs under a 64-bit kernel. You should likely turn this on, unless you're @@ -2536,7 +2575,7 @@ config IA32_AOUT config X86_X32 bool "x32 ABI for 64-bit mode" - depends on X86_64 && IA32_EMULATION + depends on X86_64 ---help--- Include code to run binaries for the x32 native 32-bit ABI for 64-bit processors. An x32 process gets access to the @@ -2550,7 +2589,6 @@ config X86_X32 config COMPAT def_bool y depends on IA32_EMULATION || X86_X32 - select ARCH_WANT_OLD_COMPAT_IPC if COMPAT config COMPAT_FOR_U64_ALIGNMENT diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 118e6debc483..054ff969fcdb 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -39,6 +39,16 @@ ifdef CONFIG_X86_NEED_RELOCS LDFLAGS_vmlinux := --emit-relocs endif +# +# Prevent GCC from generating any FP code by mistake. +# +# This must happen before we try the -mpreferred-stack-boundary, see: +# +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 +# +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow +KBUILD_CFLAGS += $(call cc-option,-mno-avx,) + ifeq ($(CONFIG_X86_32),y) BITS := 32 UTS_MACHINE := i386 @@ -167,9 +177,6 @@ KBUILD_CFLAGS += -pipe KBUILD_CFLAGS += -Wno-sign-compare # KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -# prevent gcc from generating any FP code by mistake -KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -KBUILD_CFLAGS += $(call cc-option,-mno-avx,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index d7b1f655b3ef..6a9b96b4624d 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -82,7 +82,7 @@ static unsigned long get_random_long(void) if (has_cpuflag(X86_FEATURE_TSC)) { debug_putstr(" RDTSC"); - rdtscll(raw); + raw = rdtsc(); random ^= raw; use_i8254 = false; diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 2c82bd150d43..7d69afd8b6fa 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1193,6 +1193,10 @@ static efi_status_t setup_e820(struct boot_params *params, unsigned int e820_type = 0; unsigned long m = efi->efi_memmap; +#ifdef CONFIG_X86_64 + m |= (u64)efi->efi_memmap_hi << 32; +#endif + d = (efi_memory_desc_t *)(m + (i * efi->efi_memdesc_size)); switch (d->type) { case EFI_RESERVED_TYPE: diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 7a144971db79..bd55dedd7614 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -2,6 +2,7 @@ # Makefile for the x86 low level entry code # obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o +obj-y += common.o obj-y += vdso/ obj-y += vsyscall/ diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index f4e6308c4200..3c71dd947c7b 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -135,9 +135,6 @@ For 32-bit we have the following conventions - kernel is built with movq %rbp, 4*8+\offset(%rsp) movq %rbx, 5*8+\offset(%rsp) .endm - .macro SAVE_EXTRA_REGS_RBP offset=0 - movq %rbp, 4*8+\offset(%rsp) - .endm .macro RESTORE_EXTRA_REGS offset=0 movq 0*8+\offset(%rsp), %r15 @@ -193,12 +190,6 @@ For 32-bit we have the following conventions - kernel is built with .macro RESTORE_C_REGS_EXCEPT_RCX_R11 RESTORE_C_REGS_HELPER 1,0,0,1,1 .endm - .macro RESTORE_RSI_RDI - RESTORE_C_REGS_HELPER 0,0,0,0,0 - .endm - .macro RESTORE_RSI_RDI_RDX - RESTORE_C_REGS_HELPER 0,0,0,0,1 - .endm .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 subq $-(15*8+\addskip), %rsp diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c new file mode 100644 index 000000000000..a3e9c7fa15d9 --- /dev/null +++ b/arch/x86/entry/common.c @@ -0,0 +1,375 @@ +/* + * common.c - C code for kernel entry and exit + * Copyright (c) 2015 Andrew Lutomirski + * GPL v2 + * + * Based on asm and ptrace code by many authors. The code here originated + * in ptrace.c and signal.c. + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/tracehook.h> +#include <linux/audit.h> +#include <linux/seccomp.h> +#include <linux/signal.h> +#include <linux/export.h> +#include <linux/context_tracking.h> +#include <linux/user-return-notifier.h> +#include <linux/uprobes.h> + +#include <asm/desc.h> +#include <asm/traps.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + +#ifdef CONFIG_CONTEXT_TRACKING +/* Called on entry from user mode with IRQs off. */ +__visible void enter_from_user_mode(void) +{ + CT_WARN_ON(ct_state() != CONTEXT_USER); + user_exit(); +} +#endif + +static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) +{ +#ifdef CONFIG_X86_64 + if (arch == AUDIT_ARCH_X86_64) { + audit_syscall_entry(regs->orig_ax, regs->di, + regs->si, regs->dx, regs->r10); + } else +#endif + { + audit_syscall_entry(regs->orig_ax, regs->bx, + regs->cx, regs->dx, regs->si); + } +} + +/* + * We can return 0 to resume the syscall or anything else to go to phase + * 2. If we resume the syscall, we need to put something appropriate in + * regs->orig_ax. + * + * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax + * are fully functional. + * + * For phase 2's benefit, our return value is: + * 0: resume the syscall + * 1: go to phase 2; no seccomp phase 2 needed + * anything else: go to phase 2; pass return value to seccomp + */ +unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) +{ + unsigned long ret = 0; + u32 work; + + BUG_ON(regs != task_pt_regs(current)); + + work = ACCESS_ONCE(current_thread_info()->flags) & + _TIF_WORK_SYSCALL_ENTRY; + +#ifdef CONFIG_CONTEXT_TRACKING + /* + * If TIF_NOHZ is set, we are required to call user_exit() before + * doing anything that could touch RCU. + */ + if (work & _TIF_NOHZ) { + enter_from_user_mode(); + work &= ~_TIF_NOHZ; + } +#endif + +#ifdef CONFIG_SECCOMP + /* + * Do seccomp first -- it should minimize exposure of other + * code, and keeping seccomp fast is probably more valuable + * than the rest of this. + */ + if (work & _TIF_SECCOMP) { + struct seccomp_data sd; + + sd.arch = arch; + sd.nr = regs->orig_ax; + sd.instruction_pointer = regs->ip; +#ifdef CONFIG_X86_64 + if (arch == AUDIT_ARCH_X86_64) { + sd.args[0] = regs->di; + sd.args[1] = regs->si; + sd.args[2] = regs->dx; + sd.args[3] = regs->r10; + sd.args[4] = regs->r8; + sd.args[5] = regs->r9; + } else +#endif + { + sd.args[0] = regs->bx; + sd.args[1] = regs->cx; + sd.args[2] = regs->dx; + sd.args[3] = regs->si; + sd.args[4] = regs->di; + sd.args[5] = regs->bp; + } + + BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0); + BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1); + + ret = seccomp_phase1(&sd); + if (ret == SECCOMP_PHASE1_SKIP) { + regs->orig_ax = -1; + ret = 0; + } else if (ret != SECCOMP_PHASE1_OK) { + return ret; /* Go directly to phase 2 */ + } + + work &= ~_TIF_SECCOMP; + } +#endif + + /* Do our best to finish without phase 2. */ + if (work == 0) + return ret; /* seccomp and/or nohz only (ret == 0 here) */ + +#ifdef CONFIG_AUDITSYSCALL + if (work == _TIF_SYSCALL_AUDIT) { + /* + * If there is no more work to be done except auditing, + * then audit in phase 1. Phase 2 always audits, so, if + * we audit here, then we can't go on to phase 2. + */ + do_audit_syscall_entry(regs, arch); + return 0; + } +#endif + + return 1; /* Something is enabled that we can't handle in phase 1 */ +} + +/* Returns the syscall nr to run (which should match regs->orig_ax). */ +long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, + unsigned long phase1_result) +{ + long ret = 0; + u32 work = ACCESS_ONCE(current_thread_info()->flags) & + _TIF_WORK_SYSCALL_ENTRY; + + BUG_ON(regs != task_pt_regs(current)); + + /* + * If we stepped into a sysenter/syscall insn, it trapped in + * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. + * If user-mode had set TF itself, then it's still clear from + * do_debug() and we need to set it again to restore the user + * state. If we entered on the slow path, TF was already set. + */ + if (work & _TIF_SINGLESTEP) + regs->flags |= X86_EFLAGS_TF; + +#ifdef CONFIG_SECCOMP + /* + * Call seccomp_phase2 before running the other hooks so that + * they can see any changes made by a seccomp tracer. + */ + if (phase1_result > 1 && seccomp_phase2(phase1_result)) { + /* seccomp failures shouldn't expose any additional code. */ + return -1; + } +#endif + + if (unlikely(work & _TIF_SYSCALL_EMU)) + ret = -1L; + + if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && + tracehook_report_syscall_entry(regs)) + ret = -1L; + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->orig_ax); + + do_audit_syscall_entry(regs, arch); + + return ret ?: regs->orig_ax; +} + +long syscall_trace_enter(struct pt_regs *regs) +{ + u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); + + if (phase1_result == 0) + return regs->orig_ax; + else + return syscall_trace_enter_phase2(regs, arch, phase1_result); +} + +/* Deprecated. */ +void syscall_trace_leave(struct pt_regs *regs) +{ + bool step; + + /* + * We may come here right after calling schedule_user() + * or do_notify_resume(), in which case we can be in RCU + * user mode. + */ + user_exit(); + + audit_syscall_exit(regs); + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->ax); + + /* + * If TIF_SYSCALL_EMU is set, we only get here because of + * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). + * We already reported this syscall instruction in + * syscall_trace_enter(). + */ + step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && + !test_thread_flag(TIF_SYSCALL_EMU); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, step); + + user_enter(); +} + +static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) +{ + unsigned long top_of_stack = + (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING; + return (struct thread_info *)(top_of_stack - THREAD_SIZE); +} + +/* Called with IRQs disabled. */ +__visible void prepare_exit_to_usermode(struct pt_regs *regs) +{ + if (WARN_ON(!irqs_disabled())) + local_irq_disable(); + + /* + * In order to return to user mode, we need to have IRQs off with + * none of _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_USER_RETURN_NOTIFY, + * _TIF_UPROBE, or _TIF_NEED_RESCHED set. Several of these flags + * can be set at any time on preemptable kernels if we have IRQs on, + * so we need to loop. Disabling preemption wouldn't help: doing the + * work to clear some of the flags can sleep. + */ + while (true) { + u32 cached_flags = + READ_ONCE(pt_regs_to_thread_info(regs)->flags); + + if (!(cached_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | + _TIF_UPROBE | _TIF_NEED_RESCHED | + _TIF_USER_RETURN_NOTIFY))) + break; + + /* We have work to do. */ + local_irq_enable(); + + if (cached_flags & _TIF_NEED_RESCHED) + schedule(); + + if (cached_flags & _TIF_UPROBE) + uprobe_notify_resume(regs); + + /* deal with pending signal delivery */ + if (cached_flags & _TIF_SIGPENDING) + do_signal(regs); + + if (cached_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + + if (cached_flags & _TIF_USER_RETURN_NOTIFY) + fire_user_return_notifiers(); + + /* Disable IRQs and retry */ + local_irq_disable(); + } + + user_enter(); +} + +/* + * Called with IRQs on and fully valid regs. Returns with IRQs off in a + * state such that we can immediately switch to user mode. + */ +__visible void syscall_return_slowpath(struct pt_regs *regs) +{ + struct thread_info *ti = pt_regs_to_thread_info(regs); + u32 cached_flags = READ_ONCE(ti->flags); + bool step; + + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); + + if (WARN(irqs_disabled(), "syscall %ld left IRQs disabled", + regs->orig_ax)) + local_irq_enable(); + + /* + * First do one-time work. If these work items are enabled, we + * want to run them exactly once per syscall exit with IRQs on. + */ + if (cached_flags & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | + _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)) { + audit_syscall_exit(regs); + + if (cached_flags & _TIF_SYSCALL_TRACEPOINT) + trace_sys_exit(regs, regs->ax); + + /* + * If TIF_SYSCALL_EMU is set, we only get here because of + * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). + * We already reported this syscall instruction in + * syscall_trace_enter(). + */ + step = unlikely( + (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)) + == _TIF_SINGLESTEP); + if (step || cached_flags & _TIF_SYSCALL_TRACE) + tracehook_report_syscall_exit(regs, step); + } + +#ifdef CONFIG_COMPAT + /* + * Compat syscalls set TS_COMPAT. Make sure we clear it before + * returning to user mode. + */ + ti->status &= ~TS_COMPAT; +#endif + + local_irq_disable(); + prepare_exit_to_usermode(regs); +} + +/* + * Deprecated notification of userspace execution resumption + * - triggered by the TIF_WORK_MASK flags + */ +__visible void +do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) +{ + user_exit(); + + if (thread_info_flags & _TIF_UPROBE) + uprobe_notify_resume(regs); + + /* deal with pending signal delivery */ + if (thread_info_flags & _TIF_SIGPENDING) + do_signal(regs); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) + fire_user_return_notifiers(); + + user_enter(); +} diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 21dc60a60b5f..f940e24acaf0 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -525,34 +525,12 @@ work_resched: work_notifysig: # deal with pending signals and # notify-resume requests -#ifdef CONFIG_VM86 - testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) - movl %esp, %eax - jnz work_notifysig_v86 # returning to kernel-space or - # vm86-space -1: -#else - movl %esp, %eax -#endif TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - movb PT_CS(%esp), %bl - andb $SEGMENT_RPL_MASK, %bl - cmpb $USER_RPL, %bl - jb resume_kernel + movl %esp, %eax xorl %edx, %edx call do_notify_resume jmp resume_userspace - -#ifdef CONFIG_VM86 - ALIGN -work_notifysig_v86: - pushl %ecx # save ti_flags for do_notify_resume - call save_v86_state # %eax contains pt_regs pointer - popl %ecx - movl %eax, %esp - jmp 1b -#endif END(work_pending) # perform syscall exit tracing diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8cb3e438f21e..d3033183ed70 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -33,7 +33,6 @@ #include <asm/paravirt.h> #include <asm/percpu.h> #include <asm/asm.h> -#include <asm/context_tracking.h> #include <asm/smap.h> #include <asm/pgtable_types.h> #include <linux/err.h> @@ -229,6 +228,11 @@ entry_SYSCALL_64_fastpath: */ USERGS_SYSRET64 +GLOBAL(int_ret_from_sys_call_irqs_off) + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_NONE) + jmp int_ret_from_sys_call + /* Do syscall entry tracing */ tracesys: movq %rsp, %rdi @@ -272,69 +276,11 @@ tracesys_phase2: * Has correct iret frame. */ GLOBAL(int_ret_from_sys_call) - DISABLE_INTERRUPTS(CLBR_NONE) -int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */ - TRACE_IRQS_OFF - movl $_TIF_ALLWORK_MASK, %edi - /* edi: mask to check */ -GLOBAL(int_with_check) - LOCKDEP_SYS_EXIT_IRQ - GET_THREAD_INFO(%rcx) - movl TI_flags(%rcx), %edx - andl %edi, %edx - jnz int_careful - andl $~TS_COMPAT, TI_status(%rcx) - jmp syscall_return - - /* - * Either reschedule or signal or syscall exit tracking needed. - * First do a reschedule test. - * edx: work, edi: workmask - */ -int_careful: - bt $TIF_NEED_RESCHED, %edx - jnc int_very_careful - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - pushq %rdi - SCHEDULE_USER - popq %rdi - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - jmp int_with_check - - /* handle signals and tracing -- both require a full pt_regs */ -int_very_careful: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) SAVE_EXTRA_REGS - /* Check for syscall exit trace */ - testl $_TIF_WORK_SYSCALL_EXIT, %edx - jz int_signal - pushq %rdi - leaq 8(%rsp), %rdi /* &ptregs -> arg1 */ - call syscall_trace_leave - popq %rdi - andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU), %edi - jmp int_restore_rest - -int_signal: - testl $_TIF_DO_NOTIFY_MASK, %edx - jz 1f - movq %rsp, %rdi /* &ptregs -> arg1 */ - xorl %esi, %esi /* oldset -> arg2 */ - call do_notify_resume -1: movl $_TIF_WORK_MASK, %edi -int_restore_rest: + movq %rsp, %rdi + call syscall_return_slowpath /* returns with IRQs disabled */ RESTORE_EXTRA_REGS - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - jmp int_with_check - -syscall_return: - /* The IRETQ could re-enable interrupts: */ - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_IRETQ + TRACE_IRQS_IRETQ /* we're about to change IF */ /* * Try to use SYSRET instead of IRET if we're returning to @@ -555,23 +501,22 @@ END(irq_entries_start) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func cld - /* - * Since nothing in interrupt handling code touches r12...r15 members - * of "struct pt_regs", and since interrupts can nest, we can save - * four stack slots and simultaneously provide - * an unwind-friendly stack layout by saving "truncated" pt_regs - * exactly up to rbp slot, without these members. - */ - ALLOC_PT_GPREGS_ON_STACK -RBP - SAVE_C_REGS -RBP - /* this goes to 0(%rsp) for unwinder, not for saving the value: */ - SAVE_EXTRA_REGS_RBP -RBP - - leaq -RBP(%rsp), %rdi /* arg1 for \func (pointer to pt_regs) */ + ALLOC_PT_GPREGS_ON_STACK + SAVE_C_REGS + SAVE_EXTRA_REGS - testb $3, CS-RBP(%rsp) + testb $3, CS(%rsp) jz 1f + + /* + * IRQ from user mode. Switch to kernel gsbase and inform context + * tracking that we're in kernel mode. + */ SWAPGS +#ifdef CONFIG_CONTEXT_TRACKING + call enter_from_user_mode +#endif + 1: /* * Save previous stack pointer, optionally switch to interrupt stack. @@ -580,14 +525,14 @@ END(irq_entries_start) * a little cheaper to use a separate counter in the PDA (short of * moving irq_enter into assembly, which would be too much work) */ - movq %rsp, %rsi + movq %rsp, %rdi incl PER_CPU_VAR(irq_count) cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp - pushq %rsi + pushq %rdi /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF - call \func + call \func /* rdi points to pt_regs */ .endm /* @@ -606,34 +551,19 @@ ret_from_intr: decl PER_CPU_VAR(irq_count) /* Restore saved previous stack */ - popq %rsi - /* return code expects complete pt_regs - adjust rsp accordingly: */ - leaq -RBP(%rsi), %rsp + popq %rsp testb $3, CS(%rsp) jz retint_kernel - /* Interrupt came from user space */ -retint_user: - GET_THREAD_INFO(%rcx) - /* %rcx: thread info. Interrupts are off. */ -retint_with_reschedule: - movl $_TIF_WORK_MASK, %edi -retint_check: + /* Interrupt came from user space */ LOCKDEP_SYS_EXIT_IRQ - movl TI_flags(%rcx), %edx - andl %edi, %edx - jnz retint_careful - -retint_swapgs: /* return to user-space */ - /* - * The iretq could re-enable interrupts: - */ - DISABLE_INTERRUPTS(CLBR_ANY) +GLOBAL(retint_user) + mov %rsp,%rdi + call prepare_exit_to_usermode TRACE_IRQS_IRETQ - SWAPGS - jmp restore_c_regs_and_iret + jmp restore_regs_and_iret /* Returning to kernel space */ retint_kernel: @@ -657,6 +587,8 @@ retint_kernel: * At this label, code paths which return to kernel and to user, * which come from interrupts/exception and from syscalls, merge. */ +restore_regs_and_iret: + RESTORE_EXTRA_REGS restore_c_regs_and_iret: RESTORE_C_REGS REMOVE_PT_GPREGS_FROM_STACK 8 @@ -707,37 +639,6 @@ native_irq_return_ldt: popq %rax jmp native_irq_return_iret #endif - - /* edi: workmask, edx: work */ -retint_careful: - bt $TIF_NEED_RESCHED, %edx - jnc retint_signal - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - pushq %rdi - SCHEDULE_USER - popq %rdi - GET_THREAD_INFO(%rcx) - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - jmp retint_check - -retint_signal: - testl $_TIF_DO_NOTIFY_MASK, %edx - jz retint_swapgs - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_EXTRA_REGS - movq $-1, ORIG_RAX(%rsp) - xorl %esi, %esi /* oldset */ - movq %rsp, %rdi /* &pt_regs */ - call do_notify_resume - RESTORE_EXTRA_REGS - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - GET_THREAD_INFO(%rcx) - jmp retint_with_reschedule - END(common_interrupt) /* @@ -1143,12 +1044,22 @@ ENTRY(error_entry) SAVE_EXTRA_REGS 8 xorl %ebx, %ebx testb $3, CS+8(%rsp) - jz error_kernelspace + jz .Lerror_kernelspace - /* We entered from user mode */ +.Lerror_entry_from_usermode_swapgs: + /* + * We entered from user mode or we're pretending to have entered + * from user mode due to an IRET fault. + */ SWAPGS -error_entry_done: +.Lerror_entry_from_usermode_after_swapgs: +#ifdef CONFIG_CONTEXT_TRACKING + call enter_from_user_mode +#endif + +.Lerror_entry_done: + TRACE_IRQS_OFF ret @@ -1158,31 +1069,30 @@ error_entry_done: * truncated RIP for IRET exceptions returning to compat mode. Check * for these here too. */ -error_kernelspace: +.Lerror_kernelspace: incl %ebx leaq native_irq_return_iret(%rip), %rcx cmpq %rcx, RIP+8(%rsp) - je error_bad_iret + je .Lerror_bad_iret movl %ecx, %eax /* zero extend */ cmpq %rax, RIP+8(%rsp) - je bstep_iret + je .Lbstep_iret cmpq $gs_change, RIP+8(%rsp) - jne error_entry_done + jne .Lerror_entry_done /* * hack: gs_change can fail with user gsbase. If this happens, fix up * gsbase and proceed. We'll fix up the exception and land in * gs_change's error handler with kernel gsbase. */ - SWAPGS - jmp error_entry_done + jmp .Lerror_entry_from_usermode_swapgs -bstep_iret: +.Lbstep_iret: /* Fix truncated RIP */ movq %rcx, RIP+8(%rsp) /* fall through */ -error_bad_iret: +.Lerror_bad_iret: /* * We came from an IRET to user mode, so we have user gsbase. * Switch to kernel gsbase: @@ -1198,7 +1108,7 @@ error_bad_iret: call fixup_bad_iret mov %rax, %rsp decl %ebx - jmp error_entry_done + jmp .Lerror_entry_from_usermode_after_swapgs END(error_entry) @@ -1209,7 +1119,6 @@ END(error_entry) */ ENTRY(error_exit) movl %ebx, %eax - RESTORE_EXTRA_REGS DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl %eax, %eax diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 5a1844765a7a..8997383ba170 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -22,8 +22,8 @@ #define __AUDIT_ARCH_LE 0x40000000 #ifndef CONFIG_AUDITSYSCALL -# define sysexit_audit ia32_ret_from_sys_call -# define sysretl_audit ia32_ret_from_sys_call +# define sysexit_audit ia32_ret_from_sys_call_irqs_off +# define sysretl_audit ia32_ret_from_sys_call_irqs_off #endif .section .entry.text, "ax" @@ -140,7 +140,8 @@ sysexit_from_sys_call: */ andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) movl RIP(%rsp), %ecx /* User %eip */ - RESTORE_RSI_RDI + movl RSI(%rsp), %esi + movl RDI(%rsp), %edi xorl %edx, %edx /* Do not leak kernel information */ xorq %r8, %r8 xorq %r9, %r9 @@ -208,10 +209,10 @@ sysexit_from_sys_call: .endm .macro auditsys_exit exit - testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz ia32_ret_from_sys_call TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + jnz ia32_ret_from_sys_call movl %eax, %esi /* second arg, syscall return value */ cmpl $-MAX_ERRNO, %eax /* is it an error ? */ jbe 1f @@ -230,7 +231,7 @@ sysexit_from_sys_call: movq %rax, R10(%rsp) movq %rax, R9(%rsp) movq %rax, R8(%rsp) - jmp int_with_check + jmp int_ret_from_sys_call_irqs_off .endm sysenter_auditsys: @@ -365,7 +366,9 @@ cstar_dispatch: sysretl_from_sys_call: andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) - RESTORE_RSI_RDI_RDX + movl RDX(%rsp), %edx + movl RSI(%rsp), %esi + movl RDI(%rsp), %edi movl RIP(%rsp), %ecx movl EFLAGS(%rsp), %r11d xorq %r10, %r10 @@ -429,8 +432,48 @@ cstar_tracesys: END(entry_SYSCALL_compat) ia32_badarg: - ASM_CLAC - movq $-EFAULT, RAX(%rsp) + /* + * So far, we've entered kernel mode, set AC, turned on IRQs, and + * saved C regs except r8-r11. We haven't done any of the other + * standard entry work, though. We want to bail, but we shouldn't + * treat this as a syscall entry since we don't even know what the + * args are. Instead, treat this as a non-syscall entry, finish + * the entry work, and immediately exit after setting AX = -EFAULT. + * + * We're really just being polite here. Killing the task outright + * would be a reasonable action, too. Given that the only valid + * way to have gotten here is through the vDSO, and we already know + * that the stack pointer is bad, the task isn't going to survive + * for long no matter what we do. + */ + + ASM_CLAC /* undo STAC */ + movq $-EFAULT, RAX(%rsp) /* return -EFAULT if possible */ + + /* Fill in the rest of pt_regs */ + xorl %eax, %eax + movq %rax, R11(%rsp) + movq %rax, R10(%rsp) + movq %rax, R9(%rsp) + movq %rax, R8(%rsp) + SAVE_EXTRA_REGS + + /* Turn IRQs back off. */ + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF + + /* Now finish entering normal kernel mode. */ +#ifdef CONFIG_CONTEXT_TRACKING + call enter_from_user_mode +#endif + + /* And exit again. */ + jmp retint_user + +ia32_ret_from_sys_call_irqs_off: + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_NONE) + ia32_ret_from_sys_call: xorl %eax, %eax /* Do not leak kernel information */ movq %rax, R11(%rsp) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ef8187f9d28d..25e3cf1cd8fd 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -365,3 +365,18 @@ 356 i386 memfd_create sys_memfd_create 357 i386 bpf sys_bpf 358 i386 execveat sys_execveat stub32_execveat +359 i386 socket sys_socket +360 i386 socketpair sys_socketpair +361 i386 bind sys_bind +362 i386 connect sys_connect +363 i386 listen sys_listen +364 i386 accept4 sys_accept4 +365 i386 getsockopt sys_getsockopt compat_sys_getsockopt +366 i386 setsockopt sys_setsockopt compat_sys_setsockopt +367 i386 getsockname sys_getsockname +368 i386 getpeername sys_getpeername +369 i386 sendto sys_sendto +370 i386 sendmsg sys_sendmsg compat_sys_sendmsg +371 i386 recvfrom sys_recvfrom compat_sys_recvfrom +372 i386 recvmsg sys_recvmsg compat_sys_recvmsg +373 i386 shutdown sys_shutdown diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index e97032069f88..96c06172c2ff 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -8,7 +8,7 @@ KASAN_SANITIZE := n VDSO64-$(CONFIG_X86_64) := y VDSOX32-$(CONFIG_X86_X32_ABI) := y VDSO32-$(CONFIG_X86_32) := y -VDSO32-$(CONFIG_COMPAT) := y +VDSO32-$(CONFIG_IA32_EMULATION) := y # files to link into the vdso vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o @@ -20,7 +20,7 @@ obj-y += vma.o vdso_img-$(VDSO64-y) += 64 vdso_img-$(VDSOX32-y) += x32 vdso_img-$(VDSO32-y) += 32-int80 -vdso_img-$(CONFIG_COMPAT) += 32-syscall +vdso_img-$(CONFIG_IA32_EMULATION) += 32-syscall vdso_img-$(VDSO32-y) += 32-sysenter obj-$(VDSO32-y) += vdso32-setup.o @@ -126,7 +126,7 @@ $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE # Build multiple 32-bit vDSO images to choose from at boot time. # vdso32.so-$(VDSO32-y) += int80 -vdso32.so-$(CONFIG_COMPAT) += syscall +vdso32.so-$(CONFIG_IA32_EMULATION) += syscall vdso32.so-$(VDSO32-y) += sysenter vdso32-images = $(vdso32.so-y:%=vdso32-%.so) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 9793322751e0..ca94fa649251 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -175,20 +175,8 @@ static notrace cycle_t vread_pvclock(int *mode) notrace static cycle_t vread_tsc(void) { - cycle_t ret; - u64 last; - - /* - * Empirically, a fence (of type that depends on the CPU) - * before rdtsc is enough to ensure that rdtsc is ordered - * with respect to loads. The various CPU manuals are unclear - * as to whether rdtsc can be reordered with later loads, - * but no one has ever seen it happen. - */ - rdtsc_barrier(); - ret = (cycle_t)__native_read_tsc(); - - last = gtod->cycle_last; + cycle_t ret = (cycle_t)rdtsc_ordered(); + u64 last = gtod->cycle_last; if (likely(ret >= last)) return ret; diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 1c9f750c3859..434543145d78 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -177,7 +177,7 @@ up_fail: return ret; } -#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static int load_vdso32(void) { int ret; @@ -219,8 +219,11 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm, return map_vdso(&vdso_image_x32, true); } #endif - +#ifdef CONFIG_IA32_EMULATION return load_vdso32(); +#else + return 0; +#endif } #endif #else diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 2dcc6ff6fdcc..26a46f44e298 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -290,7 +290,7 @@ static struct vm_area_struct gate_vma = { struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { -#ifdef CONFIG_IA32_EMULATION +#ifdef CONFIG_COMPAT if (!mm || mm->context.ia32_compat) return NULL; #endif diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index ae3a29ae875b..a0a19b7ba22d 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -34,99 +34,6 @@ #include <asm/sys_ia32.h> #include <asm/smap.h> -int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) -{ - int err = 0; - bool ia32 = test_thread_flag(TIF_IA32); - - if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) - return -EFAULT; - - put_user_try { - /* If you change siginfo_t structure, please make sure that - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. */ - put_user_ex(from->si_signo, &to->si_signo); - put_user_ex(from->si_errno, &to->si_errno); - put_user_ex((short)from->si_code, &to->si_code); - - if (from->si_code < 0) { - put_user_ex(from->si_pid, &to->si_pid); - put_user_ex(from->si_uid, &to->si_uid); - put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); - } else { - /* - * First 32bits of unions are always present: - * si_pid === si_band === si_tid === si_addr(LS half) - */ - put_user_ex(from->_sifields._pad[0], - &to->_sifields._pad[0]); - switch (from->si_code >> 16) { - case __SI_FAULT >> 16: - break; - case __SI_SYS >> 16: - put_user_ex(from->si_syscall, &to->si_syscall); - put_user_ex(from->si_arch, &to->si_arch); - break; - case __SI_CHLD >> 16: - if (ia32) { - put_user_ex(from->si_utime, &to->si_utime); - put_user_ex(from->si_stime, &to->si_stime); - } else { - put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime); - put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime); - } - put_user_ex(from->si_status, &to->si_status); - /* FALL THROUGH */ - default: - case __SI_KILL >> 16: - put_user_ex(from->si_uid, &to->si_uid); - break; - case __SI_POLL >> 16: - put_user_ex(from->si_fd, &to->si_fd); - break; - case __SI_TIMER >> 16: - put_user_ex(from->si_overrun, &to->si_overrun); - put_user_ex(ptr_to_compat(from->si_ptr), - &to->si_ptr); - break; - /* This is not generated by the kernel as of now. */ - case __SI_RT >> 16: - case __SI_MESGQ >> 16: - put_user_ex(from->si_uid, &to->si_uid); - put_user_ex(from->si_int, &to->si_int); - break; - } - } - } put_user_catch(err); - - return err; -} - -int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) -{ - int err = 0; - u32 ptr32; - - if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) - return -EFAULT; - - get_user_try { - get_user_ex(to->si_signo, &from->si_signo); - get_user_ex(to->si_errno, &from->si_errno); - get_user_ex(to->si_code, &from->si_code); - - get_user_ex(to->si_pid, &from->si_pid); - get_user_ex(to->si_uid, &from->si_uid); - get_user_ex(ptr32, &from->si_ptr); - to->si_ptr = compat_ptr(ptr32); - } get_user_catch(err); - - return err; -} - /* * Do a signal return; undo the signal stack. */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index e51a8f803f55..0681d2532527 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -57,12 +57,12 @@ do { \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ___p1; \ @@ -74,12 +74,12 @@ do { \ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ barrier(); \ ___p1; \ @@ -91,15 +91,4 @@ do { \ #define smp_mb__before_atomic() barrier() #define smp_mb__after_atomic() barrier() -/* - * Stop RDTSC speculation. This is needed when you need to use RDTSC - * (or get_cycles or vread that possibly accesses the TSC) in a defined - * code region. - */ -static __always_inline void rdtsc_barrier(void) -{ - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, - "lfence", X86_FEATURE_LFENCE_RDTSC); -} - #endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/context_tracking.h b/arch/x86/include/asm/context_tracking.h deleted file mode 100644 index 1fe49704b146..000000000000 --- a/arch/x86/include/asm/context_tracking.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _ASM_X86_CONTEXT_TRACKING_H -#define _ASM_X86_CONTEXT_TRACKING_H - -#ifdef CONFIG_CONTEXT_TRACKING -# define SCHEDULE_USER call schedule_user -#else -# define SCHEDULE_USER call schedule -#endif - -#endif diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index a0bf89fd2647..4e10d73cf018 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -280,21 +280,6 @@ static inline void clear_LDT(void) set_ldt(NULL, 0); } -/* - * load one particular LDT into the current CPU - */ -static inline void load_LDT_nolock(mm_context_t *pc) -{ - set_ldt(pc->ldt, pc->size); -} - -static inline void load_LDT(mm_context_t *pc) -{ - preempt_disable(); - load_LDT_nolock(pc); - preempt_enable(); -} - static inline unsigned long get_desc_base(const struct desc_struct *desc) { return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index f161c189c27b..141c561f4664 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -78,7 +78,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t; #ifdef CONFIG_X86_64 extern unsigned int vdso64_enabled; #endif -#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) extern unsigned int vdso32_enabled; #endif @@ -187,8 +187,8 @@ static inline void elf_common_init(struct thread_struct *t, #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ elf_common_init(¤t->thread, regs, __USER_DS) -void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); -#define compat_start_thread start_thread_ia32 +void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp); +#define compat_start_thread compat_start_thread void set_personality_ia32(bool); #define COMPAT_SET_PERSONALITY(ex) \ @@ -344,14 +344,9 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, */ static inline int mmap_is_ia32(void) { -#ifdef CONFIG_X86_32 - return 1; -#endif -#ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_ADDR32)) - return 1; -#endif - return 0; + return config_enabled(CONFIG_X86_32) || + (config_enabled(CONFIG_COMPAT) && + test_thread_flag(TIF_ADDR32)); } /* Do not change the values. See get_align_mask() */ diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index d0e8e0141041..28019765442e 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -22,15 +22,6 @@ struct ucontext_ia32 { compat_sigset_t uc_sigmask; /* mask last for extensibility */ }; -struct ucontext_x32 { - unsigned int uc_flags; - unsigned int uc_link; - compat_stack_t uc_stack; - unsigned int uc__pad0; /* needed for alignment */ - struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */ - compat_sigset_t uc_sigmask; /* mask last for extensibility */ -}; - /* This matches struct stat64 in glibc2.2, hence the absolutely * insane amounts of padding around dev_t's. */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4c2d2eb2060a..6ca9fd6234e1 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -117,16 +117,6 @@ #define FPU_IRQ 13 -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 - -#ifndef __ASSEMBLY__ -static inline int invalid_vm86_irq(int irq) -{ - return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ; -} -#endif - /* * Size the maximum number of interrupts. * diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a4c1cf7e93f8..28d7a857f9d1 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -16,7 +16,7 @@ # define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC #endif -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:" ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" @@ -24,7 +24,24 @@ static __always_inline bool arch_static_branch(struct static_key *key) _ASM_ALIGN "\n\t" _ASM_PTR "1b, %l[l_yes], %c0 \n\t" ".popsection \n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:" + ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" + "2:\n\t" + ".pushsection __jump_table, \"aw\" \n\t" + _ASM_ALIGN "\n\t" + _ASM_PTR "1b, %l[l_yes], %c0 \n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h index 031f6266f425..0d9b14f60d2c 100644 --- a/arch/x86/include/asm/math_emu.h +++ b/arch/x86/include/asm/math_emu.h @@ -2,7 +2,6 @@ #define _ASM_X86_MATH_EMU_H #include <asm/ptrace.h> -#include <asm/vm86.h> /* This structure matches the layout of the data saved to the stack following a device-not-present interrupt, part of it saved @@ -10,9 +9,6 @@ */ struct math_emu_info { long ___orig_eip; - union { - struct pt_regs *regs; - struct kernel_vm86_regs *vm86; - }; + struct pt_regs *regs; }; #endif /* _ASM_X86_MATH_EMU_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 09b9620a73b4..55234d5e7160 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -9,8 +9,9 @@ * we put the segment information here. */ typedef struct { - void *ldt; - int size; +#ifdef CONFIG_MODIFY_LDT_SYSCALL + struct ldt_struct *ldt; +#endif #ifdef CONFIG_X86_64 /* True if mm supports a task running in 32 bit compatibility mode. */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 804a3a6030ca..379cd3658799 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -33,12 +33,68 @@ static inline void load_mm_cr4(struct mm_struct *mm) static inline void load_mm_cr4(struct mm_struct *mm) {} #endif +#ifdef CONFIG_MODIFY_LDT_SYSCALL +/* + * ldt_structs can be allocated, used, and freed, but they are never + * modified while live. + */ +struct ldt_struct { + /* + * Xen requires page-aligned LDTs with special permissions. This is + * needed to prevent us from installing evil descriptors such as + * call gates. On native, we could merge the ldt_struct and LDT + * allocations, but it's not worth trying to optimize. + */ + struct desc_struct *entries; + int size; +}; + /* * Used for LDT copy/destruction. */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm); void destroy_context(struct mm_struct *mm); +#else /* CONFIG_MODIFY_LDT_SYSCALL */ +static inline int init_new_context(struct task_struct *tsk, + struct mm_struct *mm) +{ + return 0; +} +static inline void destroy_context(struct mm_struct *mm) {} +#endif +static inline void load_mm_ldt(struct mm_struct *mm) +{ +#ifdef CONFIG_MODIFY_LDT_SYSCALL + struct ldt_struct *ldt; + + /* lockless_dereference synchronizes with smp_store_release */ + ldt = lockless_dereference(mm->context.ldt); + + /* + * Any change to mm->context.ldt is followed by an IPI to all + * CPUs with the mm active. The LDT will not be freed until + * after the IPI is handled by all such CPUs. This means that, + * if the ldt_struct changes before we return, the values we see + * will be safe, and the new values will be loaded before we run + * any user code. + * + * NB: don't try to convert this to use RCU without extreme care. + * We would still need IRQs off, because we don't want to change + * the local LDT after an IPI loaded a newer value than the one + * that we can see. + */ + + if (unlikely(ldt)) + set_ldt(ldt->entries, ldt->size); + else + clear_LDT(); +#else + clear_LDT(); +#endif + + DEBUG_LOCKS_WARN_ON(preemptible()); +} static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { @@ -70,6 +126,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* Load per-mm CR4 state */ load_mm_cr4(next); +#ifdef CONFIG_MODIFY_LDT_SYSCALL /* * Load the LDT, if the LDT is different. * @@ -78,12 +135,13 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * was called and then modify_ldt changed * prev->context.ldt but suppressed an IPI to this CPU. * In this case, prev->context.ldt != NULL, because we - * never free an LDT while the mm still exists. That - * means that next->context.ldt != prev->context.ldt, - * because mms never share an LDT. + * never set context.ldt to NULL while the mm still + * exists. That means that next->context.ldt != + * prev->context.ldt, because mms never share an LDT. */ if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context); + load_mm_ldt(next); +#endif } #ifdef CONFIG_SMP else { @@ -106,7 +164,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); load_mm_cr4(next); - load_LDT_nolock(&next->context); + load_mm_ldt(next); } } #endif diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index e6a707eb5081..131eec2ca137 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -47,14 +47,13 @@ static inline unsigned long long native_read_tscp(unsigned int *aux) * it means rax *or* rdx. */ #ifdef CONFIG_X86_64 -#define DECLARE_ARGS(val, low, high) unsigned low, high -#define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32)) -#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high) +/* Using 64-bit values saves one instruction clearing the high half of low */ +#define DECLARE_ARGS(val, low, high) unsigned long low, high +#define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32) #define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) #else #define DECLARE_ARGS(val, low, high) unsigned long long val #define EAX_EDX_VAL(val, low, high) (val) -#define EAX_EDX_ARGS(val, low, high) "A" (val) #define EAX_EDX_RET(val, low, high) "=A" (val) #endif @@ -106,12 +105,19 @@ notrace static inline int native_write_msr_safe(unsigned int msr, return err; } -extern unsigned long long native_read_tsc(void); - extern int rdmsr_safe_regs(u32 regs[8]); extern int wrmsr_safe_regs(u32 regs[8]); -static __always_inline unsigned long long __native_read_tsc(void) +/** + * rdtsc() - returns the current TSC without ordering constraints + * + * rdtsc() returns the result of RDTSC as a 64-bit integer. The + * only ordering constraint it supplies is the ordering implied by + * "asm volatile": it will put the RDTSC in the place you expect. The + * CPU can and will speculatively execute that RDTSC, though, so the + * results can be non-monotonic if compared on different CPUs. + */ +static __always_inline unsigned long long rdtsc(void) { DECLARE_ARGS(val, low, high); @@ -120,6 +126,32 @@ static __always_inline unsigned long long __native_read_tsc(void) return EAX_EDX_VAL(val, low, high); } +/** + * rdtsc_ordered() - read the current TSC in program order + * + * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer. + * It is ordered like a load to a global in-memory counter. It should + * be impossible to observe non-monotonic rdtsc_unordered() behavior + * across multiple CPUs as long as the TSC is synced. + */ +static __always_inline unsigned long long rdtsc_ordered(void) +{ + /* + * The RDTSC instruction is not ordered relative to memory + * access. The Intel SDM and the AMD APM are both vague on this + * point, but empirically an RDTSC instruction can be + * speculatively executed before prior loads. An RDTSC + * immediately after an appropriate barrier appears to be + * ordered as a normal load, that is, it provides the same + * ordering guarantees as reading from a global memory location + * that some other imaginary CPU is updating continuously with a + * time stamp. + */ + alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, + "lfence", X86_FEATURE_LFENCE_RDTSC); + return rdtsc(); +} + static inline unsigned long long native_read_pmc(int counter) { DECLARE_ARGS(val, low, high); @@ -180,12 +212,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) return err; } -#define rdtscl(low) \ - ((low) = (u32)__native_read_tsc()) - -#define rdtscll(val) \ - ((val) = __native_read_tsc()) - #define rdpmc(counter, low, high) \ do { \ u64 _l = native_read_pmc((counter)); \ @@ -195,15 +221,6 @@ do { \ #define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) -#define rdtscp(low, high, aux) \ -do { \ - unsigned long long _val = native_read_tscp(&(aux)); \ - (low) = (u32)_val; \ - (high) = (u32)(_val >> 32); \ -} while (0) - -#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux)) - #endif /* !CONFIG_PARAVIRT */ /* diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index d143bfad45d7..c2be0375bcad 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -174,19 +174,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) return err; } -static inline u64 paravirt_read_tsc(void) -{ - return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); -} - -#define rdtscl(low) \ -do { \ - u64 _l = paravirt_read_tsc(); \ - low = (int)_l; \ -} while (0) - -#define rdtscll(val) (val = paravirt_read_tsc()) - static inline unsigned long long paravirt_sched_clock(void) { return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); @@ -215,27 +202,6 @@ do { \ #define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter)) -static inline unsigned long long paravirt_rdtscp(unsigned int *aux) -{ - return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); -} - -#define rdtscp(low, high, aux) \ -do { \ - int __aux; \ - unsigned long __val = paravirt_rdtscp(&__aux); \ - (low) = (u32)__val; \ - (high) = (u32)(__val >> 32); \ - (aux) = __aux; \ -} while (0) - -#define rdtscpll(val, aux) \ -do { \ - unsigned long __aux; \ - val = paravirt_rdtscp(&__aux); \ - (aux) = __aux; \ -} while (0) - static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) { PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index a6b8f9fadb06..ce029e4fa7c6 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -156,9 +156,7 @@ struct pv_cpu_ops { u64 (*read_msr)(unsigned int msr, int *err); int (*write_msr)(unsigned int msr, unsigned low, unsigned high); - u64 (*read_tsc)(void); u64 (*read_pmc)(int counter); - unsigned long long (*read_tscp)(unsigned int *aux); #ifdef CONFIG_X86_32 /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 944f1785ed0d..9615a4e2645e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -6,8 +6,8 @@ /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; +struct vm86; -#include <asm/vm86.h> #include <asm/math_emu.h> #include <asm/segment.h> #include <asm/types.h> @@ -400,15 +400,9 @@ struct thread_struct { unsigned long cr2; unsigned long trap_nr; unsigned long error_code; -#ifdef CONFIG_X86_32 +#ifdef CONFIG_VM86 /* Virtual 86 mode info */ - struct vm86_struct __user *vm86_info; - unsigned long screen_bitmap; - unsigned long v86flags; - unsigned long v86mask; - unsigned long saved_sp0; - unsigned int saved_fs; - unsigned int saved_gs; + struct vm86 *vm86; #endif /* IO permissions: */ unsigned long *io_bitmap_ptr; @@ -720,7 +714,6 @@ static inline void spin_lock_prefetch(const void *x) #define INIT_THREAD { \ .sp0 = TOP_OF_INIT_STACK, \ - .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ } diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 628954ceede1..7a6bed5c08bc 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -62,7 +62,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) static __always_inline u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src) { - u64 delta = __native_read_tsc() - src->tsc_timestamp; + u64 delta = rdtsc_ordered() - src->tsc_timestamp; return pvclock_scale_delta(delta, src->tsc_to_system_mul, src->tsc_shift); } @@ -76,13 +76,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, u8 ret_flags; version = src->version; - /* Note: emulated platforms which do not advertise SSE2 support - * result in kvmclock not using the necessary RDTSC barriers. - * Without barriers, it is possible that RDTSC instruction reads from - * the time stamp counter outside rdtsc_barrier protected section - * below, resulting in violation of monotonicity. - */ - rdtsc_barrier(); + offset = pvclock_get_nsec_offset(src); ret = src->system_time + offset; ret_flags = src->flags; diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h index ae0e241e228b..a8810bf135ab 100644 --- a/arch/x86/include/asm/qrwlock.h +++ b/arch/x86/include/asm/qrwlock.h @@ -4,8 +4,8 @@ #include <asm-generic/qrwlock_types.h> #ifndef CONFIG_X86_PPRO_FENCE -#define queue_write_unlock queue_write_unlock -static inline void queue_write_unlock(struct qrwlock *lock) +#define queued_write_unlock queued_write_unlock +static inline void queued_write_unlock(struct qrwlock *lock) { barrier(); ACCESS_ONCE(*(u8 *)&lock->cnts) = 0; diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index 7c7c27c97daa..1f3175bb994e 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -4,6 +4,7 @@ #include <asm/sigcontext.h> #include <asm/siginfo.h> #include <asm/ucontext.h> +#include <linux/compat.h> #ifdef CONFIG_X86_32 #define sigframe_ia32 sigframe @@ -69,6 +70,15 @@ struct rt_sigframe { #ifdef CONFIG_X86_X32_ABI +struct ucontext_x32 { + unsigned int uc_flags; + unsigned int uc_link; + compat_stack_t uc_stack; + unsigned int uc__pad0; /* needed for alignment */ + struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */ + compat_sigset_t uc_sigmask; /* mask last for extensibility */ +}; + struct rt_sigframe_x32 { u64 pretcode; struct ucontext_x32 uc; diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 31eab867e6d3..b42408bcf6b5 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -30,6 +30,7 @@ typedef sigset_t compat_sigset_t; #endif /* __ASSEMBLY__ */ #include <uapi/asm/signal.h> #ifndef __ASSEMBLY__ +extern void do_signal(struct pt_regs *regs); extern void do_notify_resume(struct pt_regs *, void *, __u32); #define __ARCH_HAS_SA_RESTORER diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index c2e00bb2a136..58505f01962f 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -72,7 +72,7 @@ static __always_inline void boot_init_stack_canary(void) * on during the bootup the random pool has true entropy too. */ get_random_bytes(&canary, sizeof(canary)); - tsc = __native_read_tsc(); + tsc = rdtsc(); canary += tsc + (tsc << 32UL); current->stack_canary = canary; diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 592a6a672e07..91dfcafe27a6 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -37,6 +37,7 @@ asmlinkage long sys_get_thread_area(struct user_desc __user *); asmlinkage unsigned long sys_sigreturn(void); /* kernel/vm86_32.c */ +struct vm86_struct; asmlinkage long sys_vm86old(struct vm86_struct __user *); asmlinkage long sys_vm86(unsigned long, unsigned long); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 225ee545e1a0..fdad5c244350 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -27,14 +27,17 @@ * Without this offset, that can result in a page fault. (We are * careful that, in this case, the value we read doesn't matter.) * - * In vm86 mode, the hardware frame is much longer still, but we neither - * access the extra members from NMI context, nor do we write such a - * frame at sp0 at all. + * In vm86 mode, the hardware frame is much longer still, so add 16 + * bytes to make room for the real-mode segments. * * x86_64 has a fixed-length stack frame. */ #ifdef CONFIG_X86_32 -# define TOP_OF_KERNEL_STACK_PADDING 8 +# ifdef CONFIG_VM86 +# define TOP_OF_KERNEL_STACK_PADDING 16 +# else +# define TOP_OF_KERNEL_STACK_PADDING 8 +# endif #else # define TOP_OF_KERNEL_STACK_PADDING 0 #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index c5380bea2a36..c3496619740a 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -112,8 +112,8 @@ asmlinkage void smp_threshold_interrupt(void); asmlinkage void smp_deferred_error_interrupt(void); #endif -extern enum ctx_state ist_enter(struct pt_regs *regs); -extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state); +extern void ist_enter(struct pt_regs *regs); +extern void ist_exit(struct pt_regs *regs); extern void ist_begin_non_atomic(struct pt_regs *regs); extern void ist_end_non_atomic(void); diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 94605c0e9cee..3df7675debcf 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -21,28 +21,12 @@ extern void disable_TSC(void); static inline cycles_t get_cycles(void) { - unsigned long long ret = 0; - #ifndef CONFIG_X86_TSC if (!cpu_has_tsc) return 0; #endif - rdtscll(ret); - - return ret; -} -static __always_inline cycles_t vget_cycles(void) -{ - /* - * We only do VDSOs on TSC capable CPUs, so this shouldn't - * access boot_cpu_data (which is not VDSO-safe): - */ -#ifndef CONFIG_X86_TSC - if (!cpu_has_tsc) - return 0; -#endif - return (cycles_t)__native_read_tsc(); + return rdtsc(); } extern void tsc_init(void); diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h index 1d8de3f3feca..1e491f3af317 100644 --- a/arch/x86/include/asm/vm86.h +++ b/arch/x86/include/asm/vm86.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_VM86_H #define _ASM_X86_VM86_H - #include <asm/ptrace.h> #include <uapi/asm/vm86.h> @@ -28,43 +27,49 @@ struct kernel_vm86_regs { unsigned short gs, __gsh; }; -struct kernel_vm86_struct { - struct kernel_vm86_regs regs; -/* - * the below part remains on the kernel stack while we are in VM86 mode. - * 'tss.esp0' then contains the address of VM86_TSS_ESP0 below, and when we - * get forced back from VM86, the CPU and "SAVE_ALL" will restore the above - * 'struct kernel_vm86_regs' with the then actual values. - * Therefore, pt_regs in fact points to a complete 'kernel_vm86_struct' - * in kernelspace, hence we need not reget the data from userspace. - */ -#define VM86_TSS_ESP0 flags +struct vm86 { + struct vm86plus_struct __user *user_vm86; + struct pt_regs regs32; + unsigned long veflags; + unsigned long veflags_mask; + unsigned long saved_sp0; + unsigned long flags; unsigned long screen_bitmap; unsigned long cpu_type; struct revectored_struct int_revectored; struct revectored_struct int21_revectored; struct vm86plus_info_struct vm86plus; - struct pt_regs *regs32; /* here we save the pointer to the old regs */ -/* - * The below is not part of the structure, but the stack layout continues - * this way. In front of 'return-eip' may be some data, depending on - * compilation, so we don't rely on this and save the pointer to 'oldregs' - * in 'regs32' above. - * However, with GCC-2.7.2 and the current CFLAGS you see exactly this: - - long return-eip; from call to vm86() - struct pt_regs oldregs; user space registers as saved by syscall - */ }; #ifdef CONFIG_VM86 void handle_vm86_fault(struct kernel_vm86_regs *, long); int handle_vm86_trap(struct kernel_vm86_regs *, long, int); -struct pt_regs *save_v86_state(struct kernel_vm86_regs *); +void save_v86_state(struct kernel_vm86_regs *, int); struct task_struct; + +#define free_vm86(t) do { \ + struct thread_struct *__t = (t); \ + if (__t->vm86 != NULL) { \ + kfree(__t->vm86); \ + __t->vm86 = NULL; \ + } \ +} while (0) + +/* + * Support for VM86 programs to request interrupts for + * real mode hardware drivers: + */ +#define FIRST_VM86_IRQ 3 +#define LAST_VM86_IRQ 15 + +static inline int invalid_vm86_irq(int irq) +{ + return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ; +} + void release_vm86_irqs(struct task_struct *); #else @@ -77,6 +82,10 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) return 0; } +static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { } + +#define free_vm86(t) do { } while(0) + #endif /* CONFIG_VM86 */ #endif /* _ASM_X86_VM86_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0f15af41bd80..514064897d55 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -23,8 +23,10 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n CFLAGS_irq.o := -I$(src)/../include/asm/trace obj-y := process_$(BITS).o signal.o +obj-$(CONFIG_COMPAT) += signal_compat.o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o -obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o +obj-y += time.o ioport.o dumpstack.o nmi.o +obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-y += probe_roms.o diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index ede92c3364d3..222a57076039 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -263,7 +263,7 @@ static int apbt_clocksource_register(void) /* Verify whether apbt counter works */ t1 = dw_apb_clocksource_read(clocksource_apbt); - rdtscll(start); + start = rdtsc(); /* * We don't know the TSC frequency yet, but waiting for @@ -273,7 +273,7 @@ static int apbt_clocksource_register(void) */ do { rep_nop(); - rdtscll(now); + now = rdtsc(); } while ((now - start) < 200000UL); /* APBT is the only always on clocksource, it has to work! */ @@ -390,13 +390,13 @@ unsigned long apbt_quick_calibrate(void) old = dw_apb_clocksource_read(clocksource_apbt); old += loop; - t1 = __native_read_tsc(); + t1 = rdtsc(); do { new = dw_apb_clocksource_read(clocksource_apbt); } while (new < old); - t2 = __native_read_tsc(); + t2 = rdtsc(); shift = 5; if (unlikely(loop >> shift == 0)) { diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index dcb52850a28f..0d71cd9b4a50 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -457,7 +457,7 @@ static int lapic_next_deadline(unsigned long delta, { u64 tsc; - rdtscll(tsc); + tsc = rdtsc(); wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); return 0; } @@ -592,7 +592,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) unsigned long pm = acpi_pm_read_early(); if (cpu_has_tsc) - rdtscll(tsc); + tsc = rdtsc(); switch (lapic_cal_loops++) { case 0: @@ -1209,7 +1209,7 @@ void setup_local_APIC(void) long long max_loops = cpu_khz ? cpu_khz : 1000000; if (cpu_has_tsc) - rdtscll(tsc); + tsc = rdtsc(); if (disable_apic) { disable_ioapic_support(); @@ -1293,7 +1293,7 @@ void setup_local_APIC(void) } if (queued) { if (cpu_has_tsc && cpu_khz) { - rdtscll(ntsc); + ntsc = rdtsc(); max_loops = (cpu_khz << 10) - (ntsc - tsc); } else max_loops--; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 845dc0df2002..206052e55517 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -943,7 +943,7 @@ static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info) */ if (irq < nr_legacy_irqs() && data->count == 1) { if (info->ioapic_trigger != data->trigger) - mp_register_handler(irq, data->trigger); + mp_register_handler(irq, info->ioapic_trigger); data->entry.trigger = data->trigger = info->ioapic_trigger; data->entry.polarity = data->polarity = info->ioapic_polarity; } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index dd3a4baffe50..51ad2af84a72 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -114,7 +114,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) const int K6_BUG_LOOP = 1000000; int n; void (*f_vide)(void); - unsigned long d, d2; + u64 d, d2; printk(KERN_INFO "AMD K6 stepping B detected - "); @@ -125,10 +125,10 @@ static void init_amd_k6(struct cpuinfo_x86 *c) n = K6_BUG_LOOP; f_vide = vide; - rdtscl(d); + d = rdtsc(); while (n--) f_vide(); - rdtscl(d2); + d2 = rdtsc(); d = d2-d; if (d > 20*K6_BUG_LOOP) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 922c5e0cea4c..cb9e5df42dd2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1410,7 +1410,7 @@ void cpu_init(void) load_sp0(t, ¤t->thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm); clear_all_debug_regs(); dbg_restore_debug_regs(); @@ -1459,7 +1459,7 @@ void cpu_init(void) load_sp0(t, thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm); t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index df919ff103c3..99940d151e7d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -125,7 +125,7 @@ void mce_setup(struct mce *m) { memset(m, 0, sizeof(struct mce)); m->cpu = m->extcpu = smp_processor_id(); - rdtscll(m->tsc); + m->tsc = rdtsc(); /* We hope get_seconds stays lockless */ m->time = get_seconds(); m->cpuvendor = boot_cpu_data.x86_vendor; @@ -1029,7 +1029,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) { struct mca_config *cfg = &mca_cfg; struct mce m, *final; - enum ctx_state prev_state; int i; int worst = 0; int severity; @@ -1055,7 +1054,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) int flags = MF_ACTION_REQUIRED; int lmce = 0; - prev_state = ist_enter(regs); + ist_enter(regs); this_cpu_inc(mce_exception_count); @@ -1227,7 +1226,7 @@ out: local_irq_disable(); ist_end_non_atomic(); done: - ist_exit(regs, prev_state); + ist_exit(regs); } EXPORT_SYMBOL_GPL(do_machine_check); @@ -1784,7 +1783,7 @@ static void collect_tscs(void *data) { unsigned long *cpu_tsc = (unsigned long *)data; - rdtscll(cpu_tsc[smp_processor_id()]); + cpu_tsc[smp_processor_id()] = rdtsc(); } static int mce_apei_read_done; diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 737b0ad4e61a..12402e10aeff 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -19,10 +19,9 @@ int mce_p5_enabled __read_mostly; /* Machine check handler for Pentium class Intel CPUs: */ static void pentium_machine_check(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; u32 loaddr, hi, lotype; - prev_state = ist_enter(regs); + ist_enter(regs); rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); @@ -39,7 +38,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - ist_exit(regs, prev_state); + ist_exit(regs); } /* Set up machine check reporting for processors with Intel style MCE: */ diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 44f138296fbe..01dd8702880b 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -15,12 +15,12 @@ /* Machine check handler for WinChip C6: */ static void winchip_machine_check(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state = ist_enter(regs); + ist_enter(regs); printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - ist_exit(regs, prev_state); + ist_exit(regs); } /* Set up machine check reporting on the Winchip C6 series */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3658de47900f..cc25c8f3512d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2179,24 +2179,32 @@ static unsigned long get_segment_base(unsigned int segment) int idx = segment >> 3; if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { +#ifdef CONFIG_MODIFY_LDT_SYSCALL + struct ldt_struct *ldt; + if (idx > LDT_ENTRIES) return 0; - if (idx > current->active_mm->context.size) + /* IRQs are off, so this synchronizes with smp_store_release */ + ldt = lockless_dereference(current->active_mm->context.ldt); + if (!ldt || idx > ldt->size) return 0; - desc = current->active_mm->context.ldt; + desc = &ldt->entries[idx]; +#else + return 0; +#endif } else { if (idx > GDT_ENTRIES) return 0; - desc = raw_cpu_ptr(gdt_page.gdt); + desc = raw_cpu_ptr(gdt_page.gdt) + idx; } - return get_desc_base(desc + idx); + return get_desc_base(desc); } -#ifdef CONFIG_COMPAT +#ifdef CONFIG_IA32_EMULATION #include <asm/compat.h> diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index ce95676abd60..4d38416e2a7f 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -110,7 +110,7 @@ static void init_espfix_random(void) */ if (!arch_get_random_long(&rand)) { /* The constant is an arbitrary large prime */ - rdtscll(rand); + rand = rdtsc(); rand *= 0xc345c6b72fd16123UL; } diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 10757d0a3fcf..f75c5908c7a6 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -735,7 +735,7 @@ static int hpet_clocksource_register(void) /* Verify whether hpet counter works */ t1 = hpet_readl(HPET_COUNTER); - rdtscll(start); + start = rdtsc(); /* * We don't know the TSC frequency yet, but waiting for @@ -745,7 +745,7 @@ static int hpet_clocksource_register(void) */ do { rep_nop(); - rdtscll(now); + now = rdtsc(); } while ((now - start) < 200000UL); if (t1 == hpet_readl(HPET_COUNTER)) { diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c7dfe1be784e..d596eba9586e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -216,8 +216,23 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) unsigned vector = ~regs->orig_ax; unsigned irq; + /* + * NB: Unlike exception entries, IRQ entries do not reliably + * handle context tracking in the low-level entry code. This is + * because syscall entries execute briefly with IRQs on before + * updating context tracking state, so we can take an IRQ from + * kernel mode with CONTEXT_USER. The low-level entry code only + * updates the context if we came from user mode, so we won't + * switch to CONTEXT_KERNEL. We'll fix that once the syscall + * code is cleaned up enough that we can cleanly defer enabling + * IRQs. + */ + entering_irq(); + /* entering_irq() tells RCU that we're not quiescent. Check it. */ + rcu_lockdep_assert(rcu_is_watching(), "IRQ failed to wake up RCU"); + irq = __this_cpu_read(vector_irq[vector]); if (!handle_irq(irq, regs)) { diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 26d5a55a2736..e565e0e4d216 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -45,7 +45,7 @@ static void __jump_label_transform(struct jump_entry *entry, const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { if (init) { /* * Jump label is enabled for the first time. diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index c37886d759cc..2bcc0525f1c1 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -12,6 +12,7 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/smp.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> @@ -20,82 +21,82 @@ #include <asm/mmu_context.h> #include <asm/syscalls.h> -#ifdef CONFIG_SMP +/* context.lock is held for us, so we don't need any locking. */ static void flush_ldt(void *current_mm) { - if (current->active_mm == current_mm) - load_LDT(¤t->active_mm->context); + mm_context_t *pc; + + if (current->active_mm != current_mm) + return; + + pc = ¤t->active_mm->context; + set_ldt(pc->ldt->entries, pc->ldt->size); } -#endif -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) +/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ +static struct ldt_struct *alloc_ldt_struct(int size) { - void *oldldt, *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & - (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); - if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount * LDT_ENTRY_SIZE); + struct ldt_struct *new_ldt; + int alloc_size; + + if (size > LDT_ENTRIES) + return NULL; + + new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); + if (!new_ldt) + return NULL; + + BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); + alloc_size = size * LDT_ENTRY_SIZE; + + /* + * Xen is very picky: it requires a page-aligned LDT that has no + * trailing nonzero bytes in any page that contains LDT descriptors. + * Keep it simple: zero the whole allocation and never allocate less + * than PAGE_SIZE. + */ + if (alloc_size > PAGE_SIZE) + new_ldt->entries = vzalloc(alloc_size); else - newldt = (void *)__get_free_page(GFP_KERNEL); - - if (!newldt) - return -ENOMEM; + new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (oldsize) - memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, - (mincount - oldsize) * LDT_ENTRY_SIZE); + if (!new_ldt->entries) { + kfree(new_ldt); + return NULL; + } - paravirt_alloc_ldt(newldt, mincount); + new_ldt->size = size; + return new_ldt; +} -#ifdef CONFIG_X86_64 - /* CHECKME: Do we really need this ? */ - wmb(); -#endif - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - - if (reload) { -#ifdef CONFIG_SMP - preempt_disable(); - load_LDT(pc); - if (!cpumask_equal(mm_cpumask(current->mm), - cpumask_of(smp_processor_id()))) - smp_call_function(flush_ldt, current->mm, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - paravirt_free_ldt(oldldt, oldsize); - if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - put_page(virt_to_page(oldldt)); - } - return 0; +/* After calling this, the LDT is immutable. */ +static void finalize_ldt_struct(struct ldt_struct *ldt) +{ + paravirt_alloc_ldt(ldt->entries, ldt->size); } -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) +/* context.lock is held */ +static void install_ldt(struct mm_struct *current_mm, + struct ldt_struct *ldt) { - int err = alloc_ldt(new, old->size, 0); - int i; + /* Synchronizes with lockless_dereference in load_mm_ldt. */ + smp_store_release(¤t_mm->context.ldt, ldt); + + /* Activate the LDT for all CPUs using current_mm. */ + on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); +} - if (err < 0) - return err; +static void free_ldt_struct(struct ldt_struct *ldt) +{ + if (likely(!ldt)) + return; - for (i = 0; i < old->size; i++) - write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); - return 0; + paravirt_free_ldt(ldt->entries, ldt->size); + if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(ldt->entries); + else + kfree(ldt->entries); + kfree(ldt); } /* @@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + struct ldt_struct *new_ldt; struct mm_struct *old_mm; int retval = 0; mutex_init(&mm->context.lock); - mm->context.size = 0; old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); + if (!old_mm) { + mm->context.ldt = NULL; + return 0; } + + mutex_lock(&old_mm->context.lock); + if (!old_mm->context.ldt) { + mm->context.ldt = NULL; + goto out_unlock; + } + + new_ldt = alloc_ldt_struct(old_mm->context.ldt->size); + if (!new_ldt) { + retval = -ENOMEM; + goto out_unlock; + } + + memcpy(new_ldt->entries, old_mm->context.ldt->entries, + new_ldt->size * LDT_ENTRY_SIZE); + finalize_ldt_struct(new_ldt); + + mm->context.ldt = new_ldt; + +out_unlock: + mutex_unlock(&old_mm->context.lock); return retval; } @@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) */ void destroy_context(struct mm_struct *mm) { - if (mm->context.size) { -#ifdef CONFIG_X86_32 - /* CHECKME: Can this ever happen ? */ - if (mm == current->active_mm) - clear_LDT(); -#endif - paravirt_free_ldt(mm->context.ldt, mm->context.size); - if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - put_page(virt_to_page(mm->context.ldt)); - mm->context.size = 0; - } + free_ldt_struct(mm->context.ldt); + mm->context.ldt = NULL; } static int read_ldt(void __user *ptr, unsigned long bytecount) { - int err; + int retval; unsigned long size; struct mm_struct *mm = current->mm; - if (!mm->context.size) - return 0; + mutex_lock(&mm->context.lock); + + if (!mm->context.ldt) { + retval = 0; + goto out_unlock; + } + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; - mutex_lock(&mm->context.lock); - size = mm->context.size * LDT_ENTRY_SIZE; + size = mm->context.ldt->size * LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; + if (copy_to_user(ptr, mm->context.ldt->entries, size)) { + retval = -EFAULT; + goto out_unlock; + } + if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr + size, bytecount - size) != 0) { - err = -EFAULT; - goto error_return; + /* Zero-fill the rest and pretend we read bytecount bytes. */ + if (clear_user(ptr + size, bytecount - size)) { + retval = -EFAULT; + goto out_unlock; } } - return bytecount; -error_return: - return err; + retval = bytecount; + +out_unlock: + mutex_unlock(&mm->context.lock); + return retval; } static int read_default_ldt(void __user *ptr, unsigned long bytecount) @@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) struct desc_struct ldt; int error; struct user_desc ldt_info; + int oldsize, newsize; + struct ldt_struct *new_ldt, *old_ldt; error = -EINVAL; if (bytecount != sizeof(ldt_info)) @@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) goto out; } - mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, - ldt_info.entry_number + 1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - memset(&ldt, 0, sizeof(ldt)); - goto install; + if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) || + LDT_empty(&ldt_info)) { + /* The user wants to clear the entry. */ + memset(&ldt, 0, sizeof(ldt)); + } else { + if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { + error = -EINVAL; + goto out; } + + fill_ldt(&ldt, &ldt_info); + if (oldmode) + ldt.avl = 0; } - if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { - error = -EINVAL; + mutex_lock(&mm->context.lock); + + old_ldt = mm->context.ldt; + oldsize = old_ldt ? old_ldt->size : 0; + newsize = max((int)(ldt_info.entry_number + 1), oldsize); + + error = -ENOMEM; + new_ldt = alloc_ldt_struct(newsize); + if (!new_ldt) goto out_unlock; - } - fill_ldt(&ldt, &ldt_info); - if (oldmode) - ldt.avl = 0; + if (old_ldt) + memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE); + new_ldt->entries[ldt_info.entry_number] = ldt; + finalize_ldt_struct(new_ldt); - /* Install the new entry ... */ -install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt); + install_ldt(mm, new_ldt); + free_ldt_struct(old_ldt); error = 0; out_unlock: diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index d05bd2e2ee91..697f90db0e37 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w) a->handler, whole_msecs, decimal_msecs); } -static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) +static int nmi_handle(unsigned int type, struct pt_regs *regs) { struct nmi_desc *desc = nmi_to_desc(type); struct nmiaction *a; @@ -213,7 +213,7 @@ static void pci_serr_error(unsigned char reason, struct pt_regs *regs) { /* check to see if anyone registered against these types of errors */ - if (nmi_handle(NMI_SERR, regs, false)) + if (nmi_handle(NMI_SERR, regs)) return; pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", @@ -247,7 +247,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) unsigned long i; /* check to see if anyone registered against these types of errors */ - if (nmi_handle(NMI_IO_CHECK, regs, false)) + if (nmi_handle(NMI_IO_CHECK, regs)) return; pr_emerg( @@ -284,7 +284,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) * as only the first one is ever run (unless it can actually determine * if it caused the NMI) */ - handled = nmi_handle(NMI_UNKNOWN, regs, false); + handled = nmi_handle(NMI_UNKNOWN, regs); if (handled) { __this_cpu_add(nmi_stats.unknown, handled); return; @@ -332,7 +332,7 @@ static void default_do_nmi(struct pt_regs *regs) __this_cpu_write(last_nmi_rip, regs->ip); - handled = nmi_handle(NMI_LOCAL, regs, b2b); + handled = nmi_handle(NMI_LOCAL, regs); __this_cpu_add(nmi_stats.normal, handled); if (handled) { /* diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 58bcfb67c01f..f68e48f5f6c2 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -351,9 +351,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, .write_msr = native_write_msr_safe, - .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, - .read_tscp = native_read_tscp, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, .load_gdt = native_load_gdt, diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index e1b013696dde..c89f50a76e97 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -10,7 +10,6 @@ DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); -DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); #if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); @@ -52,7 +51,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); PATCH_SITE(pv_cpu_ops, clts); - PATCH_SITE(pv_cpu_ops, read_tsc); #if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): if (pv_is_native_spin_unlock()) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 397688beed4b..2199d9b774c8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -29,6 +29,7 @@ #include <asm/debugreg.h> #include <asm/nmi.h> #include <asm/tlbflush.h> +#include <asm/vm86.h> /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -110,6 +111,8 @@ void exit_thread(void) kfree(bp); } + free_vm86(t); + fpu__drop(fpu); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f73c962fe636..c13df2c735f8 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -53,6 +53,7 @@ #include <asm/syscalls.h> #include <asm/debugreg.h> #include <asm/switch_to.h> +#include <asm/vm86.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 71d7849a07f7..3c1bbcf12924 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -121,13 +121,15 @@ void __show_regs(struct pt_regs *regs, int all) void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { - if (dead_task->mm->context.size) { +#ifdef CONFIG_MODIFY_LDT_SYSCALL + if (dead_task->mm->context.ldt) { pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, dead_task->mm->context.ldt, - dead_task->mm->context.size); + dead_task->mm->context.ldt->size); BUG(); } +#endif } } @@ -248,8 +250,8 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) __USER_CS, __USER_DS, 0); } -#ifdef CONFIG_IA32_EMULATION -void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) +#ifdef CONFIG_COMPAT +void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp) { start_thread_common(regs, new_ip, new_sp, test_thread_flag(TIF_X32) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 9be72bc3613f..558f50edebca 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -37,12 +37,10 @@ #include <asm/proto.h> #include <asm/hw_breakpoint.h> #include <asm/traps.h> +#include <asm/syscall.h> #include "tls.h" -#define CREATE_TRACE_POINTS -#include <trace/events/syscalls.h> - enum x86_regset { REGSET_GENERAL, REGSET_FP, @@ -1123,6 +1121,73 @@ static int genregs32_set(struct task_struct *target, return ret; } +static long ia32_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) +{ + unsigned long addr = caddr; + unsigned long data = cdata; + void __user *datap = compat_ptr(data); + int ret; + __u32 val; + + switch (request) { + case PTRACE_PEEKUSR: + ret = getreg32(child, addr, &val); + if (ret == 0) + ret = put_user(val, (__u32 __user *)datap); + break; + + case PTRACE_POKEUSR: + ret = putreg32(child, addr, data); + break; + + case PTRACE_GETREGS: /* Get all gp regs from the child. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct32), + datap); + + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, &user_x86_32_view, + REGSET_GENERAL, 0, + sizeof(struct user_regs_struct32), + datap); + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_FP, 0, + sizeof(struct user_i387_ia32_struct), + datap); + + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + return copy_regset_from_user( + child, &user_x86_32_view, REGSET_FP, + 0, sizeof(struct user_i387_ia32_struct), datap); + + case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_XFP, 0, + sizeof(struct user32_fxsr_struct), + datap); + + case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ + return copy_regset_from_user(child, &user_x86_32_view, + REGSET_XFP, 0, + sizeof(struct user32_fxsr_struct), + datap); + + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return arch_ptrace(child, request, addr, data); + + default: + return compat_ptrace_request(child, request, addr, data); + } + + return ret; +} +#endif /* CONFIG_IA32_EMULATION */ + #ifdef CONFIG_X86_X32_ABI static long x32_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, @@ -1211,78 +1276,21 @@ static long x32_arch_ptrace(struct task_struct *child, } #endif +#ifdef CONFIG_COMPAT long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { - unsigned long addr = caddr; - unsigned long data = cdata; - void __user *datap = compat_ptr(data); - int ret; - __u32 val; - #ifdef CONFIG_X86_X32_ABI if (!is_ia32_task()) return x32_arch_ptrace(child, request, caddr, cdata); #endif - - switch (request) { - case PTRACE_PEEKUSR: - ret = getreg32(child, addr, &val); - if (ret == 0) - ret = put_user(val, (__u32 __user *)datap); - break; - - case PTRACE_POKEUSR: - ret = putreg32(child, addr, data); - break; - - case PTRACE_GETREGS: /* Get all gp regs from the child. */ - return copy_regset_to_user(child, &user_x86_32_view, - REGSET_GENERAL, - 0, sizeof(struct user_regs_struct32), - datap); - - case PTRACE_SETREGS: /* Set all gp regs in the child. */ - return copy_regset_from_user(child, &user_x86_32_view, - REGSET_GENERAL, 0, - sizeof(struct user_regs_struct32), - datap); - - case PTRACE_GETFPREGS: /* Get the child FPU state. */ - return copy_regset_to_user(child, &user_x86_32_view, - REGSET_FP, 0, - sizeof(struct user_i387_ia32_struct), - datap); - - case PTRACE_SETFPREGS: /* Set the child FPU state. */ - return copy_regset_from_user( - child, &user_x86_32_view, REGSET_FP, - 0, sizeof(struct user_i387_ia32_struct), datap); - - case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ - return copy_regset_to_user(child, &user_x86_32_view, - REGSET_XFP, 0, - sizeof(struct user32_fxsr_struct), - datap); - - case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ - return copy_regset_from_user(child, &user_x86_32_view, - REGSET_XFP, 0, - sizeof(struct user32_fxsr_struct), - datap); - - case PTRACE_GET_THREAD_AREA: - case PTRACE_SET_THREAD_AREA: - return arch_ptrace(child, request, addr, data); - - default: - return compat_ptrace_request(child, request, addr, data); - } - - return ret; +#ifdef CONFIG_IA32_EMULATION + return ia32_arch_ptrace(child, request, caddr, cdata); +#else + return 0; +#endif } - -#endif /* CONFIG_IA32_EMULATION */ +#endif /* CONFIG_COMPAT */ #ifdef CONFIG_X86_64 @@ -1434,201 +1442,3 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, /* Send us the fake SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); } - -static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) -{ -#ifdef CONFIG_X86_64 - if (arch == AUDIT_ARCH_X86_64) { - audit_syscall_entry(regs->orig_ax, regs->di, - regs->si, regs->dx, regs->r10); - } else -#endif - { - audit_syscall_entry(regs->orig_ax, regs->bx, - regs->cx, regs->dx, regs->si); - } -} - -/* - * We can return 0 to resume the syscall or anything else to go to phase - * 2. If we resume the syscall, we need to put something appropriate in - * regs->orig_ax. - * - * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax - * are fully functional. - * - * For phase 2's benefit, our return value is: - * 0: resume the syscall - * 1: go to phase 2; no seccomp phase 2 needed - * anything else: go to phase 2; pass return value to seccomp - */ -unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) -{ - unsigned long ret = 0; - u32 work; - - BUG_ON(regs != task_pt_regs(current)); - - work = ACCESS_ONCE(current_thread_info()->flags) & - _TIF_WORK_SYSCALL_ENTRY; - - /* - * If TIF_NOHZ is set, we are required to call user_exit() before - * doing anything that could touch RCU. - */ - if (work & _TIF_NOHZ) { - user_exit(); - work &= ~_TIF_NOHZ; - } - -#ifdef CONFIG_SECCOMP - /* - * Do seccomp first -- it should minimize exposure of other - * code, and keeping seccomp fast is probably more valuable - * than the rest of this. - */ - if (work & _TIF_SECCOMP) { - struct seccomp_data sd; - - sd.arch = arch; - sd.nr = regs->orig_ax; - sd.instruction_pointer = regs->ip; -#ifdef CONFIG_X86_64 - if (arch == AUDIT_ARCH_X86_64) { - sd.args[0] = regs->di; - sd.args[1] = regs->si; - sd.args[2] = regs->dx; - sd.args[3] = regs->r10; - sd.args[4] = regs->r8; - sd.args[5] = regs->r9; - } else -#endif - { - sd.args[0] = regs->bx; - sd.args[1] = regs->cx; - sd.args[2] = regs->dx; - sd.args[3] = regs->si; - sd.args[4] = regs->di; - sd.args[5] = regs->bp; - } - - BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0); - BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1); - - ret = seccomp_phase1(&sd); - if (ret == SECCOMP_PHASE1_SKIP) { - regs->orig_ax = -1; - ret = 0; - } else if (ret != SECCOMP_PHASE1_OK) { - return ret; /* Go directly to phase 2 */ - } - - work &= ~_TIF_SECCOMP; - } -#endif - - /* Do our best to finish without phase 2. */ - if (work == 0) - return ret; /* seccomp and/or nohz only (ret == 0 here) */ - -#ifdef CONFIG_AUDITSYSCALL - if (work == _TIF_SYSCALL_AUDIT) { - /* - * If there is no more work to be done except auditing, - * then audit in phase 1. Phase 2 always audits, so, if - * we audit here, then we can't go on to phase 2. - */ - do_audit_syscall_entry(regs, arch); - return 0; - } -#endif - - return 1; /* Something is enabled that we can't handle in phase 1 */ -} - -/* Returns the syscall nr to run (which should match regs->orig_ax). */ -long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, - unsigned long phase1_result) -{ - long ret = 0; - u32 work = ACCESS_ONCE(current_thread_info()->flags) & - _TIF_WORK_SYSCALL_ENTRY; - - BUG_ON(regs != task_pt_regs(current)); - - /* - * If we stepped into a sysenter/syscall insn, it trapped in - * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. - * If user-mode had set TF itself, then it's still clear from - * do_debug() and we need to set it again to restore the user - * state. If we entered on the slow path, TF was already set. - */ - if (work & _TIF_SINGLESTEP) - regs->flags |= X86_EFLAGS_TF; - -#ifdef CONFIG_SECCOMP - /* - * Call seccomp_phase2 before running the other hooks so that - * they can see any changes made by a seccomp tracer. - */ - if (phase1_result > 1 && seccomp_phase2(phase1_result)) { - /* seccomp failures shouldn't expose any additional code. */ - return -1; - } -#endif - - if (unlikely(work & _TIF_SYSCALL_EMU)) - ret = -1L; - - if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && - tracehook_report_syscall_entry(regs)) - ret = -1L; - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->orig_ax); - - do_audit_syscall_entry(regs, arch); - - return ret ?: regs->orig_ax; -} - -long syscall_trace_enter(struct pt_regs *regs) -{ - u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; - unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); - - if (phase1_result == 0) - return regs->orig_ax; - else - return syscall_trace_enter_phase2(regs, arch, phase1_result); -} - -void syscall_trace_leave(struct pt_regs *regs) -{ - bool step; - - /* - * We may come here right after calling schedule_user() - * or do_notify_resume(), in which case we can be in RCU - * user mode. - */ - user_exit(); - - audit_syscall_exit(regs); - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->ax); - - /* - * If TIF_SYSCALL_EMU is set, we only get here because of - * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). - * We already reported this syscall instruction in - * syscall_trace_enter(). - */ - step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && - !test_thread_flag(TIF_SYSCALL_EMU); - if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, step); - - user_enter(); -} diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 206996c1669d..07eb84407036 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -31,11 +31,11 @@ #include <asm/vdso.h> #include <asm/mce.h> #include <asm/sighandling.h> +#include <asm/vm86.h> #ifdef CONFIG_X86_64 #include <asm/proto.h> #include <asm/ia32_unistd.h> -#include <asm/sys_ia32.h> #endif /* CONFIG_X86_64 */ #include <asm/syscall.h> @@ -636,6 +636,9 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) bool stepping, failed; struct fpu *fpu = ¤t->thread.fpu; + if (v8086_mode(regs)) + save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL); + /* Are we from a system call? */ if (syscall_get_nr(current, regs) >= 0) { /* If so, check system call restarting.. */ @@ -701,7 +704,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ -static void do_signal(struct pt_regs *regs) +void do_signal(struct pt_regs *regs) { struct ksignal ksig; @@ -736,32 +739,6 @@ static void do_signal(struct pt_regs *regs) restore_saved_sigmask(); } -/* - * notification of userspace execution resumption - * - triggered by the TIF_WORK_MASK flags - */ -__visible void -do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) -{ - user_exit(); - - if (thread_info_flags & _TIF_UPROBE) - uprobe_notify_resume(regs); - - /* deal with pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs); - - if (thread_info_flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - } - if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) - fire_user_return_notifiers(); - - user_enter(); -} - void signal_fault(struct pt_regs *regs, void __user *frame, char *where) { struct task_struct *me = current; diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c new file mode 100644 index 000000000000..dc3c0b1c816f --- /dev/null +++ b/arch/x86/kernel/signal_compat.c @@ -0,0 +1,95 @@ +#include <linux/compat.h> +#include <linux/uaccess.h> + +int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) +{ + int err = 0; + bool ia32 = test_thread_flag(TIF_IA32); + + if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) + return -EFAULT; + + put_user_try { + /* If you change siginfo_t structure, please make sure that + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + put_user_ex(from->si_signo, &to->si_signo); + put_user_ex(from->si_errno, &to->si_errno); + put_user_ex((short)from->si_code, &to->si_code); + + if (from->si_code < 0) { + put_user_ex(from->si_pid, &to->si_pid); + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); + } else { + /* + * First 32bits of unions are always present: + * si_pid === si_band === si_tid === si_addr(LS half) + */ + put_user_ex(from->_sifields._pad[0], + &to->_sifields._pad[0]); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_SYS >> 16: + put_user_ex(from->si_syscall, &to->si_syscall); + put_user_ex(from->si_arch, &to->si_arch); + break; + case __SI_CHLD >> 16: + if (ia32) { + put_user_ex(from->si_utime, &to->si_utime); + put_user_ex(from->si_stime, &to->si_stime); + } else { + put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime); + put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime); + } + put_user_ex(from->si_status, &to->si_status); + /* FALL THROUGH */ + default: + case __SI_KILL >> 16: + put_user_ex(from->si_uid, &to->si_uid); + break; + case __SI_POLL >> 16: + put_user_ex(from->si_fd, &to->si_fd); + break; + case __SI_TIMER >> 16: + put_user_ex(from->si_overrun, &to->si_overrun); + put_user_ex(ptr_to_compat(from->si_ptr), + &to->si_ptr); + break; + /* This is not generated by the kernel as of now. */ + case __SI_RT >> 16: + case __SI_MESGQ >> 16: + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(from->si_int, &to->si_int); + break; + } + } + } put_user_catch(err); + + return err; +} + +int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) +{ + int err = 0; + u32 ptr32; + + if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) + return -EFAULT; + + get_user_try { + get_user_ex(to->si_signo, &from->si_signo); + get_user_ex(to->si_errno, &from->si_errno); + get_user_ex(to->si_code, &from->si_code); + + get_user_ex(to->si_pid, &from->si_pid); + get_user_ex(to->si_uid, &from->si_uid); + get_user_ex(ptr32, &from->si_ptr); + to->si_ptr = compat_ptr(ptr32); + } get_user_catch(err); + + return err; +} diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 9b4d51d0c0d0..fd88e152d584 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -5,6 +5,7 @@ #include <linux/mm.h> #include <linux/ptrace.h> #include <asm/desc.h> +#include <asm/mmu_context.h> unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) { @@ -17,6 +18,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re return addr; } +#ifdef CONFIG_MODIFY_LDT_SYSCALL /* * We'll assume that the code segments in the GDT * are all zero-based. That is largely true: the @@ -30,10 +32,11 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re seg &= ~7UL; mutex_lock(&child->mm->context.lock); - if (unlikely((seg >> 3) >= child->mm->context.size)) + if (unlikely(!child->mm->context.ldt || + (seg >> 3) >= child->mm->context.ldt->size)) addr = -1L; /* bogus selector, access would fault */ else { - desc = child->mm->context.ldt + seg; + desc = &child->mm->context.ldt->entries[seg]; base = get_desc_base(desc); /* 16-bit code segment? */ @@ -43,6 +46,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re } mutex_unlock(&child->mm->context.lock); } +#endif return addr; } diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c index 25b993729f9b..80bb24d9b880 100644 --- a/arch/x86/kernel/trace_clock.c +++ b/arch/x86/kernel/trace_clock.c @@ -12,10 +12,5 @@ */ u64 notrace trace_clock_x86_tsc(void) { - u64 ret; - - rdtsc_barrier(); - rdtscll(ret); - - return ret; + return rdtsc_ordered(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f5791927aa64..86a82eafb96f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -62,6 +62,7 @@ #include <asm/fpu/xstate.h> #include <asm/trace/mpx.h> #include <asm/mpx.h> +#include <asm/vm86.h> #ifdef CONFIG_X86_64 #include <asm/x86_init.h> @@ -108,13 +109,10 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_count_dec(); } -enum ctx_state ist_enter(struct pt_regs *regs) +void ist_enter(struct pt_regs *regs) { - enum ctx_state prev_state; - if (user_mode(regs)) { - /* Other than that, we're just an exception. */ - prev_state = exception_enter(); + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); } else { /* * We might have interrupted pretty much anything. In @@ -123,32 +121,25 @@ enum ctx_state ist_enter(struct pt_regs *regs) * but we need to notify RCU. */ rcu_nmi_enter(); - prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */ } /* - * We are atomic because we're on the IST stack (or we're on x86_32, - * in which case we still shouldn't schedule). - * - * This must be after exception_enter(), because exception_enter() - * won't do anything if in_interrupt() returns true. + * We are atomic because we're on the IST stack; or we're on + * x86_32, in which case we still shouldn't schedule; or we're + * on x86_64 and entered from user mode, in which case we're + * still atomic unless ist_begin_non_atomic is called. */ preempt_count_add(HARDIRQ_OFFSET); /* This code is a bit fragile. Test it. */ rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work"); - - return prev_state; } -void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) +void ist_exit(struct pt_regs *regs) { - /* Must be before exception_exit. */ preempt_count_sub(HARDIRQ_OFFSET); - if (user_mode(regs)) - return exception_exit(prev_state); - else + if (!user_mode(regs)) rcu_nmi_exit(); } @@ -162,7 +153,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) * a double fault, it can be safe to schedule. ist_begin_non_atomic() * begins a non-atomic section within an ist_enter()/ist_exit() region. * Callers are responsible for enabling interrupts themselves inside - * the non-atomic section, and callers must call is_end_non_atomic() + * the non-atomic section, and callers must call ist_end_non_atomic() * before ist_exit(). */ void ist_begin_non_atomic(struct pt_regs *regs) @@ -289,17 +280,16 @@ NOKPROBE_SYMBOL(do_trap); static void do_error_trap(struct pt_regs *regs, long error_code, char *str, unsigned long trapnr, int signr) { - enum ctx_state prev_state = exception_enter(); siginfo_t info; + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { conditional_sti(regs); do_trap(trapnr, signr, str, regs, error_code, fill_trap_info(regs, signr, trapnr, &info)); } - - exception_exit(prev_state); } #define DO_ERROR(trapnr, signr, str, name) \ @@ -351,7 +341,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) } #endif - ist_enter(regs); /* Discard prev_state because we won't return. */ + ist_enter(regs); notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); tsk->thread.error_code = error_code; @@ -371,14 +361,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; const struct bndcsr *bndcsr; siginfo_t *info; - prev_state = exception_enter(); + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); if (notify_die(DIE_TRAP, "bounds", regs, error_code, X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) - goto exit; + return; conditional_sti(regs); if (!user_mode(regs)) @@ -435,9 +424,8 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) die("bounds", regs, error_code); } -exit: - exception_exit(prev_state); return; + exit_trap: /* * This path out is for all the cases where we could not @@ -447,35 +435,33 @@ exit_trap: * time.. */ do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL); - exception_exit(prev_state); } dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; - enum ctx_state prev_state; - prev_state = exception_enter(); + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); conditional_sti(regs); if (v8086_mode(regs)) { local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); - goto exit; + return; } tsk = current; if (!user_mode(regs)) { if (fixup_exception(regs)) - goto exit; + return; tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; if (notify_die(DIE_GPF, "general protection fault", regs, error_code, X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) die("general protection fault", regs, error_code); - goto exit; + return; } tsk->thread.error_code = error_code; @@ -491,16 +477,12 @@ do_general_protection(struct pt_regs *regs, long error_code) } force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); -exit: - exception_exit(prev_state); } NOKPROBE_SYMBOL(do_general_protection); /* May run on IST stack. */ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; - #ifdef CONFIG_DYNAMIC_FTRACE /* * ftrace must be first, everything else may cause a recursive crash. @@ -513,7 +495,8 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) if (poke_int3_handler(regs)) return; - prev_state = ist_enter(regs); + ist_enter(regs); + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) @@ -539,7 +522,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); debug_stack_usage_dec(); exit: - ist_exit(regs, prev_state); + ist_exit(regs); } NOKPROBE_SYMBOL(do_int3); @@ -615,12 +598,11 @@ NOKPROBE_SYMBOL(fixup_bad_iret); dotraplinkage void do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - enum ctx_state prev_state; int user_icebp = 0; unsigned long dr6; int si_code; - prev_state = ist_enter(regs); + ist_enter(regs); get_debugreg(dr6, 6); @@ -695,7 +677,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_dec(); exit: - ist_exit(regs, prev_state); + ist_exit(regs); } NOKPROBE_SYMBOL(do_debug); @@ -747,21 +729,15 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; - - prev_state = exception_enter(); + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); math_error(regs, error_code, X86_TRAP_MF); - exception_exit(prev_state); } dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; - - prev_state = exception_enter(); + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); math_error(regs, error_code, X86_TRAP_XF); - exception_exit(prev_state); } dotraplinkage void @@ -773,9 +749,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; - - prev_state = exception_enter(); + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); BUG_ON(use_eager_fpu()); #ifdef CONFIG_MATH_EMULATION @@ -786,7 +760,6 @@ do_device_not_available(struct pt_regs *regs, long error_code) info.regs = regs; math_emulate(&info); - exception_exit(prev_state); return; } #endif @@ -794,7 +767,6 @@ do_device_not_available(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 conditional_sti(regs); #endif - exception_exit(prev_state); } NOKPROBE_SYMBOL(do_device_not_available); @@ -802,9 +774,8 @@ NOKPROBE_SYMBOL(do_device_not_available); dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { siginfo_t info; - enum ctx_state prev_state; - prev_state = exception_enter(); + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); local_irq_enable(); info.si_signo = SIGILL; @@ -816,7 +787,6 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, &info); } - exception_exit(prev_state); } #endif diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7437b41f6a47..b9cfd462f7e7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -38,7 +38,7 @@ static int __read_mostly tsc_unstable; erroneous rdtsc usage on !cpu_has_tsc processors */ static int __read_mostly tsc_disabled = -1; -static struct static_key __use_tsc = STATIC_KEY_INIT; +static DEFINE_STATIC_KEY_FALSE(__use_tsc); int tsc_clocksource_reliable; @@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) data = cyc2ns_write_begin(cpu); - rdtscll(tsc_now); + tsc_now = rdtsc(); ns_now = cycles_2_ns(tsc_now); /* @@ -274,7 +274,12 @@ done: */ u64 native_sched_clock(void) { - u64 tsc_now; + if (static_branch_likely(&__use_tsc)) { + u64 tsc_now = rdtsc(); + + /* return the value in ns */ + return cycles_2_ns(tsc_now); + } /* * Fall back to jiffies if there's no TSC available: @@ -284,16 +289,9 @@ u64 native_sched_clock(void) * very important for it to be as fast as the platform * can achieve it. ) */ - if (!static_key_false(&__use_tsc)) { - /* No locking but a rare wrong value is not a big deal: */ - return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); - } - - /* read the Time Stamp Counter: */ - rdtscll(tsc_now); - /* return the value in ns */ - return cycles_2_ns(tsc_now); + /* No locking but a rare wrong value is not a big deal: */ + return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); } /* We need to define a real function for sched_clock, to override the @@ -308,12 +306,6 @@ unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); #endif -unsigned long long native_read_tsc(void) -{ - return __native_read_tsc(); -} -EXPORT_SYMBOL(native_read_tsc); - int check_tsc_unstable(void) { return tsc_unstable; @@ -976,7 +968,7 @@ static struct clocksource clocksource_tsc; */ static cycle_t read_tsc(struct clocksource *cs) { - return (cycle_t)get_cycles(); + return (cycle_t)rdtsc_ordered(); } /* @@ -1210,7 +1202,7 @@ void __init tsc_init(void) /* now allow native_sched_clock() to use rdtsc */ tsc_disabled = 0; - static_key_slow_inc(&__use_tsc); + static_branch_enable(&__use_tsc); if (!no_sched_irq_time) enable_sched_clock_irqtime(); diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index dd8d0791dfb5..78083bf23ed1 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -39,16 +39,15 @@ static cycles_t max_warp; static int nr_warps; /* - * TSC-warp measurement loop running on both CPUs: + * TSC-warp measurement loop running on both CPUs. This is not called + * if there is no TSC. */ static void check_tsc_warp(unsigned int timeout) { cycles_t start, now, prev, end; int i; - rdtsc_barrier(); - start = get_cycles(); - rdtsc_barrier(); + start = rdtsc_ordered(); /* * The measurement runs for 'timeout' msecs: */ @@ -63,9 +62,7 @@ static void check_tsc_warp(unsigned int timeout) */ arch_spin_lock(&sync_lock); prev = last_tsc; - rdtsc_barrier(); - now = get_cycles(); - rdtsc_barrier(); + now = rdtsc_ordered(); last_tsc = now; arch_spin_unlock(&sync_lock); @@ -126,7 +123,7 @@ void check_tsc_sync_source(int cpu) /* * No need to check if we already know that the TSC is not - * synchronized: + * synchronized or if we have no TSC. */ if (unsynchronized_tsc()) return; @@ -190,6 +187,7 @@ void check_tsc_sync_target(void) { int cpus = 2; + /* Also aborts if there is no TSC. */ if (unsynchronized_tsc() || tsc_clocksource_reliable) return; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index fc9db6ef2a95..abd8b856bd2b 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -44,11 +44,14 @@ #include <linux/ptrace.h> #include <linux/audit.h> #include <linux/stddef.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/tlbflush.h> #include <asm/irq.h> +#include <asm/traps.h> +#include <asm/vm86.h> /* * Known problems: @@ -66,10 +69,6 @@ */ -#define KVM86 ((struct kernel_vm86_struct *)regs) -#define VMPI KVM86->vm86plus - - /* * 8- and 16-bit register defines.. */ @@ -81,8 +80,8 @@ /* * virtual flags (16 and 32-bit versions) */ -#define VFLAGS (*(unsigned short *)&(current->thread.v86flags)) -#define VEFLAGS (current->thread.v86flags) +#define VFLAGS (*(unsigned short *)&(current->thread.vm86->veflags)) +#define VEFLAGS (current->thread.vm86->veflags) #define set_flags(X, new, mask) \ ((X) = ((X) & ~(mask)) | ((new) & (mask))) @@ -90,46 +89,13 @@ #define SAFE_MASK (0xDD5) #define RETURN_MASK (0xDFF) -/* convert kernel_vm86_regs to vm86_regs */ -static int copy_vm86_regs_to_user(struct vm86_regs __user *user, - const struct kernel_vm86_regs *regs) -{ - int ret = 0; - - /* - * kernel_vm86_regs is missing gs, so copy everything up to - * (but not including) orig_eax, and then rest including orig_eax. - */ - ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax)); - ret += copy_to_user(&user->orig_eax, ®s->pt.orig_ax, - sizeof(struct kernel_vm86_regs) - - offsetof(struct kernel_vm86_regs, pt.orig_ax)); - - return ret; -} - -/* convert vm86_regs to kernel_vm86_regs */ -static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, - const struct vm86_regs __user *user, - unsigned extra) -{ - int ret = 0; - - /* copy ax-fs inclusive */ - ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax)); - /* copy orig_ax-__gsh+extra */ - ret += copy_from_user(®s->pt.orig_ax, &user->orig_eax, - sizeof(struct kernel_vm86_regs) - - offsetof(struct kernel_vm86_regs, pt.orig_ax) + - extra); - return ret; -} - -struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) +void save_v86_state(struct kernel_vm86_regs *regs, int retval) { struct tss_struct *tss; - struct pt_regs *ret; - unsigned long tmp; + struct task_struct *tsk = current; + struct vm86plus_struct __user *user; + struct vm86 *vm86 = current->thread.vm86; + long err = 0; /* * This gets called from entry.S with interrupts disabled, but @@ -138,31 +104,57 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) */ local_irq_enable(); - if (!current->thread.vm86_info) { - pr_alert("no vm86_info: BAD\n"); + if (!vm86 || !vm86->user_vm86) { + pr_alert("no user_vm86: BAD\n"); do_exit(SIGSEGV); } - set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask); - tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs); - tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap); - if (tmp) { - pr_alert("could not access userspace vm86_info\n"); + set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask); + user = vm86->user_vm86; + + if (!access_ok(VERIFY_WRITE, user, vm86->vm86plus.is_vm86pus ? + sizeof(struct vm86plus_struct) : + sizeof(struct vm86_struct))) { + pr_alert("could not access userspace vm86 info\n"); + do_exit(SIGSEGV); + } + + put_user_try { + put_user_ex(regs->pt.bx, &user->regs.ebx); + put_user_ex(regs->pt.cx, &user->regs.ecx); + put_user_ex(regs->pt.dx, &user->regs.edx); + put_user_ex(regs->pt.si, &user->regs.esi); + put_user_ex(regs->pt.di, &user->regs.edi); + put_user_ex(regs->pt.bp, &user->regs.ebp); + put_user_ex(regs->pt.ax, &user->regs.eax); + put_user_ex(regs->pt.ip, &user->regs.eip); + put_user_ex(regs->pt.cs, &user->regs.cs); + put_user_ex(regs->pt.flags, &user->regs.eflags); + put_user_ex(regs->pt.sp, &user->regs.esp); + put_user_ex(regs->pt.ss, &user->regs.ss); + put_user_ex(regs->es, &user->regs.es); + put_user_ex(regs->ds, &user->regs.ds); + put_user_ex(regs->fs, &user->regs.fs); + put_user_ex(regs->gs, &user->regs.gs); + + put_user_ex(vm86->screen_bitmap, &user->screen_bitmap); + } put_user_catch(err); + if (err) { + pr_alert("could not access userspace vm86 info\n"); do_exit(SIGSEGV); } tss = &per_cpu(cpu_tss, get_cpu()); - current->thread.sp0 = current->thread.saved_sp0; - current->thread.sysenter_cs = __KERNEL_CS; - load_sp0(tss, ¤t->thread); - current->thread.saved_sp0 = 0; + tsk->thread.sp0 = vm86->saved_sp0; + tsk->thread.sysenter_cs = __KERNEL_CS; + load_sp0(tss, &tsk->thread); + vm86->saved_sp0 = 0; put_cpu(); - ret = KVM86->regs32; + memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); - ret->fs = current->thread.saved_fs; - set_user_gs(ret, current->thread.saved_gs); + lazy_load_gs(vm86->regs32.gs); - return ret; + regs->pt.ax = retval; } static void mark_screen_rdonly(struct mm_struct *mm) @@ -200,45 +192,16 @@ out: static int do_vm86_irq_handling(int subfunction, int irqnumber); -static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); +static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus); -SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, v86) +SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, user_vm86) { - struct kernel_vm86_struct info; /* declare this _on top_, - * this avoids wasting of stack space. - * This remains on the stack until we - * return to 32 bit user space. - */ - struct task_struct *tsk = current; - int tmp; - - if (tsk->thread.saved_sp0) - return -EPERM; - tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, - offsetof(struct kernel_vm86_struct, vm86plus) - - sizeof(info.regs)); - if (tmp) - return -EFAULT; - memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); - info.regs32 = current_pt_regs(); - tsk->thread.vm86_info = v86; - do_sys_vm86(&info, tsk); - return 0; /* we never return here */ + return do_sys_vm86((struct vm86plus_struct __user *) user_vm86, false); } SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) { - struct kernel_vm86_struct info; /* declare this _on top_, - * this avoids wasting of stack space. - * This remains on the stack until we - * return to 32 bit user space. - */ - struct task_struct *tsk; - int tmp; - struct vm86plus_struct __user *v86; - - tsk = current; switch (cmd) { case VM86_REQUEST_IRQ: case VM86_FREE_IRQ: @@ -256,114 +219,133 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) } /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ - if (tsk->thread.saved_sp0) - return -EPERM; - v86 = (struct vm86plus_struct __user *)arg; - tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, - offsetof(struct kernel_vm86_struct, regs32) - - sizeof(info.regs)); - if (tmp) - return -EFAULT; - info.regs32 = current_pt_regs(); - info.vm86plus.is_vm86pus = 1; - tsk->thread.vm86_info = (struct vm86_struct __user *)v86; - do_sys_vm86(&info, tsk); - return 0; /* we never return here */ + return do_sys_vm86((struct vm86plus_struct __user *) arg, true); } -static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) +static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) { struct tss_struct *tss; -/* - * make sure the vm86() system call doesn't try to do anything silly - */ - info->regs.pt.ds = 0; - info->regs.pt.es = 0; - info->regs.pt.fs = 0; -#ifndef CONFIG_X86_32_LAZY_GS - info->regs.pt.gs = 0; -#endif + struct task_struct *tsk = current; + struct vm86 *vm86 = tsk->thread.vm86; + struct kernel_vm86_regs vm86regs; + struct pt_regs *regs = current_pt_regs(); + unsigned long err = 0; + + if (!vm86) { + if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL))) + return -ENOMEM; + tsk->thread.vm86 = vm86; + } + if (vm86->saved_sp0) + return -EPERM; + + if (!access_ok(VERIFY_READ, user_vm86, plus ? + sizeof(struct vm86_struct) : + sizeof(struct vm86plus_struct))) + return -EFAULT; + + memset(&vm86regs, 0, sizeof(vm86regs)); + get_user_try { + unsigned short seg; + get_user_ex(vm86regs.pt.bx, &user_vm86->regs.ebx); + get_user_ex(vm86regs.pt.cx, &user_vm86->regs.ecx); + get_user_ex(vm86regs.pt.dx, &user_vm86->regs.edx); + get_user_ex(vm86regs.pt.si, &user_vm86->regs.esi); + get_user_ex(vm86regs.pt.di, &user_vm86->regs.edi); + get_user_ex(vm86regs.pt.bp, &user_vm86->regs.ebp); + get_user_ex(vm86regs.pt.ax, &user_vm86->regs.eax); + get_user_ex(vm86regs.pt.ip, &user_vm86->regs.eip); + get_user_ex(seg, &user_vm86->regs.cs); + vm86regs.pt.cs = seg; + get_user_ex(vm86regs.pt.flags, &user_vm86->regs.eflags); + get_user_ex(vm86regs.pt.sp, &user_vm86->regs.esp); + get_user_ex(seg, &user_vm86->regs.ss); + vm86regs.pt.ss = seg; + get_user_ex(vm86regs.es, &user_vm86->regs.es); + get_user_ex(vm86regs.ds, &user_vm86->regs.ds); + get_user_ex(vm86regs.fs, &user_vm86->regs.fs); + get_user_ex(vm86regs.gs, &user_vm86->regs.gs); + + get_user_ex(vm86->flags, &user_vm86->flags); + get_user_ex(vm86->screen_bitmap, &user_vm86->screen_bitmap); + get_user_ex(vm86->cpu_type, &user_vm86->cpu_type); + } get_user_catch(err); + if (err) + return err; + + if (copy_from_user(&vm86->int_revectored, + &user_vm86->int_revectored, + sizeof(struct revectored_struct))) + return -EFAULT; + if (copy_from_user(&vm86->int21_revectored, + &user_vm86->int21_revectored, + sizeof(struct revectored_struct))) + return -EFAULT; + if (plus) { + if (copy_from_user(&vm86->vm86plus, &user_vm86->vm86plus, + sizeof(struct vm86plus_info_struct))) + return -EFAULT; + vm86->vm86plus.is_vm86pus = 1; + } else + memset(&vm86->vm86plus, 0, + sizeof(struct vm86plus_info_struct)); + + memcpy(&vm86->regs32, regs, sizeof(struct pt_regs)); + vm86->user_vm86 = user_vm86; /* * The flags register is also special: we cannot trust that the user * has set it up safely, so this makes sure interrupt etc flags are * inherited from protected mode. */ - VEFLAGS = info->regs.pt.flags; - info->regs.pt.flags &= SAFE_MASK; - info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK; - info->regs.pt.flags |= X86_VM_MASK; + VEFLAGS = vm86regs.pt.flags; + vm86regs.pt.flags &= SAFE_MASK; + vm86regs.pt.flags |= regs->flags & ~SAFE_MASK; + vm86regs.pt.flags |= X86_VM_MASK; + + vm86regs.pt.orig_ax = regs->orig_ax; - switch (info->cpu_type) { + switch (vm86->cpu_type) { case CPU_286: - tsk->thread.v86mask = 0; + vm86->veflags_mask = 0; break; case CPU_386: - tsk->thread.v86mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL; + vm86->veflags_mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL; break; case CPU_486: - tsk->thread.v86mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; + vm86->veflags_mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; break; default: - tsk->thread.v86mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; + vm86->veflags_mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; break; } /* - * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL) + * Save old state */ - info->regs32->ax = VM86_SIGNAL; - tsk->thread.saved_sp0 = tsk->thread.sp0; - tsk->thread.saved_fs = info->regs32->fs; - tsk->thread.saved_gs = get_user_gs(info->regs32); + vm86->saved_sp0 = tsk->thread.sp0; + lazy_save_gs(vm86->regs32.gs); tss = &per_cpu(cpu_tss, get_cpu()); - tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; + /* make room for real-mode segments */ + tsk->thread.sp0 += 16; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; load_sp0(tss, &tsk->thread); put_cpu(); - tsk->thread.screen_bitmap = info->screen_bitmap; - if (info->flags & VM86_SCREEN_BITMAP) + if (vm86->flags & VM86_SCREEN_BITMAP) mark_screen_rdonly(tsk->mm); - /*call __audit_syscall_exit since we do not exit via the normal paths */ -#ifdef CONFIG_AUDITSYSCALL - if (unlikely(current->audit_context)) - __audit_syscall_exit(1, 0); -#endif - - __asm__ __volatile__( - "movl %0,%%esp\n\t" - "movl %1,%%ebp\n\t" -#ifdef CONFIG_X86_32_LAZY_GS - "mov %2, %%gs\n\t" -#endif - "jmp resume_userspace" - : /* no outputs */ - :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); - /* we never return here */ -} - -static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval) -{ - struct pt_regs *regs32; - - regs32 = save_v86_state(regs16); - regs32->ax = retval; - __asm__ __volatile__("movl %0,%%esp\n\t" - "movl %1,%%ebp\n\t" - "jmp resume_userspace" - : : "r" (regs32), "r" (current_thread_info())); + memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs)); + force_iret(); + return regs->ax; } static inline void set_IF(struct kernel_vm86_regs *regs) { VEFLAGS |= X86_EFLAGS_VIF; - if (VEFLAGS & X86_EFLAGS_VIP) - return_to_32bit(regs, VM86_STI); } static inline void clear_IF(struct kernel_vm86_regs *regs) @@ -395,7 +377,7 @@ static inline void clear_AC(struct kernel_vm86_regs *regs) static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs) { - set_flags(VEFLAGS, flags, current->thread.v86mask); + set_flags(VEFLAGS, flags, current->thread.vm86->veflags_mask); set_flags(regs->pt.flags, flags, SAFE_MASK); if (flags & X86_EFLAGS_IF) set_IF(regs); @@ -405,7 +387,7 @@ static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs) { - set_flags(VFLAGS, flags, current->thread.v86mask); + set_flags(VFLAGS, flags, current->thread.vm86->veflags_mask); set_flags(regs->pt.flags, flags, SAFE_MASK); if (flags & X86_EFLAGS_IF) set_IF(regs); @@ -420,7 +402,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs) if (VEFLAGS & X86_EFLAGS_VIF) flags |= X86_EFLAGS_IF; flags |= X86_EFLAGS_IOPL; - return flags | (VEFLAGS & current->thread.v86mask); + return flags | (VEFLAGS & current->thread.vm86->veflags_mask); } static inline int is_revectored(int nr, struct revectored_struct *bitmap) @@ -518,12 +500,13 @@ static void do_int(struct kernel_vm86_regs *regs, int i, { unsigned long __user *intr_ptr; unsigned long segoffs; + struct vm86 *vm86 = current->thread.vm86; if (regs->pt.cs == BIOSSEG) goto cannot_handle; - if (is_revectored(i, &KVM86->int_revectored)) + if (is_revectored(i, &vm86->int_revectored)) goto cannot_handle; - if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored)) + if (i == 0x21 && is_revectored(AH(regs), &vm86->int21_revectored)) goto cannot_handle; intr_ptr = (unsigned long __user *) (i << 2); if (get_user(segoffs, intr_ptr)) @@ -542,18 +525,16 @@ static void do_int(struct kernel_vm86_regs *regs, int i, return; cannot_handle: - return_to_32bit(regs, VM86_INTx + (i << 8)); + save_v86_state(regs, VM86_INTx + (i << 8)); } int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) { - if (VMPI.is_vm86pus) { + struct vm86 *vm86 = current->thread.vm86; + + if (vm86->vm86plus.is_vm86pus) { if ((trapno == 3) || (trapno == 1)) { - KVM86->regs32->ax = VM86_TRAP + (trapno << 8); - /* setting this flag forces the code in entry_32.S to - the path where we call save_v86_state() and change - the stack pointer to KVM86->regs32 */ - set_thread_flag(TIF_NOTIFY_RESUME); + save_v86_state(regs, VM86_TRAP + (trapno << 8)); return 0; } do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); @@ -574,16 +555,11 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) unsigned char __user *ssp; unsigned short ip, sp, orig_flags; int data32, pref_done; + struct vm86plus_info_struct *vmpi = ¤t->thread.vm86->vm86plus; #define CHECK_IF_IN_TRAP \ - if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \ + if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \ newflags |= X86_EFLAGS_TF -#define VM86_FAULT_RETURN do { \ - if (VMPI.force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \ - return_to_32bit(regs, VM86_PICRETURN); \ - if (orig_flags & X86_EFLAGS_TF) \ - handle_vm86_trap(regs, 0, 1); \ - return; } while (0) orig_flags = *(unsigned short *)®s->pt.flags; @@ -622,7 +598,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) SP(regs) -= 2; } IP(regs) = ip; - VM86_FAULT_RETURN; + goto vm86_fault_return; /* popf */ case 0x9d: @@ -642,16 +618,18 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) else set_vflags_short(newflags, regs); - VM86_FAULT_RETURN; + goto check_vip; } /* int xx */ case 0xcd: { int intno = popb(csp, ip, simulate_sigsegv); IP(regs) = ip; - if (VMPI.vm86dbg_active) { - if ((1 << (intno & 7)) & VMPI.vm86dbg_intxxtab[intno >> 3]) - return_to_32bit(regs, VM86_INTx + (intno << 8)); + if (vmpi->vm86dbg_active) { + if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) { + save_v86_state(regs, VM86_INTx + (intno << 8)); + return; + } } do_int(regs, intno, ssp, sp); return; @@ -682,14 +660,14 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) } else { set_vflags_short(newflags, regs); } - VM86_FAULT_RETURN; + goto check_vip; } /* cli */ case 0xfa: IP(regs) = ip; clear_IF(regs); - VM86_FAULT_RETURN; + goto vm86_fault_return; /* sti */ /* @@ -701,14 +679,29 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) case 0xfb: IP(regs) = ip; set_IF(regs); - VM86_FAULT_RETURN; + goto check_vip; default: - return_to_32bit(regs, VM86_UNKNOWN); + save_v86_state(regs, VM86_UNKNOWN); } return; +check_vip: + if (VEFLAGS & X86_EFLAGS_VIP) { + save_v86_state(regs, VM86_STI); + return; + } + +vm86_fault_return: + if (vmpi->force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) { + save_v86_state(regs, VM86_PICRETURN); + return; + } + if (orig_flags & X86_EFLAGS_TF) + handle_vm86_trap(regs, 0, X86_TRAP_DB); + return; + simulate_sigsegv: /* FIXME: After a long discussion with Stas we finally * agreed, that this is wrong. Here we should @@ -720,7 +713,7 @@ simulate_sigsegv: * should be a mixture of the two, but how do we * get the information? [KD] */ - return_to_32bit(regs, VM86_UNKNOWN); + save_v86_state(regs, VM86_UNKNOWN); } /* ---------------- vm86 special IRQ passing stuff ----------------- */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2a5ca97c263b..946f52c43b8f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1172,7 +1172,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) tsc_deadline = apic->lapic_timer.expired_tscdeadline; apic->lapic_timer.expired_tscdeadline = 0; - guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); + guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ @@ -1240,7 +1240,7 @@ static void start_apic_timer(struct kvm_lapic *apic) local_irq_save(flags); now = apic->lapic_timer.timer.base->get_time(); - guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); + guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); if (likely(tscdeadline > guest_tsc)) { ns = (tscdeadline - guest_tsc) * 1000000ULL; do_div(ns, this_tsc_khz); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8e0c0844c6b9..a4b8c8d57b96 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1139,7 +1139,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) { u64 tsc; - tsc = svm_scale_tsc(vcpu, native_read_tsc()); + tsc = svm_scale_tsc(vcpu, rdtsc()); return target_tsc - tsc; } @@ -3172,7 +3172,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr_info->index) { case MSR_IA32_TSC: { msr_info->data = svm->vmcb->control.tsc_offset + - svm_scale_tsc(vcpu, native_read_tsc()); + svm_scale_tsc(vcpu, rdtsc()); break; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 83b7b5cd75d5..af30c265412c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2236,7 +2236,7 @@ static u64 guest_read_tsc(void) { u64 host_tsc, tsc_offset; - rdtscll(host_tsc); + host_tsc = rdtsc(); tsc_offset = vmcs_read64(TSC_OFFSET); return host_tsc + tsc_offset; } @@ -2317,7 +2317,7 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) { - return target_tsc - native_read_tsc(); + return target_tsc - rdtsc(); } static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5ef2560075bf..a51f94d3193d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1444,20 +1444,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc); static cycle_t read_tsc(void) { - cycle_t ret; - u64 last; - - /* - * Empirically, a fence (of type that depends on the CPU) - * before rdtsc is enough to ensure that rdtsc is ordered - * with respect to loads. The various CPU manuals are unclear - * as to whether rdtsc can be reordered with later loads, - * but no one has ever seen it happen. - */ - rdtsc_barrier(); - ret = (cycle_t)vget_cycles(); - - last = pvclock_gtod_data.clock.cycle_last; + cycle_t ret = (cycle_t)rdtsc_ordered(); + u64 last = pvclock_gtod_data.clock.cycle_last; if (likely(ret >= last)) return ret; @@ -1646,7 +1634,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) return 1; } if (!use_master_clock) { - host_tsc = native_read_tsc(); + host_tsc = rdtsc(); kernel_ns = get_kernel_ns(); } @@ -2810,7 +2798,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : - native_read_tsc() - vcpu->arch.last_host_tsc; + rdtsc() - vcpu->arch.last_host_tsc; if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); if (check_tsc_unstable()) { @@ -2838,7 +2826,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); - vcpu->arch.last_host_tsc = native_read_tsc(); + vcpu->arch.last_host_tsc = rdtsc(); } static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, @@ -6622,7 +6610,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) hw_breakpoint_restore(); vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, - native_read_tsc()); + rdtsc()); vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); @@ -7431,7 +7419,7 @@ int kvm_arch_hardware_enable(void) if (ret != 0) return ret; - local_tsc = native_read_tsc(); + local_tsc = rdtsc(); stable = !check_tsc_unstable(); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 39d6a3db0b96..4453d52a143d 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -49,16 +49,14 @@ static void delay_loop(unsigned long loops) /* TSC based delay: */ static void delay_tsc(unsigned long __loops) { - u32 bclock, now, loops = __loops; + u64 bclock, now, loops = __loops; int cpu; preempt_disable(); cpu = smp_processor_id(); - rdtsc_barrier(); - rdtscl(bclock); + bclock = rdtsc_ordered(); for (;;) { - rdtsc_barrier(); - rdtscl(now); + now = rdtsc_ordered(); if ((now - bclock) >= loops) break; @@ -79,8 +77,7 @@ static void delay_tsc(unsigned long __loops) if (unlikely(cpu != smp_processor_id())) { loops -= (now - bclock); cpu = smp_processor_id(); - rdtsc_barrier(); - rdtscl(bclock); + bclock = rdtsc_ordered(); } } preempt_enable(); @@ -100,7 +97,7 @@ void use_tsc_delay(void) int read_current_timer(unsigned long *timer_val) { if (delay_fn == delay_tsc) { - rdtscll(*timer_val); + *timer_val = rdtsc(); return 0; } return -1; diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index 6ef5e99380f9..a2eefb121a5f 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -21,6 +21,7 @@ #include <asm/uaccess.h> #include <asm/desc.h> +#include <asm/vm86.h> #include "fpu_system.h" #include "exception.h" diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9dc909841739..eef44d9a3f77 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -20,6 +20,7 @@ #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ #include <asm/fixmap.h> /* VSYSCALL_ADDR */ #include <asm/vsyscall.h> /* emulate_vsyscall */ +#include <asm/vm86.h> /* struct vm86 */ #define CREATE_TRACE_POINTS #include <asm/trace/exceptions.h> @@ -301,14 +302,16 @@ static inline void check_v8086_mode(struct pt_regs *regs, unsigned long address, struct task_struct *tsk) { +#ifdef CONFIG_VM86 unsigned long bit; - if (!v8086_mode(regs)) + if (!v8086_mode(regs) || !tsk->thread.vm86) return; bit = (address - 0xA0000) >> PAGE_SHIFT; if (bit < 32) - tsk->thread.screen_bitmap |= 1 << bit; + tsk->thread.vm86->screen_bitmap |= 1 << bit; +#endif } static bool low_pfn(unsigned long pfn) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 579a8fd74be0..be2e7a2b10d7 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -269,7 +269,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ offsetof(struct bpf_array, map.max_entries)); EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ -#define OFFSET1 44 /* number of bytes to jump */ +#define OFFSET1 47 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -278,15 +278,15 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 33 +#define OFFSET2 36 EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */ /* prog = array->prog[index]; */ - EMIT4(0x48, 0x8D, 0x44, 0xD6); /* lea rax, [rsi + rdx * 8 + 0x50] */ - EMIT1(offsetof(struct bpf_array, prog)); + EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ + offsetof(struct bpf_array, prog)); EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ /* if (prog == NULL) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index cfba30f27392..e4308fe6afe8 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -972,6 +972,11 @@ u64 efi_mem_attributes(unsigned long phys_addr) static int __init arch_parse_efi_cmdline(char *str) { + if (!str) { + pr_warn("need at least one option\n"); + return -EINVAL; + } + if (parse_option_str(str, "old_map")) set_bit(EFI_OLD_MEMMAP, &efi.flags); if (parse_option_str(str, "debug")) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 0d7dd1f5ac36..9ab52791fed5 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -22,6 +22,7 @@ #include <asm/fpu/internal.h> #include <asm/debugreg.h> #include <asm/cpu.h> +#include <asm/mmu_context.h> #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -153,7 +154,7 @@ static void fix_processor_context(void) syscall_init(); /* This sets MSR_*STAR and related */ #endif load_TR_desc(); /* This does ltr */ - load_LDT(¤t->active_mm->context); /* This does lldt */ + load_mm_ldt(current->active_mm); /* This does lldt */ fpu__resume_cpu(); } diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index b9531d343134..755481f14d90 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -45,17 +45,4 @@ #define read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0) -/* - * Stop RDTSC speculation. This is needed when you need to use RDTSC - * (or get_cycles or vread that possibly accesses the TSC) in a defined - * code region. - * - * (Could use an alternative three way for this if there was one.) - */ -static inline void rdtsc_barrier(void) -{ - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, - "lfence", X86_FEATURE_LFENCE_RDTSC); -} - #endif diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0b95c9b8283f..d9cfa452da9d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -483,6 +483,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte_t pte; unsigned long pfn; struct page *page; + unsigned char dummy; ptep = lookup_address((unsigned long)v, &level); BUG_ON(ptep == NULL); @@ -492,6 +493,32 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte = pfn_pte(pfn, prot); + /* + * Careful: update_va_mapping() will fail if the virtual address + * we're poking isn't populated in the page tables. We don't + * need to worry about the direct map (that's always in the page + * tables), but we need to be careful about vmap space. In + * particular, the top level page table can lazily propagate + * entries between processes, so if we've switched mms since we + * vmapped the target in the first place, we might not have the + * top-level page table entry populated. + * + * We disable preemption because we want the same mm active when + * we probe the target and when we issue the hypercall. We'll + * have the same nominal mm, but if we're a kernel thread, lazy + * mm dropping could change our pgd. + * + * Out of an abundance of caution, this uses __get_user() to fault + * in the target address just in case there's some obscure case + * in which the target address isn't readable. + */ + + preempt_disable(); + + pagefault_disable(); /* Avoid warnings due to being atomic. */ + __get_user(dummy, (unsigned char __user __force *)v); + pagefault_enable(); + if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); @@ -503,6 +530,8 @@ static void set_aliased_prot(void *v, pgprot_t prot) BUG(); } else kmap_flush_unused(); + + preempt_enable(); } static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) @@ -510,6 +539,17 @@ static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; int i; + /* + * We need to mark the all aliases of the LDT pages RO. We + * don't need to call vm_flush_aliases(), though, since that's + * only responsible for flushing aliases out the TLBs, not the + * page tables, and Xen will flush the TLB for us if needed. + * + * To avoid confusing future readers: none of this is necessary + * to load the LDT. The hypervisor only checks this when the + * LDT is faulted in due to subsequent descriptor access. + */ + for(i = 0; i < entries; i += entries_per_page) set_aliased_prot(ldt + i, PAGE_KERNEL_RO); } @@ -1175,11 +1215,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_msr = xen_read_msr_safe, .write_msr = xen_write_msr_safe, - .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, - .read_tscp = native_read_tscp, - .iret = xen_iret, #ifdef CONFIG_X86_64 .usergs_sysret32 = xen_sysret32, diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig index e4d193e7a300..f3dfe0d921c2 100644 --- a/arch/xtensa/configs/iss_defconfig +++ b/arch/xtensa/configs/iss_defconfig @@ -616,7 +616,6 @@ CONFIG_SCHED_DEBUG=y # CONFIG_SLUB_DEBUG_ON is not set # CONFIG_SLUB_STATS is not set # CONFIG_DEBUG_RT_MUTEXES is not set -# CONFIG_RT_MUTEX_TESTER is not set # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_MUTEXES is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 717afcdb5f4a..88dbbb115285 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -231,7 +231,7 @@ int acpi_device_set_power(struct acpi_device *device, int state) dev_warn(&device->dev, "Failed to change power state to %s\n", acpi_power_state_string(state)); } else { - device->power.state = state; + device->power.state = target_state; ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] transitioned to %s\n", device->pnp.bus_id, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 26063afb3eba..7a3c30c4336f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1002,7 +1002,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy) int ret = 0; /* Some related CPUs might not be present (physically hotplugged) */ - for_each_cpu_and(j, policy->related_cpus, cpu_present_mask) { + for_each_cpu(j, policy->real_cpus) { if (j == policy->kobj_cpu) continue; @@ -1019,7 +1019,7 @@ static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy) unsigned int j; /* Some related CPUs might not be present (physically hotplugged) */ - for_each_cpu_and(j, policy->related_cpus, cpu_present_mask) { + for_each_cpu(j, policy->real_cpus) { if (j == policy->kobj_cpu) continue; @@ -1163,11 +1163,14 @@ static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) goto err_free_cpumask; + if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) + goto err_free_rcpumask; + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &dev->kobj, "cpufreq"); if (ret) { pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); - goto err_free_rcpumask; + goto err_free_real_cpus; } INIT_LIST_HEAD(&policy->policy_list); @@ -1184,6 +1187,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) return policy; +err_free_real_cpus: + free_cpumask_var(policy->real_cpus); err_free_rcpumask: free_cpumask_var(policy->related_cpus); err_free_cpumask: @@ -1234,6 +1239,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify) write_unlock_irqrestore(&cpufreq_driver_lock, flags); cpufreq_policy_put_kobj(policy, notify); + free_cpumask_var(policy->real_cpus); free_cpumask_var(policy->related_cpus); free_cpumask_var(policy->cpus); kfree(policy); @@ -1258,14 +1264,17 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) pr_debug("adding CPU %u\n", cpu); - /* - * Only possible if 'cpu' wasn't physically present earlier and we are - * here from subsys_interface add callback. A hotplug notifier will - * follow and we will handle it like logical CPU hotplug then. For now, - * just create the sysfs link. - */ - if (cpu_is_offline(cpu)) - return add_cpu_dev_symlink(per_cpu(cpufreq_cpu_data, cpu), cpu); + if (cpu_is_offline(cpu)) { + /* + * Only possible if we are here from the subsys_interface add + * callback. A hotplug notifier will follow and we will handle + * it as CPU online then. For now, just create the sysfs link, + * unless there is no policy or the link is already present. + */ + policy = per_cpu(cpufreq_cpu_data, cpu); + return policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus) + ? add_cpu_dev_symlink(policy, cpu) : 0; + } if (!down_read_trylock(&cpufreq_rwsem)) return 0; @@ -1307,6 +1316,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* related cpus should atleast have policy->cpus */ cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); + /* Remember which CPUs have been present at the policy creation time. */ + if (!recover_policy) + cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); + /* * affected cpus must always be the one, which are online. We aren't * managing offline cpus here. @@ -1420,8 +1433,7 @@ nomem_out: return ret; } -static int __cpufreq_remove_dev_prepare(struct device *dev, - struct subsys_interface *sif) +static int __cpufreq_remove_dev_prepare(struct device *dev) { unsigned int cpu = dev->id; int ret = 0; @@ -1437,10 +1449,8 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); - if (ret) { + if (ret) pr_err("%s: Failed to stop governor\n", __func__); - return ret; - } } down_write(&policy->rwsem); @@ -1473,8 +1483,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, return ret; } -static int __cpufreq_remove_dev_finish(struct device *dev, - struct subsys_interface *sif) +static int __cpufreq_remove_dev_finish(struct device *dev) { unsigned int cpu = dev->id; int ret; @@ -1492,10 +1501,8 @@ static int __cpufreq_remove_dev_finish(struct device *dev, /* If cpu is last user of policy, free policy */ if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - if (ret) { + if (ret) pr_err("%s: Failed to exit governor\n", __func__); - return ret; - } } /* @@ -1506,10 +1513,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev, if (cpufreq_driver->exit) cpufreq_driver->exit(policy); - /* Free the policy only if the driver is getting removed. */ - if (sif) - cpufreq_policy_free(policy, true); - return 0; } @@ -1521,42 +1524,41 @@ static int __cpufreq_remove_dev_finish(struct device *dev, static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) { unsigned int cpu = dev->id; - int ret; - - /* - * Only possible if 'cpu' is getting physically removed now. A hotplug - * notifier should have already been called and we just need to remove - * link or free policy here. - */ - if (cpu_is_offline(cpu)) { - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); - struct cpumask mask; + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); - if (!policy) - return 0; + if (!policy) + return 0; - cpumask_copy(&mask, policy->related_cpus); - cpumask_clear_cpu(cpu, &mask); + if (cpu_online(cpu)) { + __cpufreq_remove_dev_prepare(dev); + __cpufreq_remove_dev_finish(dev); + } - /* - * Free policy only if all policy->related_cpus are removed - * physically. - */ - if (cpumask_intersects(&mask, cpu_present_mask)) { - remove_cpu_dev_symlink(policy, cpu); - return 0; - } + cpumask_clear_cpu(cpu, policy->real_cpus); + if (cpumask_empty(policy->real_cpus)) { cpufreq_policy_free(policy, true); return 0; } - ret = __cpufreq_remove_dev_prepare(dev, sif); + if (cpu != policy->kobj_cpu) { + remove_cpu_dev_symlink(policy, cpu); + } else { + /* + * The CPU owning the policy object is going away. Move it to + * another suitable CPU. + */ + unsigned int new_cpu = cpumask_first(policy->real_cpus); + struct device *new_dev = get_cpu_device(new_cpu); + + dev_dbg(dev, "%s: Moving policy object to CPU%u\n", __func__, new_cpu); - if (!ret) - ret = __cpufreq_remove_dev_finish(dev, sif); + sysfs_remove_link(&new_dev->kobj, "cpufreq"); + policy->kobj_cpu = new_cpu; + WARN_ON(kobject_move(&policy->kobj, &new_dev->kobj)); + } - return ret; + return 0; } static void handle_update(struct work_struct *work) @@ -2395,11 +2397,11 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_PREPARE: - __cpufreq_remove_dev_prepare(dev, NULL); + __cpufreq_remove_dev_prepare(dev); break; case CPU_POST_DEAD: - __cpufreq_remove_dev_finish(dev, NULL); + __cpufreq_remove_dev_finish(dev); break; case CPU_DOWN_FAILED: diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 15ada47bb720..7898de054f4e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -681,6 +681,7 @@ static struct cpu_defaults knl_params = { .get_max = core_get_max_pstate, .get_min = core_get_min_pstate, .get_turbo = knl_get_turbo_pstate, + .get_scaling = core_get_scaling, .set = core_set_pstate, }, }; @@ -765,7 +766,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu) local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); - tsc = native_read_tsc(); + tsc = rdtsc(); local_irq_restore(flags); cpu->last_sample_time = cpu->sample.time; diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 59892126d175..d3629b7482dd 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -48,6 +48,8 @@ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) +#define ATC_MAX_DSCR_TRIALS 10 + /* * Initial number of descriptors to allocate for each channel. This could * be increased during dma usage. @@ -285,28 +287,19 @@ static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan, * * @current_len: the number of bytes left before reading CTRLA * @ctrla: the value of CTRLA - * @desc: the descriptor containing the transfer width */ -static inline int atc_calc_bytes_left(int current_len, u32 ctrla, - struct at_desc *desc) +static inline int atc_calc_bytes_left(int current_len, u32 ctrla) { - return current_len - ((ctrla & ATC_BTSIZE_MAX) << desc->tx_width); -} + u32 btsize = (ctrla & ATC_BTSIZE_MAX); + u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla); -/** - * atc_calc_bytes_left_from_reg - calculates the number of bytes left according - * to the current value of CTRLA. - * - * @current_len: the number of bytes left before reading CTRLA - * @atchan: the channel to read CTRLA for - * @desc: the descriptor containing the transfer width - */ -static inline int atc_calc_bytes_left_from_reg(int current_len, - struct at_dma_chan *atchan, struct at_desc *desc) -{ - u32 ctrla = channel_readl(atchan, CTRLA); - - return atc_calc_bytes_left(current_len, ctrla, desc); + /* + * According to the datasheet, when reading the Control A Register + * (ctrla), the Buffer Transfer Size (btsize) bitfield refers to the + * number of transfers completed on the Source Interface. + * So btsize is always a number of source width transfers. + */ + return current_len - (btsize << src_width); } /** @@ -320,7 +313,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) struct at_desc *desc_first = atc_first_active(atchan); struct at_desc *desc; int ret; - u32 ctrla, dscr; + u32 ctrla, dscr, trials; /* * If the cookie doesn't match to the currently running transfer then @@ -346,15 +339,82 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) * the channel's DSCR register and compare it against the value * of the hardware linked list structure of each child * descriptor. + * + * The CTRLA register provides us with the amount of data + * already read from the source for the current child + * descriptor. So we can compute a more accurate residue by also + * removing the number of bytes corresponding to this amount of + * data. + * + * However, the DSCR and CTRLA registers cannot be read both + * atomically. Hence a race condition may occur: the first read + * register may refer to one child descriptor whereas the second + * read may refer to a later child descriptor in the list + * because of the DMA transfer progression inbetween the two + * reads. + * + * One solution could have been to pause the DMA transfer, read + * the DSCR and CTRLA then resume the DMA transfer. Nonetheless, + * this approach presents some drawbacks: + * - If the DMA transfer is paused, RX overruns or TX underruns + * are more likey to occur depending on the system latency. + * Taking the USART driver as an example, it uses a cyclic DMA + * transfer to read data from the Receive Holding Register + * (RHR) to avoid RX overruns since the RHR is not protected + * by any FIFO on most Atmel SoCs. So pausing the DMA transfer + * to compute the residue would break the USART driver design. + * - The atc_pause() function masks interrupts but we'd rather + * avoid to do so for system latency purpose. + * + * Then we'd rather use another solution: the DSCR is read a + * first time, the CTRLA is read in turn, next the DSCR is read + * a second time. If the two consecutive read values of the DSCR + * are the same then we assume both refers to the very same + * child descriptor as well as the CTRLA value read inbetween + * does. For cyclic tranfers, the assumption is that a full loop + * is "not so fast". + * If the two DSCR values are different, we read again the CTRLA + * then the DSCR till two consecutive read values from DSCR are + * equal or till the maxium trials is reach. + * This algorithm is very unlikely not to find a stable value for + * DSCR. */ - ctrla = channel_readl(atchan, CTRLA); - rmb(); /* ensure CTRLA is read before DSCR */ dscr = channel_readl(atchan, DSCR); + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); + for (trials = 0; trials < ATC_MAX_DSCR_TRIALS; ++trials) { + u32 new_dscr; + + rmb(); /* ensure DSCR is read after CTRLA */ + new_dscr = channel_readl(atchan, DSCR); + + /* + * If the DSCR register value has not changed inside the + * DMA controller since the previous read, we assume + * that both the dscr and ctrla values refers to the + * very same descriptor. + */ + if (likely(new_dscr == dscr)) + break; + + /* + * DSCR has changed inside the DMA controller, so the + * previouly read value of CTRLA may refer to an already + * processed descriptor hence could be outdated. + * We need to update ctrla to match the current + * descriptor. + */ + dscr = new_dscr; + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); + } + if (unlikely(trials >= ATC_MAX_DSCR_TRIALS)) + return -ETIMEDOUT; /* for the first descriptor we can be more accurate */ if (desc_first->lli.dscr == dscr) - return atc_calc_bytes_left(ret, ctrla, desc_first); + return atc_calc_bytes_left(ret, ctrla); ret -= desc_first->len; list_for_each_entry(desc, &desc_first->tx_list, desc_node) { @@ -365,16 +425,14 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) } /* - * For the last descriptor in the chain we can calculate + * For the current descriptor in the chain we can calculate * the remaining bytes using the channel's register. - * Note that the transfer width of the first and last - * descriptor may differ. */ - if (!desc->lli.dscr) - ret = atc_calc_bytes_left_from_reg(ret, atchan, desc); + ret = atc_calc_bytes_left(ret, ctrla); } else { /* single transfer */ - ret = atc_calc_bytes_left_from_reg(ret, atchan, desc_first); + ctrla = channel_readl(atchan, CTRLA); + ret = atc_calc_bytes_left(ret, ctrla); } return ret; @@ -726,7 +784,6 @@ atc_prep_dma_interleaved(struct dma_chan *chan, desc->txd.cookie = -EBUSY; desc->total_len = desc->len = len; - desc->tx_width = dwidth; /* set end-of-link to the last link descriptor of list*/ set_desc_eol(desc); @@ -804,10 +861,6 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, first->txd.cookie = -EBUSY; first->total_len = len; - /* set transfer width for the calculation of the residue */ - first->tx_width = src_width; - prev->tx_width = src_width; - /* set end-of-link to the last link descriptor of list*/ set_desc_eol(desc); @@ -956,10 +1009,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, first->txd.cookie = -EBUSY; first->total_len = total_len; - /* set transfer width for the calculation of the residue */ - first->tx_width = reg_width; - prev->tx_width = reg_width; - /* first link descriptor of list is responsible of flags */ first->txd.flags = flags; /* client is in control of this ack */ @@ -1077,12 +1126,6 @@ atc_prep_dma_sg(struct dma_chan *chan, desc->txd.cookie = 0; desc->len = len; - /* - * Although we only need the transfer width for the first and - * the last descriptor, its easier to set it to all descriptors. - */ - desc->tx_width = src_width; - atc_desc_chain(&first, &prev, desc); /* update the lengths and addresses for the next loop cycle */ @@ -1256,7 +1299,6 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, /* First descriptor of the chain embedds additional information */ first->txd.cookie = -EBUSY; first->total_len = buf_len; - first->tx_width = reg_width; return &first->txd; diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index bc8d5ebedd19..7f5a08230f76 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -112,6 +112,7 @@ #define ATC_SRC_WIDTH_BYTE (0x0 << 24) #define ATC_SRC_WIDTH_HALFWORD (0x1 << 24) #define ATC_SRC_WIDTH_WORD (0x2 << 24) +#define ATC_REG_TO_SRC_WIDTH(r) (((r) >> 24) & 0x3) #define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */ #define ATC_DST_WIDTH(x) ((x) << 28) #define ATC_DST_WIDTH_BYTE (0x0 << 28) @@ -182,7 +183,6 @@ struct at_lli { * @txd: support for the async_tx api * @desc_node: node on the channed descriptors list * @len: descriptor byte count - * @tx_width: transfer width * @total_len: total transaction byte count */ struct at_desc { @@ -194,7 +194,6 @@ struct at_desc { struct dma_async_tx_descriptor txd; struct list_head desc_node; size_t len; - u32 tx_width; size_t total_len; /* Interleaved data */ diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index cf1213de7865..40afa2a16cfc 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -359,18 +359,19 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan, * descriptor view 2 since some fields of the configuration register * depend on transfer size and src/dest addresses. */ - if (at_xdmac_chan_is_cyclic(atchan)) { + if (at_xdmac_chan_is_cyclic(atchan)) reg = AT_XDMAC_CNDC_NDVIEW_NDV1; - at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg); - } else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3) { + else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3) reg = AT_XDMAC_CNDC_NDVIEW_NDV3; - } else { - /* - * No need to write AT_XDMAC_CC reg, it will be done when the - * descriptor is fecthed. - */ + else reg = AT_XDMAC_CNDC_NDVIEW_NDV2; - } + /* + * Even if the register will be updated from the configuration in the + * descriptor when using view 2 or higher, the PROT bit won't be set + * properly. This bit can be modified only by using the channel + * configuration register. + */ + at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg); reg |= AT_XDMAC_CNDC_NDDUP | AT_XDMAC_CNDC_NDSUP @@ -681,15 +682,16 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, desc->lld.mbr_sa = mem; desc->lld.mbr_da = atchan->sconfig.dst_addr; } - desc->lld.mbr_cfg = atchan->cfg; - dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg); + dwidth = at_xdmac_get_dwidth(atchan->cfg); fixed_dwidth = IS_ALIGNED(len, 1 << dwidth) - ? at_xdmac_get_dwidth(desc->lld.mbr_cfg) + ? dwidth : AT_XDMAC_CC_DWIDTH_BYTE; desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2 /* next descriptor view */ | AT_XDMAC_MBR_UBC_NDEN /* next descriptor dst parameter update */ | AT_XDMAC_MBR_UBC_NSEN /* next descriptor src parameter update */ | (len >> fixed_dwidth); /* microblock length */ + desc->lld.mbr_cfg = (atchan->cfg & ~AT_XDMAC_CC_DWIDTH_MASK) | + AT_XDMAC_CC_DWIDTH(fixed_dwidth); dev_dbg(chan2dev(chan), "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n", __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index fbaf1ead2597..f1325f62563e 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -162,10 +162,11 @@ static void mv_chan_set_mode(struct mv_xor_chan *chan, config &= ~0x7; config |= op_mode; - if (IS_ENABLED(__BIG_ENDIAN)) - config |= XOR_DESCRIPTOR_SWAP; - else - config &= ~XOR_DESCRIPTOR_SWAP; +#if defined(__BIG_ENDIAN) + config |= XOR_DESCRIPTOR_SWAP; +#else + config &= ~XOR_DESCRIPTOR_SWAP; +#endif writel_relaxed(config, XOR_CONFIG(chan)); chan->current_type = type; diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index f513f77b1d85..ecab4ea059b4 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2328,7 +2328,7 @@ static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx) desc->txd.callback = last->txd.callback; desc->txd.callback_param = last->txd.callback_param; } - last->last = false; + desc->last = false; dma_cookie_assign(&desc->txd); @@ -2623,6 +2623,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, desc->rqcfg.brst_len = 1; desc->rqcfg.brst_len = get_burst_len(desc, len); + desc->bytes_requested = len; desc->txd.flags = flags; diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c index 7d2c17d8d30f..6f80432a3f0a 100644 --- a/drivers/dma/virt-dma.c +++ b/drivers/dma/virt-dma.c @@ -29,7 +29,7 @@ dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&vc->lock, flags); cookie = dma_cookie_assign(tx); - list_move_tail(&vd->node, &vc->desc_submitted); + list_add_tail(&vd->node, &vc->desc_submitted); spin_unlock_irqrestore(&vc->lock, flags); dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n", @@ -83,10 +83,8 @@ static void vchan_complete(unsigned long arg) cb_data = vd->tx.callback_param; list_del(&vd->node); - if (async_tx_test_ack(&vd->tx)) - list_add(&vd->node, &vc->desc_allocated); - else - vc->desc_free(vd); + + vc->desc_free(vd); if (cb) cb(cb_data); @@ -98,13 +96,9 @@ void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head) while (!list_empty(head)) { struct virt_dma_desc *vd = list_first_entry(head, struct virt_dma_desc, node); - if (async_tx_test_ack(&vd->tx)) { - list_move_tail(&vd->node, &vc->desc_allocated); - } else { - dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd); - list_del(&vd->node); - vc->desc_free(vd); - } + list_del(&vd->node); + dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd); + vc->desc_free(vd); } } EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list); @@ -114,7 +108,6 @@ void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev) dma_cookie_init(&vc->chan); spin_lock_init(&vc->lock); - INIT_LIST_HEAD(&vc->desc_allocated); INIT_LIST_HEAD(&vc->desc_submitted); INIT_LIST_HEAD(&vc->desc_issued); INIT_LIST_HEAD(&vc->desc_completed); diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h index 189e75dbcb15..181b95267866 100644 --- a/drivers/dma/virt-dma.h +++ b/drivers/dma/virt-dma.h @@ -29,7 +29,6 @@ struct virt_dma_chan { spinlock_t lock; /* protected by vc.lock */ - struct list_head desc_allocated; struct list_head desc_submitted; struct list_head desc_issued; struct list_head desc_completed; @@ -56,16 +55,11 @@ static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan struct virt_dma_desc *vd, unsigned long tx_flags) { extern dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *); - unsigned long flags; dma_async_tx_descriptor_init(&vd->tx, &vc->chan); vd->tx.flags = tx_flags; vd->tx.tx_submit = vchan_tx_submit; - spin_lock_irqsave(&vc->lock, flags); - list_add_tail(&vd->node, &vc->desc_allocated); - spin_unlock_irqrestore(&vc->lock, flags); - return &vd->tx; } @@ -128,8 +122,7 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc) } /** - * vchan_get_all_descriptors - obtain all allocated, submitted and issued - * descriptors + * vchan_get_all_descriptors - obtain all submitted and issued descriptors * vc: virtual channel to get descriptors from * head: list of descriptors found * @@ -141,7 +134,6 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc) static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc, struct list_head *head) { - list_splice_tail_init(&vc->desc_allocated, head); list_splice_tail_init(&vc->desc_submitted, head); list_splice_tail_init(&vc->desc_issued, head); list_splice_tail_init(&vc->desc_completed, head); @@ -149,14 +141,11 @@ static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc, static inline void vchan_free_chan_resources(struct virt_dma_chan *vc) { - struct virt_dma_desc *vd; unsigned long flags; LIST_HEAD(head); spin_lock_irqsave(&vc->lock, flags); vchan_get_all_descriptors(vc, &head); - list_for_each_entry(vd, &head, node) - async_tx_clear_ack(&vd->tx); spin_unlock_irqrestore(&vc->lock, flags); vchan_dma_desc_free_list(vc, &head); diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c index 620fd55ec766..dff22ab01851 100644 --- a/drivers/dma/xgene-dma.c +++ b/drivers/dma/xgene-dma.c @@ -111,6 +111,7 @@ #define XGENE_DMA_MEM_RAM_SHUTDOWN 0xD070 #define XGENE_DMA_BLK_MEM_RDY 0xD074 #define XGENE_DMA_BLK_MEM_RDY_VAL 0xFFFFFFFF +#define XGENE_DMA_RING_CMD_SM_OFFSET 0x8000 /* X-Gene SoC EFUSE csr register and bit defination */ #define XGENE_SOC_JTAG1_SHADOW 0x18 @@ -1887,6 +1888,8 @@ static int xgene_dma_get_resources(struct platform_device *pdev, return -ENOMEM; } + pdma->csr_ring_cmd += XGENE_DMA_RING_CMD_SM_OFFSET; + /* Get efuse csr region */ res = platform_get_resource(pdev, IORESOURCE_MEM, 3); if (!res) { diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 9fa8084a7c8d..d6144e3b97c5 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -58,6 +58,11 @@ bool efi_runtime_disabled(void) static int __init parse_efi_cmdline(char *str) { + if (!str) { + pr_warn("need at least one option\n"); + return -EINVAL; + } + if (parse_option_str(str, "noruntime")) disable_runtime = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e9fde72cf038..31b00f91cfcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1866,6 +1866,12 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); +struct amdgpu_ip_block_status { + bool valid; + bool sw; + bool hw; +}; + struct amdgpu_device { struct device *dev; struct drm_device *ddev; @@ -2008,7 +2014,7 @@ struct amdgpu_device { const struct amdgpu_ip_block_version *ip_blocks; int num_ip_blocks; - bool *ip_block_enabled; + struct amdgpu_ip_block_status *ip_block_status; struct mutex mn_lock; DECLARE_HASHTABLE(mn_hash, 7); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d79009b65867..99f158e1baff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1191,8 +1191,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev) return -EINVAL; } - adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL); - if (adev->ip_block_enabled == NULL) + adev->ip_block_status = kcalloc(adev->num_ip_blocks, + sizeof(struct amdgpu_ip_block_status), GFP_KERNEL); + if (adev->ip_block_status == NULL) return -ENOMEM; if (adev->ip_blocks == NULL) { @@ -1203,18 +1204,18 @@ static int amdgpu_early_init(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { DRM_ERROR("disabled ip block: %d\n", i); - adev->ip_block_enabled[i] = false; + adev->ip_block_status[i].valid = false; } else { if (adev->ip_blocks[i].funcs->early_init) { r = adev->ip_blocks[i].funcs->early_init((void *)adev); if (r == -ENOENT) - adev->ip_block_enabled[i] = false; + adev->ip_block_status[i].valid = false; else if (r) return r; else - adev->ip_block_enabled[i] = true; + adev->ip_block_status[i].valid = true; } else { - adev->ip_block_enabled[i] = true; + adev->ip_block_status[i].valid = true; } } } @@ -1227,11 +1228,12 @@ static int amdgpu_init(struct amdgpu_device *adev) int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_block_enabled[i]) + if (!adev->ip_block_status[i].valid) continue; r = adev->ip_blocks[i].funcs->sw_init((void *)adev); if (r) return r; + adev->ip_block_status[i].sw = true; /* need to do gmc hw init early so we can allocate gpu mem */ if (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_GMC) { r = amdgpu_vram_scratch_init(adev); @@ -1243,11 +1245,12 @@ static int amdgpu_init(struct amdgpu_device *adev) r = amdgpu_wb_init(adev); if (r) return r; + adev->ip_block_status[i].hw = true; } } for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_block_enabled[i]) + if (!adev->ip_block_status[i].sw) continue; /* gmc hw init is done early */ if (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_GMC) @@ -1255,6 +1258,7 @@ static int amdgpu_init(struct amdgpu_device *adev) r = adev->ip_blocks[i].funcs->hw_init((void *)adev); if (r) return r; + adev->ip_block_status[i].hw = true; } return 0; @@ -1265,7 +1269,7 @@ static int amdgpu_late_init(struct amdgpu_device *adev) int i = 0, r; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_block_enabled[i]) + if (!adev->ip_block_status[i].valid) continue; /* enable clockgating to save power */ r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev, @@ -1287,7 +1291,7 @@ static int amdgpu_fini(struct amdgpu_device *adev) int i, r; for (i = adev->num_ip_blocks - 1; i >= 0; i--) { - if (!adev->ip_block_enabled[i]) + if (!adev->ip_block_status[i].hw) continue; if (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_GMC) { amdgpu_wb_fini(adev); @@ -1300,14 +1304,16 @@ static int amdgpu_fini(struct amdgpu_device *adev) return r; r = adev->ip_blocks[i].funcs->hw_fini((void *)adev); /* XXX handle errors */ + adev->ip_block_status[i].hw = false; } for (i = adev->num_ip_blocks - 1; i >= 0; i--) { - if (!adev->ip_block_enabled[i]) + if (!adev->ip_block_status[i].sw) continue; r = adev->ip_blocks[i].funcs->sw_fini((void *)adev); /* XXX handle errors */ - adev->ip_block_enabled[i] = false; + adev->ip_block_status[i].sw = false; + adev->ip_block_status[i].valid = false; } return 0; @@ -1318,7 +1324,7 @@ static int amdgpu_suspend(struct amdgpu_device *adev) int i, r; for (i = adev->num_ip_blocks - 1; i >= 0; i--) { - if (!adev->ip_block_enabled[i]) + if (!adev->ip_block_status[i].valid) continue; /* ungate blocks so that suspend can properly shut them down */ r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev, @@ -1336,7 +1342,7 @@ static int amdgpu_resume(struct amdgpu_device *adev) int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_block_enabled[i]) + if (!adev->ip_block_status[i].valid) continue; r = adev->ip_blocks[i].funcs->resume(adev); if (r) @@ -1582,8 +1588,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev) amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_fini(adev); - kfree(adev->ip_block_enabled); - adev->ip_block_enabled = NULL; + kfree(adev->ip_block_status); + adev->ip_block_status = NULL; adev->accel_working = false; /* free i2c buses */ amdgpu_i2c_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index ae43b58c9733..4afc507820c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -449,7 +449,7 @@ out: * vital here, so they are not reported back to userspace. */ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, - struct amdgpu_bo_va *bo_va) + struct amdgpu_bo_va *bo_va, uint32_t operation) { struct ttm_validate_buffer tv, *entry; struct amdgpu_bo_list_entry *vm_bos; @@ -485,7 +485,9 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (r) goto error_unlock; - r = amdgpu_vm_bo_update(adev, bo_va, &bo_va->bo->tbo.mem); + + if (operation == AMDGPU_VA_OP_MAP) + r = amdgpu_vm_bo_update(adev, bo_va, &bo_va->bo->tbo.mem); error_unlock: mutex_unlock(&bo_va->vm->mutex); @@ -580,7 +582,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, } if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE)) - amdgpu_gem_va_update_vm(adev, bo_va); + amdgpu_gem_va_update_vm(adev, bo_va, args->operation); drm_gem_object_unreference_unlocked(gobj); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 52dff75aac6f..bc0fac618a3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -180,16 +180,16 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, if (vm) { /* do context switch */ amdgpu_vm_flush(ring, vm, ib->sync.last_vm_update); - } - if (vm && ring->funcs->emit_gds_switch) - amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id, - ib->gds_base, ib->gds_size, - ib->gws_base, ib->gws_size, - ib->oa_base, ib->oa_size); + if (ring->funcs->emit_gds_switch) + amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id, + ib->gds_base, ib->gds_size, + ib->gws_base, ib->gws_size, + ib->oa_base, ib->oa_size); - if (ring->funcs->emit_hdp_flush) - amdgpu_ring_emit_hdp_flush(ring); + if (ring->funcs->emit_hdp_flush) + amdgpu_ring_emit_hdp_flush(ring); + } old_ctx = ring->current_ctx; for (i = 0; i < num_ibs; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 31ad444c6386..9736892bcdf9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -235,7 +235,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file for (i = 0; i < adev->num_ip_blocks; i++) { if (adev->ip_blocks[i].type == type && - adev->ip_block_enabled[i]) { + adev->ip_block_status[i].valid) { ip.hw_ip_version_major = adev->ip_blocks[i].major; ip.hw_ip_version_minor = adev->ip_blocks[i].minor; ip.capabilities_flags = 0; @@ -274,7 +274,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file for (i = 0; i < adev->num_ip_blocks; i++) if (adev->ip_blocks[i].type == type && - adev->ip_block_enabled[i] && + adev->ip_block_status[i].valid && count < AMDGPU_HW_IP_INSTANCE_MAX_COUNT) count++; @@ -416,7 +416,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file return n ? -EFAULT : 0; } case AMDGPU_INFO_DEV_INFO: { - struct drm_amdgpu_info_device dev_info; + struct drm_amdgpu_info_device dev_info = {}; struct amdgpu_cu_info cu_info; dev_info.device_id = dev->pdev->device; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 2c188fb9fd22..2db6ab0a543d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2561,7 +2561,7 @@ static bool gfx_v7_0_ring_emit_semaphore(struct amdgpu_ring *ring, * sheduling on the ring. This function schedules the IB * on the gfx ring for execution by the GPU. */ -static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring, +static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { bool need_ctx_switch = ring->current_ctx != ib->ctx; @@ -2569,15 +2569,10 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring, u32 next_rptr = ring->wptr + 5; /* drop the CE preamble IB for the same context */ - if ((ring->type == AMDGPU_RING_TYPE_GFX) && - (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && - !need_ctx_switch) + if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch) return; - if (ring->type == AMDGPU_RING_TYPE_COMPUTE) - control |= INDIRECT_BUFFER_VALID; - - if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX) + if (need_ctx_switch) next_rptr += 2; next_rptr += 4; @@ -2588,7 +2583,7 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* insert SWITCH_BUFFER packet before first IB in the ring frame */ - if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX) { + if (need_ctx_switch) { amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, 0); } @@ -2611,6 +2606,35 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, control); } +static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_ib *ib) +{ + u32 header, control = 0; + u32 next_rptr = ring->wptr + 5; + + control |= INDIRECT_BUFFER_VALID; + next_rptr += 4; + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); + amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); + amdgpu_ring_write(ring, next_rptr); + + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + + control |= ib->length_dw | + (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); + + amdgpu_ring_write(ring, header); + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + (ib->gpu_addr & 0xFFFFFFFC)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); + amdgpu_ring_write(ring, control); +} + /** * gfx_v7_0_ring_test_ib - basic ring IB test * @@ -5555,7 +5579,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .get_wptr = gfx_v7_0_ring_get_wptr_gfx, .set_wptr = gfx_v7_0_ring_set_wptr_gfx, .parse_cs = NULL, - .emit_ib = gfx_v7_0_ring_emit_ib, + .emit_ib = gfx_v7_0_ring_emit_ib_gfx, .emit_fence = gfx_v7_0_ring_emit_fence_gfx, .emit_semaphore = gfx_v7_0_ring_emit_semaphore, .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, @@ -5571,7 +5595,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .get_wptr = gfx_v7_0_ring_get_wptr_compute, .set_wptr = gfx_v7_0_ring_set_wptr_compute, .parse_cs = NULL, - .emit_ib = gfx_v7_0_ring_emit_ib, + .emit_ib = gfx_v7_0_ring_emit_ib_compute, .emit_fence = gfx_v7_0_ring_emit_fence_compute, .emit_semaphore = gfx_v7_0_ring_emit_semaphore, .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1c7c992dea37..9e1d4ddbf475 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3753,7 +3753,7 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x20); /* poll interval */ } -static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring, +static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { bool need_ctx_switch = ring->current_ctx != ib->ctx; @@ -3761,15 +3761,10 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring, u32 next_rptr = ring->wptr + 5; /* drop the CE preamble IB for the same context */ - if ((ring->type == AMDGPU_RING_TYPE_GFX) && - (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && - !need_ctx_switch) + if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch) return; - if (ring->type == AMDGPU_RING_TYPE_COMPUTE) - control |= INDIRECT_BUFFER_VALID; - - if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX) + if (need_ctx_switch) next_rptr += 2; next_rptr += 4; @@ -3780,7 +3775,7 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* insert SWITCH_BUFFER packet before first IB in the ring frame */ - if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX) { + if (need_ctx_switch) { amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, 0); } @@ -3803,6 +3798,36 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, control); } +static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_ib *ib) +{ + u32 header, control = 0; + u32 next_rptr = ring->wptr + 5; + + control |= INDIRECT_BUFFER_VALID; + + next_rptr += 4; + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); + amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); + amdgpu_ring_write(ring, next_rptr); + + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + + control |= ib->length_dw | + (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); + + amdgpu_ring_write(ring, header); + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + (ib->gpu_addr & 0xFFFFFFFC)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); + amdgpu_ring_write(ring, control); +} + static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { @@ -4224,7 +4249,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .get_wptr = gfx_v8_0_ring_get_wptr_gfx, .set_wptr = gfx_v8_0_ring_set_wptr_gfx, .parse_cs = NULL, - .emit_ib = gfx_v8_0_ring_emit_ib, + .emit_ib = gfx_v8_0_ring_emit_ib_gfx, .emit_fence = gfx_v8_0_ring_emit_fence_gfx, .emit_semaphore = gfx_v8_0_ring_emit_semaphore, .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, @@ -4240,7 +4265,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .get_wptr = gfx_v8_0_ring_get_wptr_compute, .set_wptr = gfx_v8_0_ring_set_wptr_compute, .parse_cs = NULL, - .emit_ib = gfx_v8_0_ring_emit_ib, + .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_compute, .emit_semaphore = gfx_v8_0_ring_emit_semaphore, .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 5b59d5ad7d1c..aac212297b49 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -230,10 +230,12 @@ update_connector_routing(struct drm_atomic_state *state, int conn_idx) } connector_state->best_encoder = new_encoder; - idx = drm_crtc_index(connector_state->crtc); + if (connector_state->crtc) { + idx = drm_crtc_index(connector_state->crtc); - crtc_state = state->crtc_states[idx]; - crtc_state->mode_changed = true; + crtc_state = state->crtc_states[idx]; + crtc_state->mode_changed = true; + } DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d]\n", connector->base.id, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5f27290201e0..fd1de451c8c6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3303,15 +3303,14 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); #define I915_READ64(reg) dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true) #define I915_READ64_2x32(lower_reg, upper_reg) ({ \ - u32 upper = I915_READ(upper_reg); \ - u32 lower = I915_READ(lower_reg); \ - u32 tmp = I915_READ(upper_reg); \ - if (upper != tmp) { \ - upper = tmp; \ - lower = I915_READ(lower_reg); \ - WARN_ON(I915_READ(upper_reg) != upper); \ - } \ - (u64)upper << 32 | lower; }) + u32 upper, lower, tmp; \ + tmp = I915_READ(upper_reg); \ + do { \ + upper = tmp; \ + lower = I915_READ(lower_reg); \ + tmp = I915_READ(upper_reg); \ + } while (upper != tmp); \ + (u64)upper << 32 | lower; }) #define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg) #define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 56b52a4767d4..31e8269e6e3d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1923,6 +1923,17 @@ static int ggtt_bind_vma(struct i915_vma *vma, vma->vm->insert_entries(vma->vm, pages, vma->node.start, cache_level, pte_flags); + + /* Note the inconsistency here is due to absence of the + * aliasing ppgtt on gen4 and earlier. Though we always + * request PIN_USER for execbuffer (translated to LOCAL_BIND), + * without the appgtt, we cannot honour that request and so + * must substitute it with a global binding. Since we do this + * behind the upper layers back, we need to explicitly set + * the bound flag ourselves. + */ + vma->bound |= GLOBAL_BIND; + } if (dev_priv->mm.aliasing_ppgtt && flags & LOCAL_BIND) { diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 633bd1fcab69..d19c9db5e18c 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -464,7 +464,10 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, } /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ - args->phys_swizzle_mode = args->swizzle_mode; + if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) + args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN; + else + args->phys_swizzle_mode = args->swizzle_mode; if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9; if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c index 0d1dbb737933..247a424445f7 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c @@ -220,13 +220,15 @@ static int mdp4_plane_mode_set(struct drm_plane *plane, uint32_t op_mode = 0; uint32_t phasex_step = MDP4_VG_PHASE_STEP_DEFAULT; uint32_t phasey_step = MDP4_VG_PHASE_STEP_DEFAULT; - enum mdp4_frame_format frame_type = mdp4_get_frame_format(fb); + enum mdp4_frame_format frame_type; if (!(crtc && fb)) { DBG("%s: disabled!", mdp4_plane->name); return 0; } + frame_type = mdp4_get_frame_format(fb); + /* src values are in Q16 fixed point, convert to integer: */ src_x = src_x >> 16; src_y = src_y >> 16; diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index 206f758f7d64..e253db5de5aa 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -76,7 +76,20 @@ static void mdp5_prepare_commit(struct msm_kms *kms, struct drm_atomic_state *st static void mdp5_complete_commit(struct msm_kms *kms, struct drm_atomic_state *state) { + int i; struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); + int nplanes = mdp5_kms->dev->mode_config.num_total_plane; + + for (i = 0; i < nplanes; i++) { + struct drm_plane *plane = state->planes[i]; + struct drm_plane_state *plane_state = state->plane_states[i]; + + if (!plane) + continue; + + mdp5_plane_complete_commit(plane, plane_state); + } + mdp5_disable(mdp5_kms); } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h index e0eb24587c84..e79ac09b7216 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h @@ -227,6 +227,8 @@ void mdp5_plane_install_properties(struct drm_plane *plane, struct drm_mode_object *obj); uint32_t mdp5_plane_get_flush(struct drm_plane *plane); void mdp5_plane_complete_flip(struct drm_plane *plane); +void mdp5_plane_complete_commit(struct drm_plane *plane, + struct drm_plane_state *state); enum mdp5_pipe mdp5_plane_pipe(struct drm_plane *plane); struct drm_plane *mdp5_plane_init(struct drm_device *dev, enum mdp5_pipe pipe, bool private_plane, uint32_t reg_offset); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index 57b8f56ae9d0..22275568ab8b 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -31,8 +31,6 @@ struct mdp5_plane { uint32_t nformats; uint32_t formats[32]; - - bool enabled; }; #define to_mdp5_plane(x) container_of(x, struct mdp5_plane, base) @@ -56,22 +54,6 @@ static bool plane_enabled(struct drm_plane_state *state) return state->fb && state->crtc; } -static int mdp5_plane_disable(struct drm_plane *plane) -{ - struct mdp5_plane *mdp5_plane = to_mdp5_plane(plane); - struct mdp5_kms *mdp5_kms = get_kms(plane); - enum mdp5_pipe pipe = mdp5_plane->pipe; - - DBG("%s: disable", mdp5_plane->name); - - if (mdp5_kms) { - /* Release the memory we requested earlier from the SMP: */ - mdp5_smp_release(mdp5_kms->smp, pipe); - } - - return 0; -} - static void mdp5_plane_destroy(struct drm_plane *plane) { struct mdp5_plane *mdp5_plane = to_mdp5_plane(plane); @@ -224,7 +206,6 @@ static void mdp5_plane_atomic_update(struct drm_plane *plane, if (!plane_enabled(state)) { to_mdp5_plane_state(state)->pending = true; - mdp5_plane_disable(plane); } else if (to_mdp5_plane_state(state)->mode_changed) { int ret; to_mdp5_plane_state(state)->pending = true; @@ -602,6 +583,20 @@ uint32_t mdp5_plane_get_flush(struct drm_plane *plane) return mdp5_plane->flush_mask; } +/* called after vsync in thread context */ +void mdp5_plane_complete_commit(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct mdp5_kms *mdp5_kms = get_kms(plane); + struct mdp5_plane *mdp5_plane = to_mdp5_plane(plane); + enum mdp5_pipe pipe = mdp5_plane->pipe; + + if (!plane_enabled(plane->state)) { + DBG("%s: free SMP", mdp5_plane->name); + mdp5_smp_release(mdp5_kms->smp, pipe); + } +} + /* initialize plane */ struct drm_plane *mdp5_plane_init(struct drm_device *dev, enum mdp5_pipe pipe, bool private_plane, uint32_t reg_offset) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.c index 16702aecf0df..64a27d86f2f5 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.c @@ -34,22 +34,44 @@ * and CANNOT be re-allocated (eg: MMB0 and MMB1 both tied to RGB0). * * For each block that can be dynamically allocated, it can be either - * free, or pending/in-use by a client. The updates happen in three steps: + * free: + * The block is free. + * + * pending: + * The block is allocated to some client and not free. + * + * configured: + * The block is allocated to some client, and assigned to that + * client in MDP5_MDP_SMP_ALLOC registers. + * + * inuse: + * The block is being actively used by a client. + * + * The updates happen in the following steps: * * 1) mdp5_smp_request(): * When plane scanout is setup, calculate required number of - * blocks needed per client, and request. Blocks not inuse or - * pending by any other client are added to client's pending - * set. + * blocks needed per client, and request. Blocks neither inuse nor + * configured nor pending by any other client are added to client's + * pending set. + * For shrinking, blocks in pending but not in configured can be freed + * directly, but those already in configured will be freed later by + * mdp5_smp_commit. * * 2) mdp5_smp_configure(): * As hw is programmed, before FLUSH, MDP5_MDP_SMP_ALLOC registers * are configured for the union(pending, inuse) + * Current pending is copied to configured. + * It is assumed that mdp5_smp_request and mdp5_smp_configure not run + * concurrently for the same pipe. * * 3) mdp5_smp_commit(): - * After next vblank, copy pending -> inuse. Optionally update + * After next vblank, copy configured -> inuse. Optionally update * MDP5_SMP_ALLOC registers if there are newly unused blocks * + * 4) mdp5_smp_release(): + * Must be called after the pipe is disabled and no longer uses any SMB + * * On the next vblank after changes have been committed to hw, the * client's pending blocks become it's in-use blocks (and no-longer * in-use blocks become available to other clients). @@ -77,6 +99,9 @@ struct mdp5_smp { struct mdp5_client_smp_state client_state[MAX_CLIENTS]; }; +static void update_smp_state(struct mdp5_smp *smp, + u32 cid, mdp5_smp_state_t *assigned); + static inline struct mdp5_kms *get_kms(struct mdp5_smp *smp) { @@ -149,7 +174,12 @@ static int smp_request_block(struct mdp5_smp *smp, for (i = cur_nblks; i > nblks; i--) { int blk = find_first_bit(ps->pending, cnt); clear_bit(blk, ps->pending); - /* don't clear in global smp_state until _commit() */ + + /* clear in global smp_state if not in configured + * otherwise until _commit() + */ + if (!test_bit(blk, ps->configured)) + clear_bit(blk, smp->state); } } @@ -223,10 +253,33 @@ int mdp5_smp_request(struct mdp5_smp *smp, enum mdp5_pipe pipe, u32 fmt, u32 wid /* Release SMP blocks for all clients of the pipe */ void mdp5_smp_release(struct mdp5_smp *smp, enum mdp5_pipe pipe) { - int i, nblks; + int i; + unsigned long flags; + int cnt = smp->blk_cnt; + + for (i = 0; i < pipe2nclients(pipe); i++) { + mdp5_smp_state_t assigned; + u32 cid = pipe2client(pipe, i); + struct mdp5_client_smp_state *ps = &smp->client_state[cid]; + + spin_lock_irqsave(&smp->state_lock, flags); + + /* clear hw assignment */ + bitmap_or(assigned, ps->inuse, ps->configured, cnt); + update_smp_state(smp, CID_UNUSED, &assigned); + + /* free to global pool */ + bitmap_andnot(smp->state, smp->state, ps->pending, cnt); + bitmap_andnot(smp->state, smp->state, assigned, cnt); + + /* clear client's infor */ + bitmap_zero(ps->pending, cnt); + bitmap_zero(ps->configured, cnt); + bitmap_zero(ps->inuse, cnt); + + spin_unlock_irqrestore(&smp->state_lock, flags); + } - for (i = 0, nblks = 0; i < pipe2nclients(pipe); i++) - smp_request_block(smp, pipe2client(pipe, i), 0); set_fifo_thresholds(smp, pipe, 0); } @@ -274,12 +327,20 @@ void mdp5_smp_configure(struct mdp5_smp *smp, enum mdp5_pipe pipe) u32 cid = pipe2client(pipe, i); struct mdp5_client_smp_state *ps = &smp->client_state[cid]; - bitmap_or(assigned, ps->inuse, ps->pending, cnt); + /* + * if vblank has not happened since last smp_configure + * skip the configure for now + */ + if (!bitmap_equal(ps->inuse, ps->configured, cnt)) + continue; + + bitmap_copy(ps->configured, ps->pending, cnt); + bitmap_or(assigned, ps->inuse, ps->configured, cnt); update_smp_state(smp, cid, &assigned); } } -/* step #3: after vblank, copy pending -> inuse: */ +/* step #3: after vblank, copy configured -> inuse: */ void mdp5_smp_commit(struct mdp5_smp *smp, enum mdp5_pipe pipe) { int cnt = smp->blk_cnt; @@ -295,7 +356,7 @@ void mdp5_smp_commit(struct mdp5_smp *smp, enum mdp5_pipe pipe) * using, which can be released and made available to other * clients: */ - if (bitmap_andnot(released, ps->inuse, ps->pending, cnt)) { + if (bitmap_andnot(released, ps->inuse, ps->configured, cnt)) { unsigned long flags; spin_lock_irqsave(&smp->state_lock, flags); @@ -306,7 +367,7 @@ void mdp5_smp_commit(struct mdp5_smp *smp, enum mdp5_pipe pipe) update_smp_state(smp, CID_UNUSED, &released); } - bitmap_copy(ps->inuse, ps->pending, cnt); + bitmap_copy(ps->inuse, ps->configured, cnt); } } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.h index e47179f63585..5b6c2363f592 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.h +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.h @@ -23,6 +23,7 @@ struct mdp5_client_smp_state { mdp5_smp_state_t inuse; + mdp5_smp_state_t configured; mdp5_smp_state_t pending; }; diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index 1b22d8bfe142..1ceb4f22dd89 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -283,12 +283,8 @@ int msm_atomic_commit(struct drm_device *dev, timeout = ktime_add_ms(ktime_get(), 1000); - ret = msm_wait_fence_interruptable(dev, c->fence, &timeout); - if (ret) { - WARN_ON(ret); // TODO unswap state back? or?? - commit_destroy(c); - return ret; - } + /* uninterruptible wait */ + msm_wait_fence(dev, c->fence, &timeout, false); complete_commit(c); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index b7ef56ed8d1c..d3467b115e04 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -637,8 +637,8 @@ static void msm_debugfs_cleanup(struct drm_minor *minor) * Fences: */ -int msm_wait_fence_interruptable(struct drm_device *dev, uint32_t fence, - ktime_t *timeout) +int msm_wait_fence(struct drm_device *dev, uint32_t fence, + ktime_t *timeout , bool interruptible) { struct msm_drm_private *priv = dev->dev_private; int ret; @@ -667,7 +667,12 @@ int msm_wait_fence_interruptable(struct drm_device *dev, uint32_t fence, remaining_jiffies = timespec_to_jiffies(&ts); } - ret = wait_event_interruptible_timeout(priv->fence_event, + if (interruptible) + ret = wait_event_interruptible_timeout(priv->fence_event, + fence_completed(dev, fence), + remaining_jiffies); + else + ret = wait_event_timeout(priv->fence_event, fence_completed(dev, fence), remaining_jiffies); @@ -853,7 +858,7 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, return -EINVAL; } - return msm_wait_fence_interruptable(dev, args->fence, &timeout); + return msm_wait_fence(dev, args->fence, &timeout, true); } static const struct drm_ioctl_desc msm_ioctls[] = { diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index e7c5ea125d45..4ff0ec9c994b 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -164,8 +164,8 @@ int msm_atomic_commit(struct drm_device *dev, int msm_register_mmu(struct drm_device *dev, struct msm_mmu *mmu); -int msm_wait_fence_interruptable(struct drm_device *dev, uint32_t fence, - ktime_t *timeout); +int msm_wait_fence(struct drm_device *dev, uint32_t fence, + ktime_t *timeout, bool interruptible); int msm_queue_fence_cb(struct drm_device *dev, struct msm_fence_cb *cb, uint32_t fence); void msm_update_fence(struct drm_device *dev, uint32_t fence); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index f211b80e3a1e..c76cc853b08a 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -460,7 +460,7 @@ int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout) if (op & MSM_PREP_NOSYNC) timeout = NULL; - ret = msm_wait_fence_interruptable(dev, fence, timeout); + ret = msm_wait_fence(dev, fence, timeout, true); } /* TODO cache maintenance */ diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index dd7a7ab603e2..831461bc98a5 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -23,8 +23,12 @@ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); - BUG_ON(!msm_obj->sgt); /* should have already pinned! */ - return msm_obj->sgt; + int npages = obj->size >> PAGE_SHIFT; + + if (WARN_ON(!msm_obj->pages)) /* should have already pinned! */ + return NULL; + + return drm_prime_pages_to_sg(msm_obj->pages, npages); } void *msm_gem_prime_vmap(struct drm_gem_object *obj) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 649024d4daf1..477cbb12809b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -128,6 +128,7 @@ nouveau_cli_destroy(struct nouveau_cli *cli) nvkm_vm_ref(NULL, &nvxx_client(&cli->base)->vm, NULL); nvif_client_fini(&cli->base); usif_client_fini(cli); + kfree(cli); } static void @@ -865,8 +866,10 @@ nouveau_drm_preclose(struct drm_device *dev, struct drm_file *fpriv) pm_runtime_get_sync(dev->dev); + mutex_lock(&cli->mutex); if (cli->abi16) nouveau_abi16_fini(cli->abi16); + mutex_unlock(&cli->mutex); mutex_lock(&drm->client.mutex); list_del(&cli->head); diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c index 775277f1edb0..dcfbbfaf1739 100644 --- a/drivers/gpu/drm/nouveau/nouveau_platform.c +++ b/drivers/gpu/drm/nouveau/nouveau_platform.c @@ -92,6 +92,8 @@ static int nouveau_platform_power_down(struct nouveau_platform_gpu *gpu) return 0; } +#if IS_ENABLED(CONFIG_IOMMU_API) + static void nouveau_platform_probe_iommu(struct device *dev, struct nouveau_platform_gpu *gpu) { @@ -158,6 +160,20 @@ static void nouveau_platform_remove_iommu(struct device *dev, } } +#else + +static void nouveau_platform_probe_iommu(struct device *dev, + struct nouveau_platform_gpu *gpu) +{ +} + +static void nouveau_platform_remove_iommu(struct device *dev, + struct nouveau_platform_gpu *gpu) +{ +} + +#endif + static int nouveau_platform_probe(struct platform_device *pdev) { struct nouveau_platform_gpu *gpu; diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 18f449715788..7464aef34674 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -175,15 +175,24 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man, node->page_shift = 12; switch (drm->device.info.family) { + case NV_DEVICE_INFO_V0_TNT: + case NV_DEVICE_INFO_V0_CELSIUS: + case NV_DEVICE_INFO_V0_KELVIN: + case NV_DEVICE_INFO_V0_RANKINE: + case NV_DEVICE_INFO_V0_CURIE: + break; case NV_DEVICE_INFO_V0_TESLA: if (drm->device.info.chipset != 0x50) node->memtype = (nvbo->tile_flags & 0x7f00) >> 8; break; case NV_DEVICE_INFO_V0_FERMI: case NV_DEVICE_INFO_V0_KEPLER: + case NV_DEVICE_INFO_V0_MAXWELL: node->memtype = (nvbo->tile_flags & 0xff00) >> 8; break; default: + NV_WARN(drm, "%s: unhandled family type %x\n", __func__, + drm->device.info.family); break; } diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c index 4ef602c5469d..495c57644ced 100644 --- a/drivers/gpu/drm/nouveau/nv04_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c @@ -203,7 +203,7 @@ nv04_fbcon_accel_init(struct fb_info *info) if (ret) return ret; - if (RING_SPACE(chan, 49)) { + if (RING_SPACE(chan, 49 + (device->info.chipset >= 0x11 ? 4 : 0))) { nouveau_fbcon_gpu_lockup(info); return 0; } diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 7da7958556a3..981342d142ff 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -979,7 +979,7 @@ nv50_crtc_cursor_show_hide(struct nouveau_crtc *nv_crtc, bool show, bool update) { struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev); - if (show && nv_crtc->cursor.nvbo) + if (show && nv_crtc->cursor.nvbo && nv_crtc->base.enabled) nv50_crtc_cursor_show(nv_crtc); else nv50_crtc_cursor_hide(nv_crtc); diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c index 394c89abcc97..901130b06072 100644 --- a/drivers/gpu/drm/nouveau/nv50_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c @@ -188,7 +188,7 @@ nv50_fbcon_accel_init(struct fb_info *info) if (ret) return ret; - ret = RING_SPACE(chan, 59); + ret = RING_SPACE(chan, 58); if (ret) { nouveau_fbcon_gpu_lockup(info); return ret; @@ -252,6 +252,7 @@ nv50_fbcon_accel_init(struct fb_info *info) OUT_RING(chan, info->var.yres_virtual); OUT_RING(chan, upper_32_bits(fb->vma.offset)); OUT_RING(chan, lower_32_bits(fb->vma.offset)); + FIRE_RING(chan); return 0; } diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c index 61246677e8dc..fcd2e5f27bb9 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c +++ b/drivers/gpu/drm/nouveau/nvc0_fbcon.c @@ -188,7 +188,7 @@ nvc0_fbcon_accel_init(struct fb_info *info) return -EINVAL; } - ret = RING_SPACE(chan, 60); + ret = RING_SPACE(chan, 58); if (ret) { WARN_ON(1); nouveau_fbcon_gpu_lockup(info); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf110.c index 9ef6728c528d..7f2f05f78cc8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf110.c @@ -809,7 +809,7 @@ exec_lookup(struct nv50_disp_priv *priv, int head, int or, u32 ctrl, case 0x00000900: type = DCB_OUTPUT_DP; mask = 2; break; default: nv_error(priv, "unknown SOR mc 0x%08x\n", ctrl); - return 0x0000; + return NULL; } } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c index e10f9644140f..52c22b026005 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c @@ -166,14 +166,30 @@ gk104_fifo_context_attach(struct nvkm_object *parent, } static int +gk104_fifo_chan_kick(struct gk104_fifo_chan *chan) +{ + struct nvkm_object *obj = (void *)chan; + struct gk104_fifo_priv *priv = (void *)obj->engine; + + nv_wr32(priv, 0x002634, chan->base.chid); + if (!nv_wait(priv, 0x002634, 0x100000, 0x000000)) { + nv_error(priv, "channel %d [%s] kick timeout\n", + chan->base.chid, nvkm_client_name(chan)); + return -EBUSY; + } + + return 0; +} + +static int gk104_fifo_context_detach(struct nvkm_object *parent, bool suspend, struct nvkm_object *object) { struct nvkm_bar *bar = nvkm_bar(parent); - struct gk104_fifo_priv *priv = (void *)parent->engine; struct gk104_fifo_base *base = (void *)parent->parent; struct gk104_fifo_chan *chan = (void *)parent; u32 addr; + int ret; switch (nv_engidx(object->engine)) { case NVDEV_ENGINE_SW : return 0; @@ -188,13 +204,9 @@ gk104_fifo_context_detach(struct nvkm_object *parent, bool suspend, return -EINVAL; } - nv_wr32(priv, 0x002634, chan->base.chid); - if (!nv_wait(priv, 0x002634, 0xffffffff, chan->base.chid)) { - nv_error(priv, "channel %d [%s] kick timeout\n", - chan->base.chid, nvkm_client_name(chan)); - if (suspend) - return -EBUSY; - } + ret = gk104_fifo_chan_kick(chan); + if (ret && suspend) + return ret; if (addr) { nv_wo32(base, addr + 0x00, 0x00000000); @@ -319,6 +331,7 @@ gk104_fifo_chan_fini(struct nvkm_object *object, bool suspend) gk104_fifo_runlist_update(priv, chan->engine); } + gk104_fifo_chan_kick(chan); nv_wr32(priv, 0x800000 + (chid * 8), 0x00000000); return nvkm_fifo_channel_fini(&chan->base, suspend); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 5606c25e5d02..ca11ddb6ed46 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -663,6 +663,37 @@ gf100_gr_zbc_init(struct gf100_gr_priv *priv) gf100_gr_zbc_clear_depth(priv, index); } +/** + * Wait until GR goes idle. GR is considered idle if it is disabled by the + * MC (0x200) register, or GR is not busy and a context switch is not in + * progress. + */ +int +gf100_gr_wait_idle(struct gf100_gr_priv *priv) +{ + unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000); + bool gr_enabled, ctxsw_active, gr_busy; + + do { + /* + * required to make sure FIFO_ENGINE_STATUS (0x2640) is + * up-to-date + */ + nv_rd32(priv, 0x400700); + + gr_enabled = nv_rd32(priv, 0x200) & 0x1000; + ctxsw_active = nv_rd32(priv, 0x2640) & 0x8000; + gr_busy = nv_rd32(priv, 0x40060c) & 0x1; + + if (!gr_enabled || (!gr_busy && !ctxsw_active)) + return 0; + } while (time_before(jiffies, end_jiffies)); + + nv_error(priv, "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n", + gr_enabled, ctxsw_active, gr_busy); + return -EAGAIN; +} + void gf100_gr_mmio(struct gf100_gr_priv *priv, const struct gf100_gr_pack *p) { @@ -699,7 +730,13 @@ gf100_gr_icmd(struct gf100_gr_priv *priv, const struct gf100_gr_pack *p) while (addr < next) { nv_wr32(priv, 0x400200, addr); - nv_wait(priv, 0x400700, 0x00000002, 0x00000000); + /** + * Wait for GR to go idle after submitting a + * GO_IDLE bundle + */ + if ((addr & 0xffff) == 0xe100) + gf100_gr_wait_idle(priv); + nv_wait(priv, 0x400700, 0x00000004, 0x00000000); addr += init->pitch; } } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index 8af1a89eda84..c9533fdac4fc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -181,6 +181,7 @@ struct gf100_gr_oclass { int ppc_nr; }; +int gf100_gr_wait_idle(struct gf100_gr_priv *); void gf100_gr_mmio(struct gf100_gr_priv *, const struct gf100_gr_pack *); void gf100_gr_icmd(struct gf100_gr_priv *, const struct gf100_gr_pack *); void gf100_gr_mthd(struct gf100_gr_priv *, const struct gf100_gr_pack *); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c index 2006c445938d..4cf36a3aa814 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c @@ -332,9 +332,12 @@ static void nvkm_perfctx_dtor(struct nvkm_object *object) { struct nvkm_pm *ppm = (void *)object->engine; + struct nvkm_perfctx *ctx = (void *)object; + mutex_lock(&nv_subdev(ppm)->mutex); - nvkm_engctx_destroy(&ppm->context->base); - ppm->context = NULL; + nvkm_engctx_destroy(&ctx->base); + if (ppm->context == ctx) + ppm->context = NULL; mutex_unlock(&nv_subdev(ppm)->mutex); } @@ -355,12 +358,11 @@ nvkm_perfctx_ctor(struct nvkm_object *parent, struct nvkm_object *engine, mutex_lock(&nv_subdev(ppm)->mutex); if (ppm->context == NULL) ppm->context = ctx; - mutex_unlock(&nv_subdev(ppm)->mutex); - if (ctx != ppm->context) - return -EBUSY; + ret = -EBUSY; + mutex_unlock(&nv_subdev(ppm)->mutex); - return 0; + return ret; } struct nvkm_oclass diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c index f67cdae1e90a..f4611e3f0971 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c @@ -1285,6 +1285,44 @@ init_zm_reg_sequence(struct nvbios_init *init) } /** + * INIT_PLL_INDIRECT - opcode 0x59 + * + */ +static void +init_pll_indirect(struct nvbios_init *init) +{ + struct nvkm_bios *bios = init->bios; + u32 reg = nv_ro32(bios, init->offset + 1); + u16 addr = nv_ro16(bios, init->offset + 5); + u32 freq = (u32)nv_ro16(bios, addr) * 1000; + + trace("PLL_INDIRECT\tR[0x%06x] =PLL= VBIOS[%04x] = %dkHz\n", + reg, addr, freq); + init->offset += 7; + + init_prog_pll(init, reg, freq); +} + +/** + * INIT_ZM_REG_INDIRECT - opcode 0x5a + * + */ +static void +init_zm_reg_indirect(struct nvbios_init *init) +{ + struct nvkm_bios *bios = init->bios; + u32 reg = nv_ro32(bios, init->offset + 1); + u16 addr = nv_ro16(bios, init->offset + 5); + u32 data = nv_ro32(bios, addr); + + trace("ZM_REG_INDIRECT\tR[0x%06x] = VBIOS[0x%04x] = 0x%08x\n", + reg, addr, data); + init->offset += 7; + + init_wr32(init, addr, data); +} + +/** * INIT_SUB_DIRECT - opcode 0x5b * */ @@ -2145,6 +2183,8 @@ static struct nvbios_init_opcode { [0x56] = { init_condition_time }, [0x57] = { init_ltime }, [0x58] = { init_zm_reg_sequence }, + [0x59] = { init_pll_indirect }, + [0x5a] = { init_zm_reg_indirect }, [0x5b] = { init_sub_direct }, [0x5c] = { init_jump }, [0x5e] = { init_i2c_if }, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c index 822d32a28d6e..065e9f5c8db9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c @@ -180,7 +180,8 @@ gt215_clk_info(struct nvkm_clk *clock, int clk, u32 khz, struct gt215_clk_info *info) { struct gt215_clk_priv *priv = (void *)clock; - u32 oclk, sclk, sdiv, diff; + u32 oclk, sclk, sdiv; + s32 diff; info->clk = 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk20a.c index c0fdb89e74ac..24dcdfb58a8d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk20a.c @@ -38,6 +38,14 @@ gk20a_ibus_init_priv_ring(struct gk20a_ibus_priv *priv) nv_wr32(priv, 0x12004c, 0x4); nv_wr32(priv, 0x122204, 0x2); nv_rd32(priv, 0x122204); + + /* + * Bug: increase clock timeout to avoid operation failure at high + * gpcclk rate. + */ + nv_wr32(priv, 0x122354, 0x800); + nv_wr32(priv, 0x128328, 0x800); + nv_wr32(priv, 0x124320, 0x800); } static void diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c index 80614f1b2074..282143f49d72 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv04.c @@ -50,7 +50,12 @@ nv04_instobj_dtor(struct nvkm_object *object) { struct nv04_instmem_priv *priv = (void *)nvkm_instmem(object); struct nv04_instobj_priv *node = (void *)object; + struct nvkm_subdev *subdev = (void *)priv; + + mutex_lock(&subdev->mutex); nvkm_mm_free(&priv->heap, &node->mem); + mutex_unlock(&subdev->mutex); + nvkm_instobj_destroy(&node->base); } @@ -62,6 +67,7 @@ nv04_instobj_ctor(struct nvkm_object *parent, struct nvkm_object *engine, struct nv04_instmem_priv *priv = (void *)nvkm_instmem(parent); struct nv04_instobj_priv *node; struct nvkm_instobj_args *args = data; + struct nvkm_subdev *subdev = (void *)priv; int ret; if (!args->align) @@ -72,8 +78,10 @@ nv04_instobj_ctor(struct nvkm_object *parent, struct nvkm_object *engine, if (ret) return ret; + mutex_lock(&subdev->mutex); ret = nvkm_mm_head(&priv->heap, 0, 1, args->size, args->size, args->align, &node->mem); + mutex_unlock(&subdev->mutex); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index dd39f434b4a7..c3872598b85a 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -2299,8 +2299,7 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder, encoder_mode = atombios_get_encoder_mode(encoder); if (connector && (radeon_audio != 0) && ((encoder_mode == ATOM_ENCODER_MODE_HDMI) || - (ENCODER_MODE_IS_DP(encoder_mode) && - drm_detect_monitor_audio(radeon_connector_edid(connector))))) + ENCODER_MODE_IS_DP(encoder_mode))) radeon_audio_mode_set(encoder, adjusted_mode); } diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 68fd9fc677e3..44480c1b9738 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -93,30 +93,26 @@ void dce6_afmt_select_pin(struct drm_encoder *encoder) struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - u32 offset; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->offset; - - WREG32(AFMT_AUDIO_SRC_CONTROL + offset, - AFMT_AUDIO_SRC_SELECT(dig->afmt->pin->id)); + WREG32(AFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, + AFMT_AUDIO_SRC_SELECT(dig->pin->id)); } void dce6_afmt_write_latency_fields(struct drm_encoder *encoder, - struct drm_connector *connector, struct drm_display_mode *mode) + struct drm_connector *connector, + struct drm_display_mode *mode) { struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - u32 tmp = 0, offset; + u32 tmp = 0; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->pin->offset; - if (mode->flags & DRM_MODE_FLAG_INTERLACE) { if (connector->latency_present[1]) tmp = VIDEO_LIPSYNC(connector->video_latency[1]) | @@ -130,24 +126,24 @@ void dce6_afmt_write_latency_fields(struct drm_encoder *encoder, else tmp = VIDEO_LIPSYNC(0) | AUDIO_LIPSYNC(0); } - WREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp); + WREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp); } void dce6_afmt_hdmi_write_speaker_allocation(struct drm_encoder *encoder, - u8 *sadb, int sad_count) + u8 *sadb, int sad_count) { struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - u32 offset, tmp; + u32 tmp; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->pin->offset; - /* program the speaker allocation */ - tmp = RREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); + tmp = RREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); tmp &= ~(DP_CONNECTION | SPEAKER_ALLOCATION_MASK); /* set HDMI mode */ tmp |= HDMI_CONNECTION; @@ -155,24 +151,24 @@ void dce6_afmt_hdmi_write_speaker_allocation(struct drm_encoder *encoder, tmp |= SPEAKER_ALLOCATION(sadb[0]); else tmp |= SPEAKER_ALLOCATION(5); /* stereo */ - WREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); + WREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); } void dce6_afmt_dp_write_speaker_allocation(struct drm_encoder *encoder, - u8 *sadb, int sad_count) + u8 *sadb, int sad_count) { struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - u32 offset, tmp; + u32 tmp; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->pin->offset; - /* program the speaker allocation */ - tmp = RREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); + tmp = RREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); tmp &= ~(HDMI_CONNECTION | SPEAKER_ALLOCATION_MASK); /* set DP mode */ tmp |= DP_CONNECTION; @@ -180,13 +176,13 @@ void dce6_afmt_dp_write_speaker_allocation(struct drm_encoder *encoder, tmp |= SPEAKER_ALLOCATION(sadb[0]); else tmp |= SPEAKER_ALLOCATION(5); /* stereo */ - WREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); + WREG32_ENDPOINT(dig->pin->offset, + AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); } void dce6_afmt_write_sad_regs(struct drm_encoder *encoder, - struct cea_sad *sads, int sad_count) + struct cea_sad *sads, int sad_count) { - u32 offset; int i; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; @@ -206,11 +202,9 @@ void dce6_afmt_write_sad_regs(struct drm_encoder *encoder, { AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, }; - if (!dig || !dig->afmt || !dig->afmt->pin) + if (!dig || !dig->afmt || !dig->pin) return; - offset = dig->afmt->pin->offset; - for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { u32 value = 0; u8 stereo_freqs = 0; @@ -237,7 +231,7 @@ void dce6_afmt_write_sad_regs(struct drm_encoder *encoder, value |= SUPPORTED_FREQUENCIES_STEREO(stereo_freqs); - WREG32_ENDPOINT(offset, eld_reg_to_type[i][0], value); + WREG32_ENDPOINT(dig->pin->offset, eld_reg_to_type[i][0], value); } } @@ -253,7 +247,7 @@ void dce6_audio_enable(struct radeon_device *rdev, } void dce6_hdmi_audio_set_dto(struct radeon_device *rdev, - struct radeon_crtc *crtc, unsigned int clock) + struct radeon_crtc *crtc, unsigned int clock) { /* Two dtos; generally use dto0 for HDMI */ u32 value = 0; @@ -272,7 +266,7 @@ void dce6_hdmi_audio_set_dto(struct radeon_device *rdev, } void dce6_dp_audio_set_dto(struct radeon_device *rdev, - struct radeon_crtc *crtc, unsigned int clock) + struct radeon_crtc *crtc, unsigned int clock) { /* Two dtos; generally use dto1 for DP */ u32 value = 0; diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index fa719c53449b..fbc8d88d6e5d 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -245,6 +245,28 @@ static struct radeon_audio_funcs dce6_dp_funcs = { static void radeon_audio_enable(struct radeon_device *rdev, struct r600_audio_pin *pin, u8 enable_mask) { + struct drm_encoder *encoder; + struct radeon_encoder *radeon_encoder; + struct radeon_encoder_atom_dig *dig; + int pin_count = 0; + + if (!pin) + return; + + if (rdev->mode_info.mode_config_initialized) { + list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + if (radeon_encoder_is_digital(encoder)) { + radeon_encoder = to_radeon_encoder(encoder); + dig = radeon_encoder->enc_priv; + if (dig->pin == pin) + pin_count++; + } + } + + if ((pin_count > 1) && (enable_mask == 0)) + return; + } + if (rdev->audio.funcs->enable) rdev->audio.funcs->enable(rdev, pin, enable_mask); } @@ -336,24 +358,13 @@ void radeon_audio_endpoint_wreg(struct radeon_device *rdev, u32 offset, static void radeon_audio_write_sad_regs(struct drm_encoder *encoder) { - struct radeon_encoder *radeon_encoder; - struct drm_connector *connector; - struct radeon_connector *radeon_connector = NULL; + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct cea_sad *sads; int sad_count; - list_for_each_entry(connector, - &encoder->dev->mode_config.connector_list, head) { - if (connector->encoder == encoder) { - radeon_connector = to_radeon_connector(connector); - break; - } - } - - if (!radeon_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); + if (!connector) return; - } sad_count = drm_edid_to_sad(radeon_connector_edid(connector), &sads); if (sad_count <= 0) { @@ -362,8 +373,6 @@ static void radeon_audio_write_sad_regs(struct drm_encoder *encoder) } BUG_ON(!sads); - radeon_encoder = to_radeon_encoder(encoder); - if (radeon_encoder->audio && radeon_encoder->audio->write_sad_regs) radeon_encoder->audio->write_sad_regs(encoder, sads, sad_count); @@ -372,27 +381,16 @@ static void radeon_audio_write_sad_regs(struct drm_encoder *encoder) static void radeon_audio_write_speaker_allocation(struct drm_encoder *encoder) { + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); - struct drm_connector *connector; - struct radeon_connector *radeon_connector = NULL; u8 *sadb = NULL; int sad_count; - list_for_each_entry(connector, - &encoder->dev->mode_config.connector_list, head) { - if (connector->encoder == encoder) { - radeon_connector = to_radeon_connector(connector); - break; - } - } - - if (!radeon_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); + if (!connector) return; - } - sad_count = drm_edid_to_speaker_allocation( - radeon_connector_edid(connector), &sadb); + sad_count = drm_edid_to_speaker_allocation(radeon_connector_edid(connector), + &sadb); if (sad_count < 0) { DRM_DEBUG("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); @@ -406,26 +404,13 @@ static void radeon_audio_write_speaker_allocation(struct drm_encoder *encoder) } static void radeon_audio_write_latency_fields(struct drm_encoder *encoder, - struct drm_display_mode *mode) + struct drm_display_mode *mode) { - struct radeon_encoder *radeon_encoder; - struct drm_connector *connector; - struct radeon_connector *radeon_connector = 0; - - list_for_each_entry(connector, - &encoder->dev->mode_config.connector_list, head) { - if (connector->encoder == encoder) { - radeon_connector = to_radeon_connector(connector); - break; - } - } + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); - if (!radeon_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); + if (!connector) return; - } - - radeon_encoder = to_radeon_encoder(encoder); if (radeon_encoder->audio && radeon_encoder->audio->write_latency_fields) radeon_encoder->audio->write_latency_fields(encoder, connector, mode); @@ -451,29 +436,23 @@ static void radeon_audio_select_pin(struct drm_encoder *encoder) } void radeon_audio_detect(struct drm_connector *connector, + struct drm_encoder *encoder, enum drm_connector_status status) { - struct radeon_device *rdev; - struct radeon_encoder *radeon_encoder; + struct drm_device *dev = connector->dev; + struct radeon_device *rdev = dev->dev_private; + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig; - if (!connector || !connector->encoder) + if (!radeon_audio_chipset_supported(rdev)) return; - rdev = connector->encoder->dev->dev_private; - - if (!radeon_audio_chipset_supported(rdev)) + if (!radeon_encoder_is_digital(encoder)) return; - radeon_encoder = to_radeon_encoder(connector->encoder); dig = radeon_encoder->enc_priv; if (status == connector_status_connected) { - if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) { - radeon_encoder->audio = NULL; - return; - } - if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -486,11 +465,17 @@ void radeon_audio_detect(struct drm_connector *connector, radeon_encoder->audio = rdev->audio.hdmi_funcs; } - dig->afmt->pin = radeon_audio_get_pin(connector->encoder); - radeon_audio_enable(rdev, dig->afmt->pin, 0xf); + if (drm_detect_monitor_audio(radeon_connector_edid(connector))) { + if (!dig->pin) + dig->pin = radeon_audio_get_pin(encoder); + radeon_audio_enable(rdev, dig->pin, 0xf); + } else { + radeon_audio_enable(rdev, dig->pin, 0); + dig->pin = NULL; + } } else { - radeon_audio_enable(rdev, dig->afmt->pin, 0); - dig->afmt->pin = NULL; + radeon_audio_enable(rdev, dig->pin, 0); + dig->pin = NULL; } } @@ -518,29 +503,18 @@ static void radeon_audio_set_dto(struct drm_encoder *encoder, unsigned int clock } static int radeon_audio_set_avi_packet(struct drm_encoder *encoder, - struct drm_display_mode *mode) + struct drm_display_mode *mode) { struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - struct drm_connector *connector; - struct radeon_connector *radeon_connector = NULL; + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE]; struct hdmi_avi_infoframe frame; int err; - list_for_each_entry(connector, - &encoder->dev->mode_config.connector_list, head) { - if (connector->encoder == encoder) { - radeon_connector = to_radeon_connector(connector); - break; - } - } - - if (!radeon_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); - return -ENOENT; - } + if (!connector) + return -EINVAL; err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); if (err < 0) { @@ -563,8 +537,8 @@ static int radeon_audio_set_avi_packet(struct drm_encoder *encoder, return err; } - if (dig && dig->afmt && - radeon_encoder->audio && radeon_encoder->audio->set_avi_packet) + if (dig && dig->afmt && radeon_encoder->audio && + radeon_encoder->audio->set_avi_packet) radeon_encoder->audio->set_avi_packet(rdev, dig->afmt->offset, buffer, sizeof(buffer)); @@ -722,30 +696,41 @@ static void radeon_audio_hdmi_mode_set(struct drm_encoder *encoder, { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); if (!dig || !dig->afmt) return; - radeon_audio_set_mute(encoder, true); + if (!connector) + return; - radeon_audio_write_speaker_allocation(encoder); - radeon_audio_write_sad_regs(encoder); - radeon_audio_write_latency_fields(encoder, mode); - radeon_audio_set_dto(encoder, mode->clock); - radeon_audio_set_vbi_packet(encoder); - radeon_hdmi_set_color_depth(encoder); - radeon_audio_update_acr(encoder, mode->clock); - radeon_audio_set_audio_packet(encoder); - radeon_audio_select_pin(encoder); + if (drm_detect_monitor_audio(radeon_connector_edid(connector))) { + radeon_audio_set_mute(encoder, true); - if (radeon_audio_set_avi_packet(encoder, mode) < 0) - return; + radeon_audio_write_speaker_allocation(encoder); + radeon_audio_write_sad_regs(encoder); + radeon_audio_write_latency_fields(encoder, mode); + radeon_audio_set_dto(encoder, mode->clock); + radeon_audio_set_vbi_packet(encoder); + radeon_hdmi_set_color_depth(encoder); + radeon_audio_update_acr(encoder, mode->clock); + radeon_audio_set_audio_packet(encoder); + radeon_audio_select_pin(encoder); + + if (radeon_audio_set_avi_packet(encoder, mode) < 0) + return; - radeon_audio_set_mute(encoder, false); + radeon_audio_set_mute(encoder, false); + } else { + radeon_hdmi_set_color_depth(encoder); + + if (radeon_audio_set_avi_packet(encoder, mode) < 0) + return; + } } static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode) + struct drm_display_mode *mode) { struct drm_device *dev = encoder->dev; struct radeon_device *rdev = dev->dev_private; @@ -759,22 +744,27 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, if (!dig || !dig->afmt) return; - radeon_audio_write_speaker_allocation(encoder); - radeon_audio_write_sad_regs(encoder); - radeon_audio_write_latency_fields(encoder, mode); - if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev)) - radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10); - else - radeon_audio_set_dto(encoder, dig_connector->dp_clock); - radeon_audio_set_audio_packet(encoder); - radeon_audio_select_pin(encoder); - - if (radeon_audio_set_avi_packet(encoder, mode) < 0) + if (!connector) return; + + if (drm_detect_monitor_audio(radeon_connector_edid(connector))) { + radeon_audio_write_speaker_allocation(encoder); + radeon_audio_write_sad_regs(encoder); + radeon_audio_write_latency_fields(encoder, mode); + if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev)) + radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10); + else + radeon_audio_set_dto(encoder, dig_connector->dp_clock); + radeon_audio_set_audio_packet(encoder); + radeon_audio_select_pin(encoder); + + if (radeon_audio_set_avi_packet(encoder, mode) < 0) + return; + } } void radeon_audio_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode) + struct drm_display_mode *mode) { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h index 8438304f7139..059cc3012062 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.h +++ b/drivers/gpu/drm/radeon/radeon_audio.h @@ -68,7 +68,8 @@ struct radeon_audio_funcs int radeon_audio_init(struct radeon_device *rdev); void radeon_audio_detect(struct drm_connector *connector, - enum drm_connector_status status); + struct drm_encoder *encoder, + enum drm_connector_status status); u32 radeon_audio_endpoint_rreg(struct radeon_device *rdev, u32 offset, u32 reg); void radeon_audio_endpoint_wreg(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 3e5f6b71f3ad..c097d3a82bda 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -1255,10 +1255,15 @@ struct radeon_encoder_lvds *radeon_combios_get_lvds_info(struct radeon_encoder if ((RBIOS16(tmp) == lvds->native_mode.hdisplay) && (RBIOS16(tmp + 2) == lvds->native_mode.vdisplay)) { + u32 hss = (RBIOS16(tmp + 21) - RBIOS16(tmp + 19) - 1) * 8; + + if (hss > lvds->native_mode.hdisplay) + hss = (10 - 1) * 8; + lvds->native_mode.htotal = lvds->native_mode.hdisplay + (RBIOS16(tmp + 17) - RBIOS16(tmp + 19)) * 8; lvds->native_mode.hsync_start = lvds->native_mode.hdisplay + - (RBIOS16(tmp + 21) - RBIOS16(tmp + 19) - 1) * 8; + hss; lvds->native_mode.hsync_end = lvds->native_mode.hsync_start + (RBIOS8(tmp + 23) * 8); diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index cebb65e07e1d..94b21ae70ef7 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1379,8 +1379,16 @@ out: /* updated in get modes as well since we need to know if it's analog or digital */ radeon_connector_update_scratch_regs(connector, ret); - if (radeon_audio != 0) - radeon_audio_detect(connector, ret); + if ((radeon_audio != 0) && radeon_connector->use_digital) { + const struct drm_connector_helper_funcs *connector_funcs = + connector->helper_private; + + encoder = connector_funcs->best_encoder(connector); + if (encoder && (encoder->encoder_type == DRM_MODE_ENCODER_TMDS)) { + radeon_connector_get_edid(connector); + radeon_audio_detect(connector, encoder, ret); + } + } exit: pm_runtime_mark_last_busy(connector->dev->dev); @@ -1717,8 +1725,10 @@ radeon_dp_detect(struct drm_connector *connector, bool force) radeon_connector_update_scratch_regs(connector, ret); - if (radeon_audio != 0) - radeon_audio_detect(connector, ret); + if ((radeon_audio != 0) && encoder) { + radeon_connector_get_edid(connector); + radeon_audio_detect(connector, encoder, ret); + } out: pm_runtime_mark_last_busy(connector->dev->dev); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 07909d817381..aecc3e3dec0c 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -237,7 +237,6 @@ struct radeon_afmt { int offset; bool last_buffer_filled_status; int id; - struct r600_audio_pin *pin; }; struct radeon_mode_info { @@ -439,6 +438,7 @@ struct radeon_encoder_atom_dig { uint8_t backlight_level; int panel_mode; struct radeon_afmt *afmt; + struct r600_audio_pin *pin; int active_mst_links; }; diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index f822fd2a1ada..884d82f9190e 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -546,6 +546,12 @@ static const struct hid_device_id apple_devices[] = { .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_JIS), .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI), + .driver_data = APPLE_HAS_FN }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ISO), + .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_JIS), + .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI), .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO), diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 157c62775053..e6fce23b121a 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1782,6 +1782,9 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_JIS) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ISO) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_JIS) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS) }, @@ -2463,6 +2466,9 @@ static const struct hid_device_id hid_mouse_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_JIS) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ISO) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) }, { } diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index b04b0820d816..b3b225b75d0a 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -142,6 +142,9 @@ #define USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI 0x0290 #define USB_DEVICE_ID_APPLE_WELLSPRING8_ISO 0x0291 #define USB_DEVICE_ID_APPLE_WELLSPRING8_JIS 0x0292 +#define USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI 0x0272 +#define USB_DEVICE_ID_APPLE_WELLSPRING9_ISO 0x0273 +#define USB_DEVICE_ID_APPLE_WELLSPRING9_JIS 0x0274 #define USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY 0x030a #define USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY 0x030b #define USB_DEVICE_ID_APPLE_IRCONTROL 0x8240 diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 28fcb2e246d5..fbfc02bb2cfa 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -195,7 +195,7 @@ abort: } static int nct7802_write_voltage(struct nct7802_data *data, int nr, int index, - unsigned int voltage) + unsigned long voltage) { int shift = 8 - REG_VOLTAGE_LIMIT_MSB_SHIFT[index - 1][nr]; int err; diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c index b77b82f24480..6153df735e82 100644 --- a/drivers/hwmon/nct7904.c +++ b/drivers/hwmon/nct7904.c @@ -412,8 +412,9 @@ static ssize_t show_pwm(struct device *dev, return sprintf(buf, "%d\n", val); } -static ssize_t store_mode(struct device *dev, struct device_attribute *devattr, - const char *buf, size_t count) +static ssize_t store_enable(struct device *dev, + struct device_attribute *devattr, + const char *buf, size_t count) { int index = to_sensor_dev_attr(devattr)->index; struct nct7904_data *data = dev_get_drvdata(dev); @@ -422,18 +423,18 @@ static ssize_t store_mode(struct device *dev, struct device_attribute *devattr, if (kstrtoul(buf, 10, &val) < 0) return -EINVAL; - if (val > 1 || (val && !data->fan_mode[index])) + if (val < 1 || val > 2 || (val == 2 && !data->fan_mode[index])) return -EINVAL; ret = nct7904_write_reg(data, BANK_3, FANCTL1_FMR_REG + index, - val ? data->fan_mode[index] : 0); + val == 2 ? data->fan_mode[index] : 0); return ret ? ret : count; } -/* Return 0 for manual mode or 1 for SmartFan mode */ -static ssize_t show_mode(struct device *dev, - struct device_attribute *devattr, char *buf) +/* Return 1 for manual mode or 2 for SmartFan mode */ +static ssize_t show_enable(struct device *dev, + struct device_attribute *devattr, char *buf) { int index = to_sensor_dev_attr(devattr)->index; struct nct7904_data *data = dev_get_drvdata(dev); @@ -443,36 +444,36 @@ static ssize_t show_mode(struct device *dev, if (val < 0) return val; - return sprintf(buf, "%d\n", val ? 1 : 0); + return sprintf(buf, "%d\n", val ? 2 : 1); } /* 2 attributes per channel: pwm and mode */ -static SENSOR_DEVICE_ATTR(fan1_pwm, S_IRUGO | S_IWUSR, +static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, show_pwm, store_pwm, 0); -static SENSOR_DEVICE_ATTR(fan1_mode, S_IRUGO | S_IWUSR, - show_mode, store_mode, 0); -static SENSOR_DEVICE_ATTR(fan2_pwm, S_IRUGO | S_IWUSR, +static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, + show_enable, store_enable, 0); +static SENSOR_DEVICE_ATTR(pwm2, S_IRUGO | S_IWUSR, show_pwm, store_pwm, 1); -static SENSOR_DEVICE_ATTR(fan2_mode, S_IRUGO | S_IWUSR, - show_mode, store_mode, 1); -static SENSOR_DEVICE_ATTR(fan3_pwm, S_IRUGO | S_IWUSR, +static SENSOR_DEVICE_ATTR(pwm2_enable, S_IRUGO | S_IWUSR, + show_enable, store_enable, 1); +static SENSOR_DEVICE_ATTR(pwm3, S_IRUGO | S_IWUSR, show_pwm, store_pwm, 2); -static SENSOR_DEVICE_ATTR(fan3_mode, S_IRUGO | S_IWUSR, - show_mode, store_mode, 2); -static SENSOR_DEVICE_ATTR(fan4_pwm, S_IRUGO | S_IWUSR, +static SENSOR_DEVICE_ATTR(pwm3_enable, S_IRUGO | S_IWUSR, + show_enable, store_enable, 2); +static SENSOR_DEVICE_ATTR(pwm4, S_IRUGO | S_IWUSR, show_pwm, store_pwm, 3); -static SENSOR_DEVICE_ATTR(fan4_mode, S_IRUGO | S_IWUSR, - show_mode, store_mode, 3); +static SENSOR_DEVICE_ATTR(pwm4_enable, S_IRUGO | S_IWUSR, + show_enable, store_enable, 3); static struct attribute *nct7904_fanctl_attrs[] = { - &sensor_dev_attr_fan1_pwm.dev_attr.attr, - &sensor_dev_attr_fan1_mode.dev_attr.attr, - &sensor_dev_attr_fan2_pwm.dev_attr.attr, - &sensor_dev_attr_fan2_mode.dev_attr.attr, - &sensor_dev_attr_fan3_pwm.dev_attr.attr, - &sensor_dev_attr_fan3_mode.dev_attr.attr, - &sensor_dev_attr_fan4_pwm.dev_attr.attr, - &sensor_dev_attr_fan4_mode.dev_attr.attr, + &sensor_dev_attr_pwm1.dev_attr.attr, + &sensor_dev_attr_pwm1_enable.dev_attr.attr, + &sensor_dev_attr_pwm2.dev_attr.attr, + &sensor_dev_attr_pwm2_enable.dev_attr.attr, + &sensor_dev_attr_pwm3.dev_attr.attr, + &sensor_dev_attr_pwm3_enable.dev_attr.attr, + &sensor_dev_attr_pwm4.dev_attr.attr, + &sensor_dev_attr_pwm4_enable.dev_attr.attr, NULL }; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index b1b73232f217..bbbe0184e592 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -736,6 +736,10 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) /* * T3 only supports 32 bits of size. */ + if (sizeof(phys_addr_t) > 4) { + pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n"); + return ERR_PTR(-ENOTSUPP); + } bl.size = 0xffffffff; bl.addr = 0; kva = 0; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index b396344fae16..6a36338593cd 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_H__ #define __OCRDMA_H__ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h index 1554cca5712a..430b1350fe96 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_ABI_H__ #define __OCRDMA_ABI_H__ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 29b27675dd70..44766fee1f4e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #include <net/neighbour.h> #include <net/netevent.h> diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index cf366fe03cb8..04a30ae67473 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_AH_H__ #define __OCRDMA_AH_H__ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 47615ff33bc6..aab391a15db4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) CNA Adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #include <linux/sched.h> #include <linux/interrupt.h> diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index e905972fceb7..7ed885c1851e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) CNA Adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_HW_H__ #define __OCRDMA_HW_H__ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index d98a707a5eb9..b119a3413a15 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #include <linux/module.h> #include <linux/idr.h> @@ -46,7 +61,7 @@ MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION); MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); MODULE_AUTHOR("Emulex Corporation"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("Dual BSD/GPL"); static LIST_HEAD(ocrdma_dev_list); static DEFINE_SPINLOCK(ocrdma_devlist_lock); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 02ad0aee99af..80006b24aa11 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_SLI_H__ #define __OCRDMA_SLI_H__ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 48d7ef51aa0c..69334e214571 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2014 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #include <rdma/ib_addr.h> #include <rdma/ib_pma.h> diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h index 091edd68a8a3..c9e58d04c7b8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2014 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_STATS_H__ #define __OCRDMA_STATS_H__ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 5bb61eb58f2c..bc84cd462ecf 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #include <linux/dma-mapping.h> #include <rdma/ib_verbs.h> diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index b15c608efa7b..eaccb2d3cb9f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -1,21 +1,36 @@ -/******************************************************************* - * This file is part of the Emulex RoCE Device Driver for * - * RoCE (RDMA over Converged Ethernet) adapters. * - * Copyright (C) 2008-2012 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.emulex.com * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * +/* This file is part of the Emulex RoCE Device Driver for + * RoCE (RDMA over Converged Ethernet) adapters. + * Copyright (C) 2012-2015 Emulex. All rights reserved. + * EMULEX and SLI are trademarks of Emulex. + * www.emulex.com + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License (GPL) Version 2, available from the file COPYING in the main + * directory of this source tree, or the BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * linux-drivers@emulex.com @@ -23,7 +38,7 @@ * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 - *******************************************************************/ + */ #ifndef __OCRDMA_VERBS_H__ #define __OCRDMA_VERBS_H__ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 9e6ee82a8fd7..851c8219d501 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -177,7 +177,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) else size += ipoib_recvq_size * ipoib_max_conn_qp; } else - goto out_free_wq; + if (ret != -ENOSYS) + goto out_free_wq; cq_attr.cqe = size; priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 771700963127..d851e1828d6f 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -775,6 +775,17 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) ret = isert_rdma_post_recvl(isert_conn); if (ret) goto out_conn_dev; + /* + * Obtain the second reference now before isert_rdma_accept() to + * ensure that any initiator generated REJECT CM event that occurs + * asynchronously won't drop the last reference until the error path + * in iscsi_target_login_sess_out() does it's ->iscsit_free_conn() -> + * isert_free_conn() -> isert_put_conn() -> kref_put(). + */ + if (!kref_get_unless_zero(&isert_conn->kref)) { + isert_warn("conn %p connect_release is running\n", isert_conn); + goto out_conn_dev; + } ret = isert_rdma_accept(isert_conn); if (ret) @@ -836,11 +847,6 @@ isert_connected_handler(struct rdma_cm_id *cma_id) isert_info("conn %p\n", isert_conn); - if (!kref_get_unless_zero(&isert_conn->kref)) { - isert_warn("conn %p connect_release is running\n", isert_conn); - return; - } - mutex_lock(&isert_conn->mutex); if (isert_conn->state != ISER_CONN_FULL_FEATURE) isert_conn->state = ISER_CONN_UP; diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index e853a2134680..4a2a9e370be7 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -149,9 +149,9 @@ static int old_gameport_measure_speed(struct gameport *gameport) for(i = 0; i < 50; i++) { local_irq_save(flags); - rdtscl(t1); + t1 = rdtsc(); for (t = 0; t < 50; t++) gameport_read(gameport); - rdtscl(t2); + t2 = rdtsc(); local_irq_restore(flags); udelay(i * 10); if (t2 - t1 < tx) tx = t2 - t1; diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 4284080e481d..6f8b084e13d0 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -143,7 +143,7 @@ struct analog_port { #include <linux/i8253.h> -#define GET_TIME(x) do { if (cpu_has_tsc) rdtscl(x); else x = get_time_pit(); } while (0) +#define GET_TIME(x) do { if (cpu_has_tsc) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0) #define DELTA(x,y) (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0))) #define TIME_NAME (cpu_has_tsc?"TSC":"PIT") static unsigned int get_time_pit(void) @@ -160,7 +160,7 @@ static unsigned int get_time_pit(void) return count; } #elif defined(__x86_64__) -#define GET_TIME(x) rdtscl(x) +#define GET_TIME(x) do { x = (unsigned int)rdtsc(); } while (0) #define DELTA(x,y) ((y)-(x)) #define TIME_NAME "TSC" #elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_TILE) diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index b10709f04615..30e3442518f8 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -2,6 +2,7 @@ * Apple USB BCM5974 (Macbook Air and Penryn Macbook Pro) multitouch driver * * Copyright (C) 2008 Henrik Rydberg (rydberg@euromail.se) + * Copyright (C) 2015 John Horan (knasher@gmail.com) * * The USB initialization and package decoding was made by * Scott Shawcroft as part of the touchd user-space driver project: @@ -91,6 +92,10 @@ #define USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI 0x0290 #define USB_DEVICE_ID_APPLE_WELLSPRING8_ISO 0x0291 #define USB_DEVICE_ID_APPLE_WELLSPRING8_JIS 0x0292 +/* MacbookPro12,1 (2015) */ +#define USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI 0x0272 +#define USB_DEVICE_ID_APPLE_WELLSPRING9_ISO 0x0273 +#define USB_DEVICE_ID_APPLE_WELLSPRING9_JIS 0x0274 #define BCM5974_DEVICE(prod) { \ .match_flags = (USB_DEVICE_ID_MATCH_DEVICE | \ @@ -152,6 +157,10 @@ static const struct usb_device_id bcm5974_table[] = { BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING8_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING8_JIS), + /* MacbookPro12,1 */ + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ISO), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_JIS), /* Terminating entry */ {} }; @@ -180,21 +189,47 @@ struct bt_data { enum tp_type { TYPE1, /* plain trackpad */ TYPE2, /* button integrated in trackpad */ - TYPE3 /* additional header fields since June 2013 */ + TYPE3, /* additional header fields since June 2013 */ + TYPE4 /* additional header field for pressure data */ }; /* trackpad finger data offsets, le16-aligned */ -#define FINGER_TYPE1 (13 * sizeof(__le16)) -#define FINGER_TYPE2 (15 * sizeof(__le16)) -#define FINGER_TYPE3 (19 * sizeof(__le16)) +#define HEADER_TYPE1 (13 * sizeof(__le16)) +#define HEADER_TYPE2 (15 * sizeof(__le16)) +#define HEADER_TYPE3 (19 * sizeof(__le16)) +#define HEADER_TYPE4 (23 * sizeof(__le16)) /* trackpad button data offsets */ +#define BUTTON_TYPE1 0 #define BUTTON_TYPE2 15 #define BUTTON_TYPE3 23 +#define BUTTON_TYPE4 31 /* list of device capability bits */ #define HAS_INTEGRATED_BUTTON 1 +/* trackpad finger data block size */ +#define FSIZE_TYPE1 (14 * sizeof(__le16)) +#define FSIZE_TYPE2 (14 * sizeof(__le16)) +#define FSIZE_TYPE3 (14 * sizeof(__le16)) +#define FSIZE_TYPE4 (15 * sizeof(__le16)) + +/* offset from header to finger struct */ +#define DELTA_TYPE1 (0 * sizeof(__le16)) +#define DELTA_TYPE2 (0 * sizeof(__le16)) +#define DELTA_TYPE3 (0 * sizeof(__le16)) +#define DELTA_TYPE4 (1 * sizeof(__le16)) + +/* usb control message mode switch data */ +#define USBMSG_TYPE1 8, 0x300, 0, 0, 0x1, 0x8 +#define USBMSG_TYPE2 8, 0x300, 0, 0, 0x1, 0x8 +#define USBMSG_TYPE3 8, 0x300, 0, 0, 0x1, 0x8 +#define USBMSG_TYPE4 2, 0x302, 2, 1, 0x1, 0x0 + +/* Wellspring initialization constants */ +#define BCM5974_WELLSPRING_MODE_READ_REQUEST_ID 1 +#define BCM5974_WELLSPRING_MODE_WRITE_REQUEST_ID 9 + /* trackpad finger structure, le16-aligned */ struct tp_finger { __le16 origin; /* zero when switching track finger */ @@ -207,14 +242,13 @@ struct tp_finger { __le16 orientation; /* 16384 when point, else 15 bit angle */ __le16 touch_major; /* touch area, major axis */ __le16 touch_minor; /* touch area, minor axis */ - __le16 unused[3]; /* zeros */ + __le16 unused[2]; /* zeros */ + __le16 pressure; /* pressure on forcetouch touchpad */ __le16 multi; /* one finger: varies, more fingers: constant */ } __attribute__((packed,aligned(2))); /* trackpad finger data size, empirically at least ten fingers */ #define MAX_FINGERS 16 -#define SIZEOF_FINGER sizeof(struct tp_finger) -#define SIZEOF_ALL_FINGERS (MAX_FINGERS * SIZEOF_FINGER) #define MAX_FINGER_ORIENTATION 16384 /* device-specific parameters */ @@ -232,8 +266,17 @@ struct bcm5974_config { int bt_datalen; /* data length of the button interface */ int tp_ep; /* the endpoint of the trackpad interface */ enum tp_type tp_type; /* type of trackpad interface */ - int tp_offset; /* offset to trackpad finger data */ + int tp_header; /* bytes in header block */ int tp_datalen; /* data length of the trackpad interface */ + int tp_button; /* offset to button data */ + int tp_fsize; /* bytes in single finger block */ + int tp_delta; /* offset from header to finger struct */ + int um_size; /* usb control message length */ + int um_req_val; /* usb control message value */ + int um_req_idx; /* usb control message index */ + int um_switch_idx; /* usb control message mode switch index */ + int um_switch_on; /* usb control message mode switch on */ + int um_switch_off; /* usb control message mode switch off */ struct bcm5974_param p; /* finger pressure limits */ struct bcm5974_param w; /* finger width limits */ struct bcm5974_param x; /* horizontal limits */ @@ -259,6 +302,24 @@ struct bcm5974 { int slots[MAX_FINGERS]; /* slot assignments */ }; +/* trackpad finger block data, le16-aligned */ +static const struct tp_finger *get_tp_finger(const struct bcm5974 *dev, int i) +{ + const struct bcm5974_config *c = &dev->cfg; + u8 *f_base = dev->tp_data + c->tp_header + c->tp_delta; + + return (const struct tp_finger *)(f_base + i * c->tp_fsize); +} + +#define DATAFORMAT(type) \ + type, \ + HEADER_##type, \ + HEADER_##type + (MAX_FINGERS) * (FSIZE_##type), \ + BUTTON_##type, \ + FSIZE_##type, \ + DELTA_##type, \ + USBMSG_##type + /* logical signal quality */ #define SN_PRESSURE 45 /* pressure signal-to-noise ratio */ #define SN_WIDTH 25 /* width signal-to-noise ratio */ @@ -273,7 +334,7 @@ static const struct bcm5974_config bcm5974_config_table[] = { USB_DEVICE_ID_APPLE_WELLSPRING_JIS, 0, 0x84, sizeof(struct bt_data), - 0x81, TYPE1, FINGER_TYPE1, FINGER_TYPE1 + SIZEOF_ALL_FINGERS, + 0x81, DATAFORMAT(TYPE1), { SN_PRESSURE, 0, 256 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4824, 5342 }, @@ -286,7 +347,7 @@ static const struct bcm5974_config bcm5974_config_table[] = { USB_DEVICE_ID_APPLE_WELLSPRING2_JIS, 0, 0x84, sizeof(struct bt_data), - 0x81, TYPE1, FINGER_TYPE1, FINGER_TYPE1 + SIZEOF_ALL_FINGERS, + 0x81, DATAFORMAT(TYPE1), { SN_PRESSURE, 0, 256 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4824, 4824 }, @@ -299,7 +360,7 @@ static const struct bcm5974_config bcm5974_config_table[] = { USB_DEVICE_ID_APPLE_WELLSPRING3_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), - 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4460, 5166 }, @@ -312,7 +373,7 @@ static const struct bcm5974_config bcm5974_config_table[] = { USB_DEVICE_ID_APPLE_WELLSPRING4_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), - 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4620, 5140 }, @@ -325,7 +386,7 @@ static const struct bcm5974_config bcm5974_config_table[] = { USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), - 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4616, 5112 }, @@ -338,7 +399,7 @@ static const struct bcm5974_config bcm5974_config_table[] = { USB_DEVICE_ID_APPLE_WELLSPRING5_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), - 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4415, 5050 }, @@ -351,7 +412,7 @@ static const struct bcm5974_config bcm5974_config_table[] = { USB_DEVICE_ID_APPLE_WELLSPRING6_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), - 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4620, 5140 }, @@ -364,7 +425,7 @@ static const struct bcm5974_config bcm5974_config_table[] = { USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), - 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4750, 5280 }, @@ -377,7 +438,7 @@ static const struct bcm5974_config bcm5974_config_table[] = { USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), - 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4620, 5140 }, @@ -390,7 +451,7 @@ static const struct bcm5974_config bcm5974_config_table[] = { USB_DEVICE_ID_APPLE_WELLSPRING7_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), - 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4750, 5280 }, @@ -403,7 +464,7 @@ static const struct bcm5974_config bcm5974_config_table[] = { USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS, HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), - 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + 0x81, DATAFORMAT(TYPE2), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4750, 5280 }, @@ -416,13 +477,26 @@ static const struct bcm5974_config bcm5974_config_table[] = { USB_DEVICE_ID_APPLE_WELLSPRING8_JIS, HAS_INTEGRATED_BUTTON, 0, sizeof(struct bt_data), - 0x83, TYPE3, FINGER_TYPE3, FINGER_TYPE3 + SIZEOF_ALL_FINGERS, + 0x83, DATAFORMAT(TYPE3), { SN_PRESSURE, 0, 300 }, { SN_WIDTH, 0, 2048 }, { SN_COORD, -4620, 5140 }, { SN_COORD, -150, 6600 }, { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, + { + USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI, + USB_DEVICE_ID_APPLE_WELLSPRING9_ISO, + USB_DEVICE_ID_APPLE_WELLSPRING9_JIS, + HAS_INTEGRATED_BUTTON, + 0, sizeof(struct bt_data), + 0x83, DATAFORMAT(TYPE4), + { SN_PRESSURE, 0, 300 }, + { SN_WIDTH, 0, 2048 }, + { SN_COORD, -4828, 5345 }, + { SN_COORD, -203, 6803 }, + { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } + }, {} }; @@ -549,19 +623,18 @@ static int report_tp_state(struct bcm5974 *dev, int size) struct input_dev *input = dev->input; int raw_n, i, n = 0; - if (size < c->tp_offset || (size - c->tp_offset) % SIZEOF_FINGER != 0) + if (size < c->tp_header || (size - c->tp_header) % c->tp_fsize != 0) return -EIO; - /* finger data, le16-aligned */ - f = (const struct tp_finger *)(dev->tp_data + c->tp_offset); - raw_n = (size - c->tp_offset) / SIZEOF_FINGER; + raw_n = (size - c->tp_header) / c->tp_fsize; for (i = 0; i < raw_n; i++) { - if (raw2int(f[i].touch_major) == 0) + f = get_tp_finger(dev, i); + if (raw2int(f->touch_major) == 0) continue; - dev->pos[n].x = raw2int(f[i].abs_x); - dev->pos[n].y = c->y.min + c->y.max - raw2int(f[i].abs_y); - dev->index[n++] = &f[i]; + dev->pos[n].x = raw2int(f->abs_x); + dev->pos[n].y = c->y.min + c->y.max - raw2int(f->abs_y); + dev->index[n++] = f; } input_mt_assign_slots(input, dev->slots, dev->pos, n, 0); @@ -572,32 +645,22 @@ static int report_tp_state(struct bcm5974 *dev, int size) input_mt_sync_frame(input); - report_synaptics_data(input, c, f, raw_n); + report_synaptics_data(input, c, get_tp_finger(dev, 0), raw_n); - /* type 2 reports button events via ibt only */ - if (c->tp_type == TYPE2) { - int ibt = raw2int(dev->tp_data[BUTTON_TYPE2]); + /* later types report button events via integrated button only */ + if (c->caps & HAS_INTEGRATED_BUTTON) { + int ibt = raw2int(dev->tp_data[c->tp_button]); input_report_key(input, BTN_LEFT, ibt); } - if (c->tp_type == TYPE3) - input_report_key(input, BTN_LEFT, dev->tp_data[BUTTON_TYPE3]); - input_sync(input); return 0; } -/* Wellspring initialization constants */ -#define BCM5974_WELLSPRING_MODE_READ_REQUEST_ID 1 -#define BCM5974_WELLSPRING_MODE_WRITE_REQUEST_ID 9 -#define BCM5974_WELLSPRING_MODE_REQUEST_VALUE 0x300 -#define BCM5974_WELLSPRING_MODE_REQUEST_INDEX 0 -#define BCM5974_WELLSPRING_MODE_VENDOR_VALUE 0x01 -#define BCM5974_WELLSPRING_MODE_NORMAL_VALUE 0x08 - static int bcm5974_wellspring_mode(struct bcm5974 *dev, bool on) { + const struct bcm5974_config *c = &dev->cfg; int retval = 0, size; char *data; @@ -605,7 +668,7 @@ static int bcm5974_wellspring_mode(struct bcm5974 *dev, bool on) if (dev->cfg.tp_type == TYPE3) return 0; - data = kmalloc(8, GFP_KERNEL); + data = kmalloc(c->um_size, GFP_KERNEL); if (!data) { dev_err(&dev->intf->dev, "out of memory\n"); retval = -ENOMEM; @@ -616,28 +679,24 @@ static int bcm5974_wellspring_mode(struct bcm5974 *dev, bool on) size = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), BCM5974_WELLSPRING_MODE_READ_REQUEST_ID, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, - BCM5974_WELLSPRING_MODE_REQUEST_VALUE, - BCM5974_WELLSPRING_MODE_REQUEST_INDEX, data, 8, 5000); + c->um_req_val, c->um_req_idx, data, c->um_size, 5000); - if (size != 8) { + if (size != c->um_size) { dev_err(&dev->intf->dev, "could not read from device\n"); retval = -EIO; goto out; } /* apply the mode switch */ - data[0] = on ? - BCM5974_WELLSPRING_MODE_VENDOR_VALUE : - BCM5974_WELLSPRING_MODE_NORMAL_VALUE; + data[c->um_switch_idx] = on ? c->um_switch_on : c->um_switch_off; /* write configuration */ size = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), BCM5974_WELLSPRING_MODE_WRITE_REQUEST_ID, USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE, - BCM5974_WELLSPRING_MODE_REQUEST_VALUE, - BCM5974_WELLSPRING_MODE_REQUEST_INDEX, data, 8, 5000); + c->um_req_val, c->um_req_idx, data, c->um_size, 5000); - if (size != 8) { + if (size != c->um_size) { dev_err(&dev->intf->dev, "could not write to device\n"); retval = -EIO; goto out; diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 3a32caf06bf1..6025eb430c0a 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1484,12 +1484,12 @@ static int __synaptics_init(struct psmouse *psmouse, bool absolute_mode) priv->pkt_type = SYN_MODEL_NEWABS(priv->model_id) ? SYN_NEWABS : SYN_OLDABS; psmouse_info(psmouse, - "Touchpad model: %ld, fw: %ld.%ld, id: %#lx, caps: %#lx/%#lx/%#lx, board id: %lu, fw id: %lu\n", + "Touchpad model: %ld, fw: %ld.%ld, id: %#lx, caps: %#lx/%#lx/%#lx/%#lx, board id: %lu, fw id: %lu\n", SYN_ID_MODEL(priv->identity), SYN_ID_MAJOR(priv->identity), SYN_ID_MINOR(priv->identity), priv->model_id, priv->capabilities, priv->ext_cap, priv->ext_cap_0c, - priv->board_id, priv->firmware_id); + priv->ext_cap_10, priv->board_id, priv->firmware_id); set_input_params(psmouse, priv); diff --git a/drivers/input/touchscreen/zforce_ts.c b/drivers/input/touchscreen/zforce_ts.c index 80285c71786e..f58a196521a9 100644 --- a/drivers/input/touchscreen/zforce_ts.c +++ b/drivers/input/touchscreen/zforce_ts.c @@ -429,7 +429,7 @@ static int zforce_read_packet(struct zforce_ts *ts, u8 *buf) goto unlock; } - if (buf[PAYLOAD_LENGTH] == 0 || buf[PAYLOAD_LENGTH] > FRAME_MAXSIZE) { + if (buf[PAYLOAD_LENGTH] == 0) { dev_err(&client->dev, "invalid payload length: %d\n", buf[PAYLOAD_LENGTH]); ret = -EIO; diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index a57e9b749895..658ee39e6569 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -76,8 +76,6 @@ LIST_HEAD(hpet_map); * Domain for untranslated devices - only allocated * if iommu=pt passed on kernel cmd line. */ -static struct protection_domain *pt_domain; - static const struct iommu_ops amd_iommu_ops; static ATOMIC_NOTIFIER_HEAD(ppr_notifier); @@ -96,7 +94,7 @@ struct iommu_dev_data { struct protection_domain *domain; /* Domain the device is bound to */ u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ - bool passthrough; /* Default for device is pt_domain */ + bool passthrough; /* Device is identity mapped */ struct { bool enabled; int qdep; @@ -116,7 +114,6 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); -static int alloc_passthrough_domain(void); static int protection_domain_init(struct protection_domain *domain); /**************************************************************************** @@ -2167,15 +2164,17 @@ static int attach_device(struct device *dev, dev_data = get_dev_data(dev); if (domain->flags & PD_IOMMUV2_MASK) { - if (!dev_data->iommu_v2 || !dev_data->passthrough) + if (!dev_data->passthrough) return -EINVAL; - if (pdev_iommuv2_enable(pdev) != 0) - return -EINVAL; + if (dev_data->iommu_v2) { + if (pdev_iommuv2_enable(pdev) != 0) + return -EINVAL; - dev_data->ats.enabled = true; - dev_data->ats.qdep = pci_ats_queue_depth(pdev); - dev_data->pri_tlp = pci_pri_tlp_required(pdev); + dev_data->ats.enabled = true; + dev_data->ats.qdep = pci_ats_queue_depth(pdev); + dev_data->pri_tlp = pci_pri_tlp_required(pdev); + } } else if (amd_iommu_iotlb_sup && pci_enable_ats(pdev, PAGE_SHIFT) == 0) { dev_data->ats.enabled = true; @@ -2221,15 +2220,6 @@ static void __detach_device(struct iommu_dev_data *dev_data) do_detach(head); spin_unlock_irqrestore(&domain->lock, flags); - - /* - * If we run in passthrough mode the device must be assigned to the - * passthrough domain if it is detached from any other domain. - * Make sure we can deassign from the pt_domain itself. - */ - if (dev_data->passthrough && - (dev_data->domain == NULL && domain != pt_domain)) - __attach_device(dev_data, pt_domain); } /* @@ -2249,7 +2239,7 @@ static void detach_device(struct device *dev) __detach_device(dev_data); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); - if (domain->flags & PD_IOMMUV2_MASK) + if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2) pdev_iommuv2_disable(to_pci_dev(dev)); else if (dev_data->ats.enabled) pci_disable_ats(to_pci_dev(dev)); @@ -2287,17 +2277,15 @@ static int amd_iommu_add_device(struct device *dev) BUG_ON(!dev_data); - if (dev_data->iommu_v2) + if (iommu_pass_through || dev_data->iommu_v2) iommu_request_dm_for_dev(dev); /* Domains are initialized for this device - have a look what we ended up with */ domain = iommu_get_domain_for_dev(dev); - if (domain->type == IOMMU_DOMAIN_IDENTITY) { + if (domain->type == IOMMU_DOMAIN_IDENTITY) dev_data->passthrough = true; - dev->archdata.dma_ops = &nommu_dma_ops; - } else { + else dev->archdata.dma_ops = &amd_iommu_dma_ops; - } out: iommu_completion_wait(iommu); @@ -2862,8 +2850,17 @@ int __init amd_iommu_init_api(void) int __init amd_iommu_init_dma_ops(void) { + swiotlb = iommu_pass_through ? 1 : 0; iommu_detected = 1; - swiotlb = 0; + + /* + * In case we don't initialize SWIOTLB (actually the common case + * when AMD IOMMU is enabled), make sure there are global + * dma_ops set as a fall-back for devices not handled by this + * driver (for example non-PCI devices). + */ + if (!swiotlb) + dma_ops = &nommu_dma_ops; amd_iommu_stats_init(); @@ -2947,21 +2944,6 @@ out_err: return NULL; } -static int alloc_passthrough_domain(void) -{ - if (pt_domain != NULL) - return 0; - - /* allocate passthrough domain */ - pt_domain = protection_domain_alloc(); - if (!pt_domain) - return -ENOMEM; - - pt_domain->mode = PAGE_MODE_NONE; - - return 0; -} - static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) { struct protection_domain *pdomain; @@ -3222,33 +3204,6 @@ static const struct iommu_ops amd_iommu_ops = { * *****************************************************************************/ -int __init amd_iommu_init_passthrough(void) -{ - struct iommu_dev_data *dev_data; - struct pci_dev *dev = NULL; - int ret; - - ret = alloc_passthrough_domain(); - if (ret) - return ret; - - for_each_pci_dev(dev) { - if (!check_device(&dev->dev)) - continue; - - dev_data = get_dev_data(&dev->dev); - dev_data->passthrough = true; - - attach_device(&dev->dev, pt_domain); - } - - amd_iommu_stats_init(); - - pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); - - return 0; -} - /* IOMMUv2 specific functions */ int amd_iommu_register_ppr_notifier(struct notifier_block *nb) { @@ -3363,7 +3318,12 @@ static int __flush_pasid(struct protection_domain *domain, int pasid, struct amd_iommu *iommu; int qdep; - BUG_ON(!dev_data->ats.enabled); + /* + There might be non-IOMMUv2 capable devices in an IOMMUv2 + * domain. + */ + if (!dev_data->ats.enabled) + continue; qdep = dev_data->ats.qdep; iommu = amd_iommu_rlookup_table[dev_data->devid]; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index dbda9ae68c5d..a24495eb4e26 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2026,14 +2026,6 @@ static bool detect_ivrs(void) return true; } -static int amd_iommu_init_dma(void) -{ - if (iommu_pass_through) - return amd_iommu_init_passthrough(); - else - return amd_iommu_init_dma_ops(); -} - /**************************************************************************** * * AMD IOMMU Initialization State Machine @@ -2073,7 +2065,7 @@ static int __init state_next(void) init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN; break; case IOMMU_INTERRUPTS_EN: - ret = amd_iommu_init_dma(); + ret = amd_iommu_init_dma_ops(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS; break; case IOMMU_DMA_OPS: diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 3465faf1809e..f7b875bb70d4 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -132,11 +132,19 @@ static struct device_state *get_device_state(u16 devid) static void free_device_state(struct device_state *dev_state) { + struct iommu_group *group; + /* * First detach device from domain - No more PRI requests will arrive * from that device after it is unbound from the IOMMUv2 domain. */ - iommu_detach_device(dev_state->domain, &dev_state->pdev->dev); + group = iommu_group_get(&dev_state->pdev->dev); + if (WARN_ON(!group)) + return; + + iommu_detach_group(dev_state->domain, group); + + iommu_group_put(group); /* Everything is down now, free the IOMMUv2 domain */ iommu_domain_free(dev_state->domain); @@ -731,6 +739,7 @@ EXPORT_SYMBOL(amd_iommu_unbind_pasid); int amd_iommu_init_device(struct pci_dev *pdev, int pasids) { struct device_state *dev_state; + struct iommu_group *group; unsigned long flags; int ret, tmp; u16 devid; @@ -776,10 +785,16 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) if (ret) goto out_free_domain; - ret = iommu_attach_device(dev_state->domain, &pdev->dev); - if (ret != 0) + group = iommu_group_get(&pdev->dev); + if (!group) goto out_free_domain; + ret = iommu_attach_group(dev_state->domain, group); + if (ret != 0) + goto out_drop_group; + + iommu_group_put(group); + spin_lock_irqsave(&state_lock, flags); if (__get_device_state(devid) != NULL) { @@ -794,6 +809,9 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) return 0; +out_drop_group: + iommu_group_put(group); + out_free_domain: iommu_domain_free(dev_state->domain); diff --git a/drivers/macintosh/ans-lcd.c b/drivers/macintosh/ans-lcd.c index 1a57e88a38f7..cd35079c8c98 100644 --- a/drivers/macintosh/ans-lcd.c +++ b/drivers/macintosh/ans-lcd.c @@ -7,7 +7,7 @@ #include <linux/kernel.h> #include <linux/miscdevice.h> #include <linux/fcntl.h> -#include <linux/init.h> +#include <linux/module.h> #include <linux/delay.h> #include <linux/fs.h> diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index b59727309072..bfec3bdfe598 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -259,7 +259,7 @@ config DM_CRYPT the ciphers you're going to use in the cryptoapi configuration. For further information on dm-crypt and userspace tools see: - <http://code.google.com/p/cryptsetup/wiki/DMCrypt> + <https://gitlab.com/cryptsetup/cryptsetup/wikis/DMCrypt> To compile this code as a module, choose M here: the module will be called dm-crypt. diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index b6f22651dd35..48a4a826ae07 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1686,7 +1686,7 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size, if (from_cblock(cache_size)) { mq->cache_hit_bits = alloc_bitset(from_cblock(cache_size)); - if (!mq->cache_hit_bits && mq->cache_hit_bits) { + if (!mq->cache_hit_bits) { DMERR("couldn't allocate cache hit bitset"); goto bad_cache_hit_bits; } diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index b680da5d7b93..1fe93cfea7d3 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -424,6 +424,7 @@ static void free_migration(struct dm_cache_migration *mg) wake_up(&cache->migration_wait); mempool_free(mg, cache->migration_pool); + wake_worker(cache); } static int prealloc_data_structs(struct cache *cache, struct prealloc *p) @@ -1966,6 +1967,7 @@ static void process_deferred_bios(struct cache *cache) * this bio might require one, we pause until there are some * prepared mappings to process. */ + prealloc_used = true; if (prealloc_data_structs(cache, &structs)) { spin_lock_irqsave(&cache->lock, flags); bio_list_merge(&cache->deferred_bios, &bios); @@ -1981,7 +1983,6 @@ static void process_deferred_bios(struct cache *cache) process_discard_bio(cache, &structs, bio); else process_bio(cache, &structs, bio); - prealloc_used = true; } if (prealloc_used) @@ -2010,6 +2011,7 @@ static void process_deferred_cells(struct cache *cache) * this bio might require one, we pause until there are some * prepared mappings to process. */ + prealloc_used = true; if (prealloc_data_structs(cache, &structs)) { spin_lock_irqsave(&cache->lock, flags); list_splice(&cells, &cache->deferred_cells); @@ -2018,7 +2020,6 @@ static void process_deferred_cells(struct cache *cache) } process_cell(cache, &structs, cell); - prealloc_used = true; } if (prealloc_used) @@ -2080,6 +2081,7 @@ static void writeback_some_dirty_blocks(struct cache *cache) if (policy_writeback_work(cache->policy, &oblock, &cblock, busy)) break; /* no work to do */ + prealloc_used = true; if (prealloc_data_structs(cache, &structs) || get_cell(cache, oblock, &structs, &old_ocell)) { policy_set_dirty(cache->policy, oblock); @@ -2087,7 +2089,6 @@ static void writeback_some_dirty_blocks(struct cache *cache) } writeback(cache, &structs, oblock, cblock, old_ocell); - prealloc_used = true; } if (prealloc_used) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 1c50c580215c..d2bbe8cc1e97 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -666,16 +666,21 @@ static void requeue_io(struct thin_c *tc) requeue_deferred_cells(tc); } -static void error_retry_list(struct pool *pool) +static void error_retry_list_with_code(struct pool *pool, int error) { struct thin_c *tc; rcu_read_lock(); list_for_each_entry_rcu(tc, &pool->active_thins, list) - error_thin_bio_list(tc, &tc->retry_on_resume_list, -EIO); + error_thin_bio_list(tc, &tc->retry_on_resume_list, error); rcu_read_unlock(); } +static void error_retry_list(struct pool *pool) +{ + return error_retry_list_with_code(pool, -EIO); +} + /* * This section of code contains the logic for processing a thin device's IO. * Much of the code depends on pool object resources (lists, workqueues, etc) @@ -2297,7 +2302,7 @@ static void do_no_space_timeout(struct work_struct *ws) if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) { pool->pf.error_if_no_space = true; notify_of_pool_mode_change_to_oods(pool); - error_retry_list(pool); + error_retry_list_with_code(pool, -ENOSPC); } } diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index caeb39561567..bf9eb2ecf960 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -104,6 +104,57 @@ static void *macb_rx_buffer(struct macb *bp, unsigned int index) return bp->rx_buffers + bp->rx_buffer_size * macb_rx_ring_wrap(index); } +/* I/O accessors */ +static u32 hw_readl_native(struct macb *bp, int offset) +{ + return __raw_readl(bp->regs + offset); +} + +static void hw_writel_native(struct macb *bp, int offset, u32 value) +{ + __raw_writel(value, bp->regs + offset); +} + +static u32 hw_readl(struct macb *bp, int offset) +{ + return readl_relaxed(bp->regs + offset); +} + +static void hw_writel(struct macb *bp, int offset, u32 value) +{ + writel_relaxed(value, bp->regs + offset); +} + +/* + * Find the CPU endianness by using the loopback bit of NCR register. When the + * CPU is in big endian we need to program swaped mode for management + * descriptor access. + */ +static bool hw_is_native_io(void __iomem *addr) +{ + u32 value = MACB_BIT(LLB); + + __raw_writel(value, addr + MACB_NCR); + value = __raw_readl(addr + MACB_NCR); + + /* Write 0 back to disable everything */ + __raw_writel(0, addr + MACB_NCR); + + return value == MACB_BIT(LLB); +} + +static bool hw_is_gem(void __iomem *addr, bool native_io) +{ + u32 id; + + if (native_io) + id = __raw_readl(addr + MACB_MID); + else + id = readl_relaxed(addr + MACB_MID); + + return MACB_BFEXT(IDNUM, id) >= 0x2; +} + static void macb_set_hwaddr(struct macb *bp) { u32 bottom; @@ -160,7 +211,7 @@ static void macb_get_hwaddr(struct macb *bp) } } - netdev_info(bp->dev, "invalid hw address, using random\n"); + dev_info(&bp->pdev->dev, "invalid hw address, using random\n"); eth_hw_addr_random(bp->dev); } @@ -252,7 +303,6 @@ static void macb_handle_link_change(struct net_device *dev) struct macb *bp = netdev_priv(dev); struct phy_device *phydev = bp->phy_dev; unsigned long flags; - int status_change = 0; spin_lock_irqsave(&bp->lock, flags); @@ -449,14 +499,14 @@ err_out: static void macb_update_stats(struct macb *bp) { - u32 __iomem *reg = bp->regs + MACB_PFR; u32 *p = &bp->hw_stats.macb.rx_pause_frames; u32 *end = &bp->hw_stats.macb.tx_pause_frames + 1; + int offset = MACB_PFR; WARN_ON((unsigned long)(end - p - 1) != (MACB_TPF - MACB_PFR) / 4); - for(; p < end; p++, reg++) - *p += readl_relaxed(reg); + for(; p < end; p++, offset += 4) + *p += bp->macb_reg_readl(bp, offset); } static int macb_halt_tx(struct macb *bp) @@ -1107,12 +1157,6 @@ static void macb_poll_controller(struct net_device *dev) } #endif -static inline unsigned int macb_count_tx_descriptors(struct macb *bp, - unsigned int len) -{ - return (len + bp->max_tx_length - 1) / bp->max_tx_length; -} - static unsigned int macb_tx_map(struct macb *bp, struct macb_queue *queue, struct sk_buff *skb) @@ -1263,11 +1307,11 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) * socket buffer: skb fragments of jumbo frames may need to be * splitted into many buffer descriptors. */ - count = macb_count_tx_descriptors(bp, skb_headlen(skb)); + count = DIV_ROUND_UP(skb_headlen(skb), bp->max_tx_length); nr_frags = skb_shinfo(skb)->nr_frags; for (f = 0; f < nr_frags; f++) { frag_size = skb_frag_size(&skb_shinfo(skb)->frags[f]); - count += macb_count_tx_descriptors(bp, frag_size); + count += DIV_ROUND_UP(frag_size, bp->max_tx_length); } spin_lock_irqsave(&bp->lock, flags); @@ -1603,7 +1647,6 @@ static u32 macb_dbw(struct macb *bp) static void macb_configure_dma(struct macb *bp) { u32 dmacfg; - u32 tmp, ncr; if (macb_is_gem(bp)) { dmacfg = gem_readl(bp, DMACFG) & ~GEM_BF(RXBS, -1L); @@ -1613,22 +1656,11 @@ static void macb_configure_dma(struct macb *bp) dmacfg |= GEM_BIT(TXPBMS) | GEM_BF(RXBMS, -1L); dmacfg &= ~GEM_BIT(ENDIA_PKT); - /* Find the CPU endianness by using the loopback bit of net_ctrl - * register. save it first. When the CPU is in big endian we - * need to program swaped mode for management descriptor access. - */ - ncr = macb_readl(bp, NCR); - __raw_writel(MACB_BIT(LLB), bp->regs + MACB_NCR); - tmp = __raw_readl(bp->regs + MACB_NCR); - - if (tmp == MACB_BIT(LLB)) + if (bp->native_io) dmacfg &= ~GEM_BIT(ENDIA_DESC); else dmacfg |= GEM_BIT(ENDIA_DESC); /* CPU in big endian */ - /* Restore net_ctrl */ - macb_writel(bp, NCR, ncr); - if (bp->dev->features & NETIF_F_HW_CSUM) dmacfg |= GEM_BIT(TXCOEN); else @@ -1897,19 +1929,19 @@ static int macb_change_mtu(struct net_device *dev, int new_mtu) static void gem_update_stats(struct macb *bp) { - int i; + unsigned int i; u32 *p = &bp->hw_stats.gem.tx_octets_31_0; for (i = 0; i < GEM_STATS_LEN; ++i, ++p) { u32 offset = gem_statistics[i].offset; - u64 val = readl_relaxed(bp->regs + offset); + u64 val = bp->macb_reg_readl(bp, offset); bp->ethtool_stats[i] += val; *p += val; if (offset == GEM_OCTTXL || offset == GEM_OCTRXL) { /* Add GEM_OCTTXH, GEM_OCTRXH */ - val = readl_relaxed(bp->regs + offset + 4); + val = bp->macb_reg_readl(bp, offset + 4); bp->ethtool_stats[i] += ((u64)val) << 32; *(++p) += val; } @@ -1976,7 +2008,7 @@ static int gem_get_sset_count(struct net_device *dev, int sset) static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p) { - int i; + unsigned int i; switch (sset) { case ETH_SS_STATS: @@ -2190,7 +2222,7 @@ static void macb_configure_caps(struct macb *bp, const struct macb_config *dt_co if (dt_conf) bp->caps = dt_conf->caps; - if (macb_is_gem_hw(bp->regs)) { + if (hw_is_gem(bp->regs, bp->native_io)) { bp->caps |= MACB_CAPS_MACB_IS_GEM; dcfg = gem_readl(bp, DCFG1); @@ -2201,10 +2233,11 @@ static void macb_configure_caps(struct macb *bp, const struct macb_config *dt_co bp->caps |= MACB_CAPS_FIFO_MODE; } - netdev_dbg(bp->dev, "Cadence caps 0x%08x\n", bp->caps); + dev_dbg(&bp->pdev->dev, "Cadence caps 0x%08x\n", bp->caps); } static void macb_probe_queues(void __iomem *mem, + bool native_io, unsigned int *queue_mask, unsigned int *num_queues) { @@ -2219,7 +2252,7 @@ static void macb_probe_queues(void __iomem *mem, * we are early in the probe process and don't have the * MACB_CAPS_MACB_IS_GEM flag positioned */ - if (!macb_is_gem_hw(mem)) + if (!hw_is_gem(mem, native_io)) return; /* bit 0 is never set but queue 0 always exists */ @@ -2786,6 +2819,7 @@ static int macb_probe(struct platform_device *pdev) struct clk *pclk, *hclk, *tx_clk; unsigned int queue_mask, num_queues; struct macb_platform_data *pdata; + bool native_io; struct phy_device *phydev; struct net_device *dev; struct resource *regs; @@ -2794,6 +2828,11 @@ static int macb_probe(struct platform_device *pdev) struct macb *bp; int err; + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + mem = devm_ioremap_resource(&pdev->dev, regs); + if (IS_ERR(mem)) + return PTR_ERR(mem); + if (np) { const struct of_device_id *match; @@ -2809,14 +2848,9 @@ static int macb_probe(struct platform_device *pdev) if (err) return err; - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mem = devm_ioremap_resource(&pdev->dev, regs); - if (IS_ERR(mem)) { - err = PTR_ERR(mem); - goto err_disable_clocks; - } + native_io = hw_is_native_io(mem); - macb_probe_queues(mem, &queue_mask, &num_queues); + macb_probe_queues(mem, native_io, &queue_mask, &num_queues); dev = alloc_etherdev_mq(sizeof(*bp), num_queues); if (!dev) { err = -ENOMEM; @@ -2831,6 +2865,14 @@ static int macb_probe(struct platform_device *pdev) bp->pdev = pdev; bp->dev = dev; bp->regs = mem; + bp->native_io = native_io; + if (native_io) { + bp->macb_reg_readl = hw_readl_native; + bp->macb_reg_writel = hw_writel_native; + } else { + bp->macb_reg_readl = hw_readl; + bp->macb_reg_writel = hw_writel; + } bp->num_queues = num_queues; bp->queue_mask = queue_mask; if (macb_config) @@ -2838,9 +2880,8 @@ static int macb_probe(struct platform_device *pdev) bp->pclk = pclk; bp->hclk = hclk; bp->tx_clk = tx_clk; - if (macb_config->jumbo_max_len) { + if (macb_config) bp->jumbo_max_len = macb_config->jumbo_max_len; - } spin_lock_init(&bp->lock); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index d74655993d4b..1895b6b2addd 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -429,18 +429,12 @@ | GEM_BF(name, value)) /* Register access macros */ -#define macb_readl(port,reg) \ - readl_relaxed((port)->regs + MACB_##reg) -#define macb_writel(port,reg,value) \ - writel_relaxed((value), (port)->regs + MACB_##reg) -#define gem_readl(port, reg) \ - readl_relaxed((port)->regs + GEM_##reg) -#define gem_writel(port, reg, value) \ - writel_relaxed((value), (port)->regs + GEM_##reg) -#define queue_readl(queue, reg) \ - readl_relaxed((queue)->bp->regs + (queue)->reg) -#define queue_writel(queue, reg, value) \ - writel_relaxed((value), (queue)->bp->regs + (queue)->reg) +#define macb_readl(port, reg) (port)->macb_reg_readl((port), MACB_##reg) +#define macb_writel(port, reg, value) (port)->macb_reg_writel((port), MACB_##reg, (value)) +#define gem_readl(port, reg) (port)->macb_reg_readl((port), GEM_##reg) +#define gem_writel(port, reg, value) (port)->macb_reg_writel((port), GEM_##reg, (value)) +#define queue_readl(queue, reg) (queue)->bp->macb_reg_readl((queue)->bp, (queue)->reg) +#define queue_writel(queue, reg, value) (queue)->bp->macb_reg_writel((queue)->bp, (queue)->reg, (value)) /* Conditional GEM/MACB macros. These perform the operation to the correct * register dependent on whether the device is a GEM or a MACB. For registers @@ -785,6 +779,11 @@ struct macb_queue { struct macb { void __iomem *regs; + bool native_io; + + /* hardware IO accessors */ + u32 (*macb_reg_readl)(struct macb *bp, int offset); + void (*macb_reg_writel)(struct macb *bp, int offset, u32 value); unsigned int rx_tail; unsigned int rx_prepared_head; @@ -817,9 +816,9 @@ struct macb { struct mii_bus *mii_bus; struct phy_device *phy_dev; - unsigned int link; - unsigned int speed; - unsigned int duplex; + int link; + int speed; + int duplex; u32 caps; unsigned int dma_burst_length; @@ -843,9 +842,4 @@ static inline bool macb_is_gem(struct macb *bp) return !!(bp->caps & MACB_CAPS_MACB_IS_GEM); } -static inline bool macb_is_gem_hw(void __iomem *addr) -{ - return !!(MACB_BFEXT(IDNUM, readl_relaxed(addr + MACB_MID)) >= 0x2); -} - #endif /* _MACB_H */ diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index dda8a02b7322..8aee250904ec 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -125,6 +125,15 @@ */ #define NICPF_CLK_PER_INT_TICK 2 +/* Time to wait before we decide that a SQ is stuck. + * + * Since both pkt rx and tx notifications are done with same CQ, + * when packets are being received at very high rate (eg: L2 forwarding) + * then freeing transmitted skbs will be delayed and watchdog + * will kick in, resetting interface. Hence keeping this value high. + */ +#define NICVF_TX_TIMEOUT (50 * HZ) + struct nicvf_cq_poll { u8 cq_idx; /* Completion queue index */ struct napi_struct napi; @@ -216,8 +225,9 @@ struct nicvf_drv_stats { /* Tx */ u64 tx_frames_ok; u64 tx_drops; - u64 tx_busy; u64 tx_tso; + u64 txq_stop; + u64 txq_wake; }; struct nicvf { diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index 16bd2d772db9..a4228e664567 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -66,9 +66,10 @@ static const struct nicvf_stat nicvf_drv_stats[] = { NICVF_DRV_STAT(rx_frames_jumbo), NICVF_DRV_STAT(rx_drops), NICVF_DRV_STAT(tx_frames_ok), - NICVF_DRV_STAT(tx_busy), NICVF_DRV_STAT(tx_tso), NICVF_DRV_STAT(tx_drops), + NICVF_DRV_STAT(txq_stop), + NICVF_DRV_STAT(txq_wake), }; static const struct nicvf_stat nicvf_queue_stats[] = { @@ -126,6 +127,7 @@ static void nicvf_set_msglevel(struct net_device *netdev, u32 lvl) static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data) { + struct nicvf *nic = netdev_priv(netdev); int stats, qidx; if (sset != ETH_SS_STATS) @@ -141,7 +143,7 @@ static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data) data += ETH_GSTRING_LEN; } - for (qidx = 0; qidx < MAX_RCV_QUEUES_PER_QS; qidx++) { + for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) { for (stats = 0; stats < nicvf_n_queue_stats; stats++) { sprintf(data, "rxq%d: %s", qidx, nicvf_queue_stats[stats].name); @@ -149,7 +151,7 @@ static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data) } } - for (qidx = 0; qidx < MAX_SND_QUEUES_PER_QS; qidx++) { + for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) { for (stats = 0; stats < nicvf_n_queue_stats; stats++) { sprintf(data, "txq%d: %s", qidx, nicvf_queue_stats[stats].name); @@ -170,12 +172,14 @@ static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data) static int nicvf_get_sset_count(struct net_device *netdev, int sset) { + struct nicvf *nic = netdev_priv(netdev); + if (sset != ETH_SS_STATS) return -EINVAL; return nicvf_n_hw_stats + nicvf_n_drv_stats + (nicvf_n_queue_stats * - (MAX_RCV_QUEUES_PER_QS + MAX_SND_QUEUES_PER_QS)) + + (nic->qs->rq_cnt + nic->qs->sq_cnt)) + BGX_RX_STATS_COUNT + BGX_TX_STATS_COUNT; } @@ -197,13 +201,13 @@ static void nicvf_get_ethtool_stats(struct net_device *netdev, *(data++) = ((u64 *)&nic->drv_stats) [nicvf_drv_stats[stat].index]; - for (qidx = 0; qidx < MAX_RCV_QUEUES_PER_QS; qidx++) { + for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) { for (stat = 0; stat < nicvf_n_queue_stats; stat++) *(data++) = ((u64 *)&nic->qs->rq[qidx].stats) [nicvf_queue_stats[stat].index]; } - for (qidx = 0; qidx < MAX_SND_QUEUES_PER_QS; qidx++) { + for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) { for (stat = 0; stat < nicvf_n_queue_stats; stat++) *(data++) = ((u64 *)&nic->qs->sq[qidx].stats) [nicvf_queue_stats[stat].index]; @@ -543,6 +547,7 @@ static int nicvf_set_channels(struct net_device *dev, { struct nicvf *nic = netdev_priv(dev); int err = 0; + bool if_up = netif_running(dev); if (!channel->rx_count || !channel->tx_count) return -EINVAL; @@ -551,6 +556,9 @@ static int nicvf_set_channels(struct net_device *dev, if (channel->tx_count > MAX_SND_QUEUES_PER_QS) return -EINVAL; + if (if_up) + nicvf_stop(dev); + nic->qs->rq_cnt = channel->rx_count; nic->qs->sq_cnt = channel->tx_count; nic->qs->cq_cnt = max(nic->qs->rq_cnt, nic->qs->sq_cnt); @@ -559,11 +567,9 @@ static int nicvf_set_channels(struct net_device *dev, if (err) return err; - if (!netif_running(dev)) - return err; + if (if_up) + nicvf_open(dev); - nicvf_stop(dev); - nicvf_open(dev); netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n", nic->qs->sq_cnt, nic->qs->rq_cnt); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 8b119a035b7e..3b90afb8c293 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -234,7 +234,7 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic) nic->duplex == DUPLEX_FULL ? "Full duplex" : "Half duplex"); netif_carrier_on(nic->netdev); - netif_tx_wake_all_queues(nic->netdev); + netif_tx_start_all_queues(nic->netdev); } else { netdev_info(nic->netdev, "%s: Link is Down\n", nic->netdev->name); @@ -425,6 +425,7 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, if (skb) { prefetch(skb); dev_consume_skb_any(skb); + sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; } } @@ -476,12 +477,13 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, struct napi_struct *napi, int budget) { - int processed_cqe, work_done = 0; + int processed_cqe, work_done = 0, tx_done = 0; int cqe_count, cqe_head; struct nicvf *nic = netdev_priv(netdev); struct queue_set *qs = nic->qs; struct cmp_queue *cq = &qs->cq[cq_idx]; struct cqe_rx_t *cq_desc; + struct netdev_queue *txq; spin_lock_bh(&cq->lock); loop: @@ -496,8 +498,8 @@ loop: cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9; cqe_head &= 0xFFFF; - netdev_dbg(nic->netdev, "%s cqe_count %d cqe_head %d\n", - __func__, cqe_count, cqe_head); + netdev_dbg(nic->netdev, "%s CQ%d cqe_count %d cqe_head %d\n", + __func__, cq_idx, cqe_count, cqe_head); while (processed_cqe < cqe_count) { /* Get the CQ descriptor */ cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head); @@ -511,8 +513,8 @@ loop: break; } - netdev_dbg(nic->netdev, "cq_desc->cqe_type %d\n", - cq_desc->cqe_type); + netdev_dbg(nic->netdev, "CQ%d cq_desc->cqe_type %d\n", + cq_idx, cq_desc->cqe_type); switch (cq_desc->cqe_type) { case CQE_TYPE_RX: nicvf_rcv_pkt_handler(netdev, napi, cq, @@ -522,6 +524,7 @@ loop: case CQE_TYPE_SEND: nicvf_snd_pkt_handler(netdev, cq, (void *)cq_desc, CQE_TYPE_SEND); + tx_done++; break; case CQE_TYPE_INVALID: case CQE_TYPE_RX_SPLIT: @@ -532,8 +535,9 @@ loop: } processed_cqe++; } - netdev_dbg(nic->netdev, "%s processed_cqe %d work_done %d budget %d\n", - __func__, processed_cqe, work_done, budget); + netdev_dbg(nic->netdev, + "%s CQ%d processed_cqe %d work_done %d budget %d\n", + __func__, cq_idx, processed_cqe, work_done, budget); /* Ring doorbell to inform H/W to reuse processed CQEs */ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, @@ -543,6 +547,19 @@ loop: goto loop; done: + /* Wakeup TXQ if its stopped earlier due to SQ full */ + if (tx_done) { + txq = netdev_get_tx_queue(netdev, cq_idx); + if (netif_tx_queue_stopped(txq)) { + netif_tx_start_queue(txq); + nic->drv_stats.txq_wake++; + if (netif_msg_tx_err(nic)) + netdev_warn(netdev, + "%s: Transmit queue wakeup SQ%d\n", + netdev->name, cq_idx); + } + } + spin_unlock_bh(&cq->lock); return work_done; } @@ -554,15 +571,10 @@ static int nicvf_poll(struct napi_struct *napi, int budget) struct net_device *netdev = napi->dev; struct nicvf *nic = netdev_priv(netdev); struct nicvf_cq_poll *cq; - struct netdev_queue *txq; cq = container_of(napi, struct nicvf_cq_poll, napi); work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget); - txq = netdev_get_tx_queue(netdev, cq->cq_idx); - if (netif_tx_queue_stopped(txq)) - netif_tx_wake_queue(txq); - if (work_done < budget) { /* Slow packet rate, exit polling */ napi_complete(napi); @@ -833,9 +845,9 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } - if (!nicvf_sq_append_skb(nic, skb) && !netif_tx_queue_stopped(txq)) { + if (!netif_tx_queue_stopped(txq) && !nicvf_sq_append_skb(nic, skb)) { netif_tx_stop_queue(txq); - nic->drv_stats.tx_busy++; + nic->drv_stats.txq_stop++; if (netif_msg_tx_err(nic)) netdev_warn(netdev, "%s: Transmit ring full, stopping SQ%d\n", @@ -859,7 +871,6 @@ int nicvf_stop(struct net_device *netdev) nicvf_send_msg_to_pf(nic, &mbx); netif_carrier_off(netdev); - netif_tx_disable(netdev); /* Disable RBDR & QS error interrupts */ for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { @@ -894,6 +905,8 @@ int nicvf_stop(struct net_device *netdev) kfree(cq_poll); } + netif_tx_disable(netdev); + /* Free resources */ nicvf_config_data_transfer(nic, false); @@ -988,6 +1001,9 @@ int nicvf_open(struct net_device *netdev) for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); + nic->drv_stats.txq_stop = 0; + nic->drv_stats.txq_wake = 0; + netif_carrier_on(netdev); netif_tx_start_all_queues(netdev); @@ -1278,6 +1294,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features = netdev->features; netdev->netdev_ops = &nicvf_netdev_ops; + netdev->watchdog_timeo = NICVF_TX_TIMEOUT; INIT_WORK(&nic->reset_task, nicvf_reset_task); @@ -1318,11 +1335,17 @@ static void nicvf_remove(struct pci_dev *pdev) pci_disable_device(pdev); } +static void nicvf_shutdown(struct pci_dev *pdev) +{ + nicvf_remove(pdev); +} + static struct pci_driver nicvf_driver = { .name = DRV_NAME, .id_table = nicvf_id_table, .probe = nicvf_probe, .remove = nicvf_remove, + .shutdown = nicvf_shutdown, }; static int __init nicvf_init_module(void) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index d69d228d11a0..ca4240aa6d15 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -103,9 +103,11 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, /* Allocate a new page */ if (!nic->rb_page) { - nic->rb_page = alloc_pages(gfp | __GFP_COMP, order); + nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, + order); if (!nic->rb_page) { - netdev_err(nic->netdev, "Failed to allocate new rcv buffer\n"); + netdev_err(nic->netdev, + "Failed to allocate new rcv buffer\n"); return -ENOMEM; } nic->rb_page_offset = 0; @@ -382,7 +384,8 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) return; if (sq->tso_hdrs) - dma_free_coherent(&nic->pdev->dev, sq->dmem.q_len, + dma_free_coherent(&nic->pdev->dev, + sq->dmem.q_len * TSO_HEADER_SIZE, sq->tso_hdrs, sq->tso_hdrs_phys); kfree(sq->skbuff); @@ -863,10 +866,11 @@ void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq, continue; } skb = (struct sk_buff *)sq->skbuff[sq->head]; + if (skb) + dev_kfree_skb_any(skb); atomic64_add(1, (atomic64_t *)&netdev->stats.tx_packets); atomic64_add(hdr->tot_len, (atomic64_t *)&netdev->stats.tx_bytes); - dev_kfree_skb_any(skb); nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); } } @@ -992,7 +996,7 @@ static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry, memset(gather, 0, SND_QUEUE_DESC_SIZE); gather->subdesc_type = SQ_DESC_TYPE_GATHER; - gather->ld_type = NIC_SEND_LD_TYPE_E_LDWB; + gather->ld_type = NIC_SEND_LD_TYPE_E_LDD; gather->size = size; gather->addr = data; } @@ -1048,7 +1052,7 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, } nicvf_sq_add_hdr_subdesc(sq, hdr_qentry, seg_subdescs - 1, skb, seg_len); - sq->skbuff[hdr_qentry] = 0; + sq->skbuff[hdr_qentry] = (u64)NULL; qentry = nicvf_get_nxt_sqentry(sq, qentry); desc_cnt += seg_subdescs; @@ -1062,6 +1066,7 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, /* Inform HW to xmit all TSO segments */ nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, skb_get_queue_mapping(skb), desc_cnt); + nic->drv_stats.tx_tso++; return 1; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 8341bdf755d1..f0937b7bfe9f 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -62,7 +62,7 @@ #define SND_QUEUE_CNT 8 #define CMP_QUEUE_CNT 8 /* Max of RCV and SND qcount */ -#define SND_QSIZE SND_QUEUE_SIZE4 +#define SND_QSIZE SND_QUEUE_SIZE2 #define SND_QUEUE_LEN (1ULL << (SND_QSIZE + 10)) #define MAX_SND_QUEUE_LEN (1ULL << (SND_QUEUE_SIZE6 + 10)) #define SND_QUEUE_THRESH 2ULL @@ -70,7 +70,10 @@ /* Since timestamp not enabled, otherwise 2 */ #define MAX_CQE_PER_PKT_XMIT 1 -#define CMP_QSIZE CMP_QUEUE_SIZE4 +/* Keep CQ and SQ sizes same, if timestamping + * is enabled this equation will change. + */ +#define CMP_QSIZE CMP_QUEUE_SIZE2 #define CMP_QUEUE_LEN (1ULL << (CMP_QSIZE + 10)) #define CMP_QUEUE_CQE_THRESH 0 #define CMP_QUEUE_TIMER_THRESH 220 /* 10usec */ @@ -87,7 +90,12 @@ #define MAX_CQES_FOR_TX ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \ MAX_CQE_PER_PKT_XMIT) -#define RQ_CQ_DROP ((CMP_QUEUE_LEN - MAX_CQES_FOR_TX) / 256) +/* Calculate number of CQEs to reserve for all SQEs. + * Its 1/256th level of CQ size. + * '+ 1' to account for pipelining + */ +#define RQ_CQ_DROP ((256 / (CMP_QUEUE_LEN / \ + (CMP_QUEUE_LEN - MAX_CQES_FOR_TX))) + 1) /* Descriptor size in bytes */ #define SND_QUEUE_DESC_SIZE 16 diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 633ec05dfe05..b961a89dc626 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -673,7 +673,10 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cmrx_cfg); bgx_flush_dmac_addrs(bgx, lmacid); - if (lmac->phydev) + if ((bgx->lmac_type != BGX_MODE_XFI) && + (bgx->lmac_type != BGX_MODE_XLAUI) && + (bgx->lmac_type != BGX_MODE_40G_KR) && + (bgx->lmac_type != BGX_MODE_10G_KR) && lmac->phydev) phy_disconnect(lmac->phydev); lmac->phydev = NULL; diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 1eee73cccdf5..99d33e2d35e6 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -562,6 +562,7 @@ struct fec_enet_private { }; void fec_ptp_init(struct platform_device *pdev); +void fec_ptp_stop(struct platform_device *pdev); void fec_ptp_start_cyclecounter(struct net_device *ndev); int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr); int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1f89c59b4353..32e3807c650e 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -24,6 +24,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> +#include <linux/pm_runtime.h> #include <linux/ptrace.h> #include <linux/errno.h> #include <linux/ioport.h> @@ -77,6 +78,7 @@ static void fec_enet_itr_coal_init(struct net_device *ndev); #define FEC_ENET_RAEM_V 0x8 #define FEC_ENET_RAFL_V 0x8 #define FEC_ENET_OPD_V 0xFFF0 +#define FEC_MDIO_PM_TIMEOUT 100 /* ms */ static struct platform_device_id fec_devtype[] = { { @@ -1767,7 +1769,13 @@ static void fec_enet_adjust_link(struct net_device *ndev) static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) { struct fec_enet_private *fep = bus->priv; + struct device *dev = &fep->pdev->dev; unsigned long time_left; + int ret = 0; + + ret = pm_runtime_get_sync(dev); + if (IS_ERR_VALUE(ret)) + return ret; fep->mii_timeout = 0; init_completion(&fep->mdio_done); @@ -1783,18 +1791,30 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) if (time_left == 0) { fep->mii_timeout = 1; netdev_err(fep->netdev, "MDIO read timeout\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto out; } - /* return value */ - return FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); + ret = FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); + +out: + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return ret; } static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value) { struct fec_enet_private *fep = bus->priv; + struct device *dev = &fep->pdev->dev; unsigned long time_left; + int ret = 0; + + ret = pm_runtime_get_sync(dev); + if (IS_ERR_VALUE(ret)) + return ret; fep->mii_timeout = 0; init_completion(&fep->mdio_done); @@ -1811,10 +1831,13 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, if (time_left == 0) { fep->mii_timeout = 1; netdev_err(fep->netdev, "MDIO write timeout\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; } - return 0; + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return ret; } static int fec_enet_clk_enable(struct net_device *ndev, bool enable) @@ -1826,9 +1849,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) ret = clk_prepare_enable(fep->clk_ahb); if (ret) return ret; - ret = clk_prepare_enable(fep->clk_ipg); - if (ret) - goto failed_clk_ipg; if (fep->clk_enet_out) { ret = clk_prepare_enable(fep->clk_enet_out); if (ret) @@ -1852,7 +1872,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable) } } else { clk_disable_unprepare(fep->clk_ahb); - clk_disable_unprepare(fep->clk_ipg); if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); if (fep->clk_ptp) { @@ -1874,8 +1893,6 @@ failed_clk_ptp: if (fep->clk_enet_out) clk_disable_unprepare(fep->clk_enet_out); failed_clk_enet_out: - clk_disable_unprepare(fep->clk_ipg); -failed_clk_ipg: clk_disable_unprepare(fep->clk_ahb); return ret; @@ -2847,10 +2864,14 @@ fec_enet_open(struct net_device *ndev) struct fec_enet_private *fep = netdev_priv(ndev); int ret; + ret = pm_runtime_get_sync(&fep->pdev->dev); + if (IS_ERR_VALUE(ret)) + return ret; + pinctrl_pm_select_default_state(&fep->pdev->dev); ret = fec_enet_clk_enable(ndev, true); if (ret) - return ret; + goto clk_enable; /* I should reset the ring buffers here, but I don't yet know * a simple way to do that. @@ -2881,6 +2902,9 @@ err_enet_mii_probe: fec_enet_free_buffers(ndev); err_enet_alloc: fec_enet_clk_enable(ndev, false); +clk_enable: + pm_runtime_mark_last_busy(&fep->pdev->dev); + pm_runtime_put_autosuspend(&fep->pdev->dev); pinctrl_pm_select_sleep_state(&fep->pdev->dev); return ret; } @@ -2903,6 +2927,9 @@ fec_enet_close(struct net_device *ndev) fec_enet_clk_enable(ndev, false); pinctrl_pm_select_sleep_state(&fep->pdev->dev); + pm_runtime_mark_last_busy(&fep->pdev->dev); + pm_runtime_put_autosuspend(&fep->pdev->dev); + fec_enet_free_buffers(ndev); return 0; @@ -3115,8 +3142,8 @@ static int fec_enet_init(struct net_device *ndev) fep->bufdesc_size; /* Allocate memory for buffer descriptors. */ - cbd_base = dma_alloc_coherent(NULL, bd_size, &bd_dma, - GFP_KERNEL); + cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma, + GFP_KERNEL); if (!cbd_base) { return -ENOMEM; } @@ -3388,6 +3415,10 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_clk; + ret = clk_prepare_enable(fep->clk_ipg); + if (ret) + goto failed_clk_ipg; + fep->reg_phy = devm_regulator_get(&pdev->dev, "phy"); if (!IS_ERR(fep->reg_phy)) { ret = regulator_enable(fep->reg_phy); @@ -3400,6 +3431,11 @@ fec_probe(struct platform_device *pdev) fep->reg_phy = NULL; } + pm_runtime_set_autosuspend_delay(&pdev->dev, FEC_MDIO_PM_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + fec_reset_phy(pdev); if (fep->bufdesc_ex) @@ -3447,6 +3483,10 @@ fec_probe(struct platform_device *pdev) fep->rx_copybreak = COPYBREAK_DEFAULT; INIT_WORK(&fep->tx_timeout_work, fec_enet_timeout_work); + + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); + return 0; failed_register: @@ -3454,9 +3494,12 @@ failed_register: failed_mii_init: failed_irq: failed_init: + fec_ptp_stop(pdev); if (fep->reg_phy) regulator_disable(fep->reg_phy); failed_regulator: + clk_disable_unprepare(fep->clk_ipg); +failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: failed_phy: @@ -3473,14 +3516,12 @@ fec_drv_remove(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); - cancel_delayed_work_sync(&fep->time_keep); cancel_work_sync(&fep->tx_timeout_work); + fec_ptp_stop(pdev); unregister_netdev(ndev); fec_enet_mii_remove(fep); if (fep->reg_phy) regulator_disable(fep->reg_phy); - if (fep->ptp_clock) - ptp_clock_unregister(fep->ptp_clock); of_node_put(fep->phy_node); free_netdev(ndev); @@ -3568,7 +3609,28 @@ failed_clk: return ret; } -static SIMPLE_DEV_PM_OPS(fec_pm_ops, fec_suspend, fec_resume); +static int __maybe_unused fec_runtime_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); + + clk_disable_unprepare(fep->clk_ipg); + + return 0; +} + +static int __maybe_unused fec_runtime_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); + + return clk_prepare_enable(fep->clk_ipg); +} + +static const struct dev_pm_ops fec_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(fec_suspend, fec_resume) + SET_RUNTIME_PM_OPS(fec_runtime_suspend, fec_runtime_resume, NULL) +}; static struct platform_driver fec_driver = { .driver = { diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index a15663ad7f5e..f457a23d0bfb 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -604,6 +604,16 @@ void fec_ptp_init(struct platform_device *pdev) schedule_delayed_work(&fep->time_keep, HZ); } +void fec_ptp_stop(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct fec_enet_private *fep = netdev_priv(ndev); + + cancel_delayed_work_sync(&fep->time_keep); + if (fep->ptp_clock) + ptp_clock_unregister(fep->ptp_clock); +} + /** * fec_ptp_check_pps_event * @fep: the fec_enet_private structure handle diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index ff875028fdff..2b7610f341b0 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -565,22 +565,6 @@ static void gfar_ints_enable(struct gfar_private *priv) } } -static void lock_tx_qs(struct gfar_private *priv) -{ - int i; - - for (i = 0; i < priv->num_tx_queues; i++) - spin_lock(&priv->tx_queue[i]->txlock); -} - -static void unlock_tx_qs(struct gfar_private *priv) -{ - int i; - - for (i = 0; i < priv->num_tx_queues; i++) - spin_unlock(&priv->tx_queue[i]->txlock); -} - static int gfar_alloc_tx_queues(struct gfar_private *priv) { int i; @@ -1376,7 +1360,6 @@ static int gfar_probe(struct platform_device *ofdev) priv->dev = &ofdev->dev; SET_NETDEV_DEV(dev, &ofdev->dev); - spin_lock_init(&priv->bflock); INIT_WORK(&priv->reset_task, gfar_reset_task); platform_set_drvdata(ofdev, priv); @@ -1470,9 +1453,8 @@ static int gfar_probe(struct platform_device *ofdev) goto register_fail; } - device_init_wakeup(&dev->dev, - priv->device_flags & - FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); + device_set_wakeup_capable(&dev->dev, priv->device_flags & + FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); /* fill out IRQ number and name fields */ for (i = 0; i < priv->num_grps; i++) { @@ -1540,48 +1522,37 @@ static int gfar_suspend(struct device *dev) struct gfar_private *priv = dev_get_drvdata(dev); struct net_device *ndev = priv->ndev; struct gfar __iomem *regs = priv->gfargrp[0].regs; - unsigned long flags; u32 tempval; - int magic_packet = priv->wol_en && (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); + if (!netif_running(ndev)) + return 0; + + disable_napi(priv); + netif_tx_lock(ndev); netif_device_detach(ndev); + netif_tx_unlock(ndev); - if (netif_running(ndev)) { + gfar_halt(priv); - local_irq_save(flags); - lock_tx_qs(priv); + if (magic_packet) { + /* Enable interrupt on Magic Packet */ + gfar_write(®s->imask, IMASK_MAG); - gfar_halt_nodisable(priv); + /* Enable Magic Packet mode */ + tempval = gfar_read(®s->maccfg2); + tempval |= MACCFG2_MPEN; + gfar_write(®s->maccfg2, tempval); - /* Disable Tx, and Rx if wake-on-LAN is disabled. */ + /* re-enable the Rx block */ tempval = gfar_read(®s->maccfg1); - - tempval &= ~MACCFG1_TX_EN; - - if (!magic_packet) - tempval &= ~MACCFG1_RX_EN; - + tempval |= MACCFG1_RX_EN; gfar_write(®s->maccfg1, tempval); - unlock_tx_qs(priv); - local_irq_restore(flags); - - disable_napi(priv); - - if (magic_packet) { - /* Enable interrupt on Magic Packet */ - gfar_write(®s->imask, IMASK_MAG); - - /* Enable Magic Packet mode */ - tempval = gfar_read(®s->maccfg2); - tempval |= MACCFG2_MPEN; - gfar_write(®s->maccfg2, tempval); - } else { - phy_stop(priv->phydev); - } + } else { + phy_stop(priv->phydev); } return 0; @@ -1592,37 +1563,26 @@ static int gfar_resume(struct device *dev) struct gfar_private *priv = dev_get_drvdata(dev); struct net_device *ndev = priv->ndev; struct gfar __iomem *regs = priv->gfargrp[0].regs; - unsigned long flags; u32 tempval; int magic_packet = priv->wol_en && (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); - if (!netif_running(ndev)) { - netif_device_attach(ndev); + if (!netif_running(ndev)) return 0; - } - if (!magic_packet && priv->phydev) + if (magic_packet) { + /* Disable Magic Packet mode */ + tempval = gfar_read(®s->maccfg2); + tempval &= ~MACCFG2_MPEN; + gfar_write(®s->maccfg2, tempval); + } else { phy_start(priv->phydev); - - /* Disable Magic Packet mode, in case something - * else woke us up. - */ - local_irq_save(flags); - lock_tx_qs(priv); - - tempval = gfar_read(®s->maccfg2); - tempval &= ~MACCFG2_MPEN; - gfar_write(®s->maccfg2, tempval); + } gfar_start(priv); - unlock_tx_qs(priv); - local_irq_restore(flags); - netif_device_attach(ndev); - enable_napi(priv); return 0; @@ -2045,7 +2005,8 @@ static int register_grp_irqs(struct gfar_priv_grp *grp) /* Install our interrupt handlers for Error, * Transmit, and Receive */ - err = request_irq(gfar_irq(grp, ER)->irq, gfar_error, 0, + err = request_irq(gfar_irq(grp, ER)->irq, gfar_error, + IRQF_NO_SUSPEND, gfar_irq(grp, ER)->name, grp); if (err < 0) { netif_err(priv, intr, dev, "Can't get IRQ %d\n", @@ -2068,7 +2029,8 @@ static int register_grp_irqs(struct gfar_priv_grp *grp) goto rx_irq_fail; } } else { - err = request_irq(gfar_irq(grp, TX)->irq, gfar_interrupt, 0, + err = request_irq(gfar_irq(grp, TX)->irq, gfar_interrupt, + IRQF_NO_SUSPEND, gfar_irq(grp, TX)->name, grp); if (err < 0) { netif_err(priv, intr, dev, "Can't get IRQ %d\n", @@ -2169,8 +2131,6 @@ static int gfar_enet_open(struct net_device *dev) if (err) return err; - device_set_wakeup_enable(&dev->dev, priv->wol_en); - return err; } diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index daa1d37de642..5545e4103368 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -1145,9 +1145,6 @@ struct gfar_private { int oldduplex; int oldlink; - /* Bitfield update lock */ - spinlock_t bflock; - uint32_t msg_enable; struct work_struct reset_task; diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index fda12fb32ec7..3c0a8f825b63 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -653,7 +653,6 @@ static void gfar_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) static int gfar_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct gfar_private *priv = netdev_priv(dev); - unsigned long flags; if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) && wol->wolopts != 0) @@ -664,9 +663,7 @@ static int gfar_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) device_set_wakeup_enable(&dev->dev, wol->wolopts & WAKE_MAGIC); - spin_lock_irqsave(&priv->bflock, flags); - priv->wol_en = !!device_may_wakeup(&dev->dev); - spin_unlock_irqrestore(&priv->bflock, flags); + priv->wol_en = !!device_may_wakeup(&dev->dev); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 82040137d7d9..0a3202047569 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -686,6 +686,7 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_context *context; + long ret_wait; int err = 0; down(&cmd->event_sem); @@ -711,8 +712,20 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, if (err) goto out_reset; - if (!wait_for_completion_timeout(&context->done, - msecs_to_jiffies(timeout))) { + if (op == MLX4_CMD_SENSE_PORT) { + ret_wait = + wait_for_completion_interruptible_timeout(&context->done, + msecs_to_jiffies(timeout)); + if (ret_wait < 0) { + context->fw_status = 0; + context->out_param = 0; + context->result = 0; + } + } else { + ret_wait = (long)wait_for_completion_timeout(&context->done, + msecs_to_jiffies(timeout)); + } + if (!ret_wait) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); if (op == MLX4_CMD_NOP) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 7a4f20bb7fcb..9c145dddd717 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -246,7 +246,6 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, static inline bool mlx4_en_is_ring_empty(struct mlx4_en_rx_ring *ring) { - BUG_ON((u32)(ring->prod - ring->cons) > ring->actual_size); return ring->prod == ring->cons; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index aae13adfb492..8e81e53c370e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -601,7 +601,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) continue; mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN to slave: %d, port:%d\n", __func__, i, port); - s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { eqe->event.port_change.port = cpu_to_be32( @@ -640,7 +640,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) continue; if (i == mlx4_master_func_num(dev)) continue; - s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { eqe->event.port_change.port = cpu_to_be32( diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 12fbfcb44d8a..29c2a017a450 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2273,6 +2273,11 @@ static int mlx4_allocate_default_counters(struct mlx4_dev *dev) } else if (err == -ENOENT) { err = 0; continue; + } else if (mlx4_is_slave(dev) && err == -EINVAL) { + priv->def_counter[port] = MLX4_SINK_COUNTER_INDEX(dev); + mlx4_warn(dev, "can't allocate counter from old PF driver, using index %d\n", + MLX4_SINK_COUNTER_INDEX(dev)); + err = 0; } else { mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n", __func__, port + 1, err); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index 33669c29b341..753ea8bad953 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -1415,7 +1415,7 @@ static int qlcnic_83xx_copy_fw_file(struct qlcnic_adapter *adapter) if (fw->size & 0xF) { addr = dest + size; for (i = 0; i < (fw->size & 0xF); i++) - data[i] = temp[size + i]; + data[i] = ((u8 *)temp)[size + i]; for (; i < 16; i++) data[i] = 0; ret = qlcnic_ms_mem_write128(adapter, addr, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index f3918c7e7eeb..bcdc8955c719 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -413,3 +413,7 @@ static int stmmac_pltfr_resume(struct device *dev) SIMPLE_DEV_PM_OPS(stmmac_pltfr_pm_ops, stmmac_pltfr_suspend, stmmac_pltfr_resume); EXPORT_SYMBOL_GPL(stmmac_pltfr_pm_ops); + +MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet platform support"); +MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 0c5842aeb807..ab6051a43134 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6658,10 +6658,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, struct sk_buff *skb_new; skb_new = skb_realloc_headroom(skb, len); - if (!skb_new) { - rp->tx_errors++; + if (!skb_new) goto out_drop; - } kfree_skb(skb); skb = skb_new; } else diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h index bbacf5cccec2..a8a730641bbb 100644 --- a/drivers/net/ethernet/ti/netcp.h +++ b/drivers/net/ethernet/ti/netcp.h @@ -223,6 +223,7 @@ void *netcp_device_find_module(struct netcp_device *netcp_device, /* SGMII functions */ int netcp_sgmii_reset(void __iomem *sgmii_ofs, int port); +bool netcp_sgmii_rtreset(void __iomem *sgmii_ofs, int port, bool set); int netcp_sgmii_get_port_link(void __iomem *sgmii_ofs, int port); int netcp_sgmii_config(void __iomem *sgmii_ofs, int port, u32 interface); diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index ec8ed30196f3..9749dfd78c43 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -2112,6 +2112,7 @@ probe_quit: static int netcp_remove(struct platform_device *pdev) { struct netcp_device *netcp_device = platform_get_drvdata(pdev); + struct netcp_intf *netcp_intf, *netcp_tmp; struct netcp_inst_modpriv *inst_modpriv, *tmp; struct netcp_module *module; @@ -2123,10 +2124,17 @@ static int netcp_remove(struct platform_device *pdev) list_del(&inst_modpriv->inst_list); kfree(inst_modpriv); } - WARN(!list_empty(&netcp_device->interface_head), "%s interface list not empty!\n", - pdev->name); - devm_kfree(&pdev->dev, netcp_device); + /* now that all modules are removed, clean up the interfaces */ + list_for_each_entry_safe(netcp_intf, netcp_tmp, + &netcp_device->interface_head, + interface_list) { + netcp_delete_interface(netcp_device, netcp_intf->ndev); + } + + WARN(!list_empty(&netcp_device->interface_head), + "%s interface list not empty!\n", pdev->name); + pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); platform_set_drvdata(pdev, NULL); diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 9b7e0a34c98b..1974a8ae764a 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -1901,11 +1901,28 @@ static void gbe_port_config(struct gbe_priv *gbe_dev, struct gbe_slave *slave, writel(slave->mac_control, GBE_REG_ADDR(slave, emac_regs, mac_control)); } +static void gbe_sgmii_rtreset(struct gbe_priv *priv, + struct gbe_slave *slave, bool set) +{ + void __iomem *sgmii_port_regs; + + if (SLAVE_LINK_IS_XGMII(slave)) + return; + + if ((priv->ss_version == GBE_SS_VERSION_14) && (slave->slave_num >= 2)) + sgmii_port_regs = priv->sgmii_port34_regs; + else + sgmii_port_regs = priv->sgmii_port_regs; + + netcp_sgmii_rtreset(sgmii_port_regs, slave->slave_num, set); +} + static void gbe_slave_stop(struct gbe_intf *intf) { struct gbe_priv *gbe_dev = intf->gbe_dev; struct gbe_slave *slave = intf->slave; + gbe_sgmii_rtreset(gbe_dev, slave, true); gbe_port_reset(slave); /* Disable forwarding */ cpsw_ale_control_set(gbe_dev->ale, slave->port_num, @@ -1947,6 +1964,7 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf) gbe_sgmii_config(priv, slave); gbe_port_reset(slave); + gbe_sgmii_rtreset(priv, slave, false); gbe_port_config(priv, slave, priv->rx_packet_max); gbe_set_slave_mac(slave, gbe_intf); /* enable forwarding */ @@ -2490,10 +2508,9 @@ static void free_secondary_ports(struct gbe_priv *gbe_dev) { struct gbe_slave *slave; - for (;;) { + while (!list_empty(&gbe_dev->secondary_slaves)) { slave = first_sec_slave(gbe_dev); - if (!slave) - break; + if (slave->phy) phy_disconnect(slave->phy); list_del(&slave->slave_list); @@ -2839,14 +2856,13 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, &gbe_dev->dma_chan_name); if (ret < 0) { dev_err(dev, "missing \"tx-channel\" parameter\n"); - ret = -ENODEV; - goto quit; + return -EINVAL; } if (!strcmp(node->name, "gbe")) { ret = get_gbe_resource_version(gbe_dev, node); if (ret) - goto quit; + return ret; dev_dbg(dev, "ss_version: 0x%08x\n", gbe_dev->ss_version); @@ -2857,22 +2873,20 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, else ret = -ENODEV; - if (ret) - goto quit; } else if (!strcmp(node->name, "xgbe")) { ret = set_xgbe_ethss10_priv(gbe_dev, node); if (ret) - goto quit; + return ret; ret = netcp_xgbe_serdes_init(gbe_dev->xgbe_serdes_regs, gbe_dev->ss_regs); - if (ret) - goto quit; } else { dev_err(dev, "unknown GBE node(%s)\n", node->name); ret = -ENODEV; - goto quit; } + if (ret) + return ret; + interfaces = of_get_child_by_name(node, "interfaces"); if (!interfaces) dev_err(dev, "could not find interfaces\n"); @@ -2880,11 +2894,11 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, ret = netcp_txpipe_init(&gbe_dev->tx_pipe, netcp_device, gbe_dev->dma_chan_name, gbe_dev->tx_queue_id); if (ret) - goto quit; + return ret; ret = netcp_txpipe_open(&gbe_dev->tx_pipe); if (ret) - goto quit; + return ret; /* Create network interfaces */ INIT_LIST_HEAD(&gbe_dev->gbe_intf_head); @@ -2899,6 +2913,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, if (gbe_dev->num_slaves >= gbe_dev->max_num_slaves) break; } + of_node_put(interfaces); if (!gbe_dev->num_slaves) dev_warn(dev, "No network interface configured\n"); @@ -2911,9 +2926,10 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, of_node_put(secondary_ports); if (!gbe_dev->num_slaves) { - dev_err(dev, "No network interface or secondary ports configured\n"); + dev_err(dev, + "No network interface or secondary ports configured\n"); ret = -ENODEV; - goto quit; + goto free_sec_ports; } memset(&ale_params, 0, sizeof(ale_params)); @@ -2927,7 +2943,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, if (!gbe_dev->ale) { dev_err(gbe_dev->dev, "error initializing ale engine\n"); ret = -ENODEV; - goto quit; + goto free_sec_ports; } else { dev_dbg(gbe_dev->dev, "Created a gbe ale engine\n"); } @@ -2943,14 +2959,8 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, *inst_priv = gbe_dev; return 0; -quit: - if (gbe_dev->hw_stats) - devm_kfree(dev, gbe_dev->hw_stats); - cpsw_ale_destroy(gbe_dev->ale); - if (gbe_dev->ss_regs) - devm_iounmap(dev, gbe_dev->ss_regs); - of_node_put(interfaces); - devm_kfree(dev, gbe_dev); +free_sec_ports: + free_secondary_ports(gbe_dev); return ret; } @@ -3023,12 +3033,9 @@ static int gbe_remove(struct netcp_device *netcp_device, void *inst_priv) free_secondary_ports(gbe_dev); if (!list_empty(&gbe_dev->gbe_intf_head)) - dev_alert(gbe_dev->dev, "unreleased ethss interfaces present\n"); + dev_alert(gbe_dev->dev, + "unreleased ethss interfaces present\n"); - devm_kfree(gbe_dev->dev, gbe_dev->hw_stats); - devm_iounmap(gbe_dev->dev, gbe_dev->ss_regs); - memset(gbe_dev, 0x00, sizeof(*gbe_dev)); - devm_kfree(gbe_dev->dev, gbe_dev); return 0; } diff --git a/drivers/net/ethernet/ti/netcp_sgmii.c b/drivers/net/ethernet/ti/netcp_sgmii.c index dbeb14266e2f..5d8419f658d0 100644 --- a/drivers/net/ethernet/ti/netcp_sgmii.c +++ b/drivers/net/ethernet/ti/netcp_sgmii.c @@ -18,6 +18,9 @@ #include "netcp.h" +#define SGMII_SRESET_RESET BIT(0) +#define SGMII_SRESET_RTRESET BIT(1) + #define SGMII_REG_STATUS_LOCK BIT(4) #define SGMII_REG_STATUS_LINK BIT(0) #define SGMII_REG_STATUS_AUTONEG BIT(2) @@ -51,12 +54,35 @@ static void sgmii_write_reg_bit(void __iomem *base, int reg, u32 val) int netcp_sgmii_reset(void __iomem *sgmii_ofs, int port) { /* Soft reset */ - sgmii_write_reg_bit(sgmii_ofs, SGMII_SRESET_REG(port), 0x1); - while (sgmii_read_reg(sgmii_ofs, SGMII_SRESET_REG(port)) != 0x0) + sgmii_write_reg_bit(sgmii_ofs, SGMII_SRESET_REG(port), + SGMII_SRESET_RESET); + + while ((sgmii_read_reg(sgmii_ofs, SGMII_SRESET_REG(port)) & + SGMII_SRESET_RESET) != 0x0) ; + return 0; } +/* port is 0 based */ +bool netcp_sgmii_rtreset(void __iomem *sgmii_ofs, int port, bool set) +{ + u32 reg; + bool oldval; + + /* Initiate a soft reset */ + reg = sgmii_read_reg(sgmii_ofs, SGMII_SRESET_REG(port)); + oldval = (reg & SGMII_SRESET_RTRESET) != 0x0; + if (set) + reg |= SGMII_SRESET_RTRESET; + else + reg &= ~SGMII_SRESET_RTRESET; + sgmii_write_reg(sgmii_ofs, SGMII_SRESET_REG(port), reg); + wmb(); + + return oldval; +} + int netcp_sgmii_get_port_link(void __iomem *sgmii_ofs, int port) { u32 status = 0, link = 0; diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 83c7cce0d172..72c9f1f352b4 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -638,7 +638,7 @@ static int receive(struct net_device *dev, int cnt) #define GETTICK(x) \ ({ \ if (cpu_has_tsc) \ - rdtscl(x); \ + x = (unsigned int)rdtsc(); \ }) #else /* __i386__ */ #define GETTICK(x) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 3b933bb5a8d5..edd77342773a 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -719,6 +719,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0; int copylen = 0; + int depth; bool zerocopy = false; size_t linear; ssize_t n; @@ -804,6 +805,12 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, skb_probe_transport_header(skb, ETH_HLEN); + /* Move network header to the right position for VLAN tagged packets */ + if ((skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)) && + __vlan_get_protocol(skb, skb->protocol, &depth) != 0) + skb_set_network_header(skb, depth); + rcu_read_lock(); vlan = rcu_dereference(q->vlan); /* copy skb_ubuf_info for callback when skb has no error */ diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7f6419ebb5e1..ad8cbc6c9ee7 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -27,7 +27,7 @@ #include <linux/usb/cdc.h> /* Version Information */ -#define DRIVER_VERSION "v1.08.0 (2015/01/13)" +#define DRIVER_VERSION "v1.08.1 (2015/07/28)" #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>" #define DRIVER_DESC "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters" #define MODULENAME "r8152" @@ -1902,11 +1902,10 @@ static void rtl_drop_queued_tx(struct r8152 *tp) static void rtl8152_tx_timeout(struct net_device *netdev) { struct r8152 *tp = netdev_priv(netdev); - int i; netif_warn(tp, tx_err, netdev, "Tx timeout\n"); - for (i = 0; i < RTL8152_MAX_TX; i++) - usb_unlink_urb(tp->tx_info[i].urb); + + usb_queue_reset_device(tp->intf); } static void rtl8152_set_rx_mode(struct net_device *netdev) @@ -2075,7 +2074,6 @@ static int rtl_start_rx(struct r8152 *tp) { int i, ret = 0; - napi_disable(&tp->napi); INIT_LIST_HEAD(&tp->rx_done); for (i = 0; i < RTL8152_MAX_RX; i++) { INIT_LIST_HEAD(&tp->rx_info[i].list); @@ -2083,7 +2081,6 @@ static int rtl_start_rx(struct r8152 *tp) if (ret) break; } - napi_enable(&tp->napi); if (ret && ++i < RTL8152_MAX_RX) { struct list_head rx_queue; @@ -2166,6 +2163,7 @@ static int rtl8153_enable(struct r8152 *tp) if (test_bit(RTL8152_UNPLUG, &tp->flags)) return -ENODEV; + usb_disable_lpm(tp->udev); set_tx_qlen(tp); rtl_set_eee_plus(tp); r8153_set_rx_early_timeout(tp); @@ -2337,11 +2335,61 @@ static void __rtl_set_wol(struct r8152 *tp, u32 wolopts) device_set_wakeup_enable(&tp->udev->dev, false); } +static void r8153_u1u2en(struct r8152 *tp, bool enable) +{ + u8 u1u2[8]; + + if (enable) + memset(u1u2, 0xff, sizeof(u1u2)); + else + memset(u1u2, 0x00, sizeof(u1u2)); + + usb_ocp_write(tp, USB_TOLERANCE, BYTE_EN_SIX_BYTES, sizeof(u1u2), u1u2); +} + +static void r8153_u2p3en(struct r8152 *tp, bool enable) +{ + u32 ocp_data; + + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL); + if (enable && tp->version != RTL_VER_03 && tp->version != RTL_VER_04) + ocp_data |= U2P3_ENABLE; + else + ocp_data &= ~U2P3_ENABLE; + ocp_write_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL, ocp_data); +} + +static void r8153_power_cut_en(struct r8152 *tp, bool enable) +{ + u32 ocp_data; + + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_POWER_CUT); + if (enable) + ocp_data |= PWR_EN | PHASE2_EN; + else + ocp_data &= ~(PWR_EN | PHASE2_EN); + ocp_write_word(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); + + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0); + ocp_data &= ~PCUT_STATUS; + ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data); +} + +static bool rtl_can_wakeup(struct r8152 *tp) +{ + struct usb_device *udev = tp->udev; + + return (udev->actconfig->desc.bmAttributes & USB_CONFIG_ATT_WAKEUP); +} + static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable) { if (enable) { u32 ocp_data; + r8153_u1u2en(tp, false); + r8153_u2p3en(tp, false); + __rtl_set_wol(tp, WAKE_ANY); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG); @@ -2353,6 +2401,8 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable) ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML); } else { __rtl_set_wol(tp, tp->saved_wolopts); + r8153_u2p3en(tp, true); + r8153_u1u2en(tp, true); } } @@ -2599,46 +2649,6 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) set_bit(PHY_RESET, &tp->flags); } -static void r8153_u1u2en(struct r8152 *tp, bool enable) -{ - u8 u1u2[8]; - - if (enable) - memset(u1u2, 0xff, sizeof(u1u2)); - else - memset(u1u2, 0x00, sizeof(u1u2)); - - usb_ocp_write(tp, USB_TOLERANCE, BYTE_EN_SIX_BYTES, sizeof(u1u2), u1u2); -} - -static void r8153_u2p3en(struct r8152 *tp, bool enable) -{ - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL); - if (enable) - ocp_data |= U2P3_ENABLE; - else - ocp_data &= ~U2P3_ENABLE; - ocp_write_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL, ocp_data); -} - -static void r8153_power_cut_en(struct r8152 *tp, bool enable) -{ - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_POWER_CUT); - if (enable) - ocp_data |= PWR_EN | PHASE2_EN; - else - ocp_data &= ~(PWR_EN | PHASE2_EN); - ocp_write_word(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0); - ocp_data &= ~PCUT_STATUS; - ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data); -} - static void r8153_first_init(struct r8152 *tp) { u32 ocp_data; @@ -2781,6 +2791,7 @@ static void rtl8153_disable(struct r8152 *tp) r8153_disable_aldps(tp); rtl_disable(tp); r8153_enable_aldps(tp); + usb_enable_lpm(tp->udev); } static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex) @@ -2901,9 +2912,13 @@ static void rtl8153_up(struct r8152 *tp) if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; + r8153_u1u2en(tp, false); r8153_disable_aldps(tp); r8153_first_init(tp); r8153_enable_aldps(tp); + r8153_u2p3en(tp, true); + r8153_u1u2en(tp, true); + usb_enable_lpm(tp->udev); } static void rtl8153_down(struct r8152 *tp) @@ -2914,6 +2929,7 @@ static void rtl8153_down(struct r8152 *tp) } r8153_u1u2en(tp, false); + r8153_u2p3en(tp, false); r8153_power_cut_en(tp, false); r8153_disable_aldps(tp); r8153_enter_oob(tp); @@ -2932,8 +2948,10 @@ static void set_carrier(struct r8152 *tp) if (!netif_carrier_ok(netdev)) { tp->rtl_ops.enable(tp); set_bit(RTL8152_SET_RX_MODE, &tp->flags); + napi_disable(&tp->napi); netif_carrier_on(netdev); rtl_start_rx(tp); + napi_enable(&tp->napi); } } else { if (netif_carrier_ok(netdev)) { @@ -3252,6 +3270,7 @@ static void r8153_init(struct r8152 *tp) msleep(20); } + usb_disable_lpm(tp->udev); r8153_u2p3en(tp, false); if (tp->version == RTL_VER_04) { @@ -3319,6 +3338,59 @@ static void r8153_init(struct r8152 *tp) r8153_enable_aldps(tp); r8152b_enable_fc(tp); rtl_tally_reset(tp); + r8153_u2p3en(tp, true); +} + +static int rtl8152_pre_reset(struct usb_interface *intf) +{ + struct r8152 *tp = usb_get_intfdata(intf); + struct net_device *netdev; + + if (!tp) + return 0; + + netdev = tp->netdev; + if (!netif_running(netdev)) + return 0; + + napi_disable(&tp->napi); + clear_bit(WORK_ENABLE, &tp->flags); + usb_kill_urb(tp->intr_urb); + cancel_delayed_work_sync(&tp->schedule); + if (netif_carrier_ok(netdev)) { + netif_stop_queue(netdev); + mutex_lock(&tp->control); + tp->rtl_ops.disable(tp); + mutex_unlock(&tp->control); + } + + return 0; +} + +static int rtl8152_post_reset(struct usb_interface *intf) +{ + struct r8152 *tp = usb_get_intfdata(intf); + struct net_device *netdev; + + if (!tp) + return 0; + + netdev = tp->netdev; + if (!netif_running(netdev)) + return 0; + + set_bit(WORK_ENABLE, &tp->flags); + if (netif_carrier_ok(netdev)) { + mutex_lock(&tp->control); + tp->rtl_ops.enable(tp); + rtl8152_set_rx_mode(netdev); + mutex_unlock(&tp->control); + netif_wake_queue(netdev); + } + + napi_enable(&tp->napi); + + return 0; } static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) @@ -3374,9 +3446,11 @@ static int rtl8152_resume(struct usb_interface *intf) if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { rtl_runtime_suspend_enable(tp, false); clear_bit(SELECTIVE_SUSPEND, &tp->flags); + napi_disable(&tp->napi); set_bit(WORK_ENABLE, &tp->flags); if (netif_carrier_ok(tp->netdev)) rtl_start_rx(tp); + napi_enable(&tp->napi); } else { tp->rtl_ops.up(tp); rtl8152_set_speed(tp, AUTONEG_ENABLE, @@ -3403,12 +3477,15 @@ static void rtl8152_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) if (usb_autopm_get_interface(tp->intf) < 0) return; - mutex_lock(&tp->control); - - wol->supported = WAKE_ANY; - wol->wolopts = __rtl_get_wol(tp); - - mutex_unlock(&tp->control); + if (!rtl_can_wakeup(tp)) { + wol->supported = 0; + wol->wolopts = 0; + } else { + mutex_lock(&tp->control); + wol->supported = WAKE_ANY; + wol->wolopts = __rtl_get_wol(tp); + mutex_unlock(&tp->control); + } usb_autopm_put_interface(tp->intf); } @@ -3418,6 +3495,9 @@ static int rtl8152_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) struct r8152 *tp = netdev_priv(dev); int ret; + if (!rtl_can_wakeup(tp)) + return -EOPNOTSUPP; + ret = usb_autopm_get_interface(tp->intf); if (ret < 0) goto out_set_wol; @@ -4059,6 +4139,9 @@ static int rtl8152_probe(struct usb_interface *intf, goto out1; } + if (!rtl_can_wakeup(tp)) + __rtl_set_wol(tp, 0); + tp->saved_wolopts = __rtl_get_wol(tp); if (tp->saved_wolopts) device_set_wakeup_enable(&udev->dev, true); @@ -4132,6 +4215,8 @@ static struct usb_driver rtl8152_driver = { .suspend = rtl8152_suspend, .resume = rtl8152_resume, .reset_resume = rtl8152_resume, + .pre_reset = rtl8152_pre_reset, + .post_reset = rtl8152_post_reset, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, }; diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 8df1b1777745..59bb8556e43a 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -47,7 +47,7 @@ config OF_DYNAMIC config OF_ADDRESS def_bool y - depends on !SPARC + depends on !SPARC && HAS_IOMEM select OF_ADDRESS_PCI if PCI config OF_ADDRESS_PCI diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 18016341d5a9..9f71770b6226 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -979,7 +979,6 @@ static struct platform_driver unittest_driver = { .remove = unittest_remove, .driver = { .name = "unittest", - .owner = THIS_MODULE, .of_match_table = of_match_ptr(unittest_match), }, }; @@ -1666,7 +1665,6 @@ static const struct i2c_device_id unittest_i2c_dev_id[] = { static struct i2c_driver unittest_i2c_dev_driver = { .driver = { .name = "unittest-i2c-dev", - .owner = THIS_MODULE, }, .probe = unittest_i2c_dev_probe, .remove = unittest_i2c_dev_remove, @@ -1761,7 +1759,6 @@ static const struct i2c_device_id unittest_i2c_mux_id[] = { static struct i2c_driver unittest_i2c_mux_driver = { .driver = { .name = "unittest-i2c-mux", - .owner = THIS_MODULE, }, .probe = unittest_i2c_mux_probe, .remove = unittest_i2c_mux_remove, diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c index f35ed53adaac..d4cda5e9600e 100644 --- a/drivers/scsi/dpt_i2o.c +++ b/drivers/scsi/dpt_i2o.c @@ -1924,6 +1924,9 @@ static void adpt_alpha_info(sysInfo_S* si) #endif #if defined __i386__ + +#include <uapi/asm/vm86.h> + static void adpt_i386_info(sysInfo_S* si) { // This is all the info we need for now diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 882744852aac..a9aa38903efe 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -599,9 +599,10 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, { struct ipr_trace_entry *trace_entry; struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; + unsigned int trace_index; - trace_entry = &ioa_cfg->trace[atomic_add_return - (1, &ioa_cfg->trace_index)%IPR_NUM_TRACE_ENTRIES]; + trace_index = atomic_add_return(1, &ioa_cfg->trace_index) & IPR_TRACE_INDEX_MASK; + trace_entry = &ioa_cfg->trace[trace_index]; trace_entry->time = jiffies; trace_entry->op_code = ipr_cmd->ioarcb.cmd_pkt.cdb[0]; trace_entry->type = type; @@ -1051,10 +1052,15 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd *ipr_cmd, static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg) { + unsigned int hrrq; + if (ioa_cfg->hrrq_num == 1) - return 0; - else - return (atomic_add_return(1, &ioa_cfg->hrrq_index) % (ioa_cfg->hrrq_num - 1)) + 1; + hrrq = 0; + else { + hrrq = atomic_add_return(1, &ioa_cfg->hrrq_index); + hrrq = (hrrq % (ioa_cfg->hrrq_num - 1)) + 1; + } + return hrrq; } /** @@ -6263,21 +6269,23 @@ static void ipr_scsi_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - unsigned long hrrq_flags; + unsigned long lock_flags; scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->s.ioasa.hdr.residual_data_len)); if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) { scsi_dma_unmap(scsi_cmd); - spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); + spin_lock_irqsave(ipr_cmd->hrrq->lock, lock_flags); list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); - spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); + spin_unlock_irqrestore(ipr_cmd->hrrq->lock, lock_flags); } else { - spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + spin_lock(&ipr_cmd->hrrq->_lock); ipr_erp_start(ioa_cfg, ipr_cmd); - spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); + spin_unlock(&ipr_cmd->hrrq->_lock); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); } } diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 73790a1d0969..6b97ee45c7b4 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -1486,6 +1486,7 @@ struct ipr_ioa_cfg { #define IPR_NUM_TRACE_INDEX_BITS 8 #define IPR_NUM_TRACE_ENTRIES (1 << IPR_NUM_TRACE_INDEX_BITS) +#define IPR_TRACE_INDEX_MASK (IPR_NUM_TRACE_ENTRIES - 1) #define IPR_TRACE_SIZE (sizeof(struct ipr_trace_entry) * IPR_NUM_TRACE_ENTRIES) char trace_start[8]; #define IPR_TRACE_START_LABEL "trace" diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 82b92c414a9c..437254e1c4de 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -738,7 +738,7 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj, ql_log(ql_log_info, vha, 0x706f, "Issuing MPI reset.\n"); - if (IS_QLA83XX(ha)) { + if (IS_QLA83XX(ha) || IS_QLA27XX(ha)) { uint32_t idc_control; qla83xx_idc_lock(vha, 0); diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 0e6ee3ca30e6..8b011aef12bd 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -67,10 +67,10 @@ * | | | 0xd031-0xd0ff | * | | | 0xd101-0xd1fe | * | | | 0xd214-0xd2fe | - * | Target Mode | 0xe079 | | - * | Target Mode Management | 0xf072 | 0xf002 | + * | Target Mode | 0xe080 | | + * | Target Mode Management | 0xf096 | 0xf002 | * | | | 0xf046-0xf049 | - * | Target Mode Task Management | 0x1000b | | + * | Target Mode Task Management | 0x1000d | | * ---------------------------------------------------------------------- */ diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index e86201d3b8c6..9ad819edcd67 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -274,6 +274,7 @@ #define RESPONSE_ENTRY_CNT_FX00 256 /* Number of response entries.*/ struct req_que; +struct qla_tgt_sess; /* * (sd.h is not exported, hence local inclusion) @@ -2026,6 +2027,7 @@ typedef struct fc_port { uint16_t port_id; unsigned long retry_delay_timestamp; + struct qla_tgt_sess *tgt_session; } fc_port_t; #include "qla_mr.h" @@ -3154,13 +3156,13 @@ struct qla_hw_data { /* Bit 21 of fw_attributes decides the MCTP capabilities */ #define IS_MCTP_CAPABLE(ha) (IS_QLA2031(ha) && \ ((ha)->fw_attributes_ext[0] & BIT_0)) -#define IS_PI_UNINIT_CAPABLE(ha) (IS_QLA83XX(ha)) -#define IS_PI_IPGUARD_CAPABLE(ha) (IS_QLA83XX(ha)) +#define IS_PI_UNINIT_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha)) +#define IS_PI_IPGUARD_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha)) #define IS_PI_DIFB_DIX0_CAPABLE(ha) (0) -#define IS_PI_SPLIT_DET_CAPABLE_HBA(ha) (IS_QLA83XX(ha)) +#define IS_PI_SPLIT_DET_CAPABLE_HBA(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha)) #define IS_PI_SPLIT_DET_CAPABLE(ha) (IS_PI_SPLIT_DET_CAPABLE_HBA(ha) && \ (((ha)->fw_attributes_h << 16 | (ha)->fw_attributes) & BIT_22)) -#define IS_ATIO_MSIX_CAPABLE(ha) (IS_QLA83XX(ha)) +#define IS_ATIO_MSIX_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha)) #define IS_TGT_MODE_CAPABLE(ha) (ha->tgt.atio_q_length) #define IS_SHADOW_REG_CAPABLE(ha) (IS_QLA27XX(ha)) #define IS_DPORT_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha)) @@ -3579,6 +3581,16 @@ typedef struct scsi_qla_host { uint16_t fcoe_fcf_idx; uint8_t fcoe_vn_port_mac[6]; + /* list of commands waiting on workqueue */ + struct list_head qla_cmd_list; + struct list_head qla_sess_op_cmd_list; + spinlock_t cmd_list_lock; + + /* Counter to detect races between ELS and RSCN events */ + atomic_t generation_tick; + /* Time when global fcport update has been scheduled */ + int total_fcport_update_gen; + uint32_t vp_abort_cnt; struct fc_vport *fc_vport; /* holds fc_vport * for each vport */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 664013115c9d..11f2f3279eab 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -115,6 +115,8 @@ qla2x00_async_iocb_timeout(void *data) QLA_LOGIO_LOGIN_RETRIED : 0; qla2x00_post_async_login_done_work(fcport->vha, fcport, lio->u.logio.data); + } else if (sp->type == SRB_LOGOUT_CMD) { + qlt_logo_completion_handler(fcport, QLA_FUNCTION_TIMEOUT); } } @@ -497,7 +499,10 @@ void qla2x00_async_logout_done(struct scsi_qla_host *vha, fc_port_t *fcport, uint16_t *data) { - qla2x00_mark_device_lost(vha, fcport, 1, 0); + /* Don't re-login in target mode */ + if (!fcport->tgt_session) + qla2x00_mark_device_lost(vha, fcport, 1, 0); + qlt_logo_completion_handler(fcport, data[0]); return; } @@ -1538,7 +1543,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) mem_size = (ha->fw_memory_size - 0x11000 + 1) * sizeof(uint16_t); } else if (IS_FWI2_CAPABLE(ha)) { - if (IS_QLA83XX(ha)) + if (IS_QLA83XX(ha) || IS_QLA27XX(ha)) fixed_size = offsetof(struct qla83xx_fw_dump, ext_mem); else if (IS_QLA81XX(ha)) fixed_size = offsetof(struct qla81xx_fw_dump, ext_mem); @@ -1550,7 +1555,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) mem_size = (ha->fw_memory_size - 0x100000 + 1) * sizeof(uint32_t); if (ha->mqenable) { - if (!IS_QLA83XX(ha)) + if (!IS_QLA83XX(ha) && !IS_QLA27XX(ha)) mq_size = sizeof(struct qla2xxx_mq_chain); /* * Allocate maximum buffer size for all queues. @@ -2922,21 +2927,14 @@ qla2x00_rport_del(void *data) { fc_port_t *fcport = data; struct fc_rport *rport; - scsi_qla_host_t *vha = fcport->vha; unsigned long flags; spin_lock_irqsave(fcport->vha->host->host_lock, flags); rport = fcport->drport ? fcport->drport: fcport->rport; fcport->drport = NULL; spin_unlock_irqrestore(fcport->vha->host->host_lock, flags); - if (rport) { + if (rport) fc_remote_port_delete(rport); - /* - * Release the target mode FC NEXUS in qla_target.c code - * if target mod is enabled. - */ - qlt_fc_port_deleted(vha, fcport); - } } /** @@ -3303,6 +3301,7 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport) * Create target mode FC NEXUS in qla_target.c if target mode is * enabled.. */ + qlt_fc_port_added(vha, fcport); spin_lock_irqsave(fcport->vha->host->host_lock, flags); @@ -3341,8 +3340,7 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport) if (IS_QLAFX00(vha->hw)) { qla2x00_set_fcport_state(fcport, FCS_ONLINE); - qla2x00_reg_remote_port(vha, fcport); - return; + goto reg_port; } fcport->login_retry = 0; fcport->flags &= ~(FCF_LOGIN_NEEDED | FCF_ASYNC_SENT); @@ -3350,7 +3348,16 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport) qla2x00_set_fcport_state(fcport, FCS_ONLINE); qla2x00_iidma_fcport(vha, fcport); qla24xx_update_fcport_fcp_prio(vha, fcport); - qla2x00_reg_remote_port(vha, fcport); + +reg_port: + if (qla_ini_mode_enabled(vha)) + qla2x00_reg_remote_port(vha, fcport); + else { + /* + * Create target mode FC NEXUS in qla_target.c + */ + qlt_fc_port_added(vha, fcport); + } } /* @@ -3375,6 +3382,7 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) LIST_HEAD(new_fcports); struct qla_hw_data *ha = vha->hw; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); + int discovery_gen; /* If FL port exists, then SNS is present */ if (IS_FWI2_CAPABLE(ha)) @@ -3445,6 +3453,14 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) fcport->scan_state = QLA_FCPORT_SCAN; } + /* Mark the time right before querying FW for connected ports. + * This process is long, asynchronous and by the time it's done, + * collected information might not be accurate anymore. E.g. + * disconnected port might have re-connected and a brand new + * session has been created. In this case session's generation + * will be newer than discovery_gen. */ + qlt_do_generation_tick(vha, &discovery_gen); + rval = qla2x00_find_all_fabric_devs(vha, &new_fcports); if (rval != QLA_SUCCESS) break; @@ -3460,20 +3476,44 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) if ((fcport->flags & FCF_FABRIC_DEVICE) == 0) continue; - if (fcport->scan_state == QLA_FCPORT_SCAN && - atomic_read(&fcport->state) == FCS_ONLINE) { - qla2x00_mark_device_lost(vha, fcport, - ql2xplogiabsentdevice, 0); - if (fcport->loop_id != FC_NO_LOOP_ID && - (fcport->flags & FCF_FCP2_DEVICE) == 0 && - fcport->port_type != FCT_INITIATOR && - fcport->port_type != FCT_BROADCAST) { - ha->isp_ops->fabric_logout(vha, - fcport->loop_id, - fcport->d_id.b.domain, - fcport->d_id.b.area, - fcport->d_id.b.al_pa); - qla2x00_clear_loop_id(fcport); + if (fcport->scan_state == QLA_FCPORT_SCAN) { + if (qla_ini_mode_enabled(base_vha) && + atomic_read(&fcport->state) == FCS_ONLINE) { + qla2x00_mark_device_lost(vha, fcport, + ql2xplogiabsentdevice, 0); + if (fcport->loop_id != FC_NO_LOOP_ID && + (fcport->flags & FCF_FCP2_DEVICE) == 0 && + fcport->port_type != FCT_INITIATOR && + fcport->port_type != FCT_BROADCAST) { + ha->isp_ops->fabric_logout(vha, + fcport->loop_id, + fcport->d_id.b.domain, + fcport->d_id.b.area, + fcport->d_id.b.al_pa); + qla2x00_clear_loop_id(fcport); + } + } else if (!qla_ini_mode_enabled(base_vha)) { + /* + * In target mode, explicitly kill + * sessions and log out of devices + * that are gone, so that we don't + * end up with an initiator using the + * wrong ACL (if the fabric recycles + * an FC address and we have a stale + * session around) and so that we don't + * report initiators that are no longer + * on the fabric. + */ + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf077, + "port gone, logging out/killing session: " + "%8phC state 0x%x flags 0x%x fc4_type 0x%x " + "scan_state %d\n", + fcport->port_name, + atomic_read(&fcport->state), + fcport->flags, fcport->fc4_type, + fcport->scan_state); + qlt_fc_port_deleted(vha, fcport, + discovery_gen); } } } @@ -3494,6 +3534,28 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) (fcport->flags & FCF_LOGIN_NEEDED) == 0) continue; + /* + * If we're not an initiator, skip looking for devices + * and logging in. There's no reason for us to do it, + * and it seems to actively cause problems in target + * mode if we race with the initiator logging into us + * (we might get the "port ID used" status back from + * our login command and log out the initiator, which + * seems to cause havoc). + */ + if (!qla_ini_mode_enabled(base_vha)) { + if (fcport->scan_state == QLA_FCPORT_FOUND) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf078, + "port %8phC state 0x%x flags 0x%x fc4_type 0x%x " + "scan_state %d (initiator mode disabled; skipping " + "login)\n", fcport->port_name, + atomic_read(&fcport->state), + fcport->flags, fcport->fc4_type, + fcport->scan_state); + } + continue; + } + if (fcport->loop_id == FC_NO_LOOP_ID) { fcport->loop_id = next_loopid; rval = qla2x00_find_new_loop_id( @@ -3520,16 +3582,38 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)) break; - /* Find a new loop ID to use. */ - fcport->loop_id = next_loopid; - rval = qla2x00_find_new_loop_id(base_vha, fcport); - if (rval != QLA_SUCCESS) { - /* Ran out of IDs to use */ - break; - } + /* + * If we're not an initiator, skip looking for devices + * and logging in. There's no reason for us to do it, + * and it seems to actively cause problems in target + * mode if we race with the initiator logging into us + * (we might get the "port ID used" status back from + * our login command and log out the initiator, which + * seems to cause havoc). + */ + if (qla_ini_mode_enabled(base_vha)) { + /* Find a new loop ID to use. */ + fcport->loop_id = next_loopid; + rval = qla2x00_find_new_loop_id(base_vha, + fcport); + if (rval != QLA_SUCCESS) { + /* Ran out of IDs to use */ + break; + } - /* Login and update database */ - qla2x00_fabric_dev_login(vha, fcport, &next_loopid); + /* Login and update database */ + qla2x00_fabric_dev_login(vha, fcport, + &next_loopid); + } else { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf079, + "new port %8phC state 0x%x flags 0x%x fc4_type " + "0x%x scan_state %d (initiator mode disabled; " + "skipping login)\n", + fcport->port_name, + atomic_read(&fcport->state), + fcport->flags, fcport->fc4_type, + fcport->scan_state); + } list_move_tail(&fcport->list, &vha->vp_fcports); } @@ -3725,11 +3809,12 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, fcport->fp_speed = new_fcport->fp_speed; /* - * If address the same and state FCS_ONLINE, nothing - * changed. + * If address the same and state FCS_ONLINE + * (or in target mode), nothing changed. */ if (fcport->d_id.b24 == new_fcport->d_id.b24 && - atomic_read(&fcport->state) == FCS_ONLINE) { + (atomic_read(&fcport->state) == FCS_ONLINE || + !qla_ini_mode_enabled(base_vha))) { break; } @@ -3749,6 +3834,22 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, * Log it out if still logged in and mark it for * relogin later. */ + if (!qla_ini_mode_enabled(base_vha)) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf080, + "port changed FC ID, %8phC" + " old %x:%x:%x (loop_id 0x%04x)-> new %x:%x:%x\n", + fcport->port_name, + fcport->d_id.b.domain, + fcport->d_id.b.area, + fcport->d_id.b.al_pa, + fcport->loop_id, + new_fcport->d_id.b.domain, + new_fcport->d_id.b.area, + new_fcport->d_id.b.al_pa); + fcport->d_id.b24 = new_fcport->d_id.b24; + break; + } + fcport->d_id.b24 = new_fcport->d_id.b24; fcport->flags |= FCF_LOGIN_NEEDED; if (fcport->loop_id != FC_NO_LOOP_ID && @@ -3768,6 +3869,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, if (found) continue; /* If device was not in our fcports list, then add it. */ + new_fcport->scan_state = QLA_FCPORT_FOUND; list_add_tail(&new_fcport->list, new_fcports); /* Allocate a new replacement fcport. */ @@ -4188,6 +4290,14 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha) atomic_read(&fcport->state) != FCS_UNCONFIGURED) { spin_unlock_irqrestore(&ha->vport_slock, flags); qla2x00_rport_del(fcport); + + /* + * Release the target mode FC NEXUS in + * qla_target.c, if target mod is enabled. + */ + qlt_fc_port_deleted(vha, fcport, + base_vha->total_fcport_update_gen); + spin_lock_irqsave(&ha->vport_slock, flags); } } diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 36fbd4c7af8f..6f02b26a35cf 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -1943,6 +1943,9 @@ qla24xx_logout_iocb(srb_t *sp, struct logio_entry_24xx *logio) logio->entry_type = LOGINOUT_PORT_IOCB_TYPE; logio->control_flags = cpu_to_le16(LCF_COMMAND_LOGO|LCF_IMPL_LOGO); + if (!sp->fcport->tgt_session || + !sp->fcport->tgt_session->keep_nport_handle) + logio->control_flags |= cpu_to_le16(LCF_FREE_NPORT); logio->nport_handle = cpu_to_le16(sp->fcport->loop_id); logio->port_id[0] = sp->fcport->d_id.b.al_pa; logio->port_id[1] = sp->fcport->d_id.b.area; diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 02b1c1c5355b..b2f713ad9034 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -2415,7 +2415,8 @@ qla2x00_get_resource_cnts(scsi_qla_host_t *vha, uint16_t *cur_xchg_cnt, *orig_iocb_cnt = mcp->mb[10]; if (vha->hw->flags.npiv_supported && max_npiv_vports) *max_npiv_vports = mcp->mb[11]; - if ((IS_QLA81XX(vha->hw) || IS_QLA83XX(vha->hw)) && max_fcfs) + if ((IS_QLA81XX(vha->hw) || IS_QLA83XX(vha->hw) || + IS_QLA27XX(vha->hw)) && max_fcfs) *max_fcfs = mcp->mb[12]; } @@ -3898,7 +3899,7 @@ qla25xx_init_rsp_que(struct scsi_qla_host *vha, struct rsp_que *rsp) spin_lock_irqsave(&ha->hardware_lock, flags); if (!(rsp->options & BIT_0)) { WRT_REG_DWORD(rsp->rsp_q_out, 0); - if (!IS_QLA83XX(ha)) + if (!IS_QLA83XX(ha) && !IS_QLA27XX(ha)) WRT_REG_DWORD(rsp->rsp_q_in, 0); } @@ -5345,7 +5346,7 @@ qla83xx_restart_nic_firmware(scsi_qla_host_t *vha) mbx_cmd_t *mcp = &mc; struct qla_hw_data *ha = vha->hw; - if (!IS_QLA83XX(ha)) + if (!IS_QLA83XX(ha) && !IS_QLA27XX(ha)) return QLA_FUNCTION_FAILED; ql_dbg(ql_dbg_mbx, vha, 0x1143, "Entered %s.\n", __func__); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index a28815b8276f..8a5cac8448c7 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2504,6 +2504,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) ha->mbx_count = MAILBOX_REGISTER_COUNT; req_length = REQUEST_ENTRY_CNT_24XX; rsp_length = RESPONSE_ENTRY_CNT_2300; + ha->tgt.atio_q_length = ATIO_ENTRY_CNT_24XX; ha->max_loop_id = SNS_LAST_LOOP_ID_2300; ha->init_cb_size = sizeof(struct mid_init_cb_81xx); ha->gid_list_info_size = 8; @@ -3229,11 +3230,15 @@ qla2x00_schedule_rport_del(struct scsi_qla_host *vha, fc_port_t *fcport, spin_lock_irqsave(vha->host->host_lock, flags); fcport->drport = rport; spin_unlock_irqrestore(vha->host->host_lock, flags); + qlt_do_generation_tick(vha, &base_vha->total_fcport_update_gen); set_bit(FCPORT_UPDATE_NEEDED, &base_vha->dpc_flags); qla2xxx_wake_dpc(base_vha); } else { - fc_remote_port_delete(rport); - qlt_fc_port_deleted(vha, fcport); + int now; + if (rport) + fc_remote_port_delete(rport); + qlt_do_generation_tick(vha, &now); + qlt_fc_port_deleted(vha, fcport, now); } } @@ -3763,8 +3768,11 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, INIT_LIST_HEAD(&vha->vp_fcports); INIT_LIST_HEAD(&vha->work_list); INIT_LIST_HEAD(&vha->list); + INIT_LIST_HEAD(&vha->qla_cmd_list); + INIT_LIST_HEAD(&vha->qla_sess_op_cmd_list); spin_lock_init(&vha->work_lock); + spin_lock_init(&vha->cmd_list_lock); sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no); ql_dbg(ql_dbg_init, vha, 0x0041, diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index 028e8c8a7de9..2feb5f38edcd 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -1697,7 +1697,7 @@ qla83xx_select_led_port(struct qla_hw_data *ha) { uint32_t led_select_value = 0; - if (!IS_QLA83XX(ha)) + if (!IS_QLA83XX(ha) && !IS_QLA27XX(ha)) goto out; if (ha->port_no == 0) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index b749026aa592..58651ecbd88c 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -113,6 +113,11 @@ static void qlt_abort_cmd_on_host_reset(struct scsi_qla_host *vha, static void qlt_alloc_qfull_cmd(struct scsi_qla_host *vha, struct atio_from_isp *atio, uint16_t status, int qfull); static void qlt_disable_vha(struct scsi_qla_host *vha); +static void qlt_clear_tgt_db(struct qla_tgt *tgt); +static void qlt_send_notify_ack(struct scsi_qla_host *vha, + struct imm_ntfy_from_isp *ntfy, + uint32_t add_flags, uint16_t resp_code, int resp_code_valid, + uint16_t srr_flags, uint16_t srr_reject_code, uint8_t srr_explan); /* * Global Variables */ @@ -122,6 +127,16 @@ static struct workqueue_struct *qla_tgt_wq; static DEFINE_MUTEX(qla_tgt_mutex); static LIST_HEAD(qla_tgt_glist); +/* This API intentionally takes dest as a parameter, rather than returning + * int value to avoid caller forgetting to issue wmb() after the store */ +void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest) +{ + scsi_qla_host_t *base_vha = pci_get_drvdata(vha->hw->pdev); + *dest = atomic_inc_return(&base_vha->generation_tick); + /* memory barrier */ + wmb(); +} + /* ha->hardware_lock supposed to be held on entry (to protect tgt->sess_list) */ static struct qla_tgt_sess *qlt_find_sess_by_port_name( struct qla_tgt *tgt, @@ -381,14 +396,73 @@ static void qlt_free_session_done(struct work_struct *work) struct qla_tgt *tgt = sess->tgt; struct scsi_qla_host *vha = sess->vha; struct qla_hw_data *ha = vha->hw; + unsigned long flags; + bool logout_started = false; + fc_port_t fcport; + + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf084, + "%s: se_sess %p / sess %p from port %8phC loop_id %#04x" + " s_id %02x:%02x:%02x logout %d keep %d plogi %d\n", + __func__, sess->se_sess, sess, sess->port_name, sess->loop_id, + sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa, + sess->logout_on_delete, sess->keep_nport_handle, + sess->plogi_ack_needed); BUG_ON(!tgt); + + if (sess->logout_on_delete) { + int rc; + + memset(&fcport, 0, sizeof(fcport)); + fcport.loop_id = sess->loop_id; + fcport.d_id = sess->s_id; + memcpy(fcport.port_name, sess->port_name, WWN_SIZE); + fcport.vha = vha; + fcport.tgt_session = sess; + + rc = qla2x00_post_async_logout_work(vha, &fcport, NULL); + if (rc != QLA_SUCCESS) + ql_log(ql_log_warn, vha, 0xf085, + "Schedule logo failed sess %p rc %d\n", + sess, rc); + else + logout_started = true; + } + /* * Release the target session for FC Nexus from fabric module code. */ if (sess->se_sess != NULL) ha->tgt.tgt_ops->free_session(sess); + if (logout_started) { + bool traced = false; + + while (!ACCESS_ONCE(sess->logout_completed)) { + if (!traced) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf086, + "%s: waiting for sess %p logout\n", + __func__, sess); + traced = true; + } + msleep(100); + } + + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf087, + "%s: sess %p logout completed\n", + __func__, sess); + } + + spin_lock_irqsave(&ha->hardware_lock, flags); + + if (sess->plogi_ack_needed) + qlt_send_notify_ack(vha, &sess->tm_iocb, + 0, 0, 0, 0, 0, 0); + + list_del(&sess->sess_list_entry); + + spin_unlock_irqrestore(&ha->hardware_lock, flags); + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf001, "Unregistration of sess %p finished\n", sess); @@ -409,9 +483,9 @@ void qlt_unreg_sess(struct qla_tgt_sess *sess) vha->hw->tgt.tgt_ops->clear_nacl_from_fcport_map(sess); - list_del(&sess->sess_list_entry); - if (sess->deleted) - list_del(&sess->del_list_entry); + if (!list_empty(&sess->del_list_entry)) + list_del_init(&sess->del_list_entry); + sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; INIT_WORK(&sess->free_work, qlt_free_session_done); schedule_work(&sess->free_work); @@ -431,10 +505,10 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd) loop_id = le16_to_cpu(n->u.isp24.nport_handle); if (loop_id == 0xFFFF) { -#if 0 /* FIXME: Re-enable Global event handling.. */ /* Global event */ - atomic_inc(&ha->tgt.qla_tgt->tgt_global_resets_count); - qlt_clear_tgt_db(ha->tgt.qla_tgt); + atomic_inc(&vha->vha_tgt.qla_tgt->tgt_global_resets_count); + qlt_clear_tgt_db(vha->vha_tgt.qla_tgt); +#if 0 /* FIXME: do we need to choose a session here? */ if (!list_empty(&ha->tgt.qla_tgt->sess_list)) { sess = list_entry(ha->tgt.qla_tgt->sess_list.next, typeof(*sess), sess_list_entry); @@ -489,27 +563,38 @@ static void qlt_schedule_sess_for_deletion(struct qla_tgt_sess *sess, struct qla_tgt *tgt = sess->tgt; uint32_t dev_loss_tmo = tgt->ha->port_down_retry_count + 5; - if (sess->deleted) - return; + if (sess->deleted) { + /* Upgrade to unconditional deletion in case it was temporary */ + if (immediate && sess->deleted == QLA_SESS_DELETION_PENDING) + list_del(&sess->del_list_entry); + else + return; + } ql_dbg(ql_dbg_tgt, sess->vha, 0xe001, "Scheduling sess %p for deletion\n", sess); - list_add_tail(&sess->del_list_entry, &tgt->del_sess_list); - sess->deleted = 1; - if (immediate) + if (immediate) { dev_loss_tmo = 0; + sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; + list_add(&sess->del_list_entry, &tgt->del_sess_list); + } else { + sess->deleted = QLA_SESS_DELETION_PENDING; + list_add_tail(&sess->del_list_entry, &tgt->del_sess_list); + } sess->expires = jiffies + dev_loss_tmo * HZ; ql_dbg(ql_dbg_tgt, sess->vha, 0xe048, - "qla_target(%d): session for port %8phC (loop ID %d) scheduled for " - "deletion in %u secs (expires: %lu) immed: %d\n", - sess->vha->vp_idx, sess->port_name, sess->loop_id, dev_loss_tmo, - sess->expires, immediate); + "qla_target(%d): session for port %8phC (loop ID %d s_id %02x:%02x:%02x)" + " scheduled for deletion in %u secs (expires: %lu) immed: %d, logout: %d, gen: %#x\n", + sess->vha->vp_idx, sess->port_name, sess->loop_id, + sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa, + dev_loss_tmo, sess->expires, immediate, sess->logout_on_delete, + sess->generation); if (immediate) - schedule_delayed_work(&tgt->sess_del_work, 0); + mod_delayed_work(system_wq, &tgt->sess_del_work, 0); else schedule_delayed_work(&tgt->sess_del_work, sess->expires - jiffies); @@ -578,9 +663,9 @@ out_free_id_list: /* ha->hardware_lock supposed to be held on entry */ static void qlt_undelete_sess(struct qla_tgt_sess *sess) { - BUG_ON(!sess->deleted); + BUG_ON(sess->deleted != QLA_SESS_DELETION_PENDING); - list_del(&sess->del_list_entry); + list_del_init(&sess->del_list_entry); sess->deleted = 0; } @@ -599,7 +684,9 @@ static void qlt_del_sess_work_fn(struct delayed_work *work) del_list_entry); elapsed = jiffies; if (time_after_eq(elapsed, sess->expires)) { - qlt_undelete_sess(sess); + /* No turning back */ + list_del_init(&sess->del_list_entry); + sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf004, "Timeout: sess %p about to be deleted\n", @@ -643,6 +730,13 @@ static struct qla_tgt_sess *qlt_create_sess( fcport->d_id.b.al_pa, fcport->d_id.b.area, fcport->loop_id); + /* Cannot undelete at this point */ + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + spin_unlock_irqrestore(&ha->hardware_lock, + flags); + return NULL; + } + if (sess->deleted) qlt_undelete_sess(sess); @@ -652,6 +746,9 @@ static struct qla_tgt_sess *qlt_create_sess( if (sess->local && !local) sess->local = 0; + + qlt_do_generation_tick(vha, &sess->generation); + spin_unlock_irqrestore(&ha->hardware_lock, flags); return sess; @@ -673,6 +770,14 @@ static struct qla_tgt_sess *qlt_create_sess( sess->s_id = fcport->d_id; sess->loop_id = fcport->loop_id; sess->local = local; + INIT_LIST_HEAD(&sess->del_list_entry); + + /* Under normal circumstances we want to logout from firmware when + * session eventually ends and release corresponding nport handle. + * In the exception cases (e.g. when new PLOGI is waiting) corresponding + * code will adjust these flags as necessary. */ + sess->logout_on_delete = 1; + sess->keep_nport_handle = 0; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf006, "Adding sess %p to tgt %p via ->check_initiator_node_acl()\n", @@ -705,6 +810,7 @@ static struct qla_tgt_sess *qlt_create_sess( spin_lock_irqsave(&ha->hardware_lock, flags); list_add_tail(&sess->sess_list_entry, &vha->vha_tgt.qla_tgt->sess_list); vha->vha_tgt.qla_tgt->sess_count++; + qlt_do_generation_tick(vha, &sess->generation); spin_unlock_irqrestore(&ha->hardware_lock, flags); ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04b, @@ -718,7 +824,7 @@ static struct qla_tgt_sess *qlt_create_sess( } /* - * Called from drivers/scsi/qla2xxx/qla_init.c:qla2x00_reg_remote_port() + * Called from qla2x00_reg_remote_port() */ void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport) { @@ -750,6 +856,10 @@ void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport) mutex_unlock(&vha->vha_tgt.tgt_mutex); spin_lock_irqsave(&ha->hardware_lock, flags); + } else if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + /* Point of no return */ + spin_unlock_irqrestore(&ha->hardware_lock, flags); + return; } else { kref_get(&sess->se_sess->sess_kref); @@ -780,27 +890,36 @@ void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport) spin_unlock_irqrestore(&ha->hardware_lock, flags); } -void qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport) +/* + * max_gen - specifies maximum session generation + * at which this deletion requestion is still valid + */ +void +qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport, int max_gen) { - struct qla_hw_data *ha = vha->hw; struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; struct qla_tgt_sess *sess; - unsigned long flags; if (!vha->hw->tgt.tgt_ops) return; - if (!tgt || (fcport->port_type != FCT_INITIATOR)) + if (!tgt) return; - spin_lock_irqsave(&ha->hardware_lock, flags); if (tgt->tgt_stop) { - spin_unlock_irqrestore(&ha->hardware_lock, flags); return; } sess = qlt_find_sess_by_port_name(tgt, fcport->port_name); if (!sess) { - spin_unlock_irqrestore(&ha->hardware_lock, flags); + return; + } + + if (max_gen - sess->generation < 0) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf092, + "Ignoring stale deletion request for se_sess %p / sess %p" + " for port %8phC, req_gen %d, sess_gen %d\n", + sess->se_sess, sess, sess->port_name, max_gen, + sess->generation); return; } @@ -808,7 +927,6 @@ void qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport) sess->local = 1; qlt_schedule_sess_for_deletion(sess, false); - spin_unlock_irqrestore(&ha->hardware_lock, flags); } static inline int test_tgt_sess_count(struct qla_tgt *tgt) @@ -1175,6 +1293,70 @@ static void qlt_24xx_retry_term_exchange(struct scsi_qla_host *vha, FCP_TMF_CMPL, true); } +static int abort_cmd_for_tag(struct scsi_qla_host *vha, uint32_t tag) +{ + struct qla_tgt_sess_op *op; + struct qla_tgt_cmd *cmd; + + spin_lock(&vha->cmd_list_lock); + + list_for_each_entry(op, &vha->qla_sess_op_cmd_list, cmd_list) { + if (tag == op->atio.u.isp24.exchange_addr) { + op->aborted = true; + spin_unlock(&vha->cmd_list_lock); + return 1; + } + } + + list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) { + if (tag == cmd->atio.u.isp24.exchange_addr) { + cmd->state = QLA_TGT_STATE_ABORTED; + spin_unlock(&vha->cmd_list_lock); + return 1; + } + } + + spin_unlock(&vha->cmd_list_lock); + return 0; +} + +/* drop cmds for the given lun + * XXX only looks for cmds on the port through which lun reset was recieved + * XXX does not go through the list of other port (which may have cmds + * for the same lun) + */ +static void abort_cmds_for_lun(struct scsi_qla_host *vha, + uint32_t lun, uint8_t *s_id) +{ + struct qla_tgt_sess_op *op; + struct qla_tgt_cmd *cmd; + uint32_t key; + + key = sid_to_key(s_id); + spin_lock(&vha->cmd_list_lock); + list_for_each_entry(op, &vha->qla_sess_op_cmd_list, cmd_list) { + uint32_t op_key; + uint32_t op_lun; + + op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id); + op_lun = scsilun_to_int( + (struct scsi_lun *)&op->atio.u.isp24.fcp_cmnd.lun); + if (op_key == key && op_lun == lun) + op->aborted = true; + } + list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) { + uint32_t cmd_key; + uint32_t cmd_lun; + + cmd_key = sid_to_key(cmd->atio.u.isp24.fcp_hdr.s_id); + cmd_lun = scsilun_to_int( + (struct scsi_lun *)&cmd->atio.u.isp24.fcp_cmnd.lun); + if (cmd_key == key && cmd_lun == lun) + cmd->state = QLA_TGT_STATE_ABORTED; + } + spin_unlock(&vha->cmd_list_lock); +} + /* ha->hardware_lock supposed to be held on entry */ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha, struct abts_recv_from_24xx *abts, struct qla_tgt_sess *sess) @@ -1199,8 +1381,19 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha, } spin_unlock(&se_sess->sess_cmd_lock); - if (!found_lun) - return -ENOENT; + /* cmd not in LIO lists, look in qla list */ + if (!found_lun) { + if (abort_cmd_for_tag(vha, abts->exchange_addr_to_abort)) { + /* send TASK_ABORT response immediately */ + qlt_24xx_send_abts_resp(vha, abts, FCP_TMF_CMPL, false); + return 0; + } else { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf081, + "unable to find cmd in driver or LIO for tag 0x%x\n", + abts->exchange_addr_to_abort); + return -ENOENT; + } + } ql_dbg(ql_dbg_tgt_mgt, vha, 0xf00f, "qla_target(%d): task abort (tag=%d)\n", @@ -1284,6 +1477,11 @@ static void qlt_24xx_handle_abts(struct scsi_qla_host *vha, return; } + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + qlt_24xx_send_abts_resp(vha, abts, FCP_TMF_REJECTED, false); + return; + } + rc = __qlt_24xx_handle_abts(vha, abts, sess); if (rc != 0) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf054, @@ -1726,20 +1924,6 @@ static int qlt_pre_xmit_response(struct qla_tgt_cmd *cmd, struct qla_hw_data *ha = vha->hw; struct se_cmd *se_cmd = &cmd->se_cmd; - if (unlikely(cmd->aborted)) { - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, - "qla_target(%d): terminating exchange for aborted cmd=%p (se_cmd=%p, tag=%lld)", - vha->vp_idx, cmd, se_cmd, se_cmd->tag); - - cmd->state = QLA_TGT_STATE_ABORTED; - cmd->cmd_flags |= BIT_6; - - qlt_send_term_exchange(vha, cmd, &cmd->atio, 0); - - /* !! At this point cmd could be already freed !! */ - return QLA_TGT_PRE_XMIT_RESP_CMD_ABORTED; - } - prm->cmd = cmd; prm->tgt = tgt; prm->rq_result = scsi_status; @@ -2301,6 +2485,19 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, unsigned long flags = 0; int res; + spin_lock_irqsave(&ha->hardware_lock, flags); + if (cmd->sess && cmd->sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + cmd->state = QLA_TGT_STATE_PROCESSED; + if (cmd->sess->logout_completed) + /* no need to terminate. FW already freed exchange. */ + qlt_abort_cmd_on_host_reset(cmd->vha, cmd); + else + qlt_send_term_exchange(vha, cmd, &cmd->atio, 1); + spin_unlock_irqrestore(&ha->hardware_lock, flags); + return 0; + } + spin_unlock_irqrestore(&ha->hardware_lock, flags); + memset(&prm, 0, sizeof(prm)); qlt_check_srr_debug(cmd, &xmit_type); @@ -2313,9 +2510,6 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, res = qlt_pre_xmit_response(cmd, &prm, xmit_type, scsi_status, &full_req_cnt); if (unlikely(res != 0)) { - if (res == QLA_TGT_PRE_XMIT_RESP_CMD_ABORTED) - return 0; - return res; } @@ -2345,9 +2539,10 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, res = qlt_build_ctio_crc2_pkt(&prm, vha); else res = qlt_24xx_build_ctio_pkt(&prm, vha); - if (unlikely(res != 0)) + if (unlikely(res != 0)) { + vha->req->cnt += full_req_cnt; goto out_unmap_unlock; - + } pkt = (struct ctio7_to_24xx *)prm.pkt; @@ -2461,7 +2656,8 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) spin_lock_irqsave(&ha->hardware_lock, flags); - if (qla2x00_reset_active(vha) || cmd->reset_count != ha->chip_reset) { + if (qla2x00_reset_active(vha) || (cmd->reset_count != ha->chip_reset) || + (cmd->sess && cmd->sess->deleted == QLA_SESS_DELETION_IN_PROGRESS)) { /* * Either a chip reset is active or this request was from * previous life, just abort the processing. @@ -2485,8 +2681,11 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) else res = qlt_24xx_build_ctio_pkt(&prm, vha); - if (unlikely(res != 0)) + if (unlikely(res != 0)) { + vha->req->cnt += prm.req_cnt; goto out_unlock_free_unmap; + } + pkt = (struct ctio7_to_24xx *)prm.pkt; pkt->u.status0.flags |= __constant_cpu_to_le16(CTIO7_FLAGS_DATA_OUT | CTIO7_FLAGS_STATUS_MODE_0); @@ -2651,6 +2850,89 @@ out: /* If hardware_lock held on entry, might drop it, then reaquire */ /* This function sends the appropriate CTIO to ISP 2xxx or 24xx */ +static int __qlt_send_term_imm_notif(struct scsi_qla_host *vha, + struct imm_ntfy_from_isp *ntfy) +{ + struct nack_to_isp *nack; + struct qla_hw_data *ha = vha->hw; + request_t *pkt; + int ret = 0; + + ql_dbg(ql_dbg_tgt_tmr, vha, 0xe01c, + "Sending TERM ELS CTIO (ha=%p)\n", ha); + + pkt = (request_t *)qla2x00_alloc_iocbs_ready(vha, NULL); + if (pkt == NULL) { + ql_dbg(ql_dbg_tgt, vha, 0xe080, + "qla_target(%d): %s failed: unable to allocate " + "request packet\n", vha->vp_idx, __func__); + return -ENOMEM; + } + + pkt->entry_type = NOTIFY_ACK_TYPE; + pkt->entry_count = 1; + pkt->handle = QLA_TGT_SKIP_HANDLE | CTIO_COMPLETION_HANDLE_MARK; + + nack = (struct nack_to_isp *)pkt; + nack->ox_id = ntfy->ox_id; + + nack->u.isp24.nport_handle = ntfy->u.isp24.nport_handle; + if (le16_to_cpu(ntfy->u.isp24.status) == IMM_NTFY_ELS) { + nack->u.isp24.flags = ntfy->u.isp24.flags & + __constant_cpu_to_le32(NOTIFY24XX_FLAGS_PUREX_IOCB); + } + + /* terminate */ + nack->u.isp24.flags |= + __constant_cpu_to_le16(NOTIFY_ACK_FLAGS_TERMINATE); + + nack->u.isp24.srr_rx_id = ntfy->u.isp24.srr_rx_id; + nack->u.isp24.status = ntfy->u.isp24.status; + nack->u.isp24.status_subcode = ntfy->u.isp24.status_subcode; + nack->u.isp24.fw_handle = ntfy->u.isp24.fw_handle; + nack->u.isp24.exchange_address = ntfy->u.isp24.exchange_address; + nack->u.isp24.srr_rel_offs = ntfy->u.isp24.srr_rel_offs; + nack->u.isp24.srr_ui = ntfy->u.isp24.srr_ui; + nack->u.isp24.vp_index = ntfy->u.isp24.vp_index; + + qla2x00_start_iocbs(vha, vha->req); + return ret; +} + +static void qlt_send_term_imm_notif(struct scsi_qla_host *vha, + struct imm_ntfy_from_isp *imm, int ha_locked) +{ + unsigned long flags = 0; + int rc; + + if (qlt_issue_marker(vha, ha_locked) < 0) + return; + + if (ha_locked) { + rc = __qlt_send_term_imm_notif(vha, imm); + +#if 0 /* Todo */ + if (rc == -ENOMEM) + qlt_alloc_qfull_cmd(vha, imm, 0, 0); +#endif + goto done; + } + + spin_lock_irqsave(&vha->hw->hardware_lock, flags); + rc = __qlt_send_term_imm_notif(vha, imm); + +#if 0 /* Todo */ + if (rc == -ENOMEM) + qlt_alloc_qfull_cmd(vha, imm, 0, 0); +#endif + +done: + if (!ha_locked) + spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); +} + +/* If hardware_lock held on entry, might drop it, then reaquire */ +/* This function sends the appropriate CTIO to ISP 2xxx or 24xx */ static int __qlt_send_term_exchange(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd, struct atio_from_isp *atio) @@ -2715,7 +2997,7 @@ static int __qlt_send_term_exchange(struct scsi_qla_host *vha, static void qlt_send_term_exchange(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked) { - unsigned long flags; + unsigned long flags = 0; int rc; if (qlt_issue_marker(vha, ha_locked) < 0) @@ -2731,17 +3013,18 @@ static void qlt_send_term_exchange(struct scsi_qla_host *vha, rc = __qlt_send_term_exchange(vha, cmd, atio); if (rc == -ENOMEM) qlt_alloc_qfull_cmd(vha, atio, 0, 0); - spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); done: if (cmd && ((cmd->state != QLA_TGT_STATE_ABORTED) || !cmd->cmd_sent_to_fw)) { - if (!ha_locked && !in_interrupt()) - msleep(250); /* just in case */ - - qlt_unmap_sg(vha, cmd); + if (cmd->sg_mapped) + qlt_unmap_sg(vha, cmd); vha->hw->tgt.tgt_ops->free_cmd(cmd); } + + if (!ha_locked) + spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); + return; } @@ -2792,6 +3075,24 @@ static void qlt_chk_exch_leak_thresh_hold(struct scsi_qla_host *vha) } +void qlt_abort_cmd(struct qla_tgt_cmd *cmd) +{ + struct qla_tgt *tgt = cmd->tgt; + struct scsi_qla_host *vha = tgt->vha; + struct se_cmd *se_cmd = &cmd->se_cmd; + + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, + "qla_target(%d): terminating exchange for aborted cmd=%p " + "(se_cmd=%p, tag=%llu)", vha->vp_idx, cmd, &cmd->se_cmd, + se_cmd->tag); + + cmd->state = QLA_TGT_STATE_ABORTED; + cmd->cmd_flags |= BIT_6; + + qlt_send_term_exchange(vha, cmd, &cmd->atio, 0); +} +EXPORT_SYMBOL(qlt_abort_cmd); + void qlt_free_cmd(struct qla_tgt_cmd *cmd) { struct qla_tgt_sess *sess = cmd->sess; @@ -3015,7 +3316,7 @@ qlt_abort_cmd_on_host_reset(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd) dump_stack(); } - cmd->cmd_flags |= BIT_12; + cmd->cmd_flags |= BIT_17; ha->tgt.tgt_ops->free_cmd(cmd); } @@ -3177,7 +3478,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle, skip_term: if (cmd->state == QLA_TGT_STATE_PROCESSED) { - ; + cmd->cmd_flags |= BIT_12; } else if (cmd->state == QLA_TGT_STATE_NEED_DATA) { int rx_status = 0; @@ -3191,9 +3492,11 @@ skip_term: ha->tgt.tgt_ops->handle_data(cmd); return; } else if (cmd->state == QLA_TGT_STATE_ABORTED) { + cmd->cmd_flags |= BIT_18; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01e, "Aborted command %p (tag %lld) finished\n", cmd, se_cmd->tag); } else { + cmd->cmd_flags |= BIT_19; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05c, "qla_target(%d): A command in state (%d) should " "not return a CTIO complete\n", vha->vp_idx, cmd->state); @@ -3205,7 +3508,6 @@ skip_term: dump_stack(); } - ha->tgt.tgt_ops->free_cmd(cmd); } @@ -3263,6 +3565,13 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) if (tgt->tgt_stop) goto out_term; + if (cmd->state == QLA_TGT_STATE_ABORTED) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf082, + "cmd with tag %u is aborted\n", + cmd->atio.u.isp24.exchange_addr); + goto out_term; + } + cdb = &atio->u.isp24.fcp_cmnd.cdb[0]; cmd->se_cmd.tag = atio->u.isp24.exchange_addr; cmd->unpacked_lun = scsilun_to_int( @@ -3316,6 +3625,12 @@ out_term: static void qlt_do_work(struct work_struct *work) { struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work); + scsi_qla_host_t *vha = cmd->vha; + unsigned long flags; + + spin_lock_irqsave(&vha->cmd_list_lock, flags); + list_del(&cmd->cmd_list); + spin_unlock_irqrestore(&vha->cmd_list_lock, flags); __qlt_do_work(cmd); } @@ -3345,6 +3660,11 @@ static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha, cmd->loop_id = sess->loop_id; cmd->conf_compl_supported = sess->conf_compl_supported; + cmd->cmd_flags = 0; + cmd->jiffies_at_alloc = get_jiffies_64(); + + cmd->reset_count = vha->hw->chip_reset; + return cmd; } @@ -3362,14 +3682,25 @@ static void qlt_create_sess_from_atio(struct work_struct *work) unsigned long flags; uint8_t *s_id = op->atio.u.isp24.fcp_hdr.s_id; + spin_lock_irqsave(&vha->cmd_list_lock, flags); + list_del(&op->cmd_list); + spin_unlock_irqrestore(&vha->cmd_list_lock, flags); + + if (op->aborted) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf083, + "sess_op with tag %u is aborted\n", + op->atio.u.isp24.exchange_addr); + goto out_term; + } + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf022, - "qla_target(%d): Unable to find wwn login" - " (s_id %x:%x:%x), trying to create it manually\n", - vha->vp_idx, s_id[0], s_id[1], s_id[2]); + "qla_target(%d): Unable to find wwn login" + " (s_id %x:%x:%x), trying to create it manually\n", + vha->vp_idx, s_id[0], s_id[1], s_id[2]); if (op->atio.u.raw.entry_count > 1) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf023, - "Dropping multy entry atio %p\n", &op->atio); + "Dropping multy entry atio %p\n", &op->atio); goto out_term; } @@ -3434,10 +3765,25 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, memcpy(&op->atio, atio, sizeof(*atio)); op->vha = vha; + + spin_lock(&vha->cmd_list_lock); + list_add_tail(&op->cmd_list, &vha->qla_sess_op_cmd_list); + spin_unlock(&vha->cmd_list_lock); + INIT_WORK(&op->work, qlt_create_sess_from_atio); queue_work(qla_tgt_wq, &op->work); return 0; } + + /* Another WWN used to have our s_id. Our PLOGI scheduled its + * session deletion, but it's still in sess_del_work wq */ + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + ql_dbg(ql_dbg_io, vha, 0x3061, + "New command while old session %p is being deleted\n", + sess); + return -EFAULT; + } + /* * Do kref_get() before returning + dropping qla_hw_data->hardware_lock. */ @@ -3451,13 +3797,13 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, return -ENOMEM; } - cmd->cmd_flags = 0; - cmd->jiffies_at_alloc = get_jiffies_64(); - - cmd->reset_count = vha->hw->chip_reset; - cmd->cmd_in_wq = 1; cmd->cmd_flags |= BIT_0; + + spin_lock(&vha->cmd_list_lock); + list_add_tail(&cmd->cmd_list, &vha->qla_cmd_list); + spin_unlock(&vha->cmd_list_lock); + INIT_WORK(&cmd->work, qlt_do_work); queue_work(qla_tgt_wq, &cmd->work); return 0; @@ -3471,6 +3817,7 @@ static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun, struct scsi_qla_host *vha = sess->vha; struct qla_hw_data *ha = vha->hw; struct qla_tgt_mgmt_cmd *mcmd; + struct atio_from_isp *a = (struct atio_from_isp *)iocb; int res; uint8_t tmr_func; @@ -3511,6 +3858,7 @@ static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun, ql_dbg(ql_dbg_tgt_tmr, vha, 0x10002, "qla_target(%d): LUN_RESET received\n", sess->vha->vp_idx); tmr_func = TMR_LUN_RESET; + abort_cmds_for_lun(vha, lun, a->u.isp24.fcp_hdr.s_id); break; case QLA_TGT_CLEAR_TS: @@ -3599,6 +3947,9 @@ static int qlt_handle_task_mgmt(struct scsi_qla_host *vha, void *iocb) sizeof(struct atio_from_isp)); } + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) + return -EFAULT; + return qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0); } @@ -3664,22 +4015,280 @@ static int qlt_abort_task(struct scsi_qla_host *vha, return __qlt_abort_task(vha, iocb, sess); } +void qlt_logo_completion_handler(fc_port_t *fcport, int rc) +{ + if (fcport->tgt_session) { + if (rc != MBS_COMMAND_COMPLETE) { + ql_dbg(ql_dbg_tgt_mgt, fcport->vha, 0xf093, + "%s: se_sess %p / sess %p from" + " port %8phC loop_id %#04x s_id %02x:%02x:%02x" + " LOGO failed: %#x\n", + __func__, + fcport->tgt_session->se_sess, + fcport->tgt_session, + fcport->port_name, fcport->loop_id, + fcport->d_id.b.domain, fcport->d_id.b.area, + fcport->d_id.b.al_pa, rc); + } + + fcport->tgt_session->logout_completed = 1; + } +} + +static void qlt_swap_imm_ntfy_iocb(struct imm_ntfy_from_isp *a, + struct imm_ntfy_from_isp *b) +{ + struct imm_ntfy_from_isp tmp; + memcpy(&tmp, a, sizeof(struct imm_ntfy_from_isp)); + memcpy(a, b, sizeof(struct imm_ntfy_from_isp)); + memcpy(b, &tmp, sizeof(struct imm_ntfy_from_isp)); +} + +/* +* ha->hardware_lock supposed to be held on entry (to protect tgt->sess_list) +* +* Schedules sessions with matching port_id/loop_id but different wwn for +* deletion. Returns existing session with matching wwn if present. +* Null otherwise. +*/ +static struct qla_tgt_sess * +qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn, + port_id_t port_id, uint16_t loop_id) +{ + struct qla_tgt_sess *sess = NULL, *other_sess; + uint64_t other_wwn; + + list_for_each_entry(other_sess, &tgt->sess_list, sess_list_entry) { + + other_wwn = wwn_to_u64(other_sess->port_name); + + if (wwn == other_wwn) { + WARN_ON(sess); + sess = other_sess; + continue; + } + + /* find other sess with nport_id collision */ + if (port_id.b24 == other_sess->s_id.b24) { + if (loop_id != other_sess->loop_id) { + ql_dbg(ql_dbg_tgt_tmr, tgt->vha, 0x1000c, + "Invalidating sess %p loop_id %d wwn %llx.\n", + other_sess, other_sess->loop_id, other_wwn); + + /* + * logout_on_delete is set by default, but another + * session that has the same s_id/loop_id combo + * might have cleared it when requested this session + * deletion, so don't touch it + */ + qlt_schedule_sess_for_deletion(other_sess, true); + } else { + /* + * Another wwn used to have our s_id/loop_id + * combo - kill the session, but don't log out + */ + sess->logout_on_delete = 0; + qlt_schedule_sess_for_deletion(other_sess, + true); + } + continue; + } + + /* find other sess with nport handle collision */ + if (loop_id == other_sess->loop_id) { + ql_dbg(ql_dbg_tgt_tmr, tgt->vha, 0x1000d, + "Invalidating sess %p loop_id %d wwn %llx.\n", + other_sess, other_sess->loop_id, other_wwn); + + /* Same loop_id but different s_id + * Ok to kill and logout */ + qlt_schedule_sess_for_deletion(other_sess, true); + } + } + + return sess; +} + +/* Abort any commands for this s_id waiting on qla_tgt_wq workqueue */ +static int abort_cmds_for_s_id(struct scsi_qla_host *vha, port_id_t *s_id) +{ + struct qla_tgt_sess_op *op; + struct qla_tgt_cmd *cmd; + uint32_t key; + int count = 0; + + key = (((u32)s_id->b.domain << 16) | + ((u32)s_id->b.area << 8) | + ((u32)s_id->b.al_pa)); + + spin_lock(&vha->cmd_list_lock); + list_for_each_entry(op, &vha->qla_sess_op_cmd_list, cmd_list) { + uint32_t op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id); + if (op_key == key) { + op->aborted = true; + count++; + } + } + list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) { + uint32_t cmd_key = sid_to_key(cmd->atio.u.isp24.fcp_hdr.s_id); + if (cmd_key == key) { + cmd->state = QLA_TGT_STATE_ABORTED; + count++; + } + } + spin_unlock(&vha->cmd_list_lock); + + return count; +} + /* * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire */ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, struct imm_ntfy_from_isp *iocb) { + struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; + struct qla_hw_data *ha = vha->hw; + struct qla_tgt_sess *sess = NULL; + uint64_t wwn; + port_id_t port_id; + uint16_t loop_id; + uint16_t wd3_lo; int res = 0; + wwn = wwn_to_u64(iocb->u.isp24.port_name); + + port_id.b.domain = iocb->u.isp24.port_id[2]; + port_id.b.area = iocb->u.isp24.port_id[1]; + port_id.b.al_pa = iocb->u.isp24.port_id[0]; + port_id.b.rsvd_1 = 0; + + loop_id = le16_to_cpu(iocb->u.isp24.nport_handle); + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf026, "qla_target(%d): Port ID: 0x%3phC ELS opcode: 0x%02x\n", vha->vp_idx, iocb->u.isp24.port_id, iocb->u.isp24.status_subcode); + /* res = 1 means ack at the end of thread + * res = 0 means ack async/later. + */ switch (iocb->u.isp24.status_subcode) { case ELS_PLOGI: - case ELS_FLOGI: + + /* Mark all stale commands in qla_tgt_wq for deletion */ + abort_cmds_for_s_id(vha, &port_id); + + if (wwn) + sess = qlt_find_sess_invalidate_other(tgt, wwn, + port_id, loop_id); + + if (!sess || IS_SW_RESV_ADDR(sess->s_id)) { + res = 1; + break; + } + + if (sess->plogi_ack_needed) { + /* + * Initiator sent another PLOGI before last PLOGI could + * finish. Swap plogi iocbs and terminate old one + * without acking, new one will get acked when session + * deletion completes. + */ + ql_log(ql_log_warn, sess->vha, 0xf094, + "sess %p received double plogi.\n", sess); + + qlt_swap_imm_ntfy_iocb(iocb, &sess->tm_iocb); + + qlt_send_term_imm_notif(vha, iocb, 1); + + res = 0; + break; + } + + res = 0; + + /* + * Save immediate Notif IOCB for Ack when sess is done + * and being deleted. + */ + memcpy(&sess->tm_iocb, iocb, sizeof(sess->tm_iocb)); + sess->plogi_ack_needed = 1; + + /* + * Under normal circumstances we want to release nport handle + * during LOGO process to avoid nport handle leaks inside FW. + * The exception is when LOGO is done while another PLOGI with + * the same nport handle is waiting as might be the case here. + * Note: there is always a possibily of a race where session + * deletion has already started for other reasons (e.g. ACL + * removal) and now PLOGI arrives: + * 1. if PLOGI arrived in FW after nport handle has been freed, + * FW must have assigned this PLOGI a new/same handle and we + * can proceed ACK'ing it as usual when session deletion + * completes. + * 2. if PLOGI arrived in FW before LOGO with LCF_FREE_NPORT + * bit reached it, the handle has now been released. We'll + * get an error when we ACK this PLOGI. Nothing will be sent + * back to initiator. Initiator should eventually retry + * PLOGI and situation will correct itself. + */ + sess->keep_nport_handle = ((sess->loop_id == loop_id) && + (sess->s_id.b24 == port_id.b24)); + qlt_schedule_sess_for_deletion(sess, true); + break; + case ELS_PRLI: + wd3_lo = le16_to_cpu(iocb->u.isp24.u.prli.wd3_lo); + + if (wwn) + sess = qlt_find_sess_invalidate_other(tgt, wwn, port_id, + loop_id); + + if (sess != NULL) { + if (sess->deleted) { + /* + * Impatient initiator sent PRLI before last + * PLOGI could finish. Will force him to re-try, + * while last one finishes. + */ + ql_log(ql_log_warn, sess->vha, 0xf095, + "sess %p PRLI received, before plogi ack.\n", + sess); + qlt_send_term_imm_notif(vha, iocb, 1); + res = 0; + break; + } + + /* + * This shouldn't happen under normal circumstances, + * since we have deleted the old session during PLOGI + */ + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf096, + "PRLI (loop_id %#04x) for existing sess %p (loop_id %#04x)\n", + sess->loop_id, sess, iocb->u.isp24.nport_handle); + + sess->local = 0; + sess->loop_id = loop_id; + sess->s_id = port_id; + + if (wd3_lo & BIT_7) + sess->conf_compl_supported = 1; + + } + res = 1; /* send notify ack */ + + /* Make session global (not used in fabric mode) */ + if (ha->current_topology != ISP_CFG_F) { + set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); + set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + } else { + /* todo: else - create sess here. */ + res = 1; /* send notify ack */ + } + + break; + case ELS_LOGO: case ELS_PRLO: res = qlt_reset(vha, iocb, QLA_TGT_NEXUS_LOSS_SESS); @@ -3697,6 +4306,7 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, break; } + case ELS_FLOGI: /* should never happen */ default: ql_dbg(ql_dbg_tgt_mgt, vha, 0xf061, "qla_target(%d): Unsupported ELS command %x " @@ -5012,6 +5622,11 @@ static void qlt_abort_work(struct qla_tgt *tgt, if (!sess) goto out_term; } else { + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + sess = NULL; + goto out_term; + } + kref_get(&sess->se_sess->sess_kref); } @@ -5066,6 +5681,11 @@ static void qlt_tmr_work(struct qla_tgt *tgt, if (!sess) goto out_term; } else { + if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { + sess = NULL; + goto out_term; + } + kref_get(&sess->se_sess->sess_kref); } @@ -5552,6 +6172,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha) /* Adjust ring index */ WRT_REG_DWORD(ISP_ATIO_Q_OUT(vha), ha->tgt.atio_ring_index); + RD_REG_DWORD_RELAXED(ISP_ATIO_Q_OUT(vha)); } void @@ -5793,7 +6414,7 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha) if (!QLA_TGT_MODE_ENABLED()) return; - if (ha->mqenable || IS_QLA83XX(ha)) { + if (ha->mqenable || IS_QLA83XX(ha) || IS_QLA27XX(ha)) { ISP_ATIO_Q_IN(base_vha) = &ha->mqiobase->isp25mq.atio_q_in; ISP_ATIO_Q_OUT(base_vha) = &ha->mqiobase->isp25mq.atio_q_out; } else { diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 985d76dd706b..bca584ae45b7 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -167,7 +167,24 @@ struct imm_ntfy_from_isp { uint32_t srr_rel_offs; uint16_t srr_ui; uint16_t srr_ox_id; - uint8_t reserved_4[19]; + union { + struct { + uint8_t node_name[8]; + } plogi; /* PLOGI/ADISC/PDISC */ + struct { + /* PRLI word 3 bit 0-15 */ + uint16_t wd3_lo; + uint8_t resv0[6]; + } prli; + struct { + uint8_t port_id[3]; + uint8_t resv1; + uint16_t nport_handle; + uint16_t resv2; + } req_els; + } u; + uint8_t port_name[8]; + uint8_t resv3[3]; uint8_t vp_index; uint32_t reserved_5; uint8_t port_id[3]; @@ -234,6 +251,7 @@ struct nack_to_isp { uint8_t reserved[2]; uint16_t ox_id; } __packed; +#define NOTIFY_ACK_FLAGS_TERMINATE BIT_3 #define NOTIFY_ACK_SRR_FLAGS_ACCEPT 0 #define NOTIFY_ACK_SRR_FLAGS_REJECT 1 @@ -790,13 +808,6 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *); #define FC_TM_REJECT 4 #define FC_TM_FAILED 5 -/* - * Error code of qlt_pre_xmit_response() meaning that cmd's exchange was - * terminated, so no more actions is needed and success should be returned - * to target. - */ -#define QLA_TGT_PRE_XMIT_RESP_CMD_ABORTED 0x1717 - #if (BITS_PER_LONG > 32) || defined(CONFIG_HIGHMEM64G) #define pci_dma_lo32(a) (a & 0xffffffff) #define pci_dma_hi32(a) ((((a) >> 16)>>16) & 0xffffffff) @@ -874,6 +885,15 @@ struct qla_tgt_sess_op { struct scsi_qla_host *vha; struct atio_from_isp atio; struct work_struct work; + struct list_head cmd_list; + bool aborted; +}; + +enum qla_sess_deletion { + QLA_SESS_DELETION_NONE = 0, + QLA_SESS_DELETION_PENDING = 1, /* hopefully we can get rid of + * this one */ + QLA_SESS_DELETION_IN_PROGRESS = 2, }; /* @@ -884,8 +904,15 @@ struct qla_tgt_sess { port_id_t s_id; unsigned int conf_compl_supported:1; - unsigned int deleted:1; + unsigned int deleted:2; unsigned int local:1; + unsigned int logout_on_delete:1; + unsigned int plogi_ack_needed:1; + unsigned int keep_nport_handle:1; + + unsigned char logout_completed; + + int generation; struct se_session *se_sess; struct scsi_qla_host *vha; @@ -897,6 +924,10 @@ struct qla_tgt_sess { uint8_t port_name[WWN_SIZE]; struct work_struct free_work; + + union { + struct imm_ntfy_from_isp tm_iocb; + }; }; struct qla_tgt_cmd { @@ -912,7 +943,6 @@ struct qla_tgt_cmd { unsigned int conf_compl_supported:1; unsigned int sg_mapped:1; unsigned int free_sg:1; - unsigned int aborted:1; /* Needed in case of SRR */ unsigned int write_data_transferred:1; unsigned int ctx_dsd_alloced:1; unsigned int q_full:1; @@ -961,6 +991,9 @@ struct qla_tgt_cmd { * BIT_14 - Back end data received/sent. * BIT_15 - SRR prepare ctio * BIT_16 - complete free + * BIT_17 - flush - qlt_abort_cmd_on_host_reset + * BIT_18 - completion w/abort status + * BIT_19 - completion w/unknown status */ uint32_t cmd_flags; }; @@ -1026,6 +1059,10 @@ struct qla_tgt_srr_ctio { struct qla_tgt_cmd *cmd; }; +/* Check for Switch reserved address */ +#define IS_SW_RESV_ADDR(_s_id) \ + ((_s_id.b.domain == 0xff) && (_s_id.b.area == 0xfc)) + #define QLA_TGT_XMIT_DATA 1 #define QLA_TGT_XMIT_STATUS 2 #define QLA_TGT_XMIT_ALL (QLA_TGT_XMIT_STATUS|QLA_TGT_XMIT_DATA) @@ -1043,7 +1080,7 @@ extern int qlt_lport_register(void *, u64, u64, u64, extern void qlt_lport_deregister(struct scsi_qla_host *); extern void qlt_unreg_sess(struct qla_tgt_sess *); extern void qlt_fc_port_added(struct scsi_qla_host *, fc_port_t *); -extern void qlt_fc_port_deleted(struct scsi_qla_host *, fc_port_t *); +extern void qlt_fc_port_deleted(struct scsi_qla_host *, fc_port_t *, int); extern int __init qlt_init(void); extern void qlt_exit(void); extern void qlt_update_vp_map(struct scsi_qla_host *, int); @@ -1073,12 +1110,23 @@ static inline void qla_reverse_ini_mode(struct scsi_qla_host *ha) ha->host->active_mode |= MODE_INITIATOR; } +static inline uint32_t sid_to_key(const uint8_t *s_id) +{ + uint32_t key; + + key = (((unsigned long)s_id[0] << 16) | + ((unsigned long)s_id[1] << 8) | + (unsigned long)s_id[2]); + return key; +} + /* * Exported symbols from qla_target.c LLD logic used by qla2xxx code.. */ extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, response_t *); extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *); extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t); +extern void qlt_abort_cmd(struct qla_tgt_cmd *); extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *); extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *); extern void qlt_free_cmd(struct qla_tgt_cmd *cmd); @@ -1109,5 +1157,7 @@ extern void qlt_stop_phase2(struct qla_tgt *); extern irqreturn_t qla83xx_msix_atio_q(int, void *); extern void qlt_83xx_iospace_config(struct qla_hw_data *); extern int qlt_free_qfull_cmds(struct scsi_qla_host *); +extern void qlt_logo_completion_handler(fc_port_t *, int); +extern void qlt_do_generation_tick(struct scsi_qla_host *, int *); #endif /* __QLA_TARGET_H */ diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index d9a8c6084346..9224a06646e6 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -374,7 +374,7 @@ static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - + cmd->cmd_flags |= BIT_3; cmd->bufflen = se_cmd->data_length; cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); @@ -405,7 +405,7 @@ static int tcm_qla2xxx_write_pending_status(struct se_cmd *se_cmd) se_cmd->t_state == TRANSPORT_COMPLETE_QF_WP) { spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); wait_for_completion_timeout(&se_cmd->t_transport_stop_comp, - 3000); + 3 * HZ); return 0; } spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); @@ -541,12 +541,10 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) cmd->cmd_flags |= BIT_4; cmd->bufflen = se_cmd->data_length; cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); - cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED); cmd->sg_cnt = se_cmd->t_data_nents; cmd->sg = se_cmd->t_data_sg; cmd->offset = 0; - cmd->cmd_flags |= BIT_3; cmd->prot_sg_cnt = se_cmd->t_prot_nents; cmd->prot_sg = se_cmd->t_prot_sg; @@ -571,7 +569,6 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd) cmd->sg_cnt = 0; cmd->offset = 0; cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); - cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED); if (cmd->cmd_flags & BIT_5) { pr_crit("Bit_5 already set for cmd = %p.\n", cmd); dump_stack(); @@ -636,14 +633,7 @@ static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - struct scsi_qla_host *vha = cmd->vha; - struct qla_hw_data *ha = vha->hw; - - if (!cmd->sg_mapped) - return; - - pci_unmap_sg(ha->pdev, cmd->sg, cmd->sg_cnt, cmd->dma_data_direction); - cmd->sg_mapped = 0; + qlt_abort_cmd(cmd); } static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *, @@ -1149,9 +1139,7 @@ static struct qla_tgt_sess *tcm_qla2xxx_find_sess_by_s_id( return NULL; } - key = (((unsigned long)s_id[0] << 16) | - ((unsigned long)s_id[1] << 8) | - (unsigned long)s_id[2]); + key = sid_to_key(s_id); pr_debug("find_sess_by_s_id: 0x%06x\n", key); se_nacl = btree_lookup32(&lport->lport_fcport_map, key); @@ -1186,9 +1174,7 @@ static void tcm_qla2xxx_set_sess_by_s_id( void *slot; int rc; - key = (((unsigned long)s_id[0] << 16) | - ((unsigned long)s_id[1] << 8) | - (unsigned long)s_id[2]); + key = sid_to_key(s_id); pr_debug("set_sess_by_s_id: %06x\n", key); slot = btree_lookup32(&lport->lport_fcport_map, key); @@ -1544,6 +1530,10 @@ static void tcm_qla2xxx_update_sess(struct qla_tgt_sess *sess, port_id_t s_id, } sess->conf_compl_supported = conf_compl_supported; + + /* Reset logout parameters to default */ + sess->logout_on_delete = 1; + sess->keep_nport_handle = 0; } /* diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 106884a5444e..cfadccef045c 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -944,7 +944,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, scmd->sdb.length); scmd->sdb.table.sgl = &ses->sense_sgl; scmd->sc_data_direction = DMA_FROM_DEVICE; - scmd->sdb.table.nents = 1; + scmd->sdb.table.nents = scmd->sdb.table.orig_nents = 1; scmd->cmnd[0] = REQUEST_SENSE; scmd->cmnd[4] = scmd->sdb.length; scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index b1a263137a23..448ebdaa3d69 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -583,7 +583,7 @@ static struct scatterlist *scsi_sg_alloc(unsigned int nents, gfp_t gfp_mask) static void scsi_free_sgtable(struct scsi_data_buffer *sdb, bool mq) { - if (mq && sdb->table.nents <= SCSI_MAX_SG_SEGMENTS) + if (mq && sdb->table.orig_nents <= SCSI_MAX_SG_SEGMENTS) return; __sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, mq, scsi_sg_free); } @@ -597,8 +597,8 @@ static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents, bool mq) if (mq) { if (nents <= SCSI_MAX_SG_SEGMENTS) { - sdb->table.nents = nents; - sg_init_table(sdb->table.sgl, sdb->table.nents); + sdb->table.nents = sdb->table.orig_nents = nents; + sg_init_table(sdb->table.sgl, nents); return 0; } first_chunk = sdb->table.sgl; diff --git a/drivers/staging/media/lirc/lirc_serial.c b/drivers/staging/media/lirc/lirc_serial.c index dc7984455c3a..465796a686c4 100644 --- a/drivers/staging/media/lirc/lirc_serial.c +++ b/drivers/staging/media/lirc/lirc_serial.c @@ -327,9 +327,6 @@ static void safe_udelay(unsigned long usecs) * time */ -/* So send_pulse can quickly convert microseconds to clocks */ -static unsigned long conv_us_to_clocks; - static int init_timing_params(unsigned int new_duty_cycle, unsigned int new_freq) { @@ -344,7 +341,6 @@ static int init_timing_params(unsigned int new_duty_cycle, /* How many clocks in a microsecond?, avoiding long long divide */ work = loops_per_sec; work *= 4295; /* 4295 = 2^32 / 1e6 */ - conv_us_to_clocks = work >> 32; /* * Carrier period in clocks, approach good up to 32GHz clock, @@ -357,10 +353,9 @@ static int init_timing_params(unsigned int new_duty_cycle, pulse_width = period * duty_cycle / 100; space_width = period - pulse_width; dprintk("in init_timing_params, freq=%d, duty_cycle=%d, " - "clk/jiffy=%ld, pulse=%ld, space=%ld, " - "conv_us_to_clocks=%ld\n", + "clk/jiffy=%ld, pulse=%ld, space=%ld\n", freq, duty_cycle, __this_cpu_read(cpu_info.loops_per_jiffy), - pulse_width, space_width, conv_us_to_clocks); + pulse_width, space_width); return 0; } #else /* ! USE_RDTSC */ @@ -431,63 +426,14 @@ static long send_pulse_irdeo(unsigned long length) return ret; } -#ifdef USE_RDTSC -/* Version that uses Pentium rdtsc instruction to measure clocks */ - -/* - * This version does sub-microsecond timing using rdtsc instruction, - * and does away with the fudged LIRC_SERIAL_TRANSMITTER_LATENCY - * Implicitly i586 architecture... - Steve - */ - -static long send_pulse_homebrew_softcarrier(unsigned long length) -{ - int flag; - unsigned long target, start, now; - - /* Get going quick as we can */ - rdtscl(start); - on(); - /* Convert length from microseconds to clocks */ - length *= conv_us_to_clocks; - /* And loop till time is up - flipping at right intervals */ - now = start; - target = pulse_width; - flag = 1; - /* - * FIXME: This looks like a hard busy wait, without even an occasional, - * polite, cpu_relax() call. There's got to be a better way? - * - * The i2c code has the result of a lot of bit-banging work, I wonder if - * there's something there which could be helpful here. - */ - while ((now - start) < length) { - /* Delay till flip time */ - do { - rdtscl(now); - } while ((now - start) < target); - - /* flip */ - if (flag) { - rdtscl(now); - off(); - target += space_width; - } else { - rdtscl(now); on(); - target += pulse_width; - } - flag = !flag; - } - rdtscl(now); - return ((now - start) - length) / conv_us_to_clocks; -} -#else /* ! USE_RDTSC */ /* Version using udelay() */ /* * here we use fixed point arithmetic, with 8 * fractional bits. that gets us within 0.1% or so of the right average * frequency, albeit with some jitter in pulse length - Steve + * + * This should use ndelay instead. */ /* To match 8 fractional bits used for pulse/space length */ @@ -520,7 +466,6 @@ static long send_pulse_homebrew_softcarrier(unsigned long length) } return (actual-length) >> 8; } -#endif /* USE_RDTSC */ static long send_pulse_homebrew(unsigned long length) { diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 4e68b62193ed..cd77a064c772 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3998,7 +3998,13 @@ get_immediate: } transport_err: - iscsit_take_action_for_connection_exit(conn); + /* + * Avoid the normal connection failure code-path if this connection + * is still within LOGIN mode, and iscsi_np process context is + * responsible for cleaning up the early connection failure. + */ + if (conn->conn_state != TARG_CONN_STATE_IN_LOGIN) + iscsit_take_action_for_connection_exit(conn); out: return 0; } @@ -4082,7 +4088,7 @@ reject: int iscsi_target_rx_thread(void *arg) { - int ret; + int ret, rc; u8 buffer[ISCSI_HDR_LEN], opcode; u32 checksum = 0, digest = 0; struct iscsi_conn *conn = arg; @@ -4092,10 +4098,16 @@ int iscsi_target_rx_thread(void *arg) * connection recovery / failure event can be triggered externally. */ allow_signal(SIGINT); + /* + * Wait for iscsi_post_login_handler() to complete before allowing + * incoming iscsi/tcp socket I/O, and/or failing the connection. + */ + rc = wait_for_completion_interruptible(&conn->rx_login_comp); + if (rc < 0) + return 0; if (conn->conn_transport->transport_type == ISCSI_INFINIBAND) { struct completion comp; - int rc; init_completion(&comp); rc = wait_for_completion_interruptible(&comp); @@ -4532,7 +4544,18 @@ static void iscsit_logout_post_handler_closesession( struct iscsi_conn *conn) { struct iscsi_session *sess = conn->sess; - int sleep = cmpxchg(&conn->tx_thread_active, true, false); + int sleep = 1; + /* + * Traditional iscsi/tcp will invoke this logic from TX thread + * context during session logout, so clear tx_thread_active and + * sleep if iscsit_close_connection() has not already occured. + * + * Since iser-target invokes this logic from it's own workqueue, + * always sleep waiting for RX/TX thread shutdown to complete + * within iscsit_close_connection(). + */ + if (conn->conn_transport->transport_type == ISCSI_TCP) + sleep = cmpxchg(&conn->tx_thread_active, true, false); atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); @@ -4546,7 +4569,10 @@ static void iscsit_logout_post_handler_closesession( static void iscsit_logout_post_handler_samecid( struct iscsi_conn *conn) { - int sleep = cmpxchg(&conn->tx_thread_active, true, false); + int sleep = 1; + + if (conn->conn_transport->transport_type == ISCSI_TCP) + sleep = cmpxchg(&conn->tx_thread_active, true, false); atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); @@ -4765,6 +4791,7 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) struct iscsi_session *sess; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; struct se_session *se_sess, *se_sess_tmp; + LIST_HEAD(free_list); int session_count = 0; spin_lock_bh(&se_tpg->session_lock); @@ -4786,14 +4813,17 @@ int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) } atomic_set(&sess->session_reinstatement, 1); spin_unlock(&sess->conn_lock); - spin_unlock_bh(&se_tpg->session_lock); - iscsit_free_session(sess); - spin_lock_bh(&se_tpg->session_lock); + list_move_tail(&se_sess->sess_list, &free_list); + } + spin_unlock_bh(&se_tpg->session_lock); + + list_for_each_entry_safe(se_sess, se_sess_tmp, &free_list, sess_list) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + iscsit_free_session(sess); session_count++; } - spin_unlock_bh(&se_tpg->session_lock); pr_debug("Released %d iSCSI Session(s) from Target Portal" " Group: %hu\n", session_count, tpg->tpgt); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 3d0fe4ff5590..7e8f65e5448f 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -82,6 +82,7 @@ static struct iscsi_login *iscsi_login_init_conn(struct iscsi_conn *conn) init_completion(&conn->conn_logout_comp); init_completion(&conn->rx_half_close_comp); init_completion(&conn->tx_half_close_comp); + init_completion(&conn->rx_login_comp); spin_lock_init(&conn->cmd_lock); spin_lock_init(&conn->conn_usage_lock); spin_lock_init(&conn->immed_queue_lock); @@ -644,7 +645,7 @@ static void iscsi_post_login_start_timers(struct iscsi_conn *conn) iscsit_start_nopin_timer(conn); } -static int iscsit_start_kthreads(struct iscsi_conn *conn) +int iscsit_start_kthreads(struct iscsi_conn *conn) { int ret = 0; @@ -679,6 +680,7 @@ static int iscsit_start_kthreads(struct iscsi_conn *conn) return 0; out_tx: + send_sig(SIGINT, conn->tx_thread, 1); kthread_stop(conn->tx_thread); conn->tx_thread_active = false; out_bitmap: @@ -689,7 +691,7 @@ out_bitmap: return ret; } -int iscsi_post_login_handler( +void iscsi_post_login_handler( struct iscsi_np *np, struct iscsi_conn *conn, u8 zero_tsih) @@ -699,7 +701,6 @@ int iscsi_post_login_handler( struct se_session *se_sess = sess->se_sess; struct iscsi_portal_group *tpg = sess->tpg; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; - int rc; iscsit_inc_conn_usage_count(conn); @@ -739,10 +740,6 @@ int iscsi_post_login_handler( sess->sess_ops->InitiatorName); spin_unlock_bh(&sess->conn_lock); - rc = iscsit_start_kthreads(conn); - if (rc) - return rc; - iscsi_post_login_start_timers(conn); /* * Determine CPU mask to ensure connection's RX and TX kthreads @@ -751,15 +748,20 @@ int iscsi_post_login_handler( iscsit_thread_get_cpumask(conn); conn->conn_rx_reset_cpumask = 1; conn->conn_tx_reset_cpumask = 1; - + /* + * Wakeup the sleeping iscsi_target_rx_thread() now that + * iscsi_conn is in TARG_CONN_STATE_LOGGED_IN state. + */ + complete(&conn->rx_login_comp); iscsit_dec_conn_usage_count(conn); + if (stop_timer) { spin_lock_bh(&se_tpg->session_lock); iscsit_stop_time2retain_timer(sess); spin_unlock_bh(&se_tpg->session_lock); } iscsit_dec_session_usage_count(sess); - return 0; + return; } iscsi_set_session_parameters(sess->sess_ops, conn->param_list, 1); @@ -800,10 +802,6 @@ int iscsi_post_login_handler( " iSCSI Target Portal Group: %hu\n", tpg->nsessions, tpg->tpgt); spin_unlock_bh(&se_tpg->session_lock); - rc = iscsit_start_kthreads(conn); - if (rc) - return rc; - iscsi_post_login_start_timers(conn); /* * Determine CPU mask to ensure connection's RX and TX kthreads @@ -812,10 +810,12 @@ int iscsi_post_login_handler( iscsit_thread_get_cpumask(conn); conn->conn_rx_reset_cpumask = 1; conn->conn_tx_reset_cpumask = 1; - + /* + * Wakeup the sleeping iscsi_target_rx_thread() now that + * iscsi_conn is in TARG_CONN_STATE_LOGGED_IN state. + */ + complete(&conn->rx_login_comp); iscsit_dec_conn_usage_count(conn); - - return 0; } static void iscsi_handle_login_thread_timeout(unsigned long data) @@ -1380,23 +1380,12 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) if (ret < 0) goto new_sess_out; - if (!conn->sess) { - pr_err("struct iscsi_conn session pointer is NULL!\n"); - goto new_sess_out; - } - iscsi_stop_login_thread_timer(np); - if (signal_pending(current)) - goto new_sess_out; - if (ret == 1) { tpg_np = conn->tpg_np; - ret = iscsi_post_login_handler(np, conn, zero_tsih); - if (ret < 0) - goto new_sess_out; - + iscsi_post_login_handler(np, conn, zero_tsih); iscsit_deaccess_np(np, tpg, tpg_np); } diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index 1c7358081533..57aa0d0fd820 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -12,7 +12,8 @@ extern int iscsit_accept_np(struct iscsi_np *, struct iscsi_conn *); extern int iscsit_get_login_rx(struct iscsi_conn *, struct iscsi_login *); extern int iscsit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32); extern void iscsit_free_conn(struct iscsi_np *, struct iscsi_conn *); -extern int iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); +extern int iscsit_start_kthreads(struct iscsi_conn *); +extern void iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); extern void iscsi_target_login_sess_out(struct iscsi_conn *, struct iscsi_np *, bool, bool); extern int iscsi_target_login_thread(void *); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 8c02fa34716f..f9cde9141836 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -17,6 +17,7 @@ ******************************************************************************/ #include <linux/ctype.h> +#include <linux/kthread.h> #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> #include <target/target_core_fabric.h> @@ -361,10 +362,24 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log ntohl(login_rsp->statsn), login->rsp_length); padding = ((-login->rsp_length) & 3); + /* + * Before sending the last login response containing the transition + * bit for full-feature-phase, go ahead and start up TX/RX threads + * now to avoid potential resource allocation failures after the + * final login response has been sent. + */ + if (login->login_complete) { + int rc = iscsit_start_kthreads(conn); + if (rc) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + } if (conn->conn_transport->iscsit_put_login_tx(conn, login, login->rsp_length + padding) < 0) - return -1; + goto err; login->rsp_length = 0; mutex_lock(&sess->cmdsn_mutex); @@ -373,6 +388,23 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log mutex_unlock(&sess->cmdsn_mutex); return 0; + +err: + if (login->login_complete) { + if (conn->rx_thread && conn->rx_thread_active) { + send_sig(SIGINT, conn->rx_thread, 1); + kthread_stop(conn->rx_thread); + } + if (conn->tx_thread && conn->tx_thread_active) { + send_sig(SIGINT, conn->tx_thread, 1); + kthread_stop(conn->tx_thread); + } + spin_lock(&iscsit_global->ts_bitmap_lock); + bitmap_release_region(iscsit_global->ts_bitmap, conn->bitmap_id, + get_order(1)); + spin_unlock(&iscsit_global->ts_bitmap_lock); + } + return -1; } static void iscsi_target_sk_data_ready(struct sock *sk) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 0b0de3647478..c2e9fea90b4a 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -747,7 +747,7 @@ static ssize_t store_pi_prot_type(struct se_dev_attrib *da, if (!dev->transport->init_prot || !dev->transport->free_prot) { /* 0 is only allowed value for non-supporting backends */ if (flag == 0) - return 0; + return count; pr_err("DIF protection not supported by backend: %s\n", dev->transport->name); @@ -1590,9 +1590,9 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( u8 type = 0; if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) - return 0; + return count; if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) - return 0; + return count; if (dev->export_count) { pr_debug("Unable to process APTPL metadata while" @@ -1658,22 +1658,32 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( * PR APTPL Metadata for Reservation */ case Opt_res_holder: - match_int(args, &arg); + ret = match_int(args, &arg); + if (ret) + goto out; res_holder = arg; break; case Opt_res_type: - match_int(args, &arg); + ret = match_int(args, &arg); + if (ret) + goto out; type = (u8)arg; break; case Opt_res_scope: - match_int(args, &arg); + ret = match_int(args, &arg); + if (ret) + goto out; break; case Opt_res_all_tg_pt: - match_int(args, &arg); + ret = match_int(args, &arg); + if (ret) + goto out; all_tg_pt = (int)arg; break; case Opt_mapped_lun: - match_int(args, &arg); + ret = match_int(args, &arg); + if (ret) + goto out; mapped_lun = (u64)arg; break; /* @@ -1701,14 +1711,20 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( } break; case Opt_tpgt: - match_int(args, &arg); + ret = match_int(args, &arg); + if (ret) + goto out; tpgt = (u16)arg; break; case Opt_port_rtpi: - match_int(args, &arg); + ret = match_int(args, &arg); + if (ret) + goto out; break; case Opt_target_lun: - match_int(args, &arg); + ret = match_int(args, &arg); + if (ret) + goto out; target_lun = (u64)arg; break; default: @@ -1985,7 +2001,7 @@ static ssize_t target_core_store_alua_lu_gp( lu_gp_mem = dev->dev_alua_lu_gp_mem; if (!lu_gp_mem) - return 0; + return count; if (count > LU_GROUP_NAME_BUF) { pr_err("ALUA LU Group Alias too large!\n"); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 0fdbe43b7dad..5ab7100de17e 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1474,7 +1474,7 @@ core_scsi3_decode_spec_i_port( LIST_HEAD(tid_dest_list); struct pr_transport_id_holder *tidh_new, *tidh, *tidh_tmp; unsigned char *buf, *ptr, proto_ident; - const unsigned char *i_str; + const unsigned char *i_str = NULL; char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN]; sense_reason_t ret; u32 tpdl, tid_len = 0; diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 4703f403f31c..384cf8894411 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -333,6 +333,7 @@ static int rd_configure_device(struct se_device *dev) dev->dev_attrib.hw_block_size = RD_BLOCKSIZE; dev->dev_attrib.hw_max_sectors = UINT_MAX; dev->dev_attrib.hw_queue_depth = RD_MAX_DEVICE_QUEUE_DEPTH; + dev->dev_attrib.is_nonrot = 1; rd_dev->rd_dev_id = rd_host->rd_host_dev_id_count++; diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index b0744433315a..b5ba1ec3c354 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -454,10 +454,17 @@ spc_emulate_evpd_86(struct se_cmd *cmd, unsigned char *buf) cmd->se_sess->sess_prot_type == TARGET_DIF_TYPE1_PROT) buf[4] = 0x5; else if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE3_PROT || - cmd->se_sess->sess_prot_type == TARGET_DIF_TYPE3_PROT) + cmd->se_sess->sess_prot_type == TARGET_DIF_TYPE3_PROT) buf[4] = 0x4; } + /* logical unit supports type 1 and type 3 protection */ + if ((dev->transport->get_device_type(dev) == TYPE_DISK) && + (sess->sup_prot_ops & (TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS)) && + (dev->dev_attrib.pi_prot_type || cmd->se_sess->sess_prot_type)) { + buf[4] |= (0x3 << 3); + } + /* Set HEADSUP, ORDSUP, SIMPSUP */ buf[5] = 0x07; diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index 5820e8513927..2ac0c704bcb8 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -340,7 +340,7 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio, /* check result for the last window */ msr_now = pkg_state_counter(); - rdtscll(tsc_now); + tsc_now = rdtsc(); /* calculate pkg cstate vs tsc ratio */ if (!msr_last || !tsc_last) @@ -482,7 +482,7 @@ static void poll_pkg_cstate(struct work_struct *dummy) u64 val64; msr_now = pkg_state_counter(); - rdtscll(tsc_now); + tsc_now = rdtsc(); jiffies_now = jiffies; /* calculate pkg cstate vs tsc ratio */ diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 2fb29dfeffbd..563c510f285c 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -689,6 +689,23 @@ struct vfio_device *vfio_device_get_from_dev(struct device *dev) } EXPORT_SYMBOL_GPL(vfio_device_get_from_dev); +static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, + char *buf) +{ + struct vfio_device *device; + + mutex_lock(&group->device_lock); + list_for_each_entry(device, &group->device_list, group_next) { + if (!strcmp(dev_name(device->dev), buf)) { + vfio_device_get(device); + break; + } + } + mutex_unlock(&group->device_lock); + + return device; +} + /* * Caller must hold a reference to the vfio_device */ @@ -1198,53 +1215,53 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) { struct vfio_device *device; struct file *filep; - int ret = -ENODEV; + int ret; if (0 == atomic_read(&group->container_users) || !group->container->iommu_driver || !vfio_group_viable(group)) return -EINVAL; - mutex_lock(&group->device_lock); - list_for_each_entry(device, &group->device_list, group_next) { - if (strcmp(dev_name(device->dev), buf)) - continue; + device = vfio_device_get_from_name(group, buf); + if (!device) + return -ENODEV; - ret = device->ops->open(device->device_data); - if (ret) - break; - /* - * We can't use anon_inode_getfd() because we need to modify - * the f_mode flags directly to allow more than just ioctls - */ - ret = get_unused_fd_flags(O_CLOEXEC); - if (ret < 0) { - device->ops->release(device->device_data); - break; - } + ret = device->ops->open(device->device_data); + if (ret) { + vfio_device_put(device); + return ret; + } - filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, - device, O_RDWR); - if (IS_ERR(filep)) { - put_unused_fd(ret); - ret = PTR_ERR(filep); - device->ops->release(device->device_data); - break; - } + /* + * We can't use anon_inode_getfd() because we need to modify + * the f_mode flags directly to allow more than just ioctls + */ + ret = get_unused_fd_flags(O_CLOEXEC); + if (ret < 0) { + device->ops->release(device->device_data); + vfio_device_put(device); + return ret; + } - /* - * TODO: add an anon_inode interface to do this. - * Appears to be missing by lack of need rather than - * explicitly prevented. Now there's need. - */ - filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); + filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, + device, O_RDWR); + if (IS_ERR(filep)) { + put_unused_fd(ret); + ret = PTR_ERR(filep); + device->ops->release(device->device_data); + vfio_device_put(device); + return ret; + } + + /* + * TODO: add an anon_inode interface to do this. + * Appears to be missing by lack of need rather than + * explicitly prevented. Now there's need. + */ + filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); - vfio_device_get(device); - atomic_inc(&group->container_users); + atomic_inc(&group->container_users); - fd_install(ret, filep); - break; - } - mutex_unlock(&group->device_lock); + fd_install(ret, filep); return ret; } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a9fe859f43c8..eec2f11809ff 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -683,11 +683,8 @@ static void *vhost_kvzalloc(unsigned long size) { void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); - if (!n) { + if (!n) n = vzalloc(size); - if (!n) - return ERR_PTR(-ENOMEM); - } return n; } @@ -995,6 +992,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) } if (eventfp != d->log_file) { filep = d->log_file; + d->log_file = eventfp; ctx = d->log_ctx; d->log_ctx = eventfp ? eventfd_ctx_fileget(eventfp) : NULL; diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 862fbc206755..564a7de17d99 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -378,7 +378,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, ret = btrfs_kobj_add_device(tgt_device->fs_devices, tgt_device); if (ret) - btrfs_error(root->fs_info, ret, "kobj add dev failed"); + btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret); printk_in_rcu(KERN_INFO "BTRFS: dev_replace from %s (devid %llu) to %s started\n", diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a9aadb2ad525..f556c3732c2c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2842,6 +2842,7 @@ int open_ctree(struct super_block *sb, !extent_buffer_uptodate(chunk_root->node)) { printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n", sb->s_id); + chunk_root->node = NULL; goto fail_tree_roots; } btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); @@ -2879,7 +2880,7 @@ retry_root_backup: !extent_buffer_uptodate(tree_root->node)) { printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", sb->s_id); - + tree_root->node = NULL; goto recovery_tree_root; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 171312d51799..07204bf601ed 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4227,6 +4227,24 @@ out: space_info->chunk_alloc = 0; spin_unlock(&space_info->lock); mutex_unlock(&fs_info->chunk_mutex); + /* + * When we allocate a new chunk we reserve space in the chunk block + * reserve to make sure we can COW nodes/leafs in the chunk tree or + * add new nodes/leafs to it if we end up needing to do it when + * inserting the chunk item and updating device items as part of the + * second phase of chunk allocation, performed by + * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a + * large number of new block groups to create in our transaction + * handle's new_bgs list to avoid exhausting the chunk block reserve + * in extreme cases - like having a single transaction create many new + * block groups when starting to write out the free space caches of all + * the block groups that were made dirty during the lifetime of the + * transaction. + */ + if (trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) { + btrfs_create_pending_block_groups(trans, trans->root); + btrfs_trans_release_chunk_metadata(trans); + } return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 51e0f0d0053e..f5021fcb154e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2152,7 +2152,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); - if (current != root->fs_info->transaction_kthread) + if (current != root->fs_info->transaction_kthread && + current != root->fs_info->cleaner_kthread) btrfs_run_delayed_iputs(root); return ret; @@ -319,6 +319,12 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, * @vma: The virtual memory area where the fault occurred * @vmf: The description of the fault * @get_block: The filesystem method used to translate file offsets to blocks + * @complete_unwritten: The filesystem method used to convert unwritten blocks + * to written so the data written to them is exposed. This is required for + * required by write faults for filesystems that will return unwritten + * extent mappings from @get_block, but it is optional for reads as + * dax_insert_mapping() will always zero unwritten blocks. If the fs does + * not support unwritten extents, the it should pass NULL. * * When a page fault occurs, filesystems may call this helper in their * fault handler for DAX files. __dax_fault() assumes the caller has done all @@ -437,8 +443,12 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, * as for normal BH based IO completions. */ error = dax_insert_mapping(inode, &bh, vma, vmf); - if (buffer_unwritten(&bh)) - complete_unwritten(&bh, !error); + if (buffer_unwritten(&bh)) { + if (complete_unwritten) + complete_unwritten(&bh, !error); + else + WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE)); + } out: if (error == -ENOMEM) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9bedfa8dd3a5..f71e19a9dd3c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2072,8 +2072,6 @@ static int f2fs_set_data_page_dirty(struct page *page) return 1; } - mark_inode_dirty(inode); - if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); update_dirty_page(inode, page); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ada2a3dd701a..b0f38c3b37f4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1331,12 +1331,13 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) if (ret) return ret; - if (f2fs_is_atomic_file(inode)) + if (f2fs_is_atomic_file(inode)) { + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); commit_inmem_pages(inode, false); + } ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); mnt_drop_write_file(filp); - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); return ret; } @@ -1387,8 +1388,8 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) f2fs_balance_fs(F2FS_I_SB(inode)); if (f2fs_is_atomic_file(inode)) { - commit_inmem_pages(inode, false); clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + commit_inmem_pages(inode, false); } if (f2fs_is_volatile_file(inode)) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e1e73617d13b..22fb5ef37966 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -556,27 +556,39 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) if (!fio.encrypted_page) goto put_out; - f2fs_submit_page_bio(&fio); + err = f2fs_submit_page_bio(&fio); + if (err) + goto put_page_out; + + /* write page */ + lock_page(fio.encrypted_page); + + if (unlikely(!PageUptodate(fio.encrypted_page))) + goto put_page_out; + if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) + goto put_page_out; + + set_page_dirty(fio.encrypted_page); + f2fs_wait_on_page_writeback(fio.encrypted_page, META); + if (clear_page_dirty_for_io(fio.encrypted_page)) + dec_page_count(fio.sbi, F2FS_DIRTY_META); + + set_page_writeback(fio.encrypted_page); /* allocate block address */ f2fs_wait_on_page_writeback(dn.node_page, NODE); - allocate_data_block(fio.sbi, NULL, fio.blk_addr, &fio.blk_addr, &sum, CURSEG_COLD_DATA); - dn.data_blkaddr = fio.blk_addr; - - /* write page */ - lock_page(fio.encrypted_page); - set_page_writeback(fio.encrypted_page); fio.rw = WRITE_SYNC; f2fs_submit_page_mbio(&fio); + dn.data_blkaddr = fio.blk_addr; set_data_blkaddr(&dn); f2fs_update_extent_cache(&dn); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); if (page->index == 0) set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); - +put_page_out: f2fs_put_page(fio.encrypted_page, 1); put_out: f2fs_put_dnode(&dn); @@ -605,8 +617,8 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) .page = page, .encrypted_page = NULL, }; + set_page_dirty(page); f2fs_wait_on_page_writeback(page, DATA); - if (clear_page_dirty_for_io(page)) inode_dec_dirty_pages(inode); set_cold_data(page); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 38e75fb1e488..a13ffcc32992 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -141,6 +141,8 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) kunmap_atomic(dst_addr); SetPageUptodate(page); no_update: + set_page_dirty(page); + /* clear dirty state */ dirty = clear_page_dirty_for_io(page); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1eb343768781..61b97f9cb9f6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -257,6 +257,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) if (!abort) { lock_page(cur->page); if (cur->page->mapping == inode->i_mapping) { + set_page_dirty(cur->page); f2fs_wait_on_page_writeback(cur->page, DATA); if (clear_page_dirty_for_io(cur->page)) inode_dec_dirty_pages(inode); diff --git a/fs/namei.c b/fs/namei.c index ae4e4c18b2ac..fbbcf0993312 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1954,8 +1954,13 @@ OK: continue; } } - if (unlikely(!d_can_lookup(nd->path.dentry))) + if (unlikely(!d_can_lookup(nd->path.dentry))) { + if (nd->flags & LOOKUP_RCU) { + if (unlazy_walk(nd, NULL, 0)) + return -ECHILD; + } return -ENOTDIR; + } } } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ecebb406cc1a..4a90c9bb3135 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -775,7 +775,7 @@ static int nfs_init_server(struct nfs_server *server, server->options = data->options; server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| - NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR; + NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index c12951b9551e..b3289d701eea 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1852,7 +1852,7 @@ ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, struct nfs42_layoutstat_devinfo *devinfo; int i; - for (i = 0; i <= FF_LAYOUT_MIRROR_COUNT(pls); i++) { + for (i = 0; i < FF_LAYOUT_MIRROR_COUNT(pls); i++) { if (*dev_count >= dev_limit) break; mirror = FF_LAYOUT_COMP(pls, i); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b77b328a06d7..0adc7d245b3d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -442,8 +442,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode->i_version = fattr->change_attr; - else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_PAGECACHE); if (fattr->valid & NFS_ATTR_FATTR_SIZE) inode->i_size = nfs_size_to_loff_t(fattr->size); else @@ -1244,9 +1245,11 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize && nfsi->nrequests == 0) + if (cur_size != new_isize) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; } + if (nfsi->nrequests != 0) + invalid &= ~NFS_INO_REVAL_PAGECACHE; /* Have any file permissions changed? */ if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) @@ -1684,13 +1687,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_REVAL_PAGECACHE; + | NFS_INO_INVALID_ACL; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); inode->i_version = fattr->change_attr; } - } else if (server->caps & NFS_CAP_CHANGE_ATTR) + } else nfsi->cache_validity |= save_cache_validity; if (fattr->valid & NFS_ATTR_FATTR_MTIME) { @@ -1717,7 +1719,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((nfsi->nrequests == 0) || new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - invalid &= ~NFS_INO_REVAL_PAGECACHE; } dprintk("NFS: isize change on server for file %s/%ld " "(%Ld to %Ld)\n", diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7e3c4604bea8..9b372b845f6a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -296,6 +296,22 @@ extern struct rpc_procinfo nfs4_procedures[]; #ifdef CONFIG_NFS_V4_SECURITY_LABEL extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags); +static inline struct nfs4_label * +nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src) +{ + if (!dst || !src) + return NULL; + + if (src->len > NFS4_MAXLABELLEN) + return NULL; + + dst->lfs = src->lfs; + dst->pi = src->pi; + dst->len = src->len; + memcpy(dst->label, src->label, src->len); + + return dst; +} static inline void nfs4_label_free(struct nfs4_label *label) { if (label) { @@ -316,6 +332,11 @@ static inline void nfs4_label_free(void *label) {} static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) { } +static inline struct nfs4_label * +nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src) +{ + return NULL; +} #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ /* proc.c */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index f486b80f927a..d731bbf974aa 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -135,7 +135,7 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) return err; } -loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) +static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) { struct inode *inode = file_inode(filep); struct nfs42_seek_args args = { @@ -171,6 +171,23 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); } +loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) +{ + struct nfs_server *server = NFS_SERVER(file_inode(filep)); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs42_proc_llseek(filep, offset, whence); + if (err == -ENOTSUPP) + return -EOPNOTSUPP; + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + + return err; +} + + static void nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8bee93469617..3acb1eb72930 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -467,7 +467,10 @@ static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp) static void renew_lease(const struct nfs_server *server, unsigned long timestamp) { - do_renew_lease(server->nfs_client, timestamp); + struct nfs_client *clp = server->nfs_client; + + if (!nfs4_has_session(clp)) + do_renew_lease(clp, timestamp); } struct nfs4_call_sync_data { @@ -616,8 +619,7 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) clp = session->clp; do_renew_lease(clp, res->sr_timestamp); /* Check sequence flags */ - if (res->sr_status_flags != 0) - nfs4_schedule_lease_recovery(clp); + nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); nfs41_update_target_slotid(slot->table, slot, res); break; case 1: @@ -910,6 +912,7 @@ struct nfs4_opendata { struct nfs_open_confirmres c_res; struct nfs4_string owner_name; struct nfs4_string group_name; + struct nfs4_label *a_label; struct nfs_fattr f_attr; struct nfs4_label *f_label; struct dentry *dir; @@ -1013,6 +1016,10 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, if (IS_ERR(p->f_label)) goto err_free_p; + p->a_label = nfs4_label_alloc(server, gfp_mask); + if (IS_ERR(p->a_label)) + goto err_free_f; + alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; p->o_arg.seqid = alloc_seqid(&sp->so_seqid, gfp_mask); if (IS_ERR(p->o_arg.seqid)) @@ -1041,7 +1048,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.server = server; p->o_arg.bitmask = nfs4_bitmask(server, label); p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; - p->o_arg.label = label; + p->o_arg.label = nfs4_label_copy(p->a_label, label); p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); switch (p->o_arg.claim) { case NFS4_OPEN_CLAIM_NULL: @@ -1074,6 +1081,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, return p; err_free_label: + nfs4_label_free(p->a_label); +err_free_f: nfs4_label_free(p->f_label); err_free_p: kfree(p); @@ -1093,6 +1102,7 @@ static void nfs4_opendata_free(struct kref *kref) nfs4_put_open_state(p->state); nfs4_put_state_owner(p->owner); + nfs4_label_free(p->a_label); nfs4_label_free(p->f_label); dput(p->dir); @@ -1198,12 +1208,15 @@ static bool nfs_need_update_open_stateid(struct nfs4_state *state, static void nfs_resync_open_stateid_locked(struct nfs4_state *state) { + if (!(state->n_wronly || state->n_rdonly || state->n_rdwr)) + return; if (state->n_wronly) set_bit(NFS_O_WRONLY_STATE, &state->flags); if (state->n_rdonly) set_bit(NFS_O_RDONLY_STATE, &state->flags); if (state->n_rdwr) set_bit(NFS_O_RDWR_STATE, &state->flags); + set_bit(NFS_OPEN_STATE, &state->flags); } static void nfs_clear_open_stateid_locked(struct nfs4_state *state, @@ -7571,13 +7584,8 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) goto out; } ret = rpc_wait_for_completion_task(task); - if (!ret) { - struct nfs4_sequence_res *res = task->tk_msg.rpc_resp; - - if (task->tk_status == 0) - nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); + if (!ret) ret = task->tk_status; - } rpc_put_task(task); out: dprintk("<-- %s status=%d\n", __func__, ret); @@ -7965,16 +7973,17 @@ static void nfs4_layoutreturn_release(void *calldata) { struct nfs4_layoutreturn *lrp = calldata; struct pnfs_layout_hdr *lo = lrp->args.layout; + LIST_HEAD(freeme); dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); if (lrp->res.lrs_present) pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); + pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); pnfs_clear_layoutreturn_waitbit(lo); - clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); - rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); + pnfs_free_lseg_list(&freeme); pnfs_put_layout_hdr(lrp->args.layout); nfs_iput_and_deactive(lrp->inode); kfree(calldata); @@ -8588,7 +8597,6 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK, .init_client = nfs40_init_client, .shutdown_client = nfs40_shutdown_client, @@ -8614,7 +8622,6 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1, @@ -8637,7 +8644,6 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .minor_version = 2, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 605840dc89cf..f2e2ad894461 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2191,25 +2191,35 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp) } } -static void nfs41_handle_state_revoked(struct nfs_client *clp) +static void nfs41_handle_all_state_revoked(struct nfs_client *clp) { nfs4_reset_all_state(clp); dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname); } +static void nfs41_handle_some_state_revoked(struct nfs_client *clp) +{ + nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); + nfs4_schedule_state_manager(clp); + + dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname); +} + static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp) { - /* This will need to handle layouts too */ - nfs_expire_all_delegations(clp); + /* FIXME: For now, we destroy all layouts. */ + pnfs_destroy_all_layouts(clp); + /* FIXME: For now, we test all delegations+open state+locks. */ + nfs41_handle_some_state_revoked(clp); dprintk("%s: Recallable state revoked on server %s!\n", __func__, clp->cl_hostname); } static void nfs41_handle_backchannel_fault(struct nfs_client *clp) { - nfs_expire_all_delegations(clp); - if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) - nfs4_schedule_state_manager(clp); + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + nfs4_schedule_state_manager(clp); + dprintk("%s: server %s declared a backchannel fault\n", __func__, clp->cl_hostname); } @@ -2231,10 +2241,11 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) nfs41_handle_server_reboot(clp); - if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | - SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | + if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED)) + nfs41_handle_all_state_revoked(clp); + if (flags & (SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | SEQ4_STATUS_ADMIN_STATE_REVOKED)) - nfs41_handle_state_revoked(clp); + nfs41_handle_some_state_revoked(clp); if (flags & SEQ4_STATUS_LEASE_MOVED) nfs4_schedule_lease_moved_recovery(clp); if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 1da68d3b1eda..4984bbe55ff1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1100,8 +1100,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) mirror->pg_base = 0; mirror->pg_recoalesce = 0; - desc->pg_moreio = 0; - while (!list_empty(&head)) { struct nfs_page *req; @@ -1109,8 +1107,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) nfs_list_remove_request(req); if (__nfs_pageio_add_request(desc, req)) continue; - if (desc->pg_error < 0) + if (desc->pg_error < 0) { + list_splice_tail(&head, &mirror->pg_list); + mirror->pg_recoalesce = 1; return 0; + } break; } } while (mirror->pg_recoalesce); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0ba9a02c9566..70bf706b1090 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -352,7 +352,7 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, { struct pnfs_layout_segment *s; - if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) return false; list_for_each_entry(s, &lo->plh_segs, pls_list) @@ -362,6 +362,18 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, return true; } +static bool +pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo) +{ + if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + return false; + lo->plh_return_iomode = 0; + lo->plh_block_lgets++; + pnfs_get_layout_hdr(lo); + clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); + return true; +} + static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, struct pnfs_layout_hdr *lo, struct inode *inode) { @@ -372,17 +384,16 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, if (pnfs_layout_need_return(lo, lseg)) { nfs4_stateid stateid; enum pnfs_iomode iomode; + bool send; stateid = lo->plh_stateid; iomode = lo->plh_return_iomode; - /* decreased in pnfs_send_layoutreturn() */ - lo->plh_block_lgets++; - lo->plh_return_iomode = 0; + send = pnfs_prepare_layoutreturn(lo); spin_unlock(&inode->i_lock); - pnfs_get_layout_hdr(lo); - - /* Send an async layoutreturn so we dont deadlock */ - pnfs_send_layoutreturn(lo, stateid, iomode, false); + if (send) { + /* Send an async layoutreturn so we dont deadlock */ + pnfs_send_layoutreturn(lo, stateid, iomode, false); + } } else spin_unlock(&inode->i_lock); } @@ -411,6 +422,10 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) pnfs_layoutreturn_before_put_lseg(lseg, lo, inode); if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { + if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { + spin_unlock(&inode->i_lock); + return; + } pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); spin_unlock(&inode->i_lock); @@ -451,6 +466,8 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); if (atomic_dec_and_test(&lseg->pls_refcount)) { struct pnfs_layout_hdr *lo = lseg->pls_layout; + if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) + return; pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); pnfs_free_lseg_async(lseg); @@ -924,6 +941,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); smp_mb__after_atomic(); wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); + rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); } static int @@ -978,6 +996,7 @@ _pnfs_return_layout(struct inode *ino) LIST_HEAD(tmp_list); nfs4_stateid stateid; int status = 0, empty; + bool send; dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); @@ -1007,17 +1026,18 @@ _pnfs_return_layout(struct inode *ino) /* Don't send a LAYOUTRETURN if list was initially empty */ if (empty) { spin_unlock(&ino->i_lock); - pnfs_put_layout_hdr(lo); dprintk("NFS: %s no layout segments to return\n", __func__); - goto out; + goto out_put_layout_hdr; } set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); - lo->plh_block_lgets++; + send = pnfs_prepare_layoutreturn(lo); spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); - - status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); + if (send) + status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); +out_put_layout_hdr: + pnfs_put_layout_hdr(lo); out: dprintk("<-- %s status: %d\n", __func__, status); return status; @@ -1097,13 +1117,9 @@ bool pnfs_roc(struct inode *ino) out_noroc: if (lo) { stateid = lo->plh_stateid; - layoutreturn = - test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, - &lo->plh_flags); - if (layoutreturn) { - lo->plh_block_lgets++; - pnfs_get_layout_hdr(lo); - } + if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, + &lo->plh_flags)) + layoutreturn = pnfs_prepare_layoutreturn(lo); } spin_unlock(&ino->i_lock); if (layoutreturn) { @@ -1146,15 +1162,18 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) struct pnfs_layout_segment *lseg; nfs4_stateid stateid; u32 current_seqid; - bool found = false, layoutreturn = false; + bool layoutreturn = false; spin_lock(&ino->i_lock); - list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) - if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { - rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); - found = true; - goto out; - } + list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) { + if (!test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) + continue; + if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) + continue; + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); + spin_unlock(&ino->i_lock); + return true; + } lo = nfsi->layout; current_seqid = be32_to_cpu(lo->plh_stateid.seqid); @@ -1162,23 +1181,19 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) * a barrier, we choose the worst-case barrier. */ *barrier = current_seqid + atomic_read(&lo->plh_outstanding); -out: - if (!found) { - stateid = lo->plh_stateid; - layoutreturn = - test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, - &lo->plh_flags); - if (layoutreturn) { - lo->plh_block_lgets++; - pnfs_get_layout_hdr(lo); - } - } + stateid = lo->plh_stateid; + if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, + &lo->plh_flags)) + layoutreturn = pnfs_prepare_layoutreturn(lo); + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); + spin_unlock(&ino->i_lock); if (layoutreturn) { - rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); + return true; } - return found; + return false; } /* @@ -1695,7 +1710,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, spin_lock(&inode->i_lock); /* set failure bit so that pnfs path will be retried later */ pnfs_layout_set_fail_bit(lo, iomode); - set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); if (lo->plh_return_iomode == 0) lo->plh_return_iomode = range.iomode; else if (lo->plh_return_iomode != range.iomode) @@ -2207,13 +2221,12 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) if (ld->prepare_layoutcommit) { status = ld->prepare_layoutcommit(&data->args); if (status) { + put_rpccred(data->cred); spin_lock(&inode->i_lock); set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; - spin_unlock(&inode->i_lock); - put_rpccred(data->cred); - goto clear_layoutcommitting; + goto out_unlock; } } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 65869ca9c851..75a35a1afa79 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1379,24 +1379,27 @@ static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr, { struct nfs_pgio_args *argp = &hdr->args; struct nfs_pgio_res *resp = &hdr->res; + u64 size = argp->offset + resp->count; if (!(fattr->valid & NFS_ATTR_FATTR_SIZE)) + fattr->size = size; + if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) { + fattr->valid &= ~NFS_ATTR_FATTR_SIZE; return; - if (argp->offset + resp->count != fattr->size) - return; - if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) + } + if (size != fattr->size) return; /* Set attribute barrier */ nfs_fattr_set_barrier(fattr); + /* ...and update size */ + fattr->valid |= NFS_ATTR_FATTR_SIZE; } void nfs_writeback_update_inode(struct nfs_pgio_header *hdr) { - struct nfs_fattr *fattr = hdr->res.fattr; + struct nfs_fattr *fattr = &hdr->fattr; struct inode *inode = hdr->inode; - if (fattr == NULL) - return; spin_lock(&inode->i_lock); nfs_writeback_check_extend(hdr, fattr); nfs_post_op_update_inode_force_wcc_locked(inode, fattr); diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 20de88d1bf86..dd714037c322 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -159,11 +159,10 @@ xfs_attr3_rmt_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + int blksize = mp->m_attr_geo->blksize; char *ptr; int len; xfs_daddr_t bno; - int blksize = mp->m_attr_geo->blksize; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -175,16 +174,22 @@ xfs_attr3_rmt_write_verify( ASSERT(len >= blksize); while (len > 0) { + struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; + if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } - if (bip) { - struct xfs_attr3_rmt_hdr *rmt; - rmt = (struct xfs_attr3_rmt_hdr *)ptr; - rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); + /* + * Ensure we aren't writing bogus LSNs to disk. See + * xfs_attr3_rmt_hdr_set() for the explanation. + */ + if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); + return; } xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); @@ -221,6 +226,18 @@ xfs_attr3_rmt_hdr_set( rmt->rm_owner = cpu_to_be64(ino); rmt->rm_blkno = cpu_to_be64(bno); + /* + * Remote attribute blocks are written synchronously, so we don't + * have an LSN that we can stamp in them that makes any sense to log + * recovery. To ensure that log recovery handles overwrites of these + * blocks sanely (i.e. once they've been freed and reallocated as some + * other type of metadata) we need to ensure that the LSN has a value + * that tells log recovery to ignore the LSN and overwrite the buffer + * with whatever is in it's log. To do this, we use the magic + * NULLCOMMITLSN to indicate that the LSN is invalid. + */ + rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN); + return sizeof(struct xfs_attr3_rmt_hdr); } @@ -434,14 +451,21 @@ xfs_attr_rmtval_set( /* * Allocate a single extent, up to the size of the value. + * + * Note that we have to consider this a data allocation as we + * write the remote attribute without logging the contents. + * Hence we must ensure that we aren't using blocks that are on + * the busy list so that we don't overwrite blocks which have + * recently been freed but their transactions are not yet + * committed to disk. If we overwrite the contents of a busy + * extent and then crash then the block may not contain the + * correct metadata after log recovery occurs. */ xfs_bmap_init(args->flist, args->firstblock); nmap = 1; error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, - blkcnt, - XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - args->firstblock, args->total, &map, &nmap, - args->flist); + blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock, + args->total, &map, &nmap, args->flist); if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, &committed); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f0e8249722d4..db4acc1c3e73 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1514,18 +1514,27 @@ xfs_filemap_fault( struct vm_area_struct *vma, struct vm_fault *vmf) { - struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file)); + struct inode *inode = file_inode(vma->vm_file); int ret; - trace_xfs_filemap_fault(ip); + trace_xfs_filemap_fault(XFS_I(inode)); /* DAX can shortcut the normal fault path on write faults! */ - if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip))) + if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode)) return xfs_filemap_page_mkwrite(vma, vmf); - xfs_ilock(ip, XFS_MMAPLOCK_SHARED); - ret = filemap_fault(vma, vmf); - xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); + xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); + if (IS_DAX(inode)) { + /* + * we do not want to trigger unwritten extent conversion on read + * faults - that is unnecessary overhead and would also require + * changes to xfs_get_blocks_direct() to map unwritten extent + * ioend for conversion on read-only mappings. + */ + ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL); + } else + ret = filemap_fault(vma, vmf); + xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); return ret; } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 01dd228ca05e..480ebba8464f 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1886,9 +1886,14 @@ xlog_recover_get_buf_lsn( uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; break; case XFS_ATTR3_RMT_MAGIC: - lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn); - uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid; - break; + /* + * Remote attr blocks are written synchronously, rather than + * being logged. That means they do not contain a valid LSN + * (i.e. transactionally ordered) in them, and hence any time we + * see a buffer to replay over the top of a remote attribute + * block we should simply do so. + */ + goto recover_immediately; case XFS_SB_MAGIC: lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); uuid = &((struct xfs_dsb *)blk)->sb_uuid; diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 55e3abc2d027..b42afada1280 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -108,12 +108,12 @@ do { \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ___p1; \ diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 6383d54bf983..deb9e8b0eb9e 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -36,33 +36,33 @@ /* * External function declarations */ -extern void queue_read_lock_slowpath(struct qrwlock *lock); -extern void queue_write_lock_slowpath(struct qrwlock *lock); +extern void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts); +extern void queued_write_lock_slowpath(struct qrwlock *lock); /** - * queue_read_can_lock- would read_trylock() succeed? + * queued_read_can_lock- would read_trylock() succeed? * @lock: Pointer to queue rwlock structure */ -static inline int queue_read_can_lock(struct qrwlock *lock) +static inline int queued_read_can_lock(struct qrwlock *lock) { return !(atomic_read(&lock->cnts) & _QW_WMASK); } /** - * queue_write_can_lock- would write_trylock() succeed? + * queued_write_can_lock- would write_trylock() succeed? * @lock: Pointer to queue rwlock structure */ -static inline int queue_write_can_lock(struct qrwlock *lock) +static inline int queued_write_can_lock(struct qrwlock *lock) { return !atomic_read(&lock->cnts); } /** - * queue_read_trylock - try to acquire read lock of a queue rwlock + * queued_read_trylock - try to acquire read lock of a queue rwlock * @lock : Pointer to queue rwlock structure * Return: 1 if lock acquired, 0 if failed */ -static inline int queue_read_trylock(struct qrwlock *lock) +static inline int queued_read_trylock(struct qrwlock *lock) { u32 cnts; @@ -77,11 +77,11 @@ static inline int queue_read_trylock(struct qrwlock *lock) } /** - * queue_write_trylock - try to acquire write lock of a queue rwlock + * queued_write_trylock - try to acquire write lock of a queue rwlock * @lock : Pointer to queue rwlock structure * Return: 1 if lock acquired, 0 if failed */ -static inline int queue_write_trylock(struct qrwlock *lock) +static inline int queued_write_trylock(struct qrwlock *lock) { u32 cnts; @@ -93,10 +93,10 @@ static inline int queue_write_trylock(struct qrwlock *lock) cnts, cnts | _QW_LOCKED) == cnts); } /** - * queue_read_lock - acquire read lock of a queue rwlock + * queued_read_lock - acquire read lock of a queue rwlock * @lock: Pointer to queue rwlock structure */ -static inline void queue_read_lock(struct qrwlock *lock) +static inline void queued_read_lock(struct qrwlock *lock) { u32 cnts; @@ -105,27 +105,27 @@ static inline void queue_read_lock(struct qrwlock *lock) return; /* The slowpath will decrement the reader count, if necessary. */ - queue_read_lock_slowpath(lock); + queued_read_lock_slowpath(lock, cnts); } /** - * queue_write_lock - acquire write lock of a queue rwlock + * queued_write_lock - acquire write lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ -static inline void queue_write_lock(struct qrwlock *lock) +static inline void queued_write_lock(struct qrwlock *lock) { /* Optimize for the unfair lock case where the fair flag is 0. */ if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0) return; - queue_write_lock_slowpath(lock); + queued_write_lock_slowpath(lock); } /** - * queue_read_unlock - release read lock of a queue rwlock + * queued_read_unlock - release read lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ -static inline void queue_read_unlock(struct qrwlock *lock) +static inline void queued_read_unlock(struct qrwlock *lock) { /* * Atomically decrement the reader count @@ -134,12 +134,12 @@ static inline void queue_read_unlock(struct qrwlock *lock) atomic_sub(_QR_BIAS, &lock->cnts); } -#ifndef queue_write_unlock +#ifndef queued_write_unlock /** - * queue_write_unlock - release write lock of a queue rwlock + * queued_write_unlock - release write lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ -static inline void queue_write_unlock(struct qrwlock *lock) +static inline void queued_write_unlock(struct qrwlock *lock) { /* * If the writer field is atomic, it can be cleared directly. @@ -154,13 +154,13 @@ static inline void queue_write_unlock(struct qrwlock *lock) * Remapping rwlock architecture specific functions to the corresponding * queue rwlock functions. */ -#define arch_read_can_lock(l) queue_read_can_lock(l) -#define arch_write_can_lock(l) queue_write_can_lock(l) -#define arch_read_lock(l) queue_read_lock(l) -#define arch_write_lock(l) queue_write_lock(l) -#define arch_read_trylock(l) queue_read_trylock(l) -#define arch_write_trylock(l) queue_write_trylock(l) -#define arch_read_unlock(l) queue_read_unlock(l) -#define arch_write_unlock(l) queue_write_unlock(l) +#define arch_read_can_lock(l) queued_read_can_lock(l) +#define arch_write_can_lock(l) queued_write_can_lock(l) +#define arch_read_lock(l) queued_read_lock(l) +#define arch_write_lock(l) queued_write_lock(l) +#define arch_read_trylock(l) queued_read_trylock(l) +#define arch_write_trylock(l) queued_write_trylock(l) +#define arch_read_unlock(l) queued_read_unlock(l) +#define arch_write_unlock(l) queued_write_unlock(l) #endif /* __ASM_GENERIC_QRWLOCK_H */ diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index b96bd299966f..008fc67d0d96 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -49,13 +49,28 @@ static inline void exception_exit(enum ctx_state prev_ctx) } } + +/** + * ct_state() - return the current context tracking state if known + * + * Returns the current cpu's context tracking state if context tracking + * is enabled. If context tracking is disabled, returns + * CONTEXT_DISABLED. This should be used primarily for debugging. + */ +static inline enum ctx_state ct_state(void) +{ + return context_tracking_is_enabled() ? + this_cpu_read(context_tracking.state) : CONTEXT_DISABLED; +} #else static inline void user_enter(void) { } static inline void user_exit(void) { } static inline enum ctx_state exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } +static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } #endif /* !CONFIG_CONTEXT_TRACKING */ +#define CT_WARN_ON(cond) WARN_ON(context_tracking_is_enabled() && (cond)) #ifdef CONFIG_CONTEXT_TRACKING_FORCE extern void context_tracking_init(void); diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 678ecdf90cf6..ee956c528fab 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -14,6 +14,7 @@ struct context_tracking { bool active; int recursion; enum ctx_state { + CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */ CONTEXT_KERNEL = 0, CONTEXT_USER, CONTEXT_GUEST, diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 29ad97c34fd5..bde1e567b3a9 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -62,6 +62,7 @@ struct cpufreq_policy { /* CPUs sharing clock, require sw coordination */ cpumask_var_t cpus; /* Online CPUs only */ cpumask_var_t related_cpus; /* Online + Offline CPUs */ + cpumask_var_t real_cpus; /* Related and present */ unsigned int shared_type; /* ACPI: ANY or ALL affected CPUs should set cpufreq */ diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index f4de473f226b..7f653e8f6690 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -7,17 +7,52 @@ * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com> * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com> * + * DEPRECATED API: + * + * The use of 'struct static_key' directly, is now DEPRECATED. In addition + * static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following: + * + * struct static_key false = STATIC_KEY_INIT_FALSE; + * struct static_key true = STATIC_KEY_INIT_TRUE; + * static_key_true() + * static_key_false() + * + * The updated API replacements are: + * + * DEFINE_STATIC_KEY_TRUE(key); + * DEFINE_STATIC_KEY_FALSE(key); + * static_key_likely() + * statick_key_unlikely() + * * Jump labels provide an interface to generate dynamic branches using - * self-modifying code. Assuming toolchain and architecture support, the result - * of a "if (static_key_false(&key))" statement is an unconditional branch (which - * defaults to false - and the true block is placed out of line). + * self-modifying code. Assuming toolchain and architecture support, if we + * define a "key" that is initially false via "DEFINE_STATIC_KEY_FALSE(key)", + * an "if (static_branch_unlikely(&key))" statement is an unconditional branch + * (which defaults to false - and the true block is placed out of line). + * Similarly, we can define an initially true key via + * "DEFINE_STATIC_KEY_TRUE(key)", and use it in the same + * "if (static_branch_unlikely(&key))", in which case we will generate an + * unconditional branch to the out-of-line true branch. Keys that are + * initially true or false can be using in both static_branch_unlikely() + * and static_branch_likely() statements. + * + * At runtime we can change the branch target by setting the key + * to true via a call to static_branch_enable(), or false using + * static_branch_disable(). If the direction of the branch is switched by + * these calls then we run-time modify the branch target via a + * no-op -> jump or jump -> no-op conversion. For example, for an + * initially false key that is used in an "if (static_branch_unlikely(&key))" + * statement, setting the key to true requires us to patch in a jump + * to the out-of-line of true branch. * - * However at runtime we can change the branch target using - * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key - * object, and for as long as there are references all branches referring to - * that particular key will point to the (out of line) true block. + * In addtion to static_branch_{enable,disable}, we can also reference count + * the key or branch direction via static_branch_{inc,dec}. Thus, + * static_branch_inc() can be thought of as a 'make more true' and + * static_branch_dec() as a 'make more false'. The inc()/dec() + * interface is meant to be used exclusively from the inc()/dec() for a given + * key. * - * Since this relies on modifying code, the static_key_slow_{inc,dec}() functions + * Since this relies on modifying code, the branch modifying functions * must be considered absolute slow paths (machine wide synchronization etc.). * OTOH, since the affected branches are unconditional, their runtime overhead * will be absolutely minimal, esp. in the default (off) case where the total @@ -29,20 +64,10 @@ * cause significant performance degradation. Struct static_key_deferred and * static_key_slow_dec_deferred() provide for this. * - * Lacking toolchain and or architecture support, jump labels fall back to a simple - * conditional branch. - * - * struct static_key my_key = STATIC_KEY_INIT_TRUE; - * - * if (static_key_true(&my_key)) { - * } + * Lacking toolchain and or architecture support, static keys fall back to a + * simple conditional branch. * - * will result in the true case being in-line and starts the key with a single - * reference. Mixing static_key_true() and static_key_false() on the same key is not - * allowed. - * - * Not initializing the key (static data is initialized to 0s anyway) is the - * same as using STATIC_KEY_INIT_FALSE. + * Additional babbling in: Documentation/static-keys.txt */ #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) @@ -86,8 +111,8 @@ struct static_key { #ifndef __ASSEMBLY__ enum jump_label_type { - JUMP_LABEL_DISABLE = 0, - JUMP_LABEL_ENABLE, + JUMP_LABEL_NOP = 0, + JUMP_LABEL_JMP, }; struct module; @@ -101,33 +126,18 @@ static inline int static_key_count(struct static_key *key) #ifdef HAVE_JUMP_LABEL -#define JUMP_LABEL_TYPE_FALSE_BRANCH 0UL -#define JUMP_LABEL_TYPE_TRUE_BRANCH 1UL -#define JUMP_LABEL_TYPE_MASK 1UL - -static -inline struct jump_entry *jump_label_get_entries(struct static_key *key) -{ - return (struct jump_entry *)((unsigned long)key->entries - & ~JUMP_LABEL_TYPE_MASK); -} - -static inline bool jump_label_get_branch_default(struct static_key *key) -{ - if (((unsigned long)key->entries & JUMP_LABEL_TYPE_MASK) == - JUMP_LABEL_TYPE_TRUE_BRANCH) - return true; - return false; -} +#define JUMP_TYPE_FALSE 0UL +#define JUMP_TYPE_TRUE 1UL +#define JUMP_TYPE_MASK 1UL static __always_inline bool static_key_false(struct static_key *key) { - return arch_static_branch(key); + return arch_static_branch(key, false); } static __always_inline bool static_key_true(struct static_key *key) { - return !static_key_false(key); + return !arch_static_branch(key, true); } extern struct jump_entry __start___jump_table[]; @@ -145,12 +155,12 @@ extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); -#define STATIC_KEY_INIT_TRUE ((struct static_key) \ +#define STATIC_KEY_INIT_TRUE \ { .enabled = ATOMIC_INIT(1), \ - .entries = (void *)JUMP_LABEL_TYPE_TRUE_BRANCH }) -#define STATIC_KEY_INIT_FALSE ((struct static_key) \ + .entries = (void *)JUMP_TYPE_TRUE } +#define STATIC_KEY_INIT_FALSE \ { .enabled = ATOMIC_INIT(0), \ - .entries = (void *)JUMP_LABEL_TYPE_FALSE_BRANCH }) + .entries = (void *)JUMP_TYPE_FALSE } #else /* !HAVE_JUMP_LABEL */ @@ -198,10 +208,8 @@ static inline int jump_label_apply_nops(struct module *mod) return 0; } -#define STATIC_KEY_INIT_TRUE ((struct static_key) \ - { .enabled = ATOMIC_INIT(1) }) -#define STATIC_KEY_INIT_FALSE ((struct static_key) \ - { .enabled = ATOMIC_INIT(0) }) +#define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } +#define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } #endif /* HAVE_JUMP_LABEL */ @@ -213,6 +221,157 @@ static inline bool static_key_enabled(struct static_key *key) return static_key_count(key) > 0; } +static inline void static_key_enable(struct static_key *key) +{ + int count = static_key_count(key); + + WARN_ON_ONCE(count < 0 || count > 1); + + if (!count) + static_key_slow_inc(key); +} + +static inline void static_key_disable(struct static_key *key) +{ + int count = static_key_count(key); + + WARN_ON_ONCE(count < 0 || count > 1); + + if (count) + static_key_slow_dec(key); +} + +/* -------------------------------------------------------------------------- */ + +/* + * Two type wrappers around static_key, such that we can use compile time + * type differentiation to emit the right code. + * + * All the below code is macros in order to play type games. + */ + +struct static_key_true { + struct static_key key; +}; + +struct static_key_false { + struct static_key key; +}; + +#define STATIC_KEY_TRUE_INIT (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE, } +#define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, } + +#define DEFINE_STATIC_KEY_TRUE(name) \ + struct static_key_true name = STATIC_KEY_TRUE_INIT + +#define DEFINE_STATIC_KEY_FALSE(name) \ + struct static_key_false name = STATIC_KEY_FALSE_INIT + +#ifdef HAVE_JUMP_LABEL + +/* + * Combine the right initial value (type) with the right branch order + * to generate the desired result. + * + * + * type\branch| likely (1) | unlikely (0) + * -----------+-----------------------+------------------ + * | | + * true (1) | ... | ... + * | NOP | JMP L + * | <br-stmts> | 1: ... + * | L: ... | + * | | + * | | L: <br-stmts> + * | | jmp 1b + * | | + * -----------+-----------------------+------------------ + * | | + * false (0) | ... | ... + * | JMP L | NOP + * | <br-stmts> | 1: ... + * | L: ... | + * | | + * | | L: <br-stmts> + * | | jmp 1b + * | | + * -----------+-----------------------+------------------ + * + * The initial value is encoded in the LSB of static_key::entries, + * type: 0 = false, 1 = true. + * + * The branch type is encoded in the LSB of jump_entry::key, + * branch: 0 = unlikely, 1 = likely. + * + * This gives the following logic table: + * + * enabled type branch instuction + * -----------------------------+----------- + * 0 0 0 | NOP + * 0 0 1 | JMP + * 0 1 0 | NOP + * 0 1 1 | JMP + * + * 1 0 0 | JMP + * 1 0 1 | NOP + * 1 1 0 | JMP + * 1 1 1 | NOP + * + * Which gives the following functions: + * + * dynamic: instruction = enabled ^ branch + * static: instruction = type ^ branch + * + * See jump_label_type() / jump_label_init_type(). + */ + +extern bool ____wrong_branch_error(void); + +#define static_branch_likely(x) \ +({ \ + bool branch; \ + if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ + branch = !arch_static_branch(&(x)->key, true); \ + else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ + branch = !arch_static_branch_jump(&(x)->key, true); \ + else \ + branch = ____wrong_branch_error(); \ + branch; \ +}) + +#define static_branch_unlikely(x) \ +({ \ + bool branch; \ + if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ + branch = arch_static_branch_jump(&(x)->key, false); \ + else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ + branch = arch_static_branch(&(x)->key, false); \ + else \ + branch = ____wrong_branch_error(); \ + branch; \ +}) + +#else /* !HAVE_JUMP_LABEL */ + +#define static_branch_likely(x) likely(static_key_enabled(&(x)->key)) +#define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key)) + +#endif /* HAVE_JUMP_LABEL */ + +/* + * Advanced usage; refcount, branch is enabled when: count != 0 + */ + +#define static_branch_inc(x) static_key_slow_inc(&(x)->key) +#define static_branch_dec(x) static_key_slow_dec(&(x)->key) + +/* + * Normal usage; boolean enable/disable. + */ + +#define static_branch_enable(x) static_key_enable(&(x)->key) +#define static_branch_disable(x) static_key_disable(&(x)->key) + #endif /* _LINUX_JUMP_LABEL_H */ #endif /* __ASSEMBLY__ */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f91b5ade30c9..874b77228fb9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -292,9 +292,12 @@ static inline void nfs_mark_for_revalidate(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR | + NFS_INO_REVAL_PAGECACHE | + NFS_INO_INVALID_ACCESS | + NFS_INO_INVALID_ACL; if (S_ISDIR(inode->i_mode)) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; + nfsi->cache_validity |= NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a2ea1491d3df..20bc8e51b161 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -220,7 +220,7 @@ struct nfs_server { #define NFS_CAP_SYMLINKS (1U << 2) #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) -#define NFS_CAP_CHANGE_ATTR (1U << 5) +/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */ #define NFS_CAP_FILEID (1U << 6) #define NFS_CAP_MODE (1U << 7) #define NFS_CAP_NLINK (1U << 8) diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 4c508549833a..cc7dd687a89d 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -59,7 +59,7 @@ void of_dma_configure(struct device *dev, struct device_node *np); #else /* CONFIG_OF */ static inline int of_driver_match_device(struct device *dev, - struct device_driver *drv) + const struct device_driver *drv) { return 0; } diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h index 044a124bfbbc..21b15f6fee25 100644 --- a/include/linux/platform_data/macb.h +++ b/include/linux/platform_data/macb.h @@ -8,11 +8,19 @@ #ifndef __MACB_PDATA_H__ #define __MACB_PDATA_H__ +/** + * struct macb_platform_data - platform data for MACB Ethernet + * @phy_mask: phy mask passed when register the MDIO bus + * within the driver + * @phy_irq_pin: PHY IRQ + * @is_rmii: using RMII interface? + * @rev_eth_addr: reverse Ethernet address byte order + */ struct macb_platform_data { u32 phy_mask; - int phy_irq_pin; /* PHY IRQ */ - u8 is_rmii; /* using RMII interface? */ - u8 rev_eth_addr; /* reverse Ethernet address byte order */ + int phy_irq_pin; + u8 is_rmii; + u8 rev_eth_addr; }; #endif /* __MACB_PDATA_H__ */ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 0063b24b4f36..ffcd053ca89a 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -296,7 +296,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) * Map the spin_lock functions to the raw variants for PREEMPT_RT=n */ -static inline raw_spinlock_t *spinlock_check(spinlock_t *lock) +static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock) { return &lock->rlock; } @@ -307,17 +307,17 @@ do { \ raw_spin_lock_init(&(_lock)->rlock); \ } while (0) -static inline void spin_lock(spinlock_t *lock) +static __always_inline void spin_lock(spinlock_t *lock) { raw_spin_lock(&lock->rlock); } -static inline void spin_lock_bh(spinlock_t *lock) +static __always_inline void spin_lock_bh(spinlock_t *lock) { raw_spin_lock_bh(&lock->rlock); } -static inline int spin_trylock(spinlock_t *lock) +static __always_inline int spin_trylock(spinlock_t *lock) { return raw_spin_trylock(&lock->rlock); } @@ -337,7 +337,7 @@ do { \ raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \ } while (0) -static inline void spin_lock_irq(spinlock_t *lock) +static __always_inline void spin_lock_irq(spinlock_t *lock) { raw_spin_lock_irq(&lock->rlock); } @@ -352,32 +352,32 @@ do { \ raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \ } while (0) -static inline void spin_unlock(spinlock_t *lock) +static __always_inline void spin_unlock(spinlock_t *lock) { raw_spin_unlock(&lock->rlock); } -static inline void spin_unlock_bh(spinlock_t *lock) +static __always_inline void spin_unlock_bh(spinlock_t *lock) { raw_spin_unlock_bh(&lock->rlock); } -static inline void spin_unlock_irq(spinlock_t *lock) +static __always_inline void spin_unlock_irq(spinlock_t *lock) { raw_spin_unlock_irq(&lock->rlock); } -static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) +static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) { raw_spin_unlock_irqrestore(&lock->rlock, flags); } -static inline int spin_trylock_bh(spinlock_t *lock) +static __always_inline int spin_trylock_bh(spinlock_t *lock) { return raw_spin_trylock_bh(&lock->rlock); } -static inline int spin_trylock_irq(spinlock_t *lock) +static __always_inline int spin_trylock_irq(spinlock_t *lock) { return raw_spin_trylock_irq(&lock->rlock); } @@ -387,22 +387,22 @@ static inline int spin_trylock_irq(spinlock_t *lock) raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ }) -static inline void spin_unlock_wait(spinlock_t *lock) +static __always_inline void spin_unlock_wait(spinlock_t *lock) { raw_spin_unlock_wait(&lock->rlock); } -static inline int spin_is_locked(spinlock_t *lock) +static __always_inline int spin_is_locked(spinlock_t *lock) { return raw_spin_is_locked(&lock->rlock); } -static inline int spin_is_contended(spinlock_t *lock) +static __always_inline int spin_is_contended(spinlock_t *lock) { return raw_spin_is_contended(&lock->rlock); } -static inline int spin_can_lock(spinlock_t *lock) +static __always_inline int spin_can_lock(spinlock_t *lock) { return raw_spin_can_lock(&lock->rlock); } diff --git a/include/net/act_api.h b/include/net/act_api.h index 3ee4c92afd1b..931738bc5bba 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -99,7 +99,6 @@ struct tc_action_ops { int tcf_hash_search(struct tc_action *a, u32 index); void tcf_hash_destroy(struct tc_action *a); -int tcf_hash_release(struct tc_action *a, int bind); u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo); int tcf_hash_check(u32 index, struct tc_action *a, int bind); int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, @@ -107,6 +106,13 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); void tcf_hash_insert(struct tc_action *a); +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict); + +static inline int tcf_hash_release(struct tc_action *a, bool bind) +{ + return __tcf_hash_release(a, bind, false); +} + int tcf_register_action(struct tc_action_ops *a, unsigned int mask); int tcf_unregister_action(struct tc_action_ops *a); int tcf_action_destroy(struct list_head *actions, int bind); diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index e1300b3dd597..53eead2da743 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -21,13 +21,11 @@ struct netns_frags { * @INET_FRAG_FIRST_IN: first fragment has arrived * @INET_FRAG_LAST_IN: final fragment has arrived * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction - * @INET_FRAG_EVICTED: frag queue is being evicted */ enum { INET_FRAG_FIRST_IN = BIT(0), INET_FRAG_LAST_IN = BIT(1), INET_FRAG_COMPLETE = BIT(2), - INET_FRAG_EVICTED = BIT(3) }; /** @@ -45,6 +43,7 @@ enum { * @flags: fragment queue flags * @max_size: maximum received fragment size * @net: namespace that this frag belongs to + * @list_evictor: list of queues to forcefully evict (e.g. due to low memory) */ struct inet_frag_queue { spinlock_t lock; @@ -59,6 +58,7 @@ struct inet_frag_queue { __u8 flags; u16 max_size; struct netns_frags *net; + struct hlist_node list_evictor; }; #define INETFRAGS_HASHSZ 1024 @@ -125,6 +125,11 @@ static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f inet_frag_destroy(q, f); } +static inline bool inet_frag_evicting(struct inet_frag_queue *q) +{ + return !hlist_unhashed(&q->list_evictor); +} + /* Memory Tracking Functions. */ /* The default percpu_counter batch size is not big enough to scale to @@ -139,14 +144,14 @@ static inline int frag_mem_limit(struct netns_frags *nf) return percpu_counter_read(&nf->mem); } -static inline void sub_frag_mem_limit(struct inet_frag_queue *q, int i) +static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&q->net->mem, -i, frag_percpu_counter_batch); + __percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch); } -static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i) +static inline void add_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&q->net->mem, i, frag_percpu_counter_batch); + __percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch); } static inline void init_frag_mem_limit(struct netns_frags *nf) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 49c142bdf01e..5fa643b4e891 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -183,7 +183,6 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); struct fib_table { struct hlist_node tb_hlist; u32 tb_id; - int tb_default; int tb_num_default; struct rcu_head rcu; unsigned long *tb_data; @@ -290,7 +289,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); -void fib_select_default(struct fib_result *res); +void fib_select_default(const struct flowi4 *flp, struct fib_result *res); #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 095433b8a8b0..37cd3911d5c5 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -291,7 +291,7 @@ extern unsigned int nf_conntrack_max; extern unsigned int nf_conntrack_hash_rnd; void init_nf_conntrack_hash_rnd(void); -void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl); +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags); #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 29d6a94db54d..723b61c82b3f 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -68,7 +68,6 @@ struct ct_pcpu { spinlock_t lock; struct hlist_nulls_head unconfirmed; struct hlist_nulls_head dying; - struct hlist_nulls_head tmpl; }; struct netns_ct { diff --git a/include/net/sock.h b/include/net/sock.h index 05a8c1aea251..f21f0708ec59 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -902,7 +902,7 @@ void sk_stream_kill_queues(struct sock *sk); void sk_set_memalloc(struct sock *sk); void sk_clear_memalloc(struct sock *sk); -int sk_wait_data(struct sock *sk, long *timeo); +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb); struct request_sock_ops; struct timewait_sock_ops; diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 34117b8b72e4..0aedbb2c10e0 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -595,6 +595,7 @@ struct iscsi_conn { int bitmap_id; int rx_thread_active; struct task_struct *rx_thread; + struct completion rx_login_comp; int tx_thread_active; struct task_struct *tx_thread; /* list_head for session connection list */ diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index d708a53b8fb1..fbdd11851725 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -32,7 +32,7 @@ #ifndef __AMDGPU_DRM_H__ #define __AMDGPU_DRM_H__ -#include <drm/drm.h> +#include "drm.h" #define DRM_AMDGPU_GEM_CREATE 0x00 #define DRM_AMDGPU_GEM_MMAP 0x01 diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 1ef76661e1a1..01aa2a8e3f8d 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -33,7 +33,7 @@ #ifndef __RADEON_DRM_H__ #define __RADEON_DRM_H__ -#include <drm/drm.h> +#include "drm.h" /* WARNING: If you change any of these defines, make sure to change the * defines in the X server file (radeon_sarea.h) diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h index 12215205ab8d..785c5ca0994b 100644 --- a/include/uapi/sound/asoc.h +++ b/include/uapi/sound/asoc.h @@ -110,7 +110,7 @@ /* * Block Header. - * This header preceeds all object and object arrays below. + * This header precedes all object and object arrays below. */ struct snd_soc_tplg_hdr { __le32 magic; /* magic number */ @@ -222,7 +222,7 @@ struct snd_soc_tplg_stream_config { /* * Manifest. List totals for each payload type. Not used in parsing, but will * be passed to the component driver before any other objects in order for any - * global componnent resource allocations. + * global component resource allocations. * * File block representation for manifest :- * +-----------------------------------+----+ diff --git a/kernel/futex.c b/kernel/futex.c index c4a182f5357e..6e443efc65f4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -64,6 +64,7 @@ #include <linux/hugetlb.h> #include <linux/freezer.h> #include <linux/bootmem.h> +#include <linux/fault-inject.h> #include <asm/futex.h> @@ -258,6 +259,66 @@ static unsigned long __read_mostly futex_hashsize; static struct futex_hash_bucket *futex_queues; +/* + * Fault injections for futexes. + */ +#ifdef CONFIG_FAIL_FUTEX + +static struct { + struct fault_attr attr; + + u32 ignore_private; +} fail_futex = { + .attr = FAULT_ATTR_INITIALIZER, + .ignore_private = 0, +}; + +static int __init setup_fail_futex(char *str) +{ + return setup_fault_attr(&fail_futex.attr, str); +} +__setup("fail_futex=", setup_fail_futex); + +static bool should_fail_futex(bool fshared) +{ + if (fail_futex.ignore_private && !fshared) + return false; + + return should_fail(&fail_futex.attr, 1); +} + +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS + +static int __init fail_futex_debugfs(void) +{ + umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + struct dentry *dir; + + dir = fault_create_debugfs_attr("fail_futex", NULL, + &fail_futex.attr); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + if (!debugfs_create_bool("ignore-private", mode, dir, + &fail_futex.ignore_private)) { + debugfs_remove_recursive(dir); + return -ENOMEM; + } + + return 0; +} + +late_initcall(fail_futex_debugfs); + +#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ + +#else +static inline bool should_fail_futex(bool fshared) +{ + return false; +} +#endif /* CONFIG_FAIL_FUTEX */ + static inline void futex_get_mm(union futex_key *key) { atomic_inc(&key->private.mm->mm_count); @@ -413,6 +474,9 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) if (unlikely(!access_ok(rw, uaddr, sizeof(u32)))) return -EFAULT; + if (unlikely(should_fail_futex(fshared))) + return -EFAULT; + /* * PROCESS_PRIVATE futexes are fast. * As the mm cannot disappear under us and the 'key' only needs @@ -428,6 +492,10 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) } again: + /* Ignore any VERIFY_READ mapping (futex common case) */ + if (unlikely(should_fail_futex(fshared))) + return -EFAULT; + err = get_user_pages_fast(address, 1, 1, &page); /* * If write access is not required (eg. FUTEX_WAIT), try @@ -516,7 +584,7 @@ again: * A RO anonymous page will never change and thus doesn't make * sense for futex operations. */ - if (ro) { + if (unlikely(should_fail_futex(fshared)) || ro) { err = -EFAULT; goto out; } @@ -974,6 +1042,9 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) { u32 uninitialized_var(curval); + if (unlikely(should_fail_futex(true))) + return -EFAULT; + if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) return -EFAULT; @@ -1015,12 +1086,18 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, if (get_futex_value_locked(&uval, uaddr)) return -EFAULT; + if (unlikely(should_fail_futex(true))) + return -EFAULT; + /* * Detect deadlocks. */ if ((unlikely((uval & FUTEX_TID_MASK) == vpid))) return -EDEADLK; + if ((unlikely(should_fail_futex(true)))) + return -EDEADLK; + /* * Lookup existing state first. If it exists, try to attach to * its pi_state. @@ -1155,6 +1232,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, */ newval = FUTEX_WAITERS | task_pid_vnr(new_owner); + if (unlikely(should_fail_futex(true))) + ret = -EFAULT; + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) ret = -EFAULT; else if (curval != uval) @@ -1457,6 +1537,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, if (get_futex_value_locked(&curval, pifutex)) return -EFAULT; + if (unlikely(should_fail_futex(true))) + return -EFAULT; + /* * Find the top_waiter and determine if there are additional waiters. * If the caller intends to requeue more than 1 waiter to pifutex, @@ -2268,8 +2351,11 @@ static long futex_wait_restart(struct restart_block *restart) /* * Userspace tried a 0 -> TID atomic transition of the futex value * and failed. The kernel side here does the whole locking operation: - * if there are waiters then it will block, it does PI, etc. (Due to - * races the kernel might see a 0 value of the futex too.) + * if there are waiters then it will block as a consequence of relying + * on rt-mutexes, it does PI, etc. (Due to races the kernel might see + * a 0 value of the futex too.). + * + * Also serves as futex trylock_pi()'ing, and due semantics. */ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock) @@ -2300,6 +2386,10 @@ retry_private: ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); if (unlikely(ret)) { + /* + * Atomic work succeeded and we got the lock, + * or failed. Either way, we do _not_ block. + */ switch (ret) { case 1: /* We got the lock. */ @@ -2530,7 +2620,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 * @uaddr: the futex we initially wait on (non-pi) * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be - * the same type, no requeueing from private to shared, etc. + * the same type, no requeueing from private to shared, etc. * @val: the expected value of uaddr * @abs_time: absolute timeout * @bitset: 32 bit wakeup bitset set by userspace, defaults to all @@ -3005,6 +3095,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || cmd == FUTEX_WAIT_BITSET || cmd == FUTEX_WAIT_REQUEUE_PI)) { + if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) + return -EFAULT; if (copy_from_user(&ts, utime, sizeof(ts)) != 0) return -EFAULT; if (!timespec_valid(&ts)) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 52ebaca1b9fc..f7dd15d537f9 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -54,7 +54,7 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop) sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); } -static void jump_label_update(struct static_key *key, int enable); +static void jump_label_update(struct static_key *key); void static_key_slow_inc(struct static_key *key) { @@ -63,13 +63,8 @@ void static_key_slow_inc(struct static_key *key) return; jump_label_lock(); - if (atomic_read(&key->enabled) == 0) { - if (!jump_label_get_branch_default(key)) - jump_label_update(key, JUMP_LABEL_ENABLE); - else - jump_label_update(key, JUMP_LABEL_DISABLE); - } - atomic_inc(&key->enabled); + if (atomic_inc_return(&key->enabled) == 1) + jump_label_update(key); jump_label_unlock(); } EXPORT_SYMBOL_GPL(static_key_slow_inc); @@ -87,10 +82,7 @@ static void __static_key_slow_dec(struct static_key *key, atomic_inc(&key->enabled); schedule_delayed_work(work, rate_limit); } else { - if (!jump_label_get_branch_default(key)) - jump_label_update(key, JUMP_LABEL_DISABLE); - else - jump_label_update(key, JUMP_LABEL_ENABLE); + jump_label_update(key); } jump_label_unlock(); } @@ -149,7 +141,7 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start, return 0; } -/* +/* * Update code which is definitely not currently executing. * Architectures which need heavyweight synchronization to modify * running code can override this to make the non-live update case @@ -158,37 +150,54 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start, void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type) { - arch_jump_label_transform(entry, type); + arch_jump_label_transform(entry, type); +} + +static inline struct jump_entry *static_key_entries(struct static_key *key) +{ + return (struct jump_entry *)((unsigned long)key->entries & ~JUMP_TYPE_MASK); +} + +static inline bool static_key_type(struct static_key *key) +{ + return (unsigned long)key->entries & JUMP_TYPE_MASK; +} + +static inline struct static_key *jump_entry_key(struct jump_entry *entry) +{ + return (struct static_key *)((unsigned long)entry->key & ~1UL); +} + +static bool jump_entry_branch(struct jump_entry *entry) +{ + return (unsigned long)entry->key & 1UL; +} + +static enum jump_label_type jump_label_type(struct jump_entry *entry) +{ + struct static_key *key = jump_entry_key(entry); + bool enabled = static_key_enabled(key); + bool branch = jump_entry_branch(entry); + + /* See the comment in linux/jump_label.h */ + return enabled ^ branch; } static void __jump_label_update(struct static_key *key, struct jump_entry *entry, - struct jump_entry *stop, int enable) + struct jump_entry *stop) { - for (; (entry < stop) && - (entry->key == (jump_label_t)(unsigned long)key); - entry++) { + for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { /* * entry->code set to 0 invalidates module init text sections * kernel_text_address() verifies we are not in core kernel * init code, see jump_label_invalidate_module_init(). */ if (entry->code && kernel_text_address(entry->code)) - arch_jump_label_transform(entry, enable); + arch_jump_label_transform(entry, jump_label_type(entry)); } } -static enum jump_label_type jump_label_type(struct static_key *key) -{ - bool true_branch = jump_label_get_branch_default(key); - bool state = static_key_enabled(key); - - if ((!true_branch && state) || (true_branch && !state)) - return JUMP_LABEL_ENABLE; - - return JUMP_LABEL_DISABLE; -} - void __init jump_label_init(void) { struct jump_entry *iter_start = __start___jump_table; @@ -202,8 +211,11 @@ void __init jump_label_init(void) for (iter = iter_start; iter < iter_stop; iter++) { struct static_key *iterk; - iterk = (struct static_key *)(unsigned long)iter->key; - arch_jump_label_transform_static(iter, jump_label_type(iterk)); + /* rewrite NOPs */ + if (jump_label_type(iter) == JUMP_LABEL_NOP) + arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); + + iterk = jump_entry_key(iter); if (iterk == key) continue; @@ -222,6 +234,16 @@ void __init jump_label_init(void) #ifdef CONFIG_MODULES +static enum jump_label_type jump_label_init_type(struct jump_entry *entry) +{ + struct static_key *key = jump_entry_key(entry); + bool type = static_key_type(key); + bool branch = jump_entry_branch(entry); + + /* See the comment in linux/jump_label.h */ + return type ^ branch; +} + struct static_key_mod { struct static_key_mod *next; struct jump_entry *entries; @@ -243,17 +265,15 @@ static int __jump_label_mod_text_reserved(void *start, void *end) start, end); } -static void __jump_label_mod_update(struct static_key *key, int enable) +static void __jump_label_mod_update(struct static_key *key) { - struct static_key_mod *mod = key->next; + struct static_key_mod *mod; - while (mod) { + for (mod = key->next; mod; mod = mod->next) { struct module *m = mod->mod; __jump_label_update(key, mod->entries, - m->jump_entries + m->num_jump_entries, - enable); - mod = mod->next; + m->jump_entries + m->num_jump_entries); } } @@ -276,7 +296,9 @@ void jump_label_apply_nops(struct module *mod) return; for (iter = iter_start; iter < iter_stop; iter++) { - arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE); + /* Only write NOPs for arch_branch_static(). */ + if (jump_label_init_type(iter) == JUMP_LABEL_NOP) + arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); } } @@ -297,7 +319,7 @@ static int jump_label_add_module(struct module *mod) for (iter = iter_start; iter < iter_stop; iter++) { struct static_key *iterk; - iterk = (struct static_key *)(unsigned long)iter->key; + iterk = jump_entry_key(iter); if (iterk == key) continue; @@ -318,8 +340,9 @@ static int jump_label_add_module(struct module *mod) jlm->next = key->next; key->next = jlm; - if (jump_label_type(key) == JUMP_LABEL_ENABLE) - __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE); + /* Only update if we've changed from our initial state */ + if (jump_label_type(iter) != jump_label_init_type(iter)) + __jump_label_update(key, iter, iter_stop); } return 0; @@ -334,10 +357,10 @@ static void jump_label_del_module(struct module *mod) struct static_key_mod *jlm, **prev; for (iter = iter_start; iter < iter_stop; iter++) { - if (iter->key == (jump_label_t)(unsigned long)key) + if (jump_entry_key(iter) == key) continue; - key = (struct static_key *)(unsigned long)iter->key; + key = jump_entry_key(iter); if (within_module(iter->key, mod)) continue; @@ -439,14 +462,14 @@ int jump_label_text_reserved(void *start, void *end) return ret; } -static void jump_label_update(struct static_key *key, int enable) +static void jump_label_update(struct static_key *key) { struct jump_entry *stop = __stop___jump_table; - struct jump_entry *entry = jump_label_get_entries(key); + struct jump_entry *entry = static_key_entries(key); #ifdef CONFIG_MODULES struct module *mod; - __jump_label_mod_update(key, enable); + __jump_label_mod_update(key); preempt_disable(); mod = __module_address((unsigned long)key); @@ -456,7 +479,44 @@ static void jump_label_update(struct static_key *key, int enable) #endif /* if there are no users, entry can be NULL */ if (entry) - __jump_label_update(key, entry, stop, enable); + __jump_label_update(key, entry, stop); } -#endif +#ifdef CONFIG_STATIC_KEYS_SELFTEST +static DEFINE_STATIC_KEY_TRUE(sk_true); +static DEFINE_STATIC_KEY_FALSE(sk_false); + +static __init int jump_label_test(void) +{ + int i; + + for (i = 0; i < 2; i++) { + WARN_ON(static_key_enabled(&sk_true.key) != true); + WARN_ON(static_key_enabled(&sk_false.key) != false); + + WARN_ON(!static_branch_likely(&sk_true)); + WARN_ON(!static_branch_unlikely(&sk_true)); + WARN_ON(static_branch_likely(&sk_false)); + WARN_ON(static_branch_unlikely(&sk_false)); + + static_branch_disable(&sk_true); + static_branch_enable(&sk_false); + + WARN_ON(static_key_enabled(&sk_true.key) == true); + WARN_ON(static_key_enabled(&sk_false.key) == false); + + WARN_ON(static_branch_likely(&sk_true)); + WARN_ON(static_branch_unlikely(&sk_true)); + WARN_ON(!static_branch_likely(&sk_false)); + WARN_ON(!static_branch_unlikely(&sk_false)); + + static_branch_enable(&sk_true); + static_branch_disable(&sk_false); + } + + return 0; +} +late_initcall(jump_label_test); +#endif /* STATIC_KEYS_SELFTEST */ + +#endif /* HAVE_JUMP_LABEL */ diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 7dd5c9918e4c..36942047ffc0 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -20,7 +20,6 @@ obj-$(CONFIG_PROVE_LOCKING) += spinlock.o obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o -obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index 6c5da483966b..6a7a3b8d5ac9 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -60,22 +60,23 @@ rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts) } /** - * queue_read_lock_slowpath - acquire read lock of a queue rwlock + * queued_read_lock_slowpath - acquire read lock of a queue rwlock * @lock: Pointer to queue rwlock structure + * @cnts: Current qrwlock lock value */ -void queue_read_lock_slowpath(struct qrwlock *lock) +void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) { - u32 cnts; - /* * Readers come here when they cannot get the lock without waiting */ if (unlikely(in_interrupt())) { /* - * Readers in interrupt context will spin until the lock is - * available without waiting in the queue. + * Readers in interrupt context will get the lock immediately + * if the writer is just waiting (not holding the lock yet). + * The rspin_until_writer_unlock() function returns immediately + * in this case. Otherwise, they will spin until the lock + * is available without waiting in the queue. */ - cnts = smp_load_acquire((u32 *)&lock->cnts); rspin_until_writer_unlock(lock, cnts); return; } @@ -87,15 +88,11 @@ void queue_read_lock_slowpath(struct qrwlock *lock) arch_spin_lock(&lock->lock); /* - * At the head of the wait queue now, wait until the writer state - * goes to 0 and then try to increment the reader count and get - * the lock. It is possible that an incoming writer may steal the - * lock in the interim, so it is necessary to check the writer byte - * to make sure that the write lock isn't taken. + * At the head of the wait queue now, increment the reader count + * and wait until the writer, if it has the lock, has gone away. + * At ths stage, it is not possible for a writer to remain in the + * waiting state (_QW_WAITING). So there won't be any deadlock. */ - while (atomic_read(&lock->cnts) & _QW_WMASK) - cpu_relax_lowlatency(); - cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS; rspin_until_writer_unlock(lock, cnts); @@ -104,13 +101,13 @@ void queue_read_lock_slowpath(struct qrwlock *lock) */ arch_spin_unlock(&lock->lock); } -EXPORT_SYMBOL(queue_read_lock_slowpath); +EXPORT_SYMBOL(queued_read_lock_slowpath); /** - * queue_write_lock_slowpath - acquire write lock of a queue rwlock + * queued_write_lock_slowpath - acquire write lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ -void queue_write_lock_slowpath(struct qrwlock *lock) +void queued_write_lock_slowpath(struct qrwlock *lock) { u32 cnts; @@ -149,4 +146,4 @@ void queue_write_lock_slowpath(struct qrwlock *lock) unlock: arch_spin_unlock(&lock->lock); } -EXPORT_SYMBOL(queue_write_lock_slowpath); +EXPORT_SYMBOL(queued_write_lock_slowpath); diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 38c49202d532..337c8818541d 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -239,8 +239,8 @@ static __always_inline void set_locked(struct qspinlock *lock) static __always_inline void __pv_init_node(struct mcs_spinlock *node) { } static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { } -static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { } - +static __always_inline void __pv_kick_node(struct qspinlock *lock, + struct mcs_spinlock *node) { } static __always_inline void __pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) { } @@ -440,7 +440,7 @@ queue: cpu_relax(); arch_mcs_spin_unlock_contended(&next->locked); - pv_kick_node(next); + pv_kick_node(lock, next); release: /* diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 04ab18151cc8..c8e6e9a596f5 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -4,6 +4,7 @@ #include <linux/hash.h> #include <linux/bootmem.h> +#include <linux/debug_locks.h> /* * Implement paravirt qspinlocks; the general idea is to halt the vcpus instead @@ -21,9 +22,14 @@ #define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET) +/* + * Queue node uses: vcpu_running & vcpu_halted. + * Queue head uses: vcpu_running & vcpu_hashed. + */ enum vcpu_state { vcpu_running = 0, - vcpu_halted, + vcpu_halted, /* Used only in pv_wait_node */ + vcpu_hashed, /* = pv_hash'ed + vcpu_halted */ }; struct pv_node { @@ -152,7 +158,8 @@ static void pv_init_node(struct mcs_spinlock *node) /* * Wait for node->locked to become true, halt the vcpu after a short spin. - * pv_kick_node() is used to wake the vcpu again. + * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its + * behalf. */ static void pv_wait_node(struct mcs_spinlock *node) { @@ -171,9 +178,9 @@ static void pv_wait_node(struct mcs_spinlock *node) * * [S] pn->state = vcpu_halted [S] next->locked = 1 * MB MB - * [L] pn->locked [RmW] pn->state = vcpu_running + * [L] pn->locked [RmW] pn->state = vcpu_hashed * - * Matches the xchg() from pv_kick_node(). + * Matches the cmpxchg() from pv_kick_node(). */ smp_store_mb(pn->state, vcpu_halted); @@ -181,9 +188,10 @@ static void pv_wait_node(struct mcs_spinlock *node) pv_wait(&pn->state, vcpu_halted); /* - * Reset the vCPU state to avoid unncessary CPU kicking + * If pv_kick_node() changed us to vcpu_hashed, retain that value + * so that pv_wait_head() knows to not also try to hash this lock. */ - WRITE_ONCE(pn->state, vcpu_running); + cmpxchg(&pn->state, vcpu_halted, vcpu_running); /* * If the locked flag is still not set after wakeup, it is a @@ -193,6 +201,7 @@ static void pv_wait_node(struct mcs_spinlock *node) * MCS lock will be released soon. */ } + /* * By now our node->locked should be 1 and our caller will not actually * spin-wait for it. We do however rely on our caller to do a @@ -201,24 +210,35 @@ static void pv_wait_node(struct mcs_spinlock *node) } /* - * Called after setting next->locked = 1, used to wake those stuck in - * pv_wait_node(). + * Called after setting next->locked = 1 when we're the lock owner. + * + * Instead of waking the waiters stuck in pv_wait_node() advance their state such + * that they're waiting in pv_wait_head(), this avoids a wake/sleep cycle. */ -static void pv_kick_node(struct mcs_spinlock *node) +static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; + struct __qspinlock *l = (void *)lock; /* - * Note that because node->locked is already set, this actual - * mcs_spinlock entry could be re-used already. + * If the vCPU is indeed halted, advance its state to match that of + * pv_wait_node(). If OTOH this fails, the vCPU was running and will + * observe its next->locked value and advance itself. * - * This should be fine however, kicking people for no reason is - * harmless. + * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() + */ + if (cmpxchg(&pn->state, vcpu_halted, vcpu_hashed) != vcpu_halted) + return; + + /* + * Put the lock into the hash table and set the _Q_SLOW_VAL. * - * See the comment in pv_wait_node(). + * As this is the same vCPU that will check the _Q_SLOW_VAL value and + * the hash table later on at unlock time, no atomic instruction is + * needed. */ - if (xchg(&pn->state, vcpu_running) == vcpu_halted) - pv_kick(pn->cpu); + WRITE_ONCE(l->locked, _Q_SLOW_VAL); + (void)pv_hash(lock, pn); } /* @@ -232,6 +252,13 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) struct qspinlock **lp = NULL; int loop; + /* + * If pv_kick_node() already advanced our state, we don't need to + * insert ourselves into the hash table anymore. + */ + if (READ_ONCE(pn->state) == vcpu_hashed) + lp = (struct qspinlock **)1; + for (;;) { for (loop = SPIN_THRESHOLD; loop; loop--) { if (!READ_ONCE(l->locked)) @@ -239,17 +266,22 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) cpu_relax(); } - WRITE_ONCE(pn->state, vcpu_halted); if (!lp) { /* ONCE */ + WRITE_ONCE(pn->state, vcpu_hashed); lp = pv_hash(lock, pn); + /* - * lp must be set before setting _Q_SLOW_VAL + * We must hash before setting _Q_SLOW_VAL, such that + * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock() + * we'll be sure to be able to observe our hash entry. * - * [S] lp = lock [RmW] l = l->locked = 0 - * MB MB - * [S] l->locked = _Q_SLOW_VAL [L] lp + * [S] pn->state + * [S] <hash> [Rmw] l->locked == _Q_SLOW_VAL + * MB RMB + * [RmW] l->locked = _Q_SLOW_VAL [L] <unhash> + * [L] pn->state * - * Matches the cmpxchg() in __pv_queued_spin_unlock(). + * Matches the smp_rmb() in __pv_queued_spin_unlock(). */ if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) { /* @@ -286,14 +318,32 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) { struct __qspinlock *l = (void *)lock; struct pv_node *node; + u8 locked; /* * We must not unlock if SLOW, because in that case we must first * unhash. Otherwise it would be possible to have multiple @lock * entries, which would be BAD. */ - if (likely(cmpxchg(&l->locked, _Q_LOCKED_VAL, 0) == _Q_LOCKED_VAL)) + locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); + if (likely(locked == _Q_LOCKED_VAL)) + return; + + if (unlikely(locked != _Q_SLOW_VAL)) { + WARN(!debug_locks_silent, + "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n", + (unsigned long)lock, atomic_read(&lock->val)); return; + } + + /* + * A failed cmpxchg doesn't provide any memory-ordering guarantees, + * so we need a barrier to order the read of the node data in + * pv_unhash *after* we've read the lock being _Q_SLOW_VAL. + * + * Matches the cmpxchg() in pv_wait_head() setting _Q_SLOW_VAL. + */ + smp_rmb(); /* * Since the above failed to release, this must be the SLOW path. @@ -310,8 +360,11 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) /* * At this point the memory pointed at by lock can be freed/reused, * however we can still use the pv_node to kick the CPU. + * The other vCPU may not really be halted, but kicking an active + * vCPU is harmless other than the additional latency in completing + * the unlock. */ - if (READ_ONCE(node->state) == vcpu_halted) + if (READ_ONCE(node->state) == vcpu_hashed) pv_kick(node->cpu); } /* diff --git a/kernel/locking/rtmutex-tester.c b/kernel/locking/rtmutex-tester.c deleted file mode 100644 index 1d96dd0d93c1..000000000000 --- a/kernel/locking/rtmutex-tester.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * RT-Mutex-tester: scriptable tester for rt mutexes - * - * started by Thomas Gleixner: - * - * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> - * - */ -#include <linux/device.h> -#include <linux/kthread.h> -#include <linux/export.h> -#include <linux/sched.h> -#include <linux/sched/rt.h> -#include <linux/spinlock.h> -#include <linux/timer.h> -#include <linux/freezer.h> -#include <linux/stat.h> - -#include "rtmutex.h" - -#define MAX_RT_TEST_THREADS 8 -#define MAX_RT_TEST_MUTEXES 8 - -static spinlock_t rttest_lock; -static atomic_t rttest_event; - -struct test_thread_data { - int opcode; - int opdata; - int mutexes[MAX_RT_TEST_MUTEXES]; - int event; - struct device dev; -}; - -static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; -static struct task_struct *threads[MAX_RT_TEST_THREADS]; -static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES]; - -enum test_opcodes { - RTTEST_NOP = 0, - RTTEST_SCHEDOT, /* 1 Sched other, data = nice */ - RTTEST_SCHEDRT, /* 2 Sched fifo, data = prio */ - RTTEST_LOCK, /* 3 Lock uninterruptible, data = lockindex */ - RTTEST_LOCKNOWAIT, /* 4 Lock uninterruptible no wait in wakeup, data = lockindex */ - RTTEST_LOCKINT, /* 5 Lock interruptible, data = lockindex */ - RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */ - RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */ - RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */ - /* 9, 10 - reserved for BKL commemoration */ - RTTEST_SIGNAL = 11, /* 11 Signal other test thread, data = thread id */ - RTTEST_RESETEVENT = 98, /* 98 Reset event counter */ - RTTEST_RESET = 99, /* 99 Reset all pending operations */ -}; - -static int handle_op(struct test_thread_data *td, int lockwakeup) -{ - int i, id, ret = -EINVAL; - - switch(td->opcode) { - - case RTTEST_NOP: - return 0; - - case RTTEST_LOCKCONT: - td->mutexes[td->opdata] = 1; - td->event = atomic_add_return(1, &rttest_event); - return 0; - - case RTTEST_RESET: - for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) { - if (td->mutexes[i] == 4) { - rt_mutex_unlock(&mutexes[i]); - td->mutexes[i] = 0; - } - } - return 0; - - case RTTEST_RESETEVENT: - atomic_set(&rttest_event, 0); - return 0; - - default: - if (lockwakeup) - return ret; - } - - switch(td->opcode) { - - case RTTEST_LOCK: - case RTTEST_LOCKNOWAIT: - id = td->opdata; - if (id < 0 || id >= MAX_RT_TEST_MUTEXES) - return ret; - - td->mutexes[id] = 1; - td->event = atomic_add_return(1, &rttest_event); - rt_mutex_lock(&mutexes[id]); - td->event = atomic_add_return(1, &rttest_event); - td->mutexes[id] = 4; - return 0; - - case RTTEST_LOCKINT: - case RTTEST_LOCKINTNOWAIT: - id = td->opdata; - if (id < 0 || id >= MAX_RT_TEST_MUTEXES) - return ret; - - td->mutexes[id] = 1; - td->event = atomic_add_return(1, &rttest_event); - ret = rt_mutex_lock_interruptible(&mutexes[id], 0); - td->event = atomic_add_return(1, &rttest_event); - td->mutexes[id] = ret ? 0 : 4; - return ret ? -EINTR : 0; - - case RTTEST_UNLOCK: - id = td->opdata; - if (id < 0 || id >= MAX_RT_TEST_MUTEXES || td->mutexes[id] != 4) - return ret; - - td->event = atomic_add_return(1, &rttest_event); - rt_mutex_unlock(&mutexes[id]); - td->event = atomic_add_return(1, &rttest_event); - td->mutexes[id] = 0; - return 0; - - default: - break; - } - return ret; -} - -/* - * Schedule replacement for rtsem_down(). Only called for threads with - * PF_MUTEX_TESTER set. - * - * This allows us to have finegrained control over the event flow. - * - */ -void schedule_rt_mutex_test(struct rt_mutex *mutex) -{ - int tid, op, dat; - struct test_thread_data *td; - - /* We have to lookup the task */ - for (tid = 0; tid < MAX_RT_TEST_THREADS; tid++) { - if (threads[tid] == current) - break; - } - - BUG_ON(tid == MAX_RT_TEST_THREADS); - - td = &thread_data[tid]; - - op = td->opcode; - dat = td->opdata; - - switch (op) { - case RTTEST_LOCK: - case RTTEST_LOCKINT: - case RTTEST_LOCKNOWAIT: - case RTTEST_LOCKINTNOWAIT: - if (mutex != &mutexes[dat]) - break; - - if (td->mutexes[dat] != 1) - break; - - td->mutexes[dat] = 2; - td->event = atomic_add_return(1, &rttest_event); - break; - - default: - break; - } - - schedule(); - - - switch (op) { - case RTTEST_LOCK: - case RTTEST_LOCKINT: - if (mutex != &mutexes[dat]) - return; - - if (td->mutexes[dat] != 2) - return; - - td->mutexes[dat] = 3; - td->event = atomic_add_return(1, &rttest_event); - break; - - case RTTEST_LOCKNOWAIT: - case RTTEST_LOCKINTNOWAIT: - if (mutex != &mutexes[dat]) - return; - - if (td->mutexes[dat] != 2) - return; - - td->mutexes[dat] = 1; - td->event = atomic_add_return(1, &rttest_event); - return; - - default: - return; - } - - td->opcode = 0; - - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - - if (td->opcode > 0) { - int ret; - - set_current_state(TASK_RUNNING); - ret = handle_op(td, 1); - set_current_state(TASK_INTERRUPTIBLE); - if (td->opcode == RTTEST_LOCKCONT) - break; - td->opcode = ret; - } - - /* Wait for the next command to be executed */ - schedule(); - } - - /* Restore previous command and data */ - td->opcode = op; - td->opdata = dat; -} - -static int test_func(void *data) -{ - struct test_thread_data *td = data; - int ret; - - current->flags |= PF_MUTEX_TESTER; - set_freezable(); - allow_signal(SIGHUP); - - for(;;) { - - set_current_state(TASK_INTERRUPTIBLE); - - if (td->opcode > 0) { - set_current_state(TASK_RUNNING); - ret = handle_op(td, 0); - set_current_state(TASK_INTERRUPTIBLE); - td->opcode = ret; - } - - /* Wait for the next command to be executed */ - schedule(); - try_to_freeze(); - - if (signal_pending(current)) - flush_signals(current); - - if(kthread_should_stop()) - break; - } - return 0; -} - -/** - * sysfs_test_command - interface for test commands - * @dev: thread reference - * @buf: command for actual step - * @count: length of buffer - * - * command syntax: - * - * opcode:data - */ -static ssize_t sysfs_test_command(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct sched_param schedpar; - struct test_thread_data *td; - char cmdbuf[32]; - int op, dat, tid, ret; - - td = container_of(dev, struct test_thread_data, dev); - tid = td->dev.id; - - /* strings from sysfs write are not 0 terminated! */ - if (count >= sizeof(cmdbuf)) - return -EINVAL; - - /* strip of \n: */ - if (buf[count-1] == '\n') - count--; - if (count < 1) - return -EINVAL; - - memcpy(cmdbuf, buf, count); - cmdbuf[count] = 0; - - if (sscanf(cmdbuf, "%d:%d", &op, &dat) != 2) - return -EINVAL; - - switch (op) { - case RTTEST_SCHEDOT: - schedpar.sched_priority = 0; - ret = sched_setscheduler(threads[tid], SCHED_NORMAL, &schedpar); - if (ret) - return ret; - set_user_nice(current, 0); - break; - - case RTTEST_SCHEDRT: - schedpar.sched_priority = dat; - ret = sched_setscheduler(threads[tid], SCHED_FIFO, &schedpar); - if (ret) - return ret; - break; - - case RTTEST_SIGNAL: - send_sig(SIGHUP, threads[tid], 0); - break; - - default: - if (td->opcode > 0) - return -EBUSY; - td->opdata = dat; - td->opcode = op; - wake_up_process(threads[tid]); - } - - return count; -} - -/** - * sysfs_test_status - sysfs interface for rt tester - * @dev: thread to query - * @buf: char buffer to be filled with thread status info - */ -static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct test_thread_data *td; - struct task_struct *tsk; - char *curr = buf; - int i; - - td = container_of(dev, struct test_thread_data, dev); - tsk = threads[td->dev.id]; - - spin_lock(&rttest_lock); - - curr += sprintf(curr, - "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:", - td->opcode, td->event, tsk->state, - (MAX_RT_PRIO - 1) - tsk->prio, - (MAX_RT_PRIO - 1) - tsk->normal_prio, - tsk->pi_blocked_on); - - for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--) - curr += sprintf(curr, "%d", td->mutexes[i]); - - spin_unlock(&rttest_lock); - - curr += sprintf(curr, ", T: %p, R: %p\n", tsk, - mutexes[td->dev.id].owner); - - return curr - buf; -} - -static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL); -static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command); - -static struct bus_type rttest_subsys = { - .name = "rttest", - .dev_name = "rttest", -}; - -static int init_test_thread(int id) -{ - thread_data[id].dev.bus = &rttest_subsys; - thread_data[id].dev.id = id; - - threads[id] = kthread_run(test_func, &thread_data[id], "rt-test-%d", id); - if (IS_ERR(threads[id])) - return PTR_ERR(threads[id]); - - return device_register(&thread_data[id].dev); -} - -static int init_rttest(void) -{ - int ret, i; - - spin_lock_init(&rttest_lock); - - for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) - rt_mutex_init(&mutexes[i]); - - ret = subsys_system_register(&rttest_subsys, NULL); - if (ret) - return ret; - - for (i = 0; i < MAX_RT_TEST_THREADS; i++) { - ret = init_test_thread(i); - if (ret) - break; - ret = device_create_file(&thread_data[i].dev, &dev_attr_status); - if (ret) - break; - ret = device_create_file(&thread_data[i].dev, &dev_attr_command); - if (ret) - break; - } - - printk("Initializing RT-Tester: %s\n", ret ? "Failed" : "OK" ); - - return ret; -} - -device_initcall(init_rttest); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 5674b073473c..7781d801212f 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1120,7 +1120,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, debug_rt_mutex_print_deadlock(waiter); - schedule_rt_mutex(lock); + schedule(); raw_spin_lock(&lock->wait_lock); set_current_state(state); diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 7844f8f0e639..4f5f83c7d2d3 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -15,28 +15,6 @@ #include <linux/rtmutex.h> /* - * The rtmutex in kernel tester is independent of rtmutex debugging. We - * call schedule_rt_mutex_test() instead of schedule() for the tasks which - * belong to the tester. That way we can delay the wakeup path of those - * threads to provoke lock stealing and testing of complex boosting scenarios. - */ -#ifdef CONFIG_RT_MUTEX_TESTER - -extern void schedule_rt_mutex_test(struct rt_mutex *lock); - -#define schedule_rt_mutex(_lock) \ - do { \ - if (!(current->flags & PF_MUTEX_TESTER)) \ - schedule(); \ - else \ - schedule_rt_mutex_test(_lock); \ - } while (0) - -#else -# define schedule_rt_mutex(_lock) schedule() -#endif - -/* * This is the control structure for tasks blocked on a rt_mutex, * which is allocated on the kernel stack on of the blocked task. * diff --git a/kernel/notifier.c b/kernel/notifier.c index ae9fc7cc360e..980e4330fb59 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -544,6 +544,8 @@ int notrace notify_die(enum die_val val, const char *str, .signr = sig, }; + rcu_lockdep_assert(rcu_is_watching(), + "notify_die called but RCU thinks we're quiescent"); return atomic_notifier_call_chain(&die_chain, val, &args); } NOKPROBE_SYMBOL(notify_die); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 78b4bad10081..66ae8baf42fe 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -164,14 +164,12 @@ struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { static void sched_feat_disable(int i) { - if (static_key_enabled(&sched_feat_keys[i])) - static_key_slow_dec(&sched_feat_keys[i]); + static_key_disable(&sched_feat_keys[i]); } static void sched_feat_enable(int i) { - if (!static_key_enabled(&sched_feat_keys[i])) - static_key_slow_inc(&sched_feat_keys[i]); + static_key_enable(&sched_feat_keys[i]); } #else static void sched_feat_disable(int i) { }; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 7995ef5868d8..ca7d84f438f1 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -140,6 +140,7 @@ cond_syscall(sys_sgetmask); cond_syscall(sys_ssetmask); cond_syscall(sys_vm86old); cond_syscall(sys_vm86); +cond_syscall(sys_modify_ldt); cond_syscall(sys_ipc); cond_syscall(compat_sys_ipc); cond_syscall(compat_sys_sysctl); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e2894b23efb6..0d859305c556 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -916,12 +916,6 @@ config DEBUG_RT_MUTEXES This allows rt mutex semantics violations and rt mutex related deadlocks (lockups) to be detected and reported automatically. -config RT_MUTEX_TESTER - bool "Built-in scriptable tester for rt-mutexes" - depends on DEBUG_KERNEL && RT_MUTEXES && BROKEN - help - This option enables a rt-mutex tester. - config DEBUG_SPINLOCK bool "Spinlock and rw-lock debugging: basic checks" depends on DEBUG_KERNEL @@ -1542,6 +1536,13 @@ config FAIL_MMC_REQUEST and to test how the mmc host driver handles retries from the block device. +config FAIL_FUTEX + bool "Fault-injection capability for futexes" + select DEBUG_FS + depends on FAULT_INJECTION && FUTEX + help + Provide fault-injection capability for futexes. + config FAULT_INJECTION_DEBUG_FS bool "Debugfs entries for fault-injection capabilities" depends on FAULT_INJECTION && SYSFS && DEBUG_FS @@ -1840,6 +1841,15 @@ config MEMTEST memtest=17, mean do 17 test patterns. If you are unsure how to answer this question, answer N. +config TEST_STATIC_KEYS + tristate "Test static keys" + default n + depends on m + help + Test the static key interfaces. + + If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Makefile b/lib/Makefile index 6897b527581a..9f2fc71a14a3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -39,6 +39,8 @@ obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_LKM) += test_module.o obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o +obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o +obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/test_static_key_base.c b/lib/test_static_key_base.c new file mode 100644 index 000000000000..729447aea02f --- /dev/null +++ b/lib/test_static_key_base.c @@ -0,0 +1,68 @@ +/* + * Kernel module for testing static keys. + * + * Copyright 2015 Akamai Technologies Inc. All Rights Reserved + * + * Authors: + * Jason Baron <jbaron@akamai.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/jump_label.h> + +/* old keys */ +struct static_key base_old_true_key = STATIC_KEY_INIT_TRUE; +EXPORT_SYMBOL_GPL(base_old_true_key); +struct static_key base_inv_old_true_key = STATIC_KEY_INIT_TRUE; +EXPORT_SYMBOL_GPL(base_inv_old_true_key); +struct static_key base_old_false_key = STATIC_KEY_INIT_FALSE; +EXPORT_SYMBOL_GPL(base_old_false_key); +struct static_key base_inv_old_false_key = STATIC_KEY_INIT_FALSE; +EXPORT_SYMBOL_GPL(base_inv_old_false_key); + +/* new keys */ +DEFINE_STATIC_KEY_TRUE(base_true_key); +EXPORT_SYMBOL_GPL(base_true_key); +DEFINE_STATIC_KEY_TRUE(base_inv_true_key); +EXPORT_SYMBOL_GPL(base_inv_true_key); +DEFINE_STATIC_KEY_FALSE(base_false_key); +EXPORT_SYMBOL_GPL(base_false_key); +DEFINE_STATIC_KEY_FALSE(base_inv_false_key); +EXPORT_SYMBOL_GPL(base_inv_false_key); + +static void invert_key(struct static_key *key) +{ + if (static_key_enabled(key)) + static_key_disable(key); + else + static_key_enable(key); +} + +static int __init test_static_key_base_init(void) +{ + invert_key(&base_inv_old_true_key); + invert_key(&base_inv_old_false_key); + invert_key(&base_inv_true_key.key); + invert_key(&base_inv_false_key.key); + + return 0; +} + +static void __exit test_static_key_base_exit(void) +{ +} + +module_init(test_static_key_base_init); +module_exit(test_static_key_base_exit); + +MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>"); +MODULE_LICENSE("GPL"); diff --git a/lib/test_static_keys.c b/lib/test_static_keys.c new file mode 100644 index 000000000000..c61b299e367f --- /dev/null +++ b/lib/test_static_keys.c @@ -0,0 +1,225 @@ +/* + * Kernel module for testing static keys. + * + * Copyright 2015 Akamai Technologies Inc. All Rights Reserved + * + * Authors: + * Jason Baron <jbaron@akamai.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/jump_label.h> + +/* old keys */ +struct static_key old_true_key = STATIC_KEY_INIT_TRUE; +struct static_key old_false_key = STATIC_KEY_INIT_FALSE; + +/* new api */ +DEFINE_STATIC_KEY_TRUE(true_key); +DEFINE_STATIC_KEY_FALSE(false_key); + +/* external */ +extern struct static_key base_old_true_key; +extern struct static_key base_inv_old_true_key; +extern struct static_key base_old_false_key; +extern struct static_key base_inv_old_false_key; + +/* new api */ +extern struct static_key_true base_true_key; +extern struct static_key_true base_inv_true_key; +extern struct static_key_false base_false_key; +extern struct static_key_false base_inv_false_key; + + +struct test_key { + bool init_state; + struct static_key *key; + bool (*test_key)(void); +}; + +#define test_key_func(key, branch) \ + ({bool func(void) { return branch(key); } func; }) + +static void invert_key(struct static_key *key) +{ + if (static_key_enabled(key)) + static_key_disable(key); + else + static_key_enable(key); +} + +static void invert_keys(struct test_key *keys, int size) +{ + struct static_key *previous = NULL; + int i; + + for (i = 0; i < size; i++) { + if (previous != keys[i].key) { + invert_key(keys[i].key); + previous = keys[i].key; + } + } +} + +static int verify_keys(struct test_key *keys, int size, bool invert) +{ + int i; + bool ret, init; + + for (i = 0; i < size; i++) { + ret = static_key_enabled(keys[i].key); + init = keys[i].init_state; + if (ret != (invert ? !init : init)) + return -EINVAL; + ret = keys[i].test_key(); + if (static_key_enabled(keys[i].key)) { + if (!ret) + return -EINVAL; + } else { + if (ret) + return -EINVAL; + } + } + return 0; +} + +static int __init test_static_key_init(void) +{ + int ret; + int size; + + struct test_key static_key_tests[] = { + /* internal keys - old keys */ + { + .init_state = true, + .key = &old_true_key, + .test_key = test_key_func(&old_true_key, static_key_true), + }, + { + .init_state = false, + .key = &old_false_key, + .test_key = test_key_func(&old_false_key, static_key_false), + }, + /* internal keys - new keys */ + { + .init_state = true, + .key = &true_key.key, + .test_key = test_key_func(&true_key, static_branch_likely), + }, + { + .init_state = true, + .key = &true_key.key, + .test_key = test_key_func(&true_key, static_branch_unlikely), + }, + { + .init_state = false, + .key = &false_key.key, + .test_key = test_key_func(&false_key, static_branch_likely), + }, + { + .init_state = false, + .key = &false_key.key, + .test_key = test_key_func(&false_key, static_branch_unlikely), + }, + /* external keys - old keys */ + { + .init_state = true, + .key = &base_old_true_key, + .test_key = test_key_func(&base_old_true_key, static_key_true), + }, + { + .init_state = false, + .key = &base_inv_old_true_key, + .test_key = test_key_func(&base_inv_old_true_key, static_key_true), + }, + { + .init_state = false, + .key = &base_old_false_key, + .test_key = test_key_func(&base_old_false_key, static_key_false), + }, + { + .init_state = true, + .key = &base_inv_old_false_key, + .test_key = test_key_func(&base_inv_old_false_key, static_key_false), + }, + /* external keys - new keys */ + { + .init_state = true, + .key = &base_true_key.key, + .test_key = test_key_func(&base_true_key, static_branch_likely), + }, + { + .init_state = true, + .key = &base_true_key.key, + .test_key = test_key_func(&base_true_key, static_branch_unlikely), + }, + { + .init_state = false, + .key = &base_inv_true_key.key, + .test_key = test_key_func(&base_inv_true_key, static_branch_likely), + }, + { + .init_state = false, + .key = &base_inv_true_key.key, + .test_key = test_key_func(&base_inv_true_key, static_branch_unlikely), + }, + { + .init_state = false, + .key = &base_false_key.key, + .test_key = test_key_func(&base_false_key, static_branch_likely), + }, + { + .init_state = false, + .key = &base_false_key.key, + .test_key = test_key_func(&base_false_key, static_branch_unlikely), + }, + { + .init_state = true, + .key = &base_inv_false_key.key, + .test_key = test_key_func(&base_inv_false_key, static_branch_likely), + }, + { + .init_state = true, + .key = &base_inv_false_key.key, + .test_key = test_key_func(&base_inv_false_key, static_branch_unlikely), + }, + }; + + size = ARRAY_SIZE(static_key_tests); + + ret = verify_keys(static_key_tests, size, false); + if (ret) + goto out; + + invert_keys(static_key_tests, size); + ret = verify_keys(static_key_tests, size, true); + if (ret) + goto out; + + invert_keys(static_key_tests, size); + ret = verify_keys(static_key_tests, size, false); + if (ret) + goto out; + return 0; +out: + return ret; +} + +static void __exit test_static_key_exit(void) +{ +} + +module_init(test_static_key_init); +module_exit(test_static_key_exit); + +MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>"); +MODULE_LICENSE("GPL"); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 3d0f7d2a0616..ad82324f710f 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2312,6 +2312,10 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) return 1; chan = conn->smp; + if (!chan) { + BT_ERR("SMP security requested but not available"); + return 1; + } if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) return 1; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 0ff6e1bbca91..fa7bfced888e 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -37,15 +37,30 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) { - if (!is_skb_forwardable(skb->dev, skb)) { - kfree_skb(skb); - } else { - skb_push(skb, ETH_HLEN); - br_drop_fake_rtable(skb); - skb_sender_cpu_clear(skb); - dev_queue_xmit(skb); + if (!is_skb_forwardable(skb->dev, skb)) + goto drop; + + skb_push(skb, ETH_HLEN); + br_drop_fake_rtable(skb); + skb_sender_cpu_clear(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL && + (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD))) { + int depth; + + if (!__vlan_get_protocol(skb, skb->protocol, &depth)) + goto drop; + + skb_set_network_header(skb, depth); } + dev_queue_xmit(skb); + + return 0; + +drop: + kfree_skb(skb); return 0; } EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 1198a3dbad95..c94321955db7 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -445,6 +445,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (p->port->state == BR_STATE_DISABLED) goto unlock; + entry->state = p->state; rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 79db489cdade..0b39dcc65b94 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1416,8 +1416,7 @@ br_multicast_leave_group(struct net_bridge *br, spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || - (port && port->state == BR_STATE_DISABLED) || - timer_pending(&other_query->timer)) + (port && port->state == BR_STATE_DISABLED)) goto out; mdb = mlock_dereference(br->mdb, br); @@ -1425,6 +1424,31 @@ br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; + if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { + struct net_bridge_port_group __rcu **pp; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->port != port) + continue; + + rcu_assign_pointer(*pp, p->next); + hlist_del_init(&p->mglist); + del_timer(&p->timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + br_mdb_notify(br->dev, port, group, RTM_DELMDB); + + if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + } + goto out; + } + + if (timer_pending(&other_query->timer)) + goto out; + if (br->multicast_querier) { __br_multicast_send_query(br, port, &mp->addr); @@ -1450,28 +1474,6 @@ br_multicast_leave_group(struct net_bridge *br, } } - if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { - struct net_bridge_port_group __rcu **pp; - - for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; - pp = &p->next) { - if (p->port != port) - continue; - - rcu_assign_pointer(*pp, p->next); - hlist_del_init(&p->mglist); - del_timer(&p->timer); - call_rcu_bh(&p->rcu, br_multicast_free_pg); - br_mdb_notify(br->dev, port, group, RTM_DELMDB); - - if (!mp->ports && !mp->mglist && - netif_running(br->dev)) - mod_timer(&mp->timer, jiffies); - } - goto out; - } - now = jiffies; time = now + br->multicast_last_member_count * br->multicast_last_member_interval; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 364bdc98bd9b..3da5525eb8a2 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -693,9 +693,17 @@ static int br_port_slave_changelink(struct net_device *brdev, struct nlattr *tb[], struct nlattr *data[]) { + struct net_bridge *br = netdev_priv(brdev); + int ret; + if (!data) return 0; - return br_setport(br_port_get_rtnl(dev), data); + + spin_lock_bh(&br->lock); + ret = br_setport(br_port_get_rtnl(dev), data); + spin_unlock_bh(&br->lock); + + return ret; } static int br_port_fill_slave_info(struct sk_buff *skb, diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index b4b6dab9c285..ed74ffaa851f 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -209,8 +209,9 @@ void br_transmit_config(struct net_bridge_port *p) br_send_config_bpdu(p, &bpdu); p->topology_change_ack = 0; p->config_pending = 0; - mod_timer(&p->hold_timer, - round_jiffies(jiffies + BR_HOLD_TIME)); + if (p->br->stp_enabled == BR_KERNEL_STP) + mod_timer(&p->hold_timer, + round_jiffies(jiffies + BR_HOLD_TIME)); } } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index a2730e7196cd..4ca449a16132 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -48,7 +48,8 @@ void br_stp_enable_bridge(struct net_bridge *br) struct net_bridge_port *p; spin_lock_bh(&br->lock); - mod_timer(&br->hello_timer, jiffies + br->hello_time); + if (br->stp_enabled == BR_KERNEL_STP) + mod_timer(&br->hello_timer, jiffies + br->hello_time); mod_timer(&br->gc_timer, jiffies + HZ/10); br_config_bpdu_generation(br); @@ -127,6 +128,7 @@ static void br_stp_start(struct net_bridge *br) int r; char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; char *envp[] = { NULL }; + struct net_bridge_port *p; r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); @@ -140,6 +142,10 @@ static void br_stp_start(struct net_bridge *br) if (r == 0) { br->stp_enabled = BR_USER_STP; br_debug(br, "userspace STP started\n"); + /* Stop hello and hold timers */ + del_timer(&br->hello_timer); + list_for_each_entry(p, &br->port_list, list) + del_timer(&p->hold_timer); } else { br->stp_enabled = BR_KERNEL_STP; br_debug(br, "using kernel STP\n"); @@ -156,12 +162,17 @@ static void br_stp_stop(struct net_bridge *br) int r; char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL }; char *envp[] = { NULL }; + struct net_bridge_port *p; if (br->stp_enabled == BR_USER_STP) { r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); br_info(br, "userspace STP stopped, return code %d\n", r); /* To start timers on any ports left in blocking */ + mod_timer(&br->hello_timer, jiffies + br->hello_time); + list_for_each_entry(p, &br->port_list, list) + mod_timer(&p->hold_timer, + round_jiffies(jiffies + BR_HOLD_TIME)); spin_lock_bh(&br->lock); br_port_state_selection(br); spin_unlock_bh(&br->lock); diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 7caf7fae2d5b..5f0f5af0ec35 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -40,7 +40,9 @@ static void br_hello_timer_expired(unsigned long arg) if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); - mod_timer(&br->hello_timer, round_jiffies(jiffies + br->hello_time)); + if (br->stp_enabled != BR_USER_STP) + mod_timer(&br->hello_timer, + round_jiffies(jiffies + br->hello_time)); } spin_unlock(&br->lock); } diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 1f2a126f4ffa..6441f47b1a8f 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -23,7 +23,8 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state struct cgroup_cls_state *task_cls_state(struct task_struct *p) { - return css_cls_state(task_css(p, net_cls_cgrp_id)); + return css_cls_state(task_css_check(p, net_cls_cgrp_id, + rcu_read_lock_bh_held())); } EXPORT_SYMBOL_GPL(task_cls_state); diff --git a/net/core/sock.c b/net/core/sock.c index 08f16db46070..193901d09757 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1497,7 +1497,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sock_copy(newsk, sk); /* SANITY */ - get_net(sock_net(newsk)); + if (likely(newsk->sk_net_refcnt)) + get_net(sock_net(newsk)); sk_node_init(&newsk->sk_node); sock_lock_init(newsk); bh_lock_sock(newsk); @@ -1967,20 +1968,21 @@ static void __release_sock(struct sock *sk) * sk_wait_data - wait for data to arrive at sk_receive_queue * @sk: sock to wait on * @timeo: for how long + * @skb: last skb seen on sk_receive_queue * * Now socket state including sk->sk_err is changed only under lock, * hence we may omit checks after joining wait queue. * We check receive queue before schedule() only as optimization; * it is very likely that release_sock() added new data. */ -int sk_wait_data(struct sock *sk, long *timeo) +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) { int rc; DEFINE_WAIT(wait); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); + rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); finish_wait(sk_sleep(sk), &wait); return rc; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 52a94016526d..b5cf13a28009 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -886,7 +886,7 @@ verify_sock_status: break; } - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); continue; found_ok_skb: if (len > skb->len) diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index f46e4d1306f2..214d44aef35b 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -207,7 +207,7 @@ found: } else { fq->q.meat += skb->len; } - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { @@ -287,7 +287,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, clone->data_len = clone->len; head->data_len -= clone->len; head->len -= clone->len; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } WARN_ON(head == NULL); @@ -310,7 +310,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 933a92820d26..6c8b1fbafce8 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1017,14 +1017,16 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) neigh = neigh_lookup(&arp_tbl, &ip, dev); if (neigh) { - read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); - r->arp_flags = arp_state_to_flags(neigh); - read_unlock_bh(&neigh->lock); - r->arp_ha.sa_family = dev->type; - strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); + if (!(neigh->nud_state & NUD_NOARP)) { + read_lock_bh(&neigh->lock); + memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + r->arp_flags = arp_state_to_flags(neigh); + read_unlock_bh(&neigh->lock); + r->arp_ha.sa_family = dev->type; + strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); + err = 0; + } neigh_release(neigh); - err = 0; } return err; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e813196c91c7..2d9cb1748f81 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -882,7 +882,6 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); - blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); } return 0; } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c6211ed60b03..9c02920725db 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -13,6 +13,7 @@ struct fib_alias { u8 fa_state; u8 fa_slen; u32 tb_id; + s16 fa_default; struct rcu_head rcu; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c7358ea4ae93..3a06586b170c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1202,23 +1202,40 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event) } /* Must be invoked inside of an RCU protected region. */ -void fib_select_default(struct fib_result *res) +void fib_select_default(const struct flowi4 *flp, struct fib_result *res) { struct fib_info *fi = NULL, *last_resort = NULL; struct hlist_head *fa_head = res->fa_head; struct fib_table *tb = res->table; + u8 slen = 32 - res->prefixlen; int order = -1, last_idx = -1; - struct fib_alias *fa; + struct fib_alias *fa, *fa1 = NULL; + u32 last_prio = res->fi->fib_priority; + u8 last_tos = 0; hlist_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; + if (fa->fa_slen != slen) + continue; + if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) + continue; + if (fa->tb_id != tb->tb_id) + continue; + if (next_fi->fib_priority > last_prio && + fa->fa_tos == last_tos) { + if (last_tos) + continue; + break; + } + if (next_fi->fib_flags & RTNH_F_DEAD) + continue; + last_tos = fa->fa_tos; + last_prio = next_fi->fib_priority; + if (next_fi->fib_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; - - if (next_fi->fib_priority > res->fi->fib_priority) - break; if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) continue; @@ -1228,10 +1245,11 @@ void fib_select_default(struct fib_result *res) if (!fi) { if (next_fi != res->fi) break; + fa1 = fa; } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, tb->tb_default)) { + &last_idx, fa1->fa_default)) { fib_result_assign(res, fi); - tb->tb_default = order; + fa1->fa_default = order; goto out; } fi = next_fi; @@ -1239,20 +1257,21 @@ void fib_select_default(struct fib_result *res) } if (order <= 0 || !fi) { - tb->tb_default = -1; + if (fa1) + fa1->fa_default = -1; goto out; } if (!fib_detect_death(fi, order, &last_resort, &last_idx, - tb->tb_default)) { + fa1->fa_default)) { fib_result_assign(res, fi); - tb->tb_default = order; + fa1->fa_default = order; goto out; } if (last_idx >= 0) fib_result_assign(res, last_resort); - tb->tb_default = last_idx; + fa1->fa_default = last_idx; out: return; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 15d32612e3c6..37c4bb89a708 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1171,6 +1171,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = state & ~FA_S_ACCESSED; new_fa->fa_slen = fa->fa_slen; new_fa->tb_id = tb->tb_id; + new_fa->fa_default = -1; err = switchdev_fib_ipv4_add(key, plen, fi, new_fa->fa_tos, @@ -1222,6 +1223,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = 0; new_fa->fa_slen = slen; new_fa->tb_id = tb->tb_id; + new_fa->fa_default = -1; /* (Optionally) offload fib entry to switch hardware. */ err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, @@ -1791,8 +1793,6 @@ void fib_table_flush_external(struct fib_table *tb) if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); - } else { - leaf_pull_suffix(pn, n); } } } @@ -1862,8 +1862,6 @@ int fib_table_flush(struct fib_table *tb) if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); - } else { - leaf_pull_suffix(pn, n); } } @@ -1990,7 +1988,6 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) return NULL; tb->tb_id = id; - tb->tb_default = -1; tb->tb_num_default = 0; tb->tb_data = (alias ? alias->__data : tb->__data); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 5e346a082e5f..d0a7c0319e3d 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -131,34 +131,22 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) unsigned int evicted = 0; HLIST_HEAD(expired); -evict_again: spin_lock(&hb->chain_lock); hlist_for_each_entry_safe(fq, n, &hb->chain, list) { if (!inet_fragq_should_evict(fq)) continue; - if (!del_timer(&fq->timer)) { - /* q expiring right now thus increment its refcount so - * it won't be freed under us and wait until the timer - * has finished executing then destroy it - */ - atomic_inc(&fq->refcnt); - spin_unlock(&hb->chain_lock); - del_timer_sync(&fq->timer); - inet_frag_put(fq, f); - goto evict_again; - } + if (!del_timer(&fq->timer)) + continue; - fq->flags |= INET_FRAG_EVICTED; - hlist_del(&fq->list); - hlist_add_head(&fq->list, &expired); + hlist_add_head(&fq->list_evictor, &expired); ++evicted; } spin_unlock(&hb->chain_lock); - hlist_for_each_entry_safe(fq, n, &expired, list) + hlist_for_each_entry_safe(fq, n, &expired, list_evictor) f->frag_expire((unsigned long) fq); return evicted; @@ -240,18 +228,20 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) int i; nf->low_thresh = 0; - local_bh_disable(); evict_again: + local_bh_disable(); seq = read_seqbegin(&f->rnd_seqlock); for (i = 0; i < INETFRAGS_HASHSZ ; i++) inet_evict_bucket(f, &f->hash[i]); - if (read_seqretry(&f->rnd_seqlock, seq)) - goto evict_again; - local_bh_enable(); + cond_resched(); + + if (read_seqretry(&f->rnd_seqlock, seq) || + percpu_counter_sum(&nf->mem)) + goto evict_again; percpu_counter_destroy(&nf->mem); } @@ -284,8 +274,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) struct inet_frag_bucket *hb; hb = get_frag_bucket_locked(fq, f); - if (!(fq->flags & INET_FRAG_EVICTED)) - hlist_del(&fq->list); + hlist_del(&fq->list); + fq->flags |= INET_FRAG_COMPLETE; spin_unlock(&hb->chain_lock); } @@ -297,7 +287,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) if (!(fq->flags & INET_FRAG_COMPLETE)) { fq_unlink(fq, f); atomic_dec(&fq->refcnt); - fq->flags |= INET_FRAG_COMPLETE; } } EXPORT_SYMBOL(inet_frag_kill); @@ -330,11 +319,12 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) fp = xp; } sum = sum_truesize + f->qsize; - sub_frag_mem_limit(q, sum); if (f->destructor) f->destructor(q); kmem_cache_free(f->frags_cachep, q); + + sub_frag_mem_limit(nf, sum); } EXPORT_SYMBOL(inet_frag_destroy); @@ -390,7 +380,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, q->net = nf; f->constructor(q, arg); - add_frag_mem_limit(q, f->qsize); + add_frag_mem_limit(nf, f->qsize); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 31f71b15cfba..921138f6c97c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -202,7 +202,7 @@ static void ip_expire(unsigned long arg) ipq_kill(qp); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); - if (!(qp->q.flags & INET_FRAG_EVICTED)) { + if (!inet_frag_evicting(&qp->q)) { struct sk_buff *head = qp->q.fragments; const struct iphdr *iph; int err; @@ -309,7 +309,7 @@ static int ip_frag_reinit(struct ipq *qp) kfree_skb(fp); fp = xp; } while (fp); - sub_frag_mem_limit(&qp->q, sum_truesize); + sub_frag_mem_limit(qp->q.net, sum_truesize); qp->q.flags = 0; qp->q.len = 0; @@ -455,7 +455,7 @@ found: qp->q.fragments = next; qp->q.meat -= free_it->len; - sub_frag_mem_limit(&qp->q, free_it->truesize); + sub_frag_mem_limit(qp->q.net, free_it->truesize); kfree_skb(free_it); } } @@ -479,7 +479,7 @@ found: qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; qp->ecn |= ecn; - add_frag_mem_limit(&qp->q, skb->truesize); + add_frag_mem_limit(qp->q.net, skb->truesize); if (offset == 0) qp->q.flags |= INET_FRAG_FIRST_IN; @@ -587,7 +587,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&qp->q, clone->truesize); + add_frag_mem_limit(qp->q.net, clone->truesize); } skb_push(head, head->data - skb_network_header(head)); @@ -615,7 +615,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&qp->q, sum_truesize); + sub_frag_mem_limit(qp->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d0362a2de3d3..e681b852ced1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2176,7 +2176,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) if (!res.prefixlen && res.table->tb_num_default > 1 && res.type == RTN_UNICAST && !fl4->flowi4_oif) - fib_select_default(&res); + fib_select_default(fl4, &res); if (!fl4->saddr) fl4->saddr = FIB_RES_PREFSRC(net, res); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7f4056785acc..45534a5ab430 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -780,7 +780,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -EAGAIN; break; } - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); if (signal_pending(current)) { ret = sock_intr_errno(timeo); break; @@ -1575,7 +1575,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int target; /* Read at least this many bytes */ long timeo; struct task_struct *user_recv = NULL; - struct sk_buff *skb; + struct sk_buff *skb, *last; u32 urg_hole = 0; if (unlikely(flags & MSG_ERRQUEUE)) @@ -1635,7 +1635,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* Next get a buffer. */ + last = skb_peek_tail(&sk->sk_receive_queue); skb_queue_walk(&sk->sk_receive_queue, skb) { + last = skb; /* Now that we have two receive queues this * shouldn't happen. */ @@ -1754,8 +1756,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); - } else - sk_wait_data(sk, &timeo); + } else { + sk_wait_data(sk, &timeo, last); + } if (user_recv) { int chunk; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0a05b35a90fc..c53331cfed95 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1650,6 +1650,7 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_change_info *change_info; struct net *net = dev_net(dev); struct inet6_dev *idev; @@ -1664,6 +1665,11 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, ndisc_send_unsol_na(dev); in6_dev_put(idev); break; + case NETDEV_CHANGE: + change_info = ptr; + if (change_info->flags_changed & IFF_NOARP) + neigh_changeaddr(&nd_tbl, dev); + break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); fib6_run_gc(0, net, false); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f187c8d8a1b..6d02498172c1 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -348,7 +348,7 @@ found: fq->ecn |= ecn; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - sub_frag_mem_limit(&fq->q, head->truesize); + sub_frag_mem_limit(fq->q.net, head->truesize); head->ignore_df = 1; head->next = NULL; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 8ffa2c8cce77..f1159bb76e0a 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - if (fq->q.flags & INET_FRAG_EVICTED) + if (inet_frag_evicting(&fq->q)) goto out_rcu_unlock; IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); @@ -330,7 +330,7 @@ found: fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8fd9febaa5ba..8dab4e569571 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -613,7 +613,7 @@ static int llc_wait_data(struct sock *sk, long timeo) if (signal_pending(current)) break; rc = 0; - if (sk_wait_data(sk, &timeo)) + if (sk_wait_data(sk, &timeo, NULL)) break; } return rc; @@ -802,7 +802,7 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, release_sock(sk); lock_sock(sk); } else - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) { net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n", diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5d2b806a862e..38fbc194b9cb 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -319,7 +319,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * return *ignored=0 i.e. ICMP and NF_DROP */ sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb, iph); + if (sched) { + /* read svc->sched_data after svc->scheduler */ + smp_rmb(); + dest = sched->schedule(svc, skb, iph); + } else { + dest = NULL; + } if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); @@ -467,7 +473,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, } sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb, iph); + if (sched) { + /* read svc->sched_data after svc->scheduler */ + smp_rmb(); + dest = sched->schedule(svc, skb, iph); + } else { + dest = NULL; + } if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 285eae3a1454..24c554201a76 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -842,15 +842,16 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); - sched = rcu_dereference_protected(svc->scheduler, 1); if (add) { ip_vs_start_estimator(svc->net, &dest->stats); list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; - if (sched->add_dest) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched && sched->add_dest) sched->add_dest(svc, dest); } else { - if (sched->upd_dest) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched && sched->upd_dest) sched->upd_dest(svc, dest); } } @@ -1084,7 +1085,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, struct ip_vs_scheduler *sched; sched = rcu_dereference_protected(svc->scheduler, 1); - if (sched->del_dest) + if (sched && sched->del_dest) sched->del_dest(svc, dest); } } @@ -1175,11 +1176,14 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ip_vs_use_count_inc(); /* Lookup the scheduler by 'u->sched_name' */ - sched = ip_vs_scheduler_get(u->sched_name); - if (sched == NULL) { - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); - ret = -ENOENT; - goto out_err; + if (strcmp(u->sched_name, "none")) { + sched = ip_vs_scheduler_get(u->sched_name); + if (!sched) { + pr_info("Scheduler module ip_vs_%s not found\n", + u->sched_name); + ret = -ENOENT; + goto out_err; + } } if (u->pe_name && *u->pe_name) { @@ -1240,10 +1244,12 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, spin_lock_init(&svc->stats.lock); /* Bind the scheduler */ - ret = ip_vs_bind_scheduler(svc, sched); - if (ret) - goto out_err; - sched = NULL; + if (sched) { + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) + goto out_err; + sched = NULL; + } /* Bind the ct retriever */ RCU_INIT_POINTER(svc->pe, pe); @@ -1291,17 +1297,20 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, static int ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) { - struct ip_vs_scheduler *sched, *old_sched; + struct ip_vs_scheduler *sched = NULL, *old_sched; struct ip_vs_pe *pe = NULL, *old_pe = NULL; int ret = 0; /* * Lookup the scheduler, by 'u->sched_name' */ - sched = ip_vs_scheduler_get(u->sched_name); - if (sched == NULL) { - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); - return -ENOENT; + if (strcmp(u->sched_name, "none")) { + sched = ip_vs_scheduler_get(u->sched_name); + if (!sched) { + pr_info("Scheduler module ip_vs_%s not found\n", + u->sched_name); + return -ENOENT; + } } old_sched = sched; @@ -1329,14 +1338,20 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) old_sched = rcu_dereference_protected(svc->scheduler, 1); if (sched != old_sched) { + if (old_sched) { + ip_vs_unbind_scheduler(svc, old_sched); + RCU_INIT_POINTER(svc->scheduler, NULL); + /* Wait all svc->sched_data users */ + synchronize_rcu(); + } /* Bind the new scheduler */ - ret = ip_vs_bind_scheduler(svc, sched); - if (ret) { - old_sched = sched; - goto out; + if (sched) { + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) { + ip_vs_scheduler_put(sched); + goto out; + } } - /* Unbind the old scheduler on success */ - ip_vs_unbind_scheduler(svc, old_sched); } /* @@ -1982,6 +1997,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); + char *sched_name = sched ? sched->name : "none"; if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 @@ -1990,18 +2006,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), - sched->name); + sched_name); else #endif seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - sched->name, + sched_name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { seq_printf(seq, "FWM %08X %s %s", - svc->fwmark, sched->name, + svc->fwmark, sched_name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } @@ -2427,13 +2443,15 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { struct ip_vs_scheduler *sched; struct ip_vs_kstats kstats; + char *sched_name; sched = rcu_dereference_protected(src->scheduler, 1); + sched_name = sched ? sched->name : "none"; dst->protocol = src->protocol; dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; - strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); + strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); dst->flags = src->flags; dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; @@ -2892,6 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, struct ip_vs_flags flags = { .flags = svc->flags, .mask = ~0 }; struct ip_vs_kstats kstats; + char *sched_name; nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); if (!nl_service) @@ -2910,8 +2929,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, } sched = rcu_dereference_protected(svc->scheduler, 1); + sched_name = sched ? sched->name : "none"; pe = rcu_dereference_protected(svc->pe, 1); - if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || + if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 199760c71f39..7e8141647943 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -74,7 +74,7 @@ void ip_vs_unbind_scheduler(struct ip_vs_service *svc, if (sched->done_service) sched->done_service(svc); - /* svc->scheduler can not be set to NULL */ + /* svc->scheduler can be set to NULL only by caller */ } @@ -147,21 +147,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) { - struct ip_vs_scheduler *sched; + struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); + char *sched_name = sched ? sched->name : "none"; - sched = rcu_dereference(svc->scheduler); if (svc->fwmark) { IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", - sched->name, svc->fwmark, svc->fwmark, msg); + sched_name, svc->fwmark, svc->fwmark, msg); #ifdef CONFIG_IP_VS_IPV6 } else if (svc->af == AF_INET6) { IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", - sched->name, ip_vs_proto_name(svc->protocol), + sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), msg); #endif } else { IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", - sched->name, ip_vs_proto_name(svc->protocol), + sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.ip, ntohs(svc->port), msg); } } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index b08ba9538d12..d99ad93eb855 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -612,7 +612,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, pkts = atomic_add_return(1, &cp->in_pkts); else pkts = sysctl_sync_threshold(ipvs); - ip_vs_sync_conn(net, cp->control, pkts); + ip_vs_sync_conn(net, cp, pkts); } } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index bf66a8657a5f..258a0b0e82a2 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -130,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; - fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? FLOWI_FLAG_KNOWN_NH : 0; @@ -505,6 +504,13 @@ err_put: return -1; err_unreach: + /* The ip6_link_failure function requires the dev field to be set + * in order to get the net (further for the sake of fwmark + * reflection). + */ + if (!skb->dev) + skb->dev = skb_dst(skb)->dev; + dst_link_failure(skb); return -1; } @@ -523,10 +529,27 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, if (ret == NF_ACCEPT) { nf_reset(skb); skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); } return ret; } +/* In the event of a remote destination, it's possible that we would have + * matches against an old socket (particularly a TIME-WAIT socket). This + * causes havoc down the line (ip_local_out et. al. expect regular sockets + * and invalid memory accesses will happen) so simply drop the association + * in this case. +*/ +static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb) +{ + /* If dev is set, the packet came from the LOCAL_IN callback and + * not from a local TCP socket. + */ + if (skb->dev) + skb_orphan(skb); +} + /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, struct ip_vs_conn *cp, int local) @@ -538,12 +561,23 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 1); + + /* Remove the early_demux association unless it's bound for the + * exact same port and address on this host after translation. + */ + if (!local || cp->vport != cp->dport || + !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr)) + ip_vs_drop_early_demux_sk(skb); + if (!local) { skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } else ret = NF_ACCEPT; + return ret; } @@ -557,7 +591,10 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) ip_vs_notrack(skb); if (!local) { + ip_vs_drop_early_demux_sk(skb); skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } else @@ -845,6 +882,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, struct ipv6hdr *old_ipv6h = NULL; #endif + ip_vs_drop_early_demux_sk(skb); + if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 13fad8668f83..651039ad1681 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -287,6 +287,46 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) spin_unlock(&pcpu->lock); } +/* Released via destroy_conntrack() */ +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags) +{ + struct nf_conn *tmpl; + + tmpl = kzalloc(sizeof(struct nf_conn), GFP_KERNEL); + if (tmpl == NULL) + return NULL; + + tmpl->status = IPS_TEMPLATE; + write_pnet(&tmpl->ct_net, net); + +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (zone) { + struct nf_conntrack_zone *nf_ct_zone; + + nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, GFP_ATOMIC); + if (!nf_ct_zone) + goto out_free; + nf_ct_zone->id = zone; + } +#endif + atomic_set(&tmpl->ct_general.use, 0); + + return tmpl; +#ifdef CONFIG_NF_CONNTRACK_ZONES +out_free: + kfree(tmpl); + return NULL; +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); + +static void nf_ct_tmpl_free(struct nf_conn *tmpl) +{ + nf_ct_ext_destroy(tmpl); + nf_ct_ext_free(tmpl); + kfree(tmpl); +} + static void destroy_conntrack(struct nf_conntrack *nfct) { @@ -298,6 +338,10 @@ destroy_conntrack(struct nf_conntrack *nfct) NF_CT_ASSERT(atomic_read(&nfct->use) == 0); NF_CT_ASSERT(!timer_pending(&ct->timeout)); + if (unlikely(nf_ct_is_template(ct))) { + nf_ct_tmpl_free(ct); + return; + } rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto && l4proto->destroy) @@ -540,28 +584,6 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); -/* deletion from this larval template list happens via nf_ct_put() */ -void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl) -{ - struct ct_pcpu *pcpu; - - __set_bit(IPS_TEMPLATE_BIT, &tmpl->status); - __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); - nf_conntrack_get(&tmpl->ct_general); - - /* add this conntrack to the (per cpu) tmpl list */ - local_bh_disable(); - tmpl->cpu = smp_processor_id(); - pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu); - - spin_lock(&pcpu->lock); - /* Overload tuple linked list to put us in template list. */ - hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &pcpu->tmpl); - spin_unlock_bh(&pcpu->lock); -} -EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert); - /* Confirm a connection given skb; places it in hash table */ int __nf_conntrack_confirm(struct sk_buff *skb) @@ -1751,7 +1773,6 @@ int nf_conntrack_init_net(struct net *net) spin_lock_init(&pcpu->lock); INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); - INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL); } net->ct.stat = alloc_percpu(struct ip_conntrack_stat); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 7a17070c5dab..b45a4223cb05 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -219,7 +219,8 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; } - return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); + return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && + nf_ct_zone(a->master) == nf_ct_zone(b->master); } static inline int expect_matches(const struct nf_conntrack_expect *a, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d1c23940a86a..6b8b0abbfab4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2995,11 +2995,6 @@ ctnetlink_create_expect(struct net *net, u16 zone, } err = nf_ct_expect_related_report(exp, portid, report); - if (err < 0) - goto err_exp; - - return 0; -err_exp: nf_ct_expect_put(exp); err_ct: nf_ct_put(ct); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 789feeae6c44..71f1e9fdfa18 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -349,12 +349,10 @@ static void __net_exit synproxy_proc_exit(struct net *net) static int __net_init synproxy_net_init(struct net *net) { struct synproxy_net *snet = synproxy_pernet(net); - struct nf_conntrack_tuple t; struct nf_conn *ct; int err = -ENOMEM; - memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL); + ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL); if (IS_ERR(ct)) { err = PTR_ERR(ct); goto err1; @@ -365,7 +363,8 @@ static int __net_init synproxy_net_init(struct net *net) if (!nfct_synproxy_ext_add(ct)) goto err2; - nf_conntrack_tmpl_insert(net, ct); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_get(&ct->ct_general); snet->tmpl = ct; snet->stats = alloc_percpu(struct synproxy_stats); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 75747aecdebe..c6630030c912 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -184,7 +184,6 @@ out: static int xt_ct_tg_check(const struct xt_tgchk_param *par, struct xt_ct_target_info_v1 *info) { - struct nf_conntrack_tuple t; struct nf_conn *ct; int ret = -EOPNOTSUPP; @@ -202,8 +201,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (ret < 0) goto err1; - memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL); + ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL); ret = PTR_ERR(ct); if (IS_ERR(ct)) goto err2; @@ -227,8 +225,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (ret < 0) goto err3; } - - nf_conntrack_tmpl_insert(par->net, ct); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_get(&ct->ct_general); out: info->ct = ct; return 0; diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index f407ebc13481..29d2c31f406c 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -126,6 +126,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) goto out; } + sysfs_attr_init(&info->timer->attr.attr); info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); if (!info->timer->attr.attr.name) { ret = -ENOMEM; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c9e8741226c6..ed458b315ef4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2403,7 +2403,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, addr, hlen); - if (tp_len > dev->mtu + dev->hard_header_len) { + if (likely(tp_len >= 0) && + tp_len > dev->mtu + dev->hard_header_len) { struct ethhdr *ehdr; /* Earlier code assumed this would be a VLAN pkt, * double-check this now that we have the actual @@ -2784,7 +2785,7 @@ static int packet_release(struct socket *sock) static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) { struct packet_sock *po = pkt_sk(sk); - const struct net_device *dev_curr; + struct net_device *dev_curr; __be16 proto_curr; bool need_rehook; @@ -2808,15 +2809,13 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) po->num = proto; po->prot_hook.type = proto; - - if (po->prot_hook.dev) - dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; packet_cached_dev_assign(po, dev); } + if (dev_curr) + dev_put(dev_curr); if (proto == 0 || !need_rehook) goto out_unlock; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index af427a3dbcba..43ec92680ae8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -45,7 +45,7 @@ void tcf_hash_destroy(struct tc_action *a) } EXPORT_SYMBOL(tcf_hash_destroy); -int tcf_hash_release(struct tc_action *a, int bind) +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict) { struct tcf_common *p = a->priv; int ret = 0; @@ -53,7 +53,7 @@ int tcf_hash_release(struct tc_action *a, int bind) if (p) { if (bind) p->tcfc_bindcnt--; - else if (p->tcfc_bindcnt > 0) + else if (strict && p->tcfc_bindcnt > 0) return -EPERM; p->tcfc_refcnt--; @@ -64,9 +64,10 @@ int tcf_hash_release(struct tc_action *a, int bind) ret = 1; } } + return ret; } -EXPORT_SYMBOL(tcf_hash_release); +EXPORT_SYMBOL(__tcf_hash_release); static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, struct tc_action *a) @@ -136,7 +137,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; hlist_for_each_entry_safe(p, n, head, tcfc_head) { a->priv = p; - ret = tcf_hash_release(a, 0); + ret = __tcf_hash_release(a, false, true); if (ret == ACT_P_DELETED) { module_put(a->ops->owner); n_i++; @@ -408,7 +409,7 @@ int tcf_action_destroy(struct list_head *actions, int bind) int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { - ret = tcf_hash_release(a, bind); + ret = __tcf_hash_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(a->ops->owner); else if (ret < 0) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1df78289e248..d0edeb7a1950 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -27,9 +27,10 @@ struct tcf_bpf_cfg { struct bpf_prog *filter; struct sock_filter *bpf_ops; - char *bpf_name; + const char *bpf_name; u32 bpf_fd; u16 bpf_num_ops; + bool is_ebpf; }; static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, @@ -207,6 +208,7 @@ static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_ops = bpf_ops; cfg->bpf_num_ops = bpf_num_ops; cfg->filter = fp; + cfg->is_ebpf = false; return 0; } @@ -241,18 +243,40 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_fd = bpf_fd; cfg->bpf_name = name; cfg->filter = fp; + cfg->is_ebpf = true; return 0; } +static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg) +{ + if (cfg->is_ebpf) + bpf_prog_put(cfg->filter); + else + bpf_prog_destroy(cfg->filter); + + kfree(cfg->bpf_ops); + kfree(cfg->bpf_name); +} + +static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, + struct tcf_bpf_cfg *cfg) +{ + cfg->is_ebpf = tcf_bpf_is_ebpf(prog); + cfg->filter = prog->filter; + + cfg->bpf_ops = prog->bpf_ops; + cfg->bpf_name = prog->bpf_name; +} + static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *act, int replace, int bind) { struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tcf_bpf_cfg cfg, old; struct tc_act_bpf *parm; struct tcf_bpf *prog; - struct tcf_bpf_cfg cfg; bool is_bpf, is_ebpf; int ret; @@ -301,6 +325,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, prog = to_bpf(act); spin_lock_bh(&prog->tcf_lock); + if (ret != ACT_P_CREATED) + tcf_bpf_prog_fill_cfg(prog, &old); + prog->bpf_ops = cfg.bpf_ops; prog->bpf_name = cfg.bpf_name; @@ -316,32 +343,22 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (ret == ACT_P_CREATED) tcf_hash_insert(act); + else + tcf_bpf_cfg_cleanup(&old); return ret; destroy_fp: - if (is_ebpf) - bpf_prog_put(cfg.filter); - else - bpf_prog_destroy(cfg.filter); - - kfree(cfg.bpf_ops); - kfree(cfg.bpf_name); - + tcf_bpf_cfg_cleanup(&cfg); return ret; } static void tcf_bpf_cleanup(struct tc_action *act, int bind) { - const struct tcf_bpf *prog = act->priv; - - if (tcf_bpf_is_ebpf(prog)) - bpf_prog_put(prog->filter); - else - bpf_prog_destroy(prog->filter); + struct tcf_bpf_cfg tmp; - kfree(prog->bpf_ops); - kfree(prog->bpf_name); + tcf_bpf_prog_fill_cfg(act->priv, &tmp); + tcf_bpf_cfg_cleanup(&tmp); } static struct tc_action_ops act_bpf_ops __read_mostly = { diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 17e6d6669c7f..ff8b466a73f6 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -68,13 +68,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; } else { - p = to_pedit(a); - tcf_hash_release(a, bind); if (bind) return 0; + tcf_hash_release(a, bind); if (!ovr) return -EEXIST; - + p = to_pedit(a); if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 93d5742dc7e0..6a783afe4960 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -385,6 +385,19 @@ static void choke_reset(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); + while (q->head != q->tail) { + struct sk_buff *skb = q->tab[q->head]; + + q->head = (q->head + 1) & q->tab_mask; + if (!skb) + continue; + qdisc_qstats_backlog_dec(sch, skb); + --sch->q.qlen; + qdisc_drop(skb, sch); + } + + memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); + q->head = q->tail = 0; red_restart(&q->vars); } diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 89f8fcf73f18..ade9445a55ab 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -216,6 +216,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = { .peek = qdisc_peek_head, .init = plug_init, .change = plug_change, + .reset = qdisc_reset_queue, .owner = THIS_MODULE, }; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1425ec2bbd5a..17bef01b9aa3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2200,12 +2200,6 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval, if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen)) return -EFAULT; - if (sctp_sk(sk)->subscribe.sctp_data_io_event) - pr_warn_ratelimited(DEPRECATED "%s (pid %d) " - "Requested SCTP_SNDRCVINFO event.\n" - "Use SCTP_RCVINFO through SCTP_RECVRCVINFO option instead.\n", - current->comm, task_pid_nr(current)); - /* At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT, * if there is no data to be sent or retransmit, the stack will * immediately send up this notification. diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 9825ff0f91d6..6255d141133b 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -240,8 +240,8 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) req = xprt_alloc_bc_req(xprt, GFP_ATOMIC); if (!req) goto not_found; - /* Note: this 'free' request adds it to xprt->bc_pa_list */ - xprt_free_bc_request(req); + list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); + xprt->bc_alloc_count++; } req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, rq_bc_pa_list); @@ -336,7 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) spin_lock(&xprt->bc_pa_lock); list_del(&req->rq_bc_pa_list); - xprt->bc_alloc_count--; + xprt_dec_alloc_count(xprt, 1); spin_unlock(&xprt->bc_pa_lock); req->rq_private_buf.len = copied; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cbc6af923dd1..23608eb0ded2 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1902,6 +1902,7 @@ call_transmit_status(struct rpc_task *task) switch (task->tk_status) { case -EAGAIN: + case -ENOBUFS: break; default: dprint_status(task); @@ -1928,7 +1929,6 @@ call_transmit_status(struct rpc_task *task) case -ECONNABORTED: case -EADDRINUSE: case -ENOTCONN: - case -ENOBUFS: case -EPIPE: rpc_task_force_reencode(task); } @@ -2057,12 +2057,13 @@ call_status(struct rpc_task *task) case -ECONNABORTED: rpc_force_rebind(clnt); case -EADDRINUSE: - case -ENOBUFS: rpc_delay(task, 3*HZ); case -EPIPE: case -ENOTCONN: task->tk_action = call_bind; break; + case -ENOBUFS: + rpc_delay(task, HZ>>2); case -EAGAIN: task->tk_action = call_transmit; break; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e193c2b5476b..0030376327b7 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -527,6 +527,10 @@ static int xs_local_send_request(struct rpc_task *task) true, &sent); dprintk("RPC: %s(%u) = %d\n", __func__, xdr->len - req->rq_bytes_sent, status); + + if (status == -EAGAIN && sock_writeable(transport->inet)) + status = -ENOBUFS; + if (likely(sent > 0) || status == 0) { req->rq_bytes_sent += sent; req->rq_xmit_bytes_sent += sent; @@ -539,6 +543,7 @@ static int xs_local_send_request(struct rpc_task *task) switch (status) { case -ENOBUFS: + break; case -EAGAIN: status = xs_nospace(task); break; @@ -589,6 +594,9 @@ static int xs_udp_send_request(struct rpc_task *task) if (status == -EPERM) goto process_status; + if (status == -EAGAIN && sock_writeable(transport->inet)) + status = -ENOBUFS; + if (sent > 0 || status == 0) { req->rq_xmit_bytes_sent += sent; if (sent >= req->rq_slen) @@ -669,9 +677,6 @@ static int xs_tcp_send_request(struct rpc_task *task) dprintk("RPC: xs_tcp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); - if (unlikely(sent == 0 && status < 0)) - break; - /* If we've sent the entire packet, immediately * reset the count of bytes sent. */ req->rq_bytes_sent += sent; @@ -681,18 +686,21 @@ static int xs_tcp_send_request(struct rpc_task *task) return 0; } - if (sent != 0) - continue; - status = -EAGAIN; - break; + if (status < 0) + break; + if (sent == 0) { + status = -EAGAIN; + break; + } } + if (status == -EAGAIN && sk_stream_is_writeable(transport->inet)) + status = -ENOBUFS; switch (status) { case -ENOTSOCK: status = -ENOTCONN; /* Should we call xs_close() here? */ break; - case -ENOBUFS: case -EAGAIN: status = xs_nospace(task); break; @@ -703,6 +711,7 @@ static int xs_tcp_send_request(struct rpc_task *task) case -ECONNREFUSED: case -ENOTCONN: case -EADDRINUSE: + case -ENOBUFS: case -EPIPE: clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); } diff --git a/scripts/rt-tester/check-all.sh b/scripts/rt-tester/check-all.sh deleted file mode 100644 index 6b5c83baf148..000000000000 --- a/scripts/rt-tester/check-all.sh +++ /dev/null @@ -1,21 +0,0 @@ - - -function testit () -{ - printf "%-30s: " $1 - ./rt-tester.py $1 | grep Pass -} - -testit t2-l1-2rt-sameprio.tst -testit t2-l1-pi.tst -testit t2-l1-signal.tst -#testit t2-l2-2rt-deadlock.tst -testit t3-l1-pi-1rt.tst -testit t3-l1-pi-2rt.tst -testit t3-l1-pi-3rt.tst -testit t3-l1-pi-signal.tst -testit t3-l1-pi-steal.tst -testit t3-l2-pi.tst -testit t4-l2-pi-deboost.tst -testit t5-l4-pi-boost-deboost.tst -testit t5-l4-pi-boost-deboost-setsched.tst diff --git a/scripts/rt-tester/rt-tester.py b/scripts/rt-tester/rt-tester.py deleted file mode 100755 index 6d916c2a45a5..000000000000 --- a/scripts/rt-tester/rt-tester.py +++ /dev/null @@ -1,218 +0,0 @@ -#!/usr/bin/python -# -# rt-mutex tester -# -# (C) 2006 Thomas Gleixner <tglx@linutronix.de> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# -import os -import sys -import getopt -import shutil -import string - -# Globals -quiet = 0 -test = 0 -comments = 0 - -sysfsprefix = "/sys/devices/system/rttest/rttest" -statusfile = "/status" -commandfile = "/command" - -# Command opcodes -cmd_opcodes = { - "schedother" : "1", - "schedfifo" : "2", - "lock" : "3", - "locknowait" : "4", - "lockint" : "5", - "lockintnowait" : "6", - "lockcont" : "7", - "unlock" : "8", - "signal" : "11", - "resetevent" : "98", - "reset" : "99", - } - -test_opcodes = { - "prioeq" : ["P" , "eq" , None], - "priolt" : ["P" , "lt" , None], - "priogt" : ["P" , "gt" , None], - "nprioeq" : ["N" , "eq" , None], - "npriolt" : ["N" , "lt" , None], - "npriogt" : ["N" , "gt" , None], - "unlocked" : ["M" , "eq" , 0], - "trylock" : ["M" , "eq" , 1], - "blocked" : ["M" , "eq" , 2], - "blockedwake" : ["M" , "eq" , 3], - "locked" : ["M" , "eq" , 4], - "opcodeeq" : ["O" , "eq" , None], - "opcodelt" : ["O" , "lt" , None], - "opcodegt" : ["O" , "gt" , None], - "eventeq" : ["E" , "eq" , None], - "eventlt" : ["E" , "lt" , None], - "eventgt" : ["E" , "gt" , None], - } - -# Print usage information -def usage(): - print "rt-tester.py <-c -h -q -t> <testfile>" - print " -c display comments after first command" - print " -h help" - print " -q quiet mode" - print " -t test mode (syntax check)" - print " testfile: read test specification from testfile" - print " otherwise from stdin" - return - -# Print progress when not in quiet mode -def progress(str): - if not quiet: - print str - -# Analyse a status value -def analyse(val, top, arg): - - intval = int(val) - - if top[0] == "M": - intval = intval / (10 ** int(arg)) - intval = intval % 10 - argval = top[2] - elif top[0] == "O": - argval = int(cmd_opcodes.get(arg, arg)) - else: - argval = int(arg) - - # progress("%d %s %d" %(intval, top[1], argval)) - - if top[1] == "eq" and intval == argval: - return 1 - if top[1] == "lt" and intval < argval: - return 1 - if top[1] == "gt" and intval > argval: - return 1 - return 0 - -# Parse the commandline -try: - (options, arguments) = getopt.getopt(sys.argv[1:],'chqt') -except getopt.GetoptError, ex: - usage() - sys.exit(1) - -# Parse commandline options -for option, value in options: - if option == "-c": - comments = 1 - elif option == "-q": - quiet = 1 - elif option == "-t": - test = 1 - elif option == '-h': - usage() - sys.exit(0) - -# Select the input source -if arguments: - try: - fd = open(arguments[0]) - except Exception,ex: - sys.stderr.write("File not found %s\n" %(arguments[0])) - sys.exit(1) -else: - fd = sys.stdin - -linenr = 0 - -# Read the test patterns -while 1: - - linenr = linenr + 1 - line = fd.readline() - if not len(line): - break - - line = line.strip() - parts = line.split(":") - - if not parts or len(parts) < 1: - continue - - if len(parts[0]) == 0: - continue - - if parts[0].startswith("#"): - if comments > 1: - progress(line) - continue - - if comments == 1: - comments = 2 - - progress(line) - - cmd = parts[0].strip().lower() - opc = parts[1].strip().lower() - tid = parts[2].strip() - dat = parts[3].strip() - - try: - # Test or wait for a status value - if cmd == "t" or cmd == "w": - testop = test_opcodes[opc] - - fname = "%s%s%s" %(sysfsprefix, tid, statusfile) - if test: - print fname - continue - - while 1: - query = 1 - fsta = open(fname, 'r') - status = fsta.readline().strip() - fsta.close() - stat = status.split(",") - for s in stat: - s = s.strip() - if s.startswith(testop[0]): - # Separate status value - val = s[2:].strip() - query = analyse(val, testop, dat) - break - if query or cmd == "t": - break - - progress(" " + status) - - if not query: - sys.stderr.write("Test failed in line %d\n" %(linenr)) - sys.exit(1) - - # Issue a command to the tester - elif cmd == "c": - cmdnr = cmd_opcodes[opc] - # Build command string and sys filename - cmdstr = "%s:%s" %(cmdnr, dat) - fname = "%s%s%s" %(sysfsprefix, tid, commandfile) - if test: - print fname - continue - fcmd = open(fname, 'w') - fcmd.write(cmdstr) - fcmd.close() - - except Exception,ex: - sys.stderr.write(str(ex)) - sys.stderr.write("\nSyntax error in line %d\n" %(linenr)) - if not test: - fd.close() - sys.exit(1) - -# Normal exit pass -print "Pass" -sys.exit(0) diff --git a/scripts/rt-tester/t2-l1-2rt-sameprio.tst b/scripts/rt-tester/t2-l1-2rt-sameprio.tst deleted file mode 100644 index 3710c8b2090d..000000000000 --- a/scripts/rt-tester/t2-l1-2rt-sameprio.tst +++ /dev/null @@ -1,94 +0,0 @@ -# -# RT-Mutex test -# -# Op: C(ommand)/T(est)/W(ait) -# | opcode -# | | threadid: 0-7 -# | | | opcode argument -# | | | | -# C: lock: 0: 0 -# -# Commands -# -# opcode opcode argument -# schedother nice value -# schedfifo priority -# lock lock nr (0-7) -# locknowait lock nr (0-7) -# lockint lock nr (0-7) -# lockintnowait lock nr (0-7) -# lockcont lock nr (0-7) -# unlock lock nr (0-7) -# signal 0 -# reset 0 -# resetevent 0 -# -# Tests / Wait -# -# opcode opcode argument -# -# prioeq priority -# priolt priority -# priogt priority -# nprioeq normal priority -# npriolt normal priority -# npriogt normal priority -# locked lock nr (0-7) -# blocked lock nr (0-7) -# blockedwake lock nr (0-7) -# unlocked lock nr (0-7) -# opcodeeq command opcode or number -# opcodelt number -# opcodegt number -# eventeq number -# eventgt number -# eventlt number - -# -# 2 threads 1 lock -# -C: resetevent: 0: 0 -W: opcodeeq: 0: 0 - -# Set schedulers -C: schedfifo: 0: 80 -C: schedfifo: 1: 80 - -# T0 lock L0 -C: locknowait: 0: 0 -C: locknowait: 1: 0 -W: locked: 0: 0 -W: blocked: 1: 0 -T: prioeq: 0: 80 - -# T0 unlock L0 -C: unlock: 0: 0 -W: locked: 1: 0 - -# Verify T0 -W: unlocked: 0: 0 -T: prioeq: 0: 80 - -# Unlock -C: unlock: 1: 0 -W: unlocked: 1: 0 - -# T1,T0 lock L0 -C: locknowait: 1: 0 -C: locknowait: 0: 0 -W: locked: 1: 0 -W: blocked: 0: 0 -T: prioeq: 1: 80 - -# T1 unlock L0 -C: unlock: 1: 0 -W: locked: 0: 0 - -# Verify T1 -W: unlocked: 1: 0 -T: prioeq: 1: 80 - -# Unlock and exit -C: unlock: 0: 0 -W: unlocked: 0: 0 - diff --git a/scripts/rt-tester/t2-l1-pi.tst b/scripts/rt-tester/t2-l1-pi.tst deleted file mode 100644 index b4cc95975adb..000000000000 --- a/scripts/rt-tester/t2-l1-pi.tst +++ /dev/null @@ -1,77 +0,0 @@ -# -# RT-Mutex test -# -# Op: C(ommand)/T(est)/W(ait) -# | opcode -# | | threadid: 0-7 -# | | | opcode argument -# | | | | -# C: lock: 0: 0 -# -# Commands -# -# opcode opcode argument -# schedother nice value -# schedfifo priority -# lock lock nr (0-7) -# locknowait lock nr (0-7) -# lockint lock nr (0-7) -# lockintnowait lock nr (0-7) -# lockcont lock nr (0-7) -# unlock lock nr (0-7) -# signal 0 -# reset 0 -# resetevent 0 -# -# Tests / Wait -# -# opcode opcode argument -# -# prioeq priority -# priolt priority -# priogt priority -# nprioeq normal priority -# npriolt normal priority -# npriogt normal priority -# locked lock nr (0-7) -# blocked lock nr (0-7) -# blockedwake lock nr (0-7) -# unlocked lock nr (0-7) -# opcodeeq command opcode or number -# opcodelt number -# opcodegt number -# eventeq number -# eventgt number -# eventlt number - -# -# 2 threads 1 lock with priority inversion -# -C: resetevent: 0: 0 -W: opcodeeq: 0: 0 - -# Set schedulers -C: schedother: 0: 0 -C: schedfifo: 1: 80 - -# T0 lock L0 -C: locknowait: 0: 0 -W: locked: 0: 0 - -# T1 lock L0 -C: locknowait: 1: 0 -W: blocked: 1: 0 -T: prioeq: 0: 80 - -# T0 unlock L0 -C: unlock: 0: 0 -W: locked: 1: 0 - -# Verify T1 -W: unlocked: 0: 0 -T: priolt: 0: 1 - -# Unlock and exit -C: unlock: 1: 0 -W: unlocked: 1: 0 - diff --git a/scripts/rt-tester/t2-l1-signal.tst b/scripts/rt-tester/t2-l1-signal.tst deleted file mode 100644 index 1b57376cc1f7..000000000000 --- a/scripts/rt-tester/t2-l1-signal.tst +++ /dev/null @@ -1,72 +0,0 @@ -# -# RT-Mutex test -# -# Op: C(ommand)/T(est)/W(ait) -# | opcode -# | | threadid: 0-7 -# | | | opcode argument -# | | | | -# C: lock: 0: 0 -# -# Commands -# -# opcode opcode argument -# schedother nice value -# schedfifo priority -# lock lock nr (0-7) -# locknowait lock nr (0-7) -# lockint lock nr (0-7) -# lockintnowait lock nr (0-7) -# lockcont lock nr (0-7) -# unlock lock nr (0-7) -# signal 0 -# reset 0 -# resetevent 0 -# -# Tests / Wait -# -# opcode opcode argument -# -# prioeq priority -# priolt priority -# priogt priority -# nprioeq normal priority -# npriolt normal priority -# npriogt normal priority -# locked lock nr (0-7) -# blocked lock nr (0-7) -# blockedwake lock nr (0-7) -# unlocked lock nr (0-7) -# opcodeeq command opcode or number -# opcodelt number -# opcodegt number -# eventeq number -# eventgt number -# eventlt number - -# -# 2 threads 1 lock with priority inversion -# -C: resetevent: 0: 0 -W: opcodeeq: 0: 0 - -# Set schedulers -C: schedother: 0: 0 -C: schedother: 1: 0 - -# T0 lock L0 -C: locknowait: 0: 0 -W: locked: 0: 0 - -# T1 lock L0 -C: lockintnowait: 1: 0 -W: blocked: 1: 0 - -# Interrupt T1 -C: signal: 1: 0 -W: unlocked: 1: 0 -T: opcodeeq: 1: -4 - -# Unlock and exit -C: unlock: 0: 0 -W: unlocked: 0: 0 diff --git a/scripts/rt-tester/t2-l2-2rt-deadlock.tst b/scripts/rt-tester/t2-l2-2rt-deadlock.tst deleted file mode 100644 index 68b10629b6f4..000000000000 --- a/scripts/rt-tester/t2-l2-2rt-deadlock.tst +++ /dev/null @@ -1,84 +0,0 @@ -# -# RT-Mutex test -# -# Op: C(ommand)/T(est)/W(ait) -# | opcode -# | | threadid: 0-7 -# | | | opcode argument -# | | | | -# C: lock: 0: 0 -# -# Commands -# -# opcode opcode argument -# schedother nice value -# schedfifo priority -# lock lock nr (0-7) -# locknowait lock nr (0-7) -# lockint lock nr (0-7) -# lockintnowait lock nr (0-7) -# lockcont lock nr (0-7) -# unlock lock nr (0-7) -# signal 0 -# reset 0 -# resetevent 0 -# -# Tests / Wait -# -# opcode opcode argument -# -# prioeq priority -# priolt priority -# priogt priority -# nprioeq normal priority -# npriolt normal priority -# npriogt normal priority -# locked lock nr (0-7) -# blocked lock nr (0-7) -# blockedwake lock nr (0-7) -# unlocked lock nr (0-7) -# opcodeeq command opcode or number -# opcodelt number -# opcodegt number -# eventeq number -# eventgt number -# eventlt number - -# -# 2 threads 2 lock -# -C: resetevent: 0: 0 -W: opcodeeq: 0: 0 - -# Set schedulers -C: schedfifo: 0: 80 -C: schedfifo: 1: 80 - -# T0 lock L0 -C: locknowait: 0: 0 -W: locked: 0: 0 - -# T1 lock L1 -C: locknowait: 1: 1 -W: locked: 1: 1 - -# T0 lock L1 -C: lockintnowait: 0: 1 -W: blocked: 0: 1 - -# T1 lock L0 -C: lockintnowait: 1: 0 -W: blocked: 1: 0 - -# Make deadlock go away -C: signal: 1: 0 -W: unlocked: 1: 0 -C: signal: 0: 0 -W: unlocked: 0: 1 - -# Unlock and exit -C: unlock: 0: 0 -W: unlocked: 0: 0 -C: unlock: 1: 1 -W: unlocked: 1: 1 - diff --git a/scripts/rt-tester/t3-l1-pi-1rt.tst b/scripts/rt-tester/t3-l1-pi-1rt.tst deleted file mode 100644 index 8e6c8b11ae56..000000000000 --- a/scripts/rt-tester/t3-l1-pi-1rt.tst +++ /dev/null @@ -1,87 +0,0 @@ -# -# rt-mutex test -# -# Op: C(ommand)/T(est)/W(ait) -# | opcode -# | | threadid: 0-7 -# | | | opcode argument -# | | | | -# C: lock: 0: 0 -# -# Commands -# -# opcode opcode argument -# schedother nice value -# schedfifo priority -# lock lock nr (0-7) -# locknowait lock nr (0-7) -# lockint lock nr (0-7) -# lockintnowait lock nr (0-7) -# lockcont lock nr (0-7) -# unlock lock nr (0-7) -# signal thread to signal (0-7) -# reset 0 -# resetevent 0 -# -# Tests / Wait -# -# opcode opcode argument -# -# prioeq priority -# priolt priority -# priogt priority -# nprioeq normal priority -# npriolt normal priority -# npriogt normal priority -# locked lock nr (0-7) -# blocked lock nr (0-7) -# blockedwake lock nr (0-7) -# unlocked lock nr (0-7) -# opcodeeq command opcode or number -# opcodelt number -# opcodegt number -# eventeq number -# eventgt number -# eventlt number - -# -# 3 threads 1 lock PI -# -C: resetevent: 0: 0 -W: opcodeeq: 0: 0 - -# Set schedulers -C: schedother: 0: 0 -C: schedother: 1: 0 -C: schedfifo: 2: 82 - -# T0 lock L0 -C: locknowait: 0: 0 -W: locked: 0: 0 - -# T1 lock L0 -C: locknowait: 1: 0 -W: blocked: 1: 0 -T: priolt: 0: 1 - -# T2 lock L0 -C: locknowait: 2: 0 -W: blocked: 2: 0 -T: prioeq: 0: 82 - -# T0 unlock L0 -C: unlock: 0: 0 - -# Wait until T2 got the lock -W: locked: 2: 0 -W: unlocked: 0: 0 -T: priolt: 0: 1 - -# T2 unlock L0 -C: unlock: 2: 0 - -W: unlocked: 2: 0 -W: locked: 1: 0 - -C: unlock: 1: 0 -W: unlocked: 1: 0 diff --git a/scripts/rt-tester/t3-l1-pi-2rt.tst b/scripts/rt-tester/t3-l1-pi-2rt.tst deleted file mode 100644 index 69c2212fc520..000000000000 --- a/scripts/rt-tester/t3-l1-pi-2rt.tst +++ /dev/null @@ -1,88 +0,0 @@ -# -# rt-mutex test -# -# Op: C(ommand)/T(est)/W(ait) -# | opcode -# | | threadid: 0-7 -# | | | opcode argument -# | | | | -# C: lock: 0: 0 -# -# Commands -# -# opcode opcode argument -# schedother nice value -# schedfifo priority -# lock lock nr (0-7) -# locknowait lock nr (0-7) -# lockint lock nr (0-7) -# lockintnowait lock nr (0-7) -# lockcont lock nr (0-7) -# unlock lock nr (0-7) -# signal thread to signal (0-7) -# reset 0 -# resetevent 0 -# -# Tests / Wait -# -# opcode opcode argument -# -# prioeq priority -# priolt priority -# priogt priority -# nprioeq normal priority -# npriolt normal priority -# npriogt normal priority -# locked lock nr (0-7) -# blocked lock nr (0-7) -# blockedwake lock nr (0-7) -# unlocked lock nr (0-7) -# opcodeeq command opcode or number -# opcodelt number -# opcodegt number -# eventeq number -# eventgt number -# eventlt number - -# -# 3 threads 1 lock PI -# -C: resetevent: 0: 0 -W: opcodeeq: 0: 0 - -# Set schedulers -C: schedother: 0: 0 -C: schedfifo: 1: 81 -C: schedfifo: 2: 82 - -# T0 lock L0 -C: locknowait: 0: 0 -W: locked: 0: 0 - -# T1 lock L0 -C: locknowait: 1: 0 -W: blocked: 1: 0 -T: prioeq: 0: 81 - -# T2 lock L0 -C: locknowait: 2: 0 -W: blocked: 2: 0 -T: prioeq: 0: 82 -T: prioeq: 1: 81 - -# T0 unlock L0 -C: unlock: 0: 0 - -# Wait until T2 got the lock -W: locked: 2: 0 -W: unlocked: 0: 0 -T: priolt: 0: 1 - -# T2 unlock L0 -C: unlock: 2: 0 - -W: unlocked: 2: 0 -W: locked: 1: 0 - -C: unlock: 1: 0 -W: unlocked: 1: 0 diff --git a/scripts/rt-tester/t3-l1-pi-3rt.tst b/scripts/rt-tester/t3-l1-pi-3rt.tst deleted file mode 100644 index 9b0f1eb26a88..000000000000 --- a/scripts/rt-tester/t3-l1-pi-3rt.tst +++ /dev/null @@ -1,87 +0,0 @@ -# -# rt-mutex test -# -# Op: C(ommand)/T(est)/W(ait) -# | opcode -# | | threadid: 0-7 -# | | | opcode argument -# | | | | -# C: lock: 0: 0 -# -# Commands -# -# opcode opcode argument -# schedother nice value -# schedfifo priority -# lock lock nr (0-7) -# locknowait lock nr (0-7) -# lockint lock nr (0-7) -# lockintnowait lock nr (0-7) -# lockcont lock nr (0-7) -# unlock lock nr (0-7) -# signal thread to signal (0-7) -# reset 0 -# resetevent 0 -# -# Tests / Wait -# -# opcode opcode argument -# -# prioeq priority -# priolt priority -# priogt priority -# nprioeq normal priority -# npriolt normal priority -# npriogt normal priority -# locked lock nr (0-7) -# blocked lock nr (0-7) -# blockedwake lock nr (0-7) -# unlocked lock nr (0-7) -# opcodeeq command opcode or number -# opcodelt number -# opcodegt number -# eventeq number -# eventgt number -# eventlt number - -# -# 3 threads 1 lock PI -# -C: resetevent: 0: 0 -W: opcodeeq: 0: 0 - -# Set schedulers -C: schedfifo: 0: 80 -C: schedfifo: 1: 81 -C: schedfifo: 2: 82 - -# T0 lock L0 -C: locknowait: 0: 0 -W: locked: 0: 0 - -# T1 lock L0 -C: locknowait: 1: 0 -W: blocked: 1: 0 -T: prioeq: 0: 81 - -# T2 lock L0 -C: locknowait: 2: 0 -W: blocked: 2: 0 -T: prioeq: 0: 82 - -# T0 unlock L0 -C: unlock: 0: 0 - -# Wait until T2 got the lock -W: locked: 2: 0 -W: unlocked: 0: 0 -T: prioeq: 0: 80 - -# T2 unlock L0 -C: unlock: 2: 0 - -W: locked: 1: 0 -W: unlocked: 2: 0 - -C: unlock: 1: 0 -W: unlocked: 1: 0 diff --git a/scripts/rt-tester/t3-l1-pi-signal.tst b/scripts/rt-tester/t3-l1-pi-signal.tst deleted file mode 100644 index 39ec74ab06ee..000000000000 --- a/scripts/rt-tester/t3-l1-pi-signal.tst +++ /dev/null @@ -1,93 +0,0 @@ -# -# rt-mutex test -# -# Op: C(ommand)/T(est)/W(ait) -# | opcode -# | | threadid: 0-7 -# | | | opcode argument -# | | | | -# C: lock: 0: 0 -# -# Commands -# -# opcode opcode argument -# schedother nice value -# schedfifo priority -# lock lock nr (0-7) -# locknowait lock nr (0-7) -# lockint lock nr (0-7) -# lockintnowait lock nr (0-7) -# lockcont lock nr (0-7) -# unlock lock nr (0-7) -# signal thread to signal (0-7) -# reset 0 -# resetevent 0 -# -# Tests / Wait -# -# opcode opcode argument -# -# prioeq priority -# priolt priority -# priogt priority -# nprioeq normal priority -# npriolt normal priority -# npriogt normal priority -# locked lock nr (0-7) -# blocked lock nr (0-7) -# blockedwake lock nr (0-7) -# unlocked lock nr (0-7) -# opcodeeq command opcode or number -# opcodelt number -# opcodegt number -# eventeq number -# eventgt number -# eventlt number - -# Reset event counter -C: resetevent: 0: 0 -W: opcodeeq: 0: 0 - -# Set priorities -C: schedother: 0: 0 -C: schedfifo: 1: 80 -C: schedfifo: 2: 81 - -# T0 lock L0 -C: lock: 0: 0 -W: locked: 0: 0 - -# T1 lock L0, no wait in the wakeup path -C: locknowait: 1: 0 -W: blocked: 1: 0 -T: prioeq: 0: 80 -T: prioeq: 1: 80 - -# T2 lock L0 interruptible, no wait in the wakeup path -C: lockintnowait: 2: 0 -W: blocked: 2: 0 -T: prioeq: 0: 81 -T: prioeq: 1: 80 - -# Interrupt T2 -C: signal: 2: 2 -W: unlocked: 2: 0 -T: prioeq: 1: 80 -T: prioeq: 0: 80 - -T: locked: 0: 0 -T: blocked: 1: 0 - -# T0 unlock L0 -C: unlock: 0: 0 - -# Wait until T1 has locked L0 and exit -W: locked: 1: 0 -W: unlocked: 0: 0 -T: priolt: 0: 1 - -C: unlock: 1: 0 -W: unlocked: 1: 0 - - - diff --git a/scripts/rt-tester/t3-l1-pi-steal.tst b/scripts/rt-tester/t3-l1-pi-steal.tst deleted file mode 100644 index e03db7e010fa..000000000000 --- a/scripts/rt-tester/t3-l1-pi-steal.tst +++ /dev/null @@ -1,91 +0,0 @@ -# -# rt-mutex test -# -# Op: C(ommand)/T(est)/W(ait) -# | opcode -# | | threadid: 0-7 -# | | | opcode argument -# | | | | -# C: lock: 0: 0 -# -# Commands -# -# opcode opcode argument -# schedother nice value -# schedfifo priority -# lock lock nr (0-7) -# locknowait lock nr (0-7) -# lockint lock nr (0-7) -# lockintnowait lock nr (0-7) -# lockcont lock nr (0-7) -# unlock lock nr (0-7) -# signal thread to signal (0-7) -# reset 0 -# resetevent 0 -# -# Tests / Wait -# -# opcode opcode argument -# -# prioeq priority -# priolt priority -# priogt priority -# nprioeq normal priority -# npriolt normal priority -# npriogt normal priority -# locked lock nr (0-7) -# blocked lock nr (0-7) -# blockedwake lock nr (0-7) -# unlocked lock nr (0-7) -# opcodeeq command opcode or number -# opcodelt number -# opcodegt number -# eventeq number -# eventgt number -# eventlt number - -# -# 3 threads 1 lock PI steal pending ownership -# -C: resetevent: 0: 0 -W: opcodeeq: 0: 0 - -# Set schedulers -C: schedother: 0: 0 -C: schedfifo: 1: 80 -C: schedfifo: 2: 81 - -# T0 lock L0 -C: lock: 0: 0 -W: locked: 0: 0 - -# T1 lock L0 -C: lock: 1: 0 -W: blocked: 1: 0 -T: prioeq: 0: 80 - -# T0 unlock L0 -C: unlock: 0: 0 - -# Wait until T1 is in the wakeup loop -W: blockedwake: 1: 0 -T: priolt: 0: 1 - -# T2 lock L0 -C: lock: 2: 0 -# T1 leave wakeup loop -C: lockcont: 1: 0 - -# T2 must have the lock and T1 must be blocked -W: locked: 2: 0 -W: blocked: 1: 0 - -# T2 unlock L0 -C: unlock: 2: 0 - -# Wait until T1 is in the wakeup loop and let it run -W: blockedwake: 1: 0 -C: lockcont: 1: 0 -W: locked: 1: 0 -C: unlock: 1: 0 -W: unlocked: 1: 0 diff --git a/scripts/rt-tester/t3-l2-pi.tst b/scripts/rt-tester/t3-l2-pi.tst deleted file mode 100644 index 7b59100d3e48..000000000000 --- a/scripts/rt-tester/t3-l2-pi.tst +++ /dev/null @@ -1,87 +0,0 @@ -# -# rt-mutex test -# -# Op: C(ommand)/T(est)/W(ait) -# | opcode -# | | threadid: 0-7 -# | | | opcode argument -# | | | | -# C: lock: 0: 0 -# -# Commands -# -# opcode opcode argument -# schedother nice value -# schedfifo priority -# lock lock nr (0-7) -# locknowait lock nr (0-7) -# lockint lock nr (0-7) -# lockintnowait lock nr (0-7) -# lockcont lock nr (0-7) -# unlock lock nr (0-7) -# signal thread to signal (0-7) -# reset 0 -# resetevent 0 -# -# Tests / Wait -# -# opcode opcode argument -# -# prioeq priority -# priolt priority -# priogt priority -# nprioeq normal priority -# npriolt normal priority -# npriogt normal priority -# locked lock nr (0-7) -# blocked lock nr (0-7) -# blockedwake lock nr (0-7) -# unlocked lock nr (0-7) -# opcodeeq command opcode or number -# opcodelt number -# opcodegt number -# eventeq number -# eventgt number -# eventlt number - -# -# 3 threads 2 lock PI -# -C: resetevent: 0: 0 -W: opcodeeq: 0: 0 - -# Set schedulers -C: schedother: 0: 0 -C: schedother: 1: 0 -C: schedfifo: 2: 82 - -# T0 lock L0 -C: locknowait: 0: 0 -W: locked: 0: 0 - -# T1 lock L0 -C: locknowait: 1: 0 -W: blocked: 1: 0 -T: priolt: 0: 1 - -# T2 lock L0 -C: locknowait: 2: 0 -W: blocked: 2: 0 -T: prioeq: 0: 82 - -# T0 unlock L0 -C: unlock: 0: 0 - -# Wait until T2 got the lock -W: locked: 2: 0 -W: unlocked: 0: 0 -T: priolt: 0: 1 - -# T2 unlock L0 -C: unlock: 2: 0 - -W: unlocked: 2: 0 -W: locked: 1: 0 - -C: unlock: 1: 0 -W: unlocked: 1: 0 diff --git a/scripts/rt-tester/t4-l2-pi-deboost.tst b/scripts/rt-tester/t4-l2-pi-deboost.tst deleted file mode 100644 index 2f0e049d6443..000000000000 --- a/scripts/rt-tester/t4-l2-pi-deboost.tst +++ /dev/null @@ -1,118 +0,0 @@ -# -# rt-mutex test -# -# Op: C(ommand)/T(est)/W(ait) -# | opcode -# | | threadid: 0-7 -# | | | opcode argument -# | | | | -# C: lock: 0: 0 -# -# Commands -# -# opcode opcode argument -# schedother nice value -# schedfifo priority -# lock lock nr (0-7) -# locknowait lock nr (0-7) -# lockint lock nr (0-7) -# lockintnowait lock nr (0-7) -# lockcont lock nr (0-7) -# unlock lock nr (0-7) -# signal thread to signal (0-7) -# reset 0 -# resetevent 0 -# -# Tests / Wait -# -# opcode opcode argument -# -# prioeq priority -# priolt priority -# priogt priority -# nprioeq normal priority -# npriolt normal priority -# npriogt normal priority -# locked lock nr (0-7) -# blocked lock nr (0-7) -# blockedwake lock nr (0-7) -# unlocked lock nr (0-7) -# opcodeeq command opcode or number -# opcodelt number -# opcodegt number -# eventeq number -# eventgt number -# eventlt number - -# -# 4 threads 2 lock PI -# -C: resetevent: 0: 0 -W: opcodeeq: 0: 0 - -# Set schedulers -C: schedother: 0: 0 -C: schedother: 1: 0 -C: schedfifo: 2: 82 -C: schedfifo: 3: 83 - -# T0 lock L0 -C: locknowait: 0: 0 -W: locked: 0: 0 - -# T1 lock L1 -C: locknowait: 1: 1 -W: locked: 1: 1 - -# T3 lock L0 -C: lockintnowait: 3: 0 -W: blocked: 3: 0 -T: prioeq: 0: 83 - -# T0 lock L1 -C: lock: 0: 1 -W: blocked: 0: 1 -T: prioeq: 1: 83 - -# T1 unlock L1 -C: unlock: 1: 1 - -# Wait until T0 is in the wakeup code -W: blockedwake: 0: 1 - -# Verify that T1 is unboosted -W: unlocked: 1: 1 -T: priolt: 1: 1 - -# T2 lock L1 (T0 is boosted and pending owner !) -C: locknowait: 2: 1 -W: blocked: 2: 1 -T: prioeq: 0: 83 - -# Interrupt T3 and wait until T3 returned -C: signal: 3: 0 -W: unlocked: 3: 0 - -# Verify prio of T0 (still pending owner, -# but T2 is enqueued due to the previous boost by T3 -T: prioeq: 0: 82 - -# Let T0 continue -C: lockcont: 0: 1 -W: locked: 0: 1 - -# Unlock L1 and let T2 get L1 -C: unlock: 0: 1 -W: locked: 2: 1 - -# Verify that T0 is unboosted -W: unlocked: 0: 1 -T: priolt: 0: 1 - -# Unlock everything and exit -C: unlock: 2: 1 -W: unlocked: 2: 1 - -C: unlock: 0: 0 -W: unlocked: 0: 0 - diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst deleted file mode 100644 index 04f4034ff895..000000000000 --- a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst +++ /dev/null @@ -1,178 +0,0 @@ -# -# rt-mutex test -# -# Op: C(ommand)/T(est)/W(ait) -# | opcode -# | | threadid: 0-7 -# | | | opcode argument -# | | | | -# C: lock: 0: 0 -# -# Commands -# -# opcode opcode argument -# schedother nice value -# schedfifo priority -# lock lock nr (0-7) -# locknowait lock nr (0-7) -# lockint lock nr (0-7) -# lockintnowait lock nr (0-7) -# lockcont lock nr (0-7) -# unlock lock nr (0-7) -# signal thread to signal (0-7) -# reset 0 -# resetevent 0 -# -# Tests / Wait -# -# opcode opcode argument -# -# prioeq priority -# priolt priority -# priogt priority -# nprioeq normal priority -# npriolt normal priority -# npriogt normal priority -# locked lock nr (0-7) -# blocked lock nr (0-7) -# blockedwake lock nr (0-7) -# unlocked lock nr (0-7) -# opcodeeq command opcode or number -# opcodelt number -# opcodegt number -# eventeq number -# eventgt number -# eventlt number - -# -# 5 threads 4 lock PI - modify priority of blocked threads -# -C: resetevent: 0: 0 -W: opcodeeq: 0: 0 - -# Set schedulers -C: schedother: 0: 0 -C: schedfifo: 1: 81 -C: schedfifo: 2: 82 -C: schedfifo: 3: 83 -C: schedfifo: 4: 84 - -# T0 lock L0 -C: locknowait: 0: 0 -W: locked: 0: 0 - -# T1 lock L1 -C: locknowait: 1: 1 -W: locked: 1: 1 - -# T1 lock L0 -C: lockintnowait: 1: 0 -W: blocked: 1: 0 -T: prioeq: 0: 81 - -# T2 lock L2 -C: locknowait: 2: 2 -W: locked: 2: 2 - -# T2 lock L1 -C: lockintnowait: 2: 1 -W: blocked: 2: 1 -T: prioeq: 0: 82 -T: prioeq: 1: 82 - -# T3 lock L3 -C: locknowait: 3: 3 -W: locked: 3: 3 - -# T3 lock L2 -C: lockintnowait: 3: 2 -W: blocked: 3: 2 -T: prioeq: 0: 83 -T: prioeq: 1: 83 -T: prioeq: 2: 83 - -# T4 lock L3 -C: lockintnowait: 4: 3 -W: blocked: 4: 3 -T: prioeq: 0: 84 -T: prioeq: 1: 84 -T: prioeq: 2: 84 -T: prioeq: 3: 84 - -# Reduce prio of T4 -C: schedfifo: 4: 80 -T: prioeq: 0: 83 -T: prioeq: 1: 83 -T: prioeq: 2: 83 -T: prioeq: 3: 83 -T: prioeq: 4: 80 - -# Increase prio of T4 -C: schedfifo: 4: 84 -T: prioeq: 0: 84 -T: prioeq: 1: 84 -T: prioeq: 2: 84 -T: prioeq: 3: 84 -T: prioeq: 4: 84 - -# Reduce prio of T3 -C: schedfifo: 3: 80 -T: prioeq: 0: 84 -T: prioeq: 1: 84 -T: prioeq: 2: 84 -T: prioeq: 3: 84 -T: prioeq: 4: 84 - -# Increase prio of T3 -C: schedfifo: 3: 85 -T: prioeq: 0: 85 -T: prioeq: 1: 85 -T: prioeq: 2: 85 -T: prioeq: 3: 85 -T: prioeq: 4: 84 - -# Reduce prio of T3 -C: schedfifo: 3: 83 -T: prioeq: 0: 84 -T: prioeq: 1: 84 -T: prioeq: 2: 84 -T: prioeq: 3: 84 -T: prioeq: 4: 84 - -# Signal T4 -C: signal: 4: 0 -W: unlocked: 4: 3 -T: prioeq: 0: 83 -T: prioeq: 1: 83 -T: prioeq: 2: 83 -T: prioeq: 3: 83 - -# Signal T3 -C: signal: 3: 0 -W: unlocked: 3: 2 -T: prioeq: 0: 82 -T: prioeq: 1: 82 -T: prioeq: 2: 82 - -# Signal T2 -C: signal: 2: 0 -W: unlocked: 2: 1 -T: prioeq: 0: 81 -T: prioeq: 1: 81 - -# Signal T1 -C: signal: 1: 0 -W: unlocked: 1: 0 -T: priolt: 0: 1 - -# Unlock and exit -C: unlock: 3: 3 -C: unlock: 2: 2 -C: unlock: 1: 1 -C: unlock: 0: 0 - -W: unlocked: 3: 3 -W: unlocked: 2: 2 -W: unlocked: 1: 1 -W: unlocked: 0: 0 - diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost.tst deleted file mode 100644 index a48a6ee29ddc..000000000000 --- a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst +++ /dev/null @@ -1,138 +0,0 @@ -# -# rt-mutex test -# -# Op: C(ommand)/T(est)/W(ait) -# | opcode -# | | threadid: 0-7 -# | | | opcode argument -# | | | | -# C: lock: 0: 0 -# -# Commands -# -# opcode opcode argument -# schedother nice value -# schedfifo priority -# lock lock nr (0-7) -# locknowait lock nr (0-7) -# lockint lock nr (0-7) -# lockintnowait lock nr (0-7) -# lockcont lock nr (0-7) -# unlock lock nr (0-7) -# signal thread to signal (0-7) -# reset 0 -# resetevent 0 -# -# Tests / Wait -# -# opcode opcode argument -# -# prioeq priority -# priolt priority -# priogt priority -# nprioeq normal priority -# npriolt normal priority -# npriogt normal priority -# locked lock nr (0-7) -# blocked lock nr (0-7) -# blockedwake lock nr (0-7) -# unlocked lock nr (0-7) -# opcodeeq command opcode or number -# opcodelt number -# opcodegt number -# eventeq number -# eventgt number -# eventlt number - -# -# 5 threads 4 lock PI -# -C: resetevent: 0: 0 -W: opcodeeq: 0: 0 - -# Set schedulers -C: schedother: 0: 0 -C: schedfifo: 1: 81 -C: schedfifo: 2: 82 -C: schedfifo: 3: 83 -C: schedfifo: 4: 84 - -# T0 lock L0 -C: locknowait: 0: 0 -W: locked: 0: 0 - -# T1 lock L1 -C: locknowait: 1: 1 -W: locked: 1: 1 - -# T1 lock L0 -C: lockintnowait: 1: 0 -W: blocked: 1: 0 -T: prioeq: 0: 81 - -# T2 lock L2 -C: locknowait: 2: 2 -W: locked: 2: 2 - -# T2 lock L1 -C: lockintnowait: 2: 1 -W: blocked: 2: 1 -T: prioeq: 0: 82 -T: prioeq: 1: 82 - -# T3 lock L3 -C: locknowait: 3: 3 -W: locked: 3: 3 - -# T3 lock L2 -C: lockintnowait: 3: 2 -W: blocked: 3: 2 -T: prioeq: 0: 83 -T: prioeq: 1: 83 -T: prioeq: 2: 83 - -# T4 lock L3 -C: lockintnowait: 4: 3 -W: blocked: 4: 3 -T: prioeq: 0: 84 -T: prioeq: 1: 84 -T: prioeq: 2: 84 -T: prioeq: 3: 84 - -# Signal T4 -C: signal: 4: 0 -W: unlocked: 4: 3 -T: prioeq: 0: 83 -T: prioeq: 1: 83 -T: prioeq: 2: 83 -T: prioeq: 3: 83 - -# Signal T3 -C: signal: 3: 0 -W: unlocked: 3: 2 -T: prioeq: 0: 82 -T: prioeq: 1: 82 -T: prioeq: 2: 82 - -# Signal T2 -C: signal: 2: 0 -W: unlocked: 2: 1 -T: prioeq: 0: 81 -T: prioeq: 1: 81 - -# Signal T1 -C: signal: 1: 0 -W: unlocked: 1: 0 -T: priolt: 0: 1 - -# Unlock and exit -C: unlock: 3: 3 -C: unlock: 2: 2 -C: unlock: 1: 1 -C: unlock: 0: 0 - -W: unlocked: 3: 3 -W: unlocked: 2: 2 -W: unlocked: 1: 1 -W: unlocked: 0: 0 - diff --git a/security/keys/keyring.c b/security/keys/keyring.c index e72548b5897e..d33437007ad2 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1181,9 +1181,11 @@ void __key_link_end(struct key *keyring, if (index_key->type == &key_type_keyring) up_write(&keyring_serialise_link_sem); - if (edit && !edit->dead_leaf) { - key_payload_reserve(keyring, - keyring->datalen - KEYQUOTA_LINK_BYTES); + if (edit) { + if (!edit->dead_leaf) { + key_payload_reserve(keyring, + keyring->datalen - KEYQUOTA_LINK_BYTES); + } assoc_array_cancel_edit(edit); } up_write(&keyring->sem); diff --git a/sound/firewire/fireworks/fireworks.c b/sound/firewire/fireworks/fireworks.c index 2682e7e3e5c9..c670db4eee70 100644 --- a/sound/firewire/fireworks/fireworks.c +++ b/sound/firewire/fireworks/fireworks.c @@ -248,6 +248,8 @@ efw_probe(struct fw_unit *unit, err = get_hardware_info(efw); if (err < 0) goto error; + if (entry->model_id == MODEL_ECHO_AUDIOFIRE_2) + efw->is_af2 = true; if (entry->model_id == MODEL_ECHO_AUDIOFIRE_9) efw->is_af9 = true; diff --git a/sound/firewire/fireworks/fireworks.h b/sound/firewire/fireworks/fireworks.h index 4f0201a95222..c33252b7bc84 100644 --- a/sound/firewire/fireworks/fireworks.h +++ b/sound/firewire/fireworks/fireworks.h @@ -70,6 +70,7 @@ struct snd_efw { bool resp_addr_changable; /* for quirks */ + bool is_af2; bool is_af9; u32 firmware_version; diff --git a/sound/firewire/fireworks/fireworks_stream.c b/sound/firewire/fireworks/fireworks_stream.c index c55db1bddc80..a0762dd6231e 100644 --- a/sound/firewire/fireworks/fireworks_stream.c +++ b/sound/firewire/fireworks/fireworks_stream.c @@ -172,6 +172,9 @@ int snd_efw_stream_init_duplex(struct snd_efw *efw) efw->tx_stream.flags |= CIP_DBC_IS_END_EVENT; /* Fireworks reset dbc at bus reset. */ efw->tx_stream.flags |= CIP_SKIP_DBC_ZERO_CHECK; + /* AudioFire2 starts packets with non-zero dbc. */ + if (efw->is_af2) + efw->tx_stream.flags |= CIP_SKIP_INIT_DBC_CHECK; /* AudioFire9 always reports wrong dbs. */ if (efw->is_af9) efw->tx_stream.flags |= CIP_WRONG_DBS; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 735bdcb04ce8..c38c68f57938 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -867,7 +867,7 @@ static int azx_suspend(struct device *dev) chip = card->private_data; hda = container_of(chip, struct hda_intel, chip); - if (chip->disabled || hda->init_failed) + if (chip->disabled || hda->init_failed || !chip->running) return 0; bus = azx_bus(chip); @@ -902,7 +902,7 @@ static int azx_resume(struct device *dev) chip = card->private_data; hda = container_of(chip, struct hda_intel, chip); - if (chip->disabled || hda->init_failed) + if (chip->disabled || hda->init_failed || !chip->running) return 0; if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL @@ -1027,7 +1027,7 @@ static int azx_runtime_idle(struct device *dev) return 0; if (!power_save_controller || !azx_has_pm_runtime(chip) || - azx_bus(chip)->codec_powered) + azx_bus(chip)->codec_powered || !chip->running) return -EBUSY; return 0; diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 25ccf781fbe7..584a0343ab0c 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -999,9 +999,7 @@ static void cs4210_spdif_automute(struct hda_codec *codec, spec->spdif_present = spdif_present; /* SPDIF TX on/off */ - if (spdif_present) - snd_hda_set_pin_ctl(codec, spdif_pin, - spdif_present ? PIN_OUT : 0); + snd_hda_set_pin_ctl(codec, spdif_pin, spdif_present ? PIN_OUT : 0); cs_automute(codec); } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 742fc626f9e1..c456c04e0928 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2222,7 +2222,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x4300, "iMac 9,1", ALC889_FIXUP_IMAC91_VREF), SND_PCI_QUIRK(0x106b, 0x4600, "MacbookPro 5,2", ALC889_FIXUP_IMAC91_VREF), SND_PCI_QUIRK(0x106b, 0x4900, "iMac 9,1 Aluminum", ALC889_FIXUP_IMAC91_VREF), - SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_IMAC91_VREF), + SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_MBA11_VREF), SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), @@ -5185,6 +5185,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13), + SND_PCI_QUIRK(0x1028, 0x069a, "Dell Vostro 5480", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), @@ -5398,8 +5399,6 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {0x19, 0x411111f0}, \ {0x1a, 0x411111f0}, \ {0x1b, 0x411111f0}, \ - {0x1d, 0x40700001}, \ - {0x1e, 0x411111f0}, \ {0x21, 0x02211020} #define ALC282_STANDARD_PINS \ @@ -5473,6 +5472,28 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x1e, 0x411111f0}, {0x21, 0x0221103f}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x40000000}, + {0x14, 0x90170150}, + {0x17, 0x411111f0}, + {0x18, 0x411111f0}, + {0x19, 0x411111f0}, + {0x1a, 0x411111f0}, + {0x1b, 0x02011020}, + {0x1d, 0x4054c029}, + {0x1e, 0x411111f0}, + {0x21, 0x0221105f}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x40000000}, + {0x14, 0x90170110}, + {0x17, 0x411111f0}, + {0x18, 0x411111f0}, + {0x19, 0x411111f0}, + {0x1a, 0x411111f0}, + {0x1b, 0x01014020}, + {0x1d, 0x4054c029}, + {0x1e, 0x411111f0}, + {0x21, 0x0221101f}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, {0x17, 0x90170140}, @@ -5534,10 +5555,19 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x21, 0x02211030}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ALC256_STANDARD_PINS, - {0x13, 0x40000000}), + {0x13, 0x40000000}, + {0x1d, 0x40700001}, + {0x1e, 0x411111f0}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ALC256_STANDARD_PINS, - {0x13, 0x411111f0}), + {0x13, 0x411111f0}, + {0x1d, 0x40700001}, + {0x1e, 0x411111f0}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC256_STANDARD_PINS, + {0x13, 0x411111f0}, + {0x1d, 0x4077992d}, + {0x1e, 0x411111ff}), SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4, {0x12, 0x90a60130}, {0x13, 0x40000000}, diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index dcc7fe91244c..9d947aef2c8b 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -2920,7 +2920,8 @@ static const struct snd_pci_quirk stac92hd83xxx_fixup_tbl[] = { SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x148a, "HP Mini", STAC_92HD83XXX_HP_LED), SND_PCI_QUIRK_VENDOR(PCI_VENDOR_ID_HP, "HP", STAC_92HD83XXX_HP), - SND_PCI_QUIRK(PCI_VENDOR_ID_TOSHIBA, 0xfa91, + /* match both for 0xfa91 and 0xfa93 */ + SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_TOSHIBA, 0xfffd, 0xfa91, "Toshiba Satellite S50D", STAC_92HD83XXX_GPIO10_EAPD), {} /* terminator */ }; diff --git a/sound/soc/codecs/pcm1681.c b/sound/soc/codecs/pcm1681.c index 477e13d30971..e7ba557979cb 100644 --- a/sound/soc/codecs/pcm1681.c +++ b/sound/soc/codecs/pcm1681.c @@ -102,7 +102,7 @@ static int pcm1681_set_deemph(struct snd_soc_codec *codec) if (val != -1) { regmap_update_bits(priv->regmap, PCM1681_DEEMPH_CONTROL, - PCM1681_DEEMPH_RATE_MASK, val); + PCM1681_DEEMPH_RATE_MASK, val << 3); enable = 1; } else enable = 0; diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 9ce311e088fc..e9cc3aae5366 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -2943,6 +2943,9 @@ static int rt5645_irq_detection(struct rt5645_priv *rt5645) { int val, btn_type, gpio_state = 0, report = 0; + if (!rt5645->codec) + return -EINVAL; + switch (rt5645->pdata.jd_mode) { case 0: /* Not using rt5645 JD */ if (rt5645->gpiod_hp_det) { diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h index bd7a344bf8c5..1c317de26176 100644 --- a/sound/soc/codecs/sgtl5000.h +++ b/sound/soc/codecs/sgtl5000.h @@ -275,7 +275,7 @@ #define SGTL5000_BIAS_CTRL_MASK 0x000e #define SGTL5000_BIAS_CTRL_SHIFT 1 #define SGTL5000_BIAS_CTRL_WIDTH 3 -#define SGTL5000_SMALL_POP 0 +#define SGTL5000_SMALL_POP 1 /* * SGTL5000_CHIP_MIC_CTRL diff --git a/sound/soc/codecs/ssm4567.c b/sound/soc/codecs/ssm4567.c index 938d2cb6d78b..84a4f5ad8064 100644 --- a/sound/soc/codecs/ssm4567.c +++ b/sound/soc/codecs/ssm4567.c @@ -315,7 +315,13 @@ static int ssm4567_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) if (invert_fclk) ctrl1 |= SSM4567_SAI_CTRL_1_FSYNC; - return regmap_write(ssm4567->regmap, SSM4567_REG_SAI_CTRL_1, ctrl1); + return regmap_update_bits(ssm4567->regmap, SSM4567_REG_SAI_CTRL_1, + SSM4567_SAI_CTRL_1_BCLK | + SSM4567_SAI_CTRL_1_FSYNC | + SSM4567_SAI_CTRL_1_LJ | + SSM4567_SAI_CTRL_1_TDM | + SSM4567_SAI_CTRL_1_PDM, + ctrl1); } static int ssm4567_set_power(struct ssm4567 *ssm4567, bool enable) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index c7647e066cfd..c0b940e2019f 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -633,7 +633,7 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, sub *= 100000; do_div(sub, freq); - if (sub < savesub) { + if (sub < savesub && !(i == 0 && psr == 0 && div2 == 0)) { baudrate = tmprate; savesub = sub; pm = i; diff --git a/sound/soc/intel/Makefile b/sound/soc/intel/Makefile index 3853ec2ddbc7..6de5d5cd3280 100644 --- a/sound/soc/intel/Makefile +++ b/sound/soc/intel/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_SND_SOC_INTEL_BAYTRAIL) += baytrail/ obj-$(CONFIG_SND_SST_MFLD_PLATFORM) += atom/ # Machine support -obj-$(CONFIG_SND_SOC_INTEL_SST) += boards/ +obj-$(CONFIG_SND_SOC) += boards/ diff --git a/sound/soc/intel/atom/sst/sst_drv_interface.c b/sound/soc/intel/atom/sst/sst_drv_interface.c index 620da1d1b9e3..0e0e4d9c021f 100644 --- a/sound/soc/intel/atom/sst/sst_drv_interface.c +++ b/sound/soc/intel/atom/sst/sst_drv_interface.c @@ -42,6 +42,11 @@ #define MIN_FRAGMENT_SIZE (50 * 1024) #define MAX_FRAGMENT_SIZE (1024 * 1024) #define SST_GET_BYTES_PER_SAMPLE(pcm_wd_sz) (((pcm_wd_sz + 15) >> 4) << 1) +#ifdef CONFIG_PM +#define GET_USAGE_COUNT(dev) (atomic_read(&dev->power.usage_count)) +#else +#define GET_USAGE_COUNT(dev) 1 +#endif int free_stream_context(struct intel_sst_drv *ctx, unsigned int str_id) { @@ -141,15 +146,9 @@ static int sst_power_control(struct device *dev, bool state) int ret = 0; int usage_count = 0; -#ifdef CONFIG_PM - usage_count = atomic_read(&dev->power.usage_count); -#else - usage_count = 1; -#endif - if (state == true) { ret = pm_runtime_get_sync(dev); - + usage_count = GET_USAGE_COUNT(dev); dev_dbg(ctx->dev, "Enable: pm usage count: %d\n", usage_count); if (ret < 0) { dev_err(ctx->dev, "Runtime get failed with err: %d\n", ret); @@ -164,6 +163,7 @@ static int sst_power_control(struct device *dev, bool state) } } } else { + usage_count = GET_USAGE_COUNT(dev); dev_dbg(ctx->dev, "Disable: pm usage count: %d\n", usage_count); return sst_pm_runtime_put(ctx); } diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c index d604ee80eda4..70f832114a5a 100644 --- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c +++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c @@ -69,12 +69,12 @@ static const struct snd_soc_dapm_route cht_audio_map[] = { {"Headphone", NULL, "HPR"}, {"Ext Spk", NULL, "SPKL"}, {"Ext Spk", NULL, "SPKR"}, - {"AIF1 Playback", NULL, "ssp2 Tx"}, + {"HiFi Playback", NULL, "ssp2 Tx"}, {"ssp2 Tx", NULL, "codec_out0"}, {"ssp2 Tx", NULL, "codec_out1"}, {"codec_in0", NULL, "ssp2 Rx" }, {"codec_in1", NULL, "ssp2 Rx" }, - {"ssp2 Rx", NULL, "AIF1 Capture"}, + {"ssp2 Rx", NULL, "HiFi Capture"}, }; static const struct snd_kcontrol_new cht_mc_controls[] = { diff --git a/sound/soc/mediatek/mt8173-max98090.c b/sound/soc/mediatek/mt8173-max98090.c index 4d44b5803e55..2d2536af141f 100644 --- a/sound/soc/mediatek/mt8173-max98090.c +++ b/sound/soc/mediatek/mt8173-max98090.c @@ -103,7 +103,6 @@ static struct snd_soc_dai_link mt8173_max98090_dais[] = { .name = "MAX98090 Playback", .stream_name = "MAX98090 Playback", .cpu_dai_name = "DL1", - .platform_name = "11220000.mt8173-afe-pcm", .codec_name = "snd-soc-dummy", .codec_dai_name = "snd-soc-dummy-dai", .trigger = {SND_SOC_DPCM_TRIGGER_POST, SND_SOC_DPCM_TRIGGER_POST}, @@ -114,7 +113,6 @@ static struct snd_soc_dai_link mt8173_max98090_dais[] = { .name = "MAX98090 Capture", .stream_name = "MAX98090 Capture", .cpu_dai_name = "VUL", - .platform_name = "11220000.mt8173-afe-pcm", .codec_name = "snd-soc-dummy", .codec_dai_name = "snd-soc-dummy-dai", .trigger = {SND_SOC_DPCM_TRIGGER_POST, SND_SOC_DPCM_TRIGGER_POST}, @@ -125,7 +123,6 @@ static struct snd_soc_dai_link mt8173_max98090_dais[] = { { .name = "Codec", .cpu_dai_name = "I2S", - .platform_name = "11220000.mt8173-afe-pcm", .no_pcm = 1, .codec_dai_name = "HiFi", .init = mt8173_max98090_init, @@ -152,9 +149,21 @@ static struct snd_soc_card mt8173_max98090_card = { static int mt8173_max98090_dev_probe(struct platform_device *pdev) { struct snd_soc_card *card = &mt8173_max98090_card; - struct device_node *codec_node; + struct device_node *codec_node, *platform_node; int ret, i; + platform_node = of_parse_phandle(pdev->dev.of_node, + "mediatek,platform", 0); + if (!platform_node) { + dev_err(&pdev->dev, "Property 'platform' missing or invalid\n"); + return -EINVAL; + } + for (i = 0; i < card->num_links; i++) { + if (mt8173_max98090_dais[i].platform_name) + continue; + mt8173_max98090_dais[i].platform_of_node = platform_node; + } + codec_node = of_parse_phandle(pdev->dev.of_node, "mediatek,audio-codec", 0); if (!codec_node) { diff --git a/sound/soc/mediatek/mt8173-rt5650-rt5676.c b/sound/soc/mediatek/mt8173-rt5650-rt5676.c index 094055323059..6f52eca05e26 100644 --- a/sound/soc/mediatek/mt8173-rt5650-rt5676.c +++ b/sound/soc/mediatek/mt8173-rt5650-rt5676.c @@ -138,7 +138,6 @@ static struct snd_soc_dai_link mt8173_rt5650_rt5676_dais[] = { .name = "rt5650_rt5676 Playback", .stream_name = "rt5650_rt5676 Playback", .cpu_dai_name = "DL1", - .platform_name = "11220000.mt8173-afe-pcm", .codec_name = "snd-soc-dummy", .codec_dai_name = "snd-soc-dummy-dai", .trigger = {SND_SOC_DPCM_TRIGGER_POST, SND_SOC_DPCM_TRIGGER_POST}, @@ -149,7 +148,6 @@ static struct snd_soc_dai_link mt8173_rt5650_rt5676_dais[] = { .name = "rt5650_rt5676 Capture", .stream_name = "rt5650_rt5676 Capture", .cpu_dai_name = "VUL", - .platform_name = "11220000.mt8173-afe-pcm", .codec_name = "snd-soc-dummy", .codec_dai_name = "snd-soc-dummy-dai", .trigger = {SND_SOC_DPCM_TRIGGER_POST, SND_SOC_DPCM_TRIGGER_POST}, @@ -161,7 +159,6 @@ static struct snd_soc_dai_link mt8173_rt5650_rt5676_dais[] = { { .name = "Codec", .cpu_dai_name = "I2S", - .platform_name = "11220000.mt8173-afe-pcm", .no_pcm = 1, .codecs = mt8173_rt5650_rt5676_codecs, .num_codecs = 2, @@ -209,7 +206,21 @@ static struct snd_soc_card mt8173_rt5650_rt5676_card = { static int mt8173_rt5650_rt5676_dev_probe(struct platform_device *pdev) { struct snd_soc_card *card = &mt8173_rt5650_rt5676_card; - int ret; + struct device_node *platform_node; + int i, ret; + + platform_node = of_parse_phandle(pdev->dev.of_node, + "mediatek,platform", 0); + if (!platform_node) { + dev_err(&pdev->dev, "Property 'platform' missing or invalid\n"); + return -EINVAL; + } + + for (i = 0; i < card->num_links; i++) { + if (mt8173_rt5650_rt5676_dais[i].platform_name) + continue; + mt8173_rt5650_rt5676_dais[i].platform_of_node = platform_node; + } mt8173_rt5650_rt5676_codecs[0].of_node = of_parse_phandle(pdev->dev.of_node, "mediatek,audio-codec", 0); diff --git a/sound/soc/mediatek/mtk-afe-pcm.c b/sound/soc/mediatek/mtk-afe-pcm.c index cc228db5fb76..9863da73dfe0 100644 --- a/sound/soc/mediatek/mtk-afe-pcm.c +++ b/sound/soc/mediatek/mtk-afe-pcm.c @@ -1199,6 +1199,8 @@ err_pm_disable: static int mtk_afe_pcm_dev_remove(struct platform_device *pdev) { pm_runtime_disable(&pdev->dev); + if (!pm_runtime_status_suspended(&pdev->dev)) + mtk_afe_runtime_suspend(&pdev->dev); snd_soc_unregister_component(&pdev->dev); snd_soc_unregister_platform(&pdev->dev); return 0; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 3a4a5c0e3f97..0e1e69c7abd5 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1716,6 +1716,7 @@ card_probe_error: if (card->remove) card->remove(card); + snd_soc_dapm_free(&card->dapm); soc_cleanup_card_debugfs(card); snd_card_free(card->snd_card); diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index aa327c92480c..e0de8072c514 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -358,9 +358,10 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget, data->widget = snd_soc_dapm_new_control_unlocked(widget->dapm, &template); + kfree(name); if (!data->widget) { ret = -ENOMEM; - goto err_name; + goto err_data; } } break; @@ -389,11 +390,12 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget, data->value = template.on_val; - data->widget = snd_soc_dapm_new_control(widget->dapm, - &template); + data->widget = snd_soc_dapm_new_control_unlocked( + widget->dapm, &template); + kfree(name); if (!data->widget) { ret = -ENOMEM; - goto err_name; + goto err_data; } snd_soc_dapm_add_path(widget->dapm, data->widget, @@ -408,8 +410,6 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget, return 0; -err_name: - kfree(name); err_data: kfree(data); return ret; @@ -418,8 +418,6 @@ err_data: static void dapm_kcontrol_free(struct snd_kcontrol *kctl) { struct dapm_kcontrol_data *data = snd_kcontrol_chip(kctl); - if (data->widget) - kfree(data->widget->name); kfree(data->wlist); kfree(data); } @@ -1952,6 +1950,7 @@ static ssize_t dapm_widget_power_read_file(struct file *file, size_t count, loff_t *ppos) { struct snd_soc_dapm_widget *w = file->private_data; + struct snd_soc_card *card = w->dapm->card; char *buf; int in, out; ssize_t ret; @@ -1961,6 +1960,8 @@ static ssize_t dapm_widget_power_read_file(struct file *file, if (!buf) return -ENOMEM; + mutex_lock(&card->dapm_mutex); + /* Supply widgets are not handled by is_connected_{input,output}_ep() */ if (w->is_supply) { in = 0; @@ -2007,6 +2008,8 @@ static ssize_t dapm_widget_power_read_file(struct file *file, p->sink->name); } + mutex_unlock(&card->dapm_mutex); + ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); kfree(buf); @@ -2281,11 +2284,15 @@ static ssize_t dapm_widget_show(struct device *dev, struct snd_soc_pcm_runtime *rtd = dev_get_drvdata(dev); int i, count = 0; + mutex_lock(&rtd->card->dapm_mutex); + for (i = 0; i < rtd->num_codecs; i++) { struct snd_soc_codec *codec = rtd->codec_dais[i]->codec; count += dapm_widget_show_codec(codec, buf + count); } + mutex_unlock(&rtd->card->dapm_mutex); + return count; } @@ -3334,16 +3341,10 @@ snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, } prefix = soc_dapm_prefix(dapm); - if (prefix) { + if (prefix) w->name = kasprintf(GFP_KERNEL, "%s %s", prefix, widget->name); - if (widget->sname) - w->sname = kasprintf(GFP_KERNEL, "%s %s", prefix, - widget->sname); - } else { + else w->name = kasprintf(GFP_KERNEL, "%s", widget->name); - if (widget->sname) - w->sname = kasprintf(GFP_KERNEL, "%s", widget->sname); - } if (w->name == NULL) { kfree(w); return NULL; @@ -3792,7 +3793,7 @@ int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card) break; } - if (!w->sname || !strstr(w->sname, dai_w->name)) + if (!w->sname || !strstr(w->sname, dai_w->sname)) continue; if (dai_w->id == snd_soc_dapm_dai_in) { diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index d0960683c409..59ac211f8fe7 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -144,7 +144,7 @@ static const struct snd_soc_tplg_kcontrol_ops io_ops[] = { {SND_SOC_TPLG_CTL_STROBE, snd_soc_get_strobe, snd_soc_put_strobe, NULL}, {SND_SOC_TPLG_DAPM_CTL_VOLSW, snd_soc_dapm_get_volsw, - snd_soc_dapm_put_volsw, NULL}, + snd_soc_dapm_put_volsw, snd_soc_info_volsw}, {SND_SOC_TPLG_DAPM_CTL_ENUM_DOUBLE, snd_soc_dapm_get_enum_double, snd_soc_dapm_put_enum_double, snd_soc_info_enum_double}, {SND_SOC_TPLG_DAPM_CTL_ENUM_VIRT, snd_soc_dapm_get_enum_double, @@ -580,27 +580,26 @@ static int soc_tplg_init_kcontrol(struct soc_tplg *tplg, } static int soc_tplg_create_tlv(struct soc_tplg *tplg, - struct snd_kcontrol_new *kc, u32 tlv_size) + struct snd_kcontrol_new *kc, struct snd_soc_tplg_ctl_tlv *tplg_tlv) { - struct snd_soc_tplg_ctl_tlv *tplg_tlv; struct snd_ctl_tlv *tlv; + int size; - if (tlv_size == 0) + if (tplg_tlv->count == 0) return 0; - tplg_tlv = (struct snd_soc_tplg_ctl_tlv *) tplg->pos; - tplg->pos += tlv_size; - - tlv = kzalloc(sizeof(*tlv) + tlv_size, GFP_KERNEL); + size = ((tplg_tlv->count + (sizeof(unsigned int) - 1)) & + ~(sizeof(unsigned int) - 1)); + tlv = kzalloc(sizeof(*tlv) + size, GFP_KERNEL); if (tlv == NULL) return -ENOMEM; dev_dbg(tplg->dev, " created TLV type %d size %d bytes\n", - tplg_tlv->numid, tplg_tlv->size); + tplg_tlv->numid, size); tlv->numid = tplg_tlv->numid; - tlv->length = tplg_tlv->size; - memcpy(tlv->tlv, tplg_tlv + 1, tplg_tlv->size); + tlv->length = size; + memcpy(&tlv->tlv[0], tplg_tlv->data, size); kc->tlv.p = (void *)tlv; return 0; @@ -773,7 +772,7 @@ static int soc_tplg_dmixer_create(struct soc_tplg *tplg, unsigned int count, } /* create any TLV data */ - soc_tplg_create_tlv(tplg, &kc, mc->hdr.tlv_size); + soc_tplg_create_tlv(tplg, &kc, &mc->tlv); /* register control here */ err = soc_tplg_add_kcontrol(tplg, &kc, diff --git a/sound/soc/zte/zx296702-i2s.c b/sound/soc/zte/zx296702-i2s.c index 98d96e1b17e0..1930c42e1f55 100644 --- a/sound/soc/zte/zx296702-i2s.c +++ b/sound/soc/zte/zx296702-i2s.c @@ -393,9 +393,9 @@ static int zx_i2s_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); zx_i2s->mapbase = res->start; zx_i2s->reg_base = devm_ioremap_resource(&pdev->dev, res); - if (!zx_i2s->reg_base) { + if (IS_ERR(zx_i2s->reg_base)) { dev_err(&pdev->dev, "ioremap failed!\n"); - return -EIO; + return PTR_ERR(zx_i2s->reg_base); } writel_relaxed(0, zx_i2s->reg_base + ZX_I2S_FIFO_CTRL); diff --git a/sound/soc/zte/zx296702-spdif.c b/sound/soc/zte/zx296702-spdif.c index 11a0e46a1156..26265ce4caca 100644 --- a/sound/soc/zte/zx296702-spdif.c +++ b/sound/soc/zte/zx296702-spdif.c @@ -322,9 +322,9 @@ static int zx_spdif_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); zx_spdif->mapbase = res->start; zx_spdif->reg_base = devm_ioremap_resource(&pdev->dev, res); - if (!zx_spdif->reg_base) { + if (IS_ERR(zx_spdif->reg_base)) { dev_err(&pdev->dev, "ioremap failed!\n"); - return -EIO; + return PTR_ERR(zx_spdif->reg_base); } zx_spdif_dev_init(zx_spdif->reg_base); diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index e5000da9e9d7..6a803eff87f7 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -341,6 +341,20 @@ static const struct usbmix_name_map scms_usb3318_map[] = { { 0 } }; +/* Bose companion 5, the dB conversion factor is 16 instead of 256 */ +static struct usbmix_dB_map bose_companion5_dB = {-5006, -6}; +static struct usbmix_name_map bose_companion5_map[] = { + { 3, NULL, .dB = &bose_companion5_dB }, + { 0 } /* terminator */ +}; + +/* Dragonfly DAC 1.2, the dB conversion factor is 1 instead of 256 */ +static struct usbmix_dB_map dragonfly_1_2_dB = {0, 5000}; +static struct usbmix_name_map dragonfly_1_2_map[] = { + { 7, NULL, .dB = &dragonfly_1_2_dB }, + { 0 } /* terminator */ +}; + /* * Control map entries */ @@ -451,6 +465,16 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x25c4, 0x0003), .map = scms_usb3318_map, }, + { + /* Bose Companion 5 */ + .id = USB_ID(0x05a7, 0x1020), + .map = bose_companion5_map, + }, + { + /* Dragonfly DAC 1.2 */ + .id = USB_ID(0x21b4, 0x0081), + .map = dragonfly_1_2_map, + }, { 0 } /* terminator */ }; diff --git a/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c index 5224ee5b392d..6ff8383f2941 100644 --- a/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c +++ b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c @@ -81,11 +81,11 @@ static int __init cpufreq_test_tsc(void) printk(KERN_DEBUG "start--> \n"); then = read_pmtmr(); - rdtscll(then_tsc); + then_tsc = rdtsc(); for (i=0;i<20;i++) { mdelay(100); now = read_pmtmr(); - rdtscll(now_tsc); + now_tsc = rdtsc(); diff = (now - then) & 0xFFFFFF; diff_tsc = now_tsc - then_tsc; printk(KERN_DEBUG "t1: %08u t2: %08u diff_pmtmr: %08u diff_tsc: %016llu\n", then, now, diff, diff_tsc); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 24ae9e829e9a..b8f12e0897e6 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -20,6 +20,7 @@ ifneq (1, $(quicktest)) TARGETS += timers endif TARGETS += user +TARGETS += jumplabel TARGETS += vm TARGETS += x86 #Please keep the TARGETS list alphabetically sorted diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c index 7f0c756993af..3d7dc6afc3f8 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c @@ -191,7 +191,7 @@ int main(int argc, char *argv[]) if (res > 0) { atomic_set(&requeued, 1); break; - } else if (res > 0) { + } else if (res < 0) { error("FUTEX_CMP_REQUEUE_PI failed\n", errno); ret = RET_ERROR; break; diff --git a/tools/testing/selftests/static_keys/Makefile b/tools/testing/selftests/static_keys/Makefile new file mode 100644 index 000000000000..9cdadf37f114 --- /dev/null +++ b/tools/testing/selftests/static_keys/Makefile @@ -0,0 +1,8 @@ +# Makefile for static keys selftests + +# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" +all: + +TEST_PROGS := test_static_keys.sh + +include ../lib.mk diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh new file mode 100644 index 000000000000..1261e3fa1e3a --- /dev/null +++ b/tools/testing/selftests/static_keys/test_static_keys.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# Runs static keys kernel module tests + +if /sbin/modprobe -q test_static_key_base; then + if /sbin/modprobe -q test_static_keys; then + echo "static_key: ok" + /sbin/modprobe -q -r test_static_keys + /sbin/modprobe -q -r test_static_key_base + else + echo "static_keys: [FAIL]" + /sbin/modprobe -q -r test_static_key_base + fi +else + echo "static_key: [FAIL]" + exit 1 +fi diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index caa60d56d7d1..b70da4acb4e5 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -4,8 +4,8 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean -TARGETS_C_BOTHBITS := sigreturn single_step_syscall sysret_ss_attrs -TARGETS_C_32BIT_ONLY := entry_from_vm86 +TARGETS_C_BOTHBITS := sigreturn single_step_syscall sysret_ss_attrs ldt_gdt +TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c index 5c38a187677b..9a43a59a9bb4 100644 --- a/tools/testing/selftests/x86/entry_from_vm86.c +++ b/tools/testing/selftests/x86/entry_from_vm86.c @@ -28,6 +28,55 @@ static unsigned long load_addr = 0x10000; static int nerrs = 0; +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static sig_atomic_t got_signal; + +static void sighandler(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM || + (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) { + printf("[FAIL]\tSignal frame should not reflect vm86 mode\n"); + nerrs++; + } + + const char *signame; + if (sig == SIGSEGV) + signame = "SIGSEGV"; + else if (sig == SIGILL) + signame = "SIGILL"; + else + signame = "unexpected signal"; + + printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame, + (unsigned long)ctx->uc_mcontext.gregs[REG_EFL], + (unsigned short)ctx->uc_mcontext.gregs[REG_CS]); + + got_signal = 1; +} + asm ( ".pushsection .rodata\n\t" ".type vmcode_bound, @object\n\t" @@ -38,6 +87,14 @@ asm ( "int3\n\t" "vmcode_sysenter:\n\t" "sysenter\n\t" + "vmcode_syscall:\n\t" + "syscall\n\t" + "vmcode_sti:\n\t" + "sti\n\t" + "vmcode_int3:\n\t" + "int3\n\t" + "vmcode_int80:\n\t" + "int $0x80\n\t" ".size vmcode, . - vmcode\n\t" "end_vmcode:\n\t" ".code32\n\t" @@ -45,9 +102,12 @@ asm ( ); extern unsigned char vmcode[], end_vmcode[]; -extern unsigned char vmcode_bound[], vmcode_sysenter[]; +extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[], + vmcode_sti[], vmcode_int3[], vmcode_int80[]; -static void do_test(struct vm86plus_struct *v86, unsigned long eip, +/* Returns false if the test was skipped. */ +static bool do_test(struct vm86plus_struct *v86, unsigned long eip, + unsigned int rettype, unsigned int retarg, const char *text) { long ret; @@ -58,7 +118,7 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip, if (ret == -1 && errno == ENOSYS) { printf("[SKIP]\tvm86 not supported\n"); - return; + return false; } if (VM86_TYPE(ret) == VM86_INTx) { @@ -73,13 +133,30 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip, else sprintf(trapname, "%d", trapno); - printf("[OK]\tExited vm86 mode due to #%s\n", trapname); + printf("[INFO]\tExited vm86 mode due to #%s\n", trapname); } else if (VM86_TYPE(ret) == VM86_UNKNOWN) { - printf("[OK]\tExited vm86 mode due to unhandled GP fault\n"); + printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n"); + } else if (VM86_TYPE(ret) == VM86_TRAP) { + printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n", + VM86_ARG(ret)); + } else if (VM86_TYPE(ret) == VM86_SIGNAL) { + printf("[INFO]\tExited vm86 mode due to a signal\n"); + } else if (VM86_TYPE(ret) == VM86_STI) { + printf("[INFO]\tExited vm86 mode due to STI\n"); } else { - printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n", + printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n", VM86_TYPE(ret), VM86_ARG(ret)); } + + if (rettype == -1 || + (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) { + printf("[OK]\tReturned correctly\n"); + } else { + printf("[FAIL]\tIncorrect return reason\n"); + nerrs++; + } + + return true; } int main(void) @@ -105,10 +182,52 @@ int main(void) assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */ /* #BR -- should deliver SIG??? */ - do_test(&v86, vmcode_bound - vmcode, "#BR"); - - /* SYSENTER -- should cause #GP or #UD depending on CPU */ - do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER"); + do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR"); + + /* + * SYSENTER -- should cause #GP or #UD depending on CPU. + * Expected return type -1 means that we shouldn't validate + * the vm86 return value. This will avoid problems on non-SEP + * CPUs. + */ + sethandler(SIGILL, sighandler, 0); + do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER"); + clearhandler(SIGILL); + + /* + * SYSCALL would be a disaster in VM86 mode. Fortunately, + * there is no kernel that both enables SYSCALL and sets + * EFER.SCE, so it's #UD on all systems. But vm86 is + * buggy (or has a "feature"), so the SIGILL will actually + * be delivered. + */ + sethandler(SIGILL, sighandler, 0); + do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL"); + clearhandler(SIGILL); + + /* STI with VIP set */ + v86.regs.eflags |= X86_EFLAGS_VIP; + v86.regs.eflags &= ~X86_EFLAGS_IF; + do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set"); + + /* INT3 -- should cause #BP */ + do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3"); + + /* INT80 -- should exit with "INTx 0x80" */ + v86.regs.eax = (unsigned int)-1; + do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80"); + + /* Execute a null pointer */ + v86.regs.cs = 0; + v86.regs.ss = 0; + sethandler(SIGSEGV, sighandler, 0); + got_signal = 0; + if (do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer") && + !got_signal) { + printf("[FAIL]\tDid not receive SIGSEGV\n"); + nerrs++; + } + clearhandler(SIGSEGV); return (nerrs == 0 ? 0 : 1); } diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c new file mode 100644 index 000000000000..31a3035cd4eb --- /dev/null +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -0,0 +1,576 @@ +/* + * ldt_gdt.c - Test cases for LDT and GDT access + * Copyright (c) 2015 Andrew Lutomirski + */ + +#define _GNU_SOURCE +#include <err.h> +#include <stdio.h> +#include <stdint.h> +#include <signal.h> +#include <setjmp.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <asm/ldt.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdbool.h> +#include <pthread.h> +#include <sched.h> +#include <linux/futex.h> + +#define AR_ACCESSED (1<<8) + +#define AR_TYPE_RODATA (0 * (1<<9)) +#define AR_TYPE_RWDATA (1 * (1<<9)) +#define AR_TYPE_RODATA_EXPDOWN (2 * (1<<9)) +#define AR_TYPE_RWDATA_EXPDOWN (3 * (1<<9)) +#define AR_TYPE_XOCODE (4 * (1<<9)) +#define AR_TYPE_XRCODE (5 * (1<<9)) +#define AR_TYPE_XOCODE_CONF (6 * (1<<9)) +#define AR_TYPE_XRCODE_CONF (7 * (1<<9)) + +#define AR_DPL3 (3 * (1<<13)) + +#define AR_S (1 << 12) +#define AR_P (1 << 15) +#define AR_AVL (1 << 20) +#define AR_L (1 << 21) +#define AR_DB (1 << 22) +#define AR_G (1 << 23) + +static int nerrs; + +static void check_invalid_segment(uint16_t index, int ldt) +{ + uint32_t has_limit = 0, has_ar = 0, limit, ar; + uint32_t selector = (index << 3) | (ldt << 2) | 3; + + asm ("lsl %[selector], %[limit]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_limit]\n\t" + "1:" + : [limit] "=r" (limit), [has_limit] "+rm" (has_limit) + : [selector] "r" (selector)); + asm ("larl %[selector], %[ar]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_ar]\n\t" + "1:" + : [ar] "=r" (ar), [has_ar] "+rm" (has_ar) + : [selector] "r" (selector)); + + if (has_limit || has_ar) { + printf("[FAIL]\t%s entry %hu is valid but should be invalid\n", + (ldt ? "LDT" : "GDT"), index); + nerrs++; + } else { + printf("[OK]\t%s entry %hu is invalid\n", + (ldt ? "LDT" : "GDT"), index); + } +} + +static void check_valid_segment(uint16_t index, int ldt, + uint32_t expected_ar, uint32_t expected_limit, + bool verbose) +{ + uint32_t has_limit = 0, has_ar = 0, limit, ar; + uint32_t selector = (index << 3) | (ldt << 2) | 3; + + asm ("lsl %[selector], %[limit]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_limit]\n\t" + "1:" + : [limit] "=r" (limit), [has_limit] "+rm" (has_limit) + : [selector] "r" (selector)); + asm ("larl %[selector], %[ar]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_ar]\n\t" + "1:" + : [ar] "=r" (ar), [has_ar] "+rm" (has_ar) + : [selector] "r" (selector)); + + if (!has_limit || !has_ar) { + printf("[FAIL]\t%s entry %hu is invalid but should be valid\n", + (ldt ? "LDT" : "GDT"), index); + nerrs++; + return; + } + + if (ar != expected_ar) { + printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", + (ldt ? "LDT" : "GDT"), index, ar, expected_ar); + nerrs++; + } else if (limit != expected_limit) { + printf("[FAIL]\t%s entry %hu has limit 0x%08X but expected 0x%08X\n", + (ldt ? "LDT" : "GDT"), index, limit, expected_limit); + nerrs++; + } else if (verbose) { + printf("[OK]\t%s entry %hu has AR 0x%08X and limit 0x%08X\n", + (ldt ? "LDT" : "GDT"), index, ar, limit); + } +} + +static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, + bool oldmode) +{ + int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, + desc, sizeof(*desc)); + if (ret < -1) + errno = -ret; + if (ret == 0) { + uint32_t limit = desc->limit; + if (desc->limit_in_pages) + limit = (limit << 12) + 4095; + check_valid_segment(desc->entry_number, 1, ar, limit, true); + return true; + } else if (errno == ENOSYS) { + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + return false; + } else { + if (desc->seg_32bit) { + printf("[FAIL]\tUnexpected modify_ldt failure %d\n", + errno); + nerrs++; + return false; + } else { + printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); + return false; + } + } +} + +static bool install_valid(const struct user_desc *desc, uint32_t ar) +{ + return install_valid_mode(desc, ar, false); +} + +static void install_invalid(const struct user_desc *desc, bool oldmode) +{ + int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, + desc, sizeof(*desc)); + if (ret < -1) + errno = -ret; + if (ret == 0) { + check_invalid_segment(desc->entry_number, 1); + } else if (errno == ENOSYS) { + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + } else { + if (desc->seg_32bit) { + printf("[FAIL]\tUnexpected modify_ldt failure %d\n", + errno); + nerrs++; + } else { + printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); + } + } +} + +static int safe_modify_ldt(int func, struct user_desc *ptr, + unsigned long bytecount) +{ + int ret = syscall(SYS_modify_ldt, 0x11, ptr, bytecount); + if (ret < -1) + errno = -ret; + return ret; +} + +static void fail_install(struct user_desc *desc) +{ + if (safe_modify_ldt(0x11, desc, sizeof(*desc)) == 0) { + printf("[FAIL]\tmodify_ldt accepted a bad descriptor\n"); + nerrs++; + } else if (errno == ENOSYS) { + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + } else { + printf("[OK]\tmodify_ldt failure %d\n", errno); + } +} + +static void do_simple_tests(void) +{ + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0, + .limit = 10, + .seg_32bit = 1, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB); + + desc.limit_in_pages = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G); + + check_invalid_segment(1, 1); + + desc.entry_number = 2; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G); + + check_invalid_segment(1, 1); + + desc.base_addr = 0xf0000000; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G); + + desc.useable = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G | AR_AVL); + + desc.seg_not_present = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.seg_32bit = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_G | AR_AVL); + + desc.seg_32bit = 1; + desc.contents = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.contents = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.read_exec_only = 0; + desc.limit_in_pages = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | + AR_S | AR_DB | AR_AVL); + + desc.contents = 3; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE_CONF | + AR_S | AR_DB | AR_AVL); + + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE_CONF | + AR_S | AR_DB | AR_AVL); + + desc.read_exec_only = 0; + desc.contents = 2; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_DB | AR_AVL); + + desc.read_exec_only = 1; + +#ifdef __x86_64__ + desc.lm = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL); + desc.lm = 0; +#endif + + bool entry1_okay = install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL); + + if (entry1_okay) { + printf("[RUN]\tTest fork\n"); + pid_t child = fork(); + if (child == 0) { + nerrs = 0; + check_valid_segment(desc.entry_number, 1, + AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL, desc.limit, + true); + check_invalid_segment(1, 1); + exit(nerrs ? 1 : 0); + } else { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + } + + printf("[RUN]\tTest size\n"); + int i; + for (i = 0; i < 8192; i++) { + desc.entry_number = i; + desc.limit = i; + if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) { + printf("[FAIL]\tFailed to install entry %d\n", i); + nerrs++; + break; + } + } + for (int j = 0; j < i; j++) { + check_valid_segment(j, 1, AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL, j, false); + } + printf("[DONE]\tSize test\n"); + } else { + printf("[SKIP]\tSkipping fork and size tests because we have no LDT\n"); + } + + /* Test entry_number too high. */ + desc.entry_number = 8192; + fail_install(&desc); + + /* Test deletion and actions mistakeable for deletion. */ + memset(&desc, 0, sizeof(desc)); + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P); + + desc.seg_not_present = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S); + + desc.seg_not_present = 0; + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P); + + desc.read_exec_only = 0; + desc.seg_not_present = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S); + + desc.read_exec_only = 1; + desc.limit = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S); + + desc.limit = 0; + desc.base_addr = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S); + + desc.base_addr = 0; + install_invalid(&desc, false); + + desc.seg_not_present = 0; + desc.read_exec_only = 0; + desc.seg_32bit = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB); + install_invalid(&desc, true); +} + +/* + * 0: thread is idle + * 1: thread armed + * 2: thread should clear LDT entry 0 + * 3: thread should exit + */ +static volatile unsigned int ftx; + +static void *threadproc(void *ctx) +{ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(1, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + err(1, "sched_setaffinity to CPU 1"); /* should never fail */ + + while (1) { + syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0); + while (ftx != 2) { + if (ftx >= 3) + return NULL; + } + + /* clear LDT entry 0 */ + const struct user_desc desc = {}; + if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) != 0) + err(1, "modify_ldt"); + + /* If ftx == 2, set it to zero. If ftx == 100, quit. */ + unsigned int x = -2; + asm volatile ("lock xaddl %[x], %[ftx]" : + [x] "+r" (x), [ftx] "+m" (ftx)); + if (x != 2) + return NULL; + } +} + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +static void do_multicpu_tests(void) +{ + cpu_set_t cpuset; + pthread_t thread; + int failures = 0, iters = 5, i; + unsigned short orig_ss; + + CPU_ZERO(&cpuset); + CPU_SET(1, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { + printf("[SKIP]\tCannot set affinity to CPU 1\n"); + return; + } + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { + printf("[SKIP]\tCannot set affinity to CPU 0\n"); + return; + } + + sethandler(SIGSEGV, sigsegv, 0); +#ifdef __i386__ + /* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */ + sethandler(SIGILL, sigsegv, 0); +#endif + + printf("[RUN]\tCross-CPU LDT invalidation\n"); + + if (pthread_create(&thread, 0, threadproc, 0) != 0) + err(1, "pthread_create"); + + asm volatile ("mov %%ss, %0" : "=rm" (orig_ss)); + + for (i = 0; i < 5; i++) { + if (sigsetjmp(jmpbuf, 1) != 0) + continue; + + /* Make sure the thread is ready after the last test. */ + while (ftx != 0) + ; + + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0, + .limit = 0xfffff, + .seg_32bit = 1, + .contents = 0, /* Data */ + .read_exec_only = 0, + .limit_in_pages = 1, + .seg_not_present = 0, + .useable = 0 + }; + + if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) { + if (errno != ENOSYS) + err(1, "modify_ldt"); + printf("[SKIP]\tmodify_ldt unavailable\n"); + break; + } + + /* Arm the thread. */ + ftx = 1; + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + + asm volatile ("mov %0, %%ss" : : "r" (0x7)); + + /* Go! */ + ftx = 2; + + while (ftx != 0) + ; + + /* + * On success, modify_ldt will segfault us synchronously, + * and we'll escape via siglongjmp. + */ + + failures++; + asm volatile ("mov %0, %%ss" : : "rm" (orig_ss)); + }; + + ftx = 100; /* Kill the thread. */ + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + + if (pthread_join(thread, NULL) != 0) + err(1, "pthread_join"); + + if (failures) { + printf("[FAIL]\t%d of %d iterations failed\n", failures, iters); + nerrs++; + } else { + printf("[OK]\tAll %d iterations succeeded\n", iters); + } +} + +static int finish_exec_test(void) +{ + /* + * In a sensible world, this would be check_invalid_segment(0, 1); + * For better or for worse, though, the LDT is inherited across exec. + * We can probably change this safely, but for now we test it. + */ + check_valid_segment(0, 1, + AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB, + 42, true); + + return nerrs ? 1 : 0; +} + +static void do_exec_test(void) +{ + printf("[RUN]\tTest exec\n"); + + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0, + .limit = 42, + .seg_32bit = 1, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB); + + pid_t child = fork(); + if (child == 0) { + execl("/proc/self/exe", "ldt_gdt_test_exec", NULL); + printf("[FAIL]\tCould not exec self\n"); + exit(1); /* exec failed */ + } else { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + } +} + +int main(int argc, char **argv) +{ + if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec")) + return finish_exec_test(); + + do_simple_tests(); + + do_multicpu_tests(); + + do_exec_test(); + + return nerrs ? 1 : 0; +} diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c new file mode 100644 index 000000000000..7db4fc9fa09f --- /dev/null +++ b/tools/testing/selftests/x86/syscall_arg_fault.c @@ -0,0 +1,130 @@ +/* + * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args + * Copyright (c) 2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#define _GNU_SOURCE + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/signal.h> +#include <sys/ucontext.h> +#include <err.h> +#include <setjmp.h> +#include <errno.h> + +/* Our sigaltstack scratch space. */ +static unsigned char altstack_data[SIGSTKSZ]; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static volatile sig_atomic_t sig_traps; +static sigjmp_buf jmpbuf; + +static volatile sig_atomic_t n_errs; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) { + printf("[FAIL]\tAX had the wrong value: 0x%x\n", + ctx->uc_mcontext.gregs[REG_EAX]); + n_errs++; + } else { + printf("[OK]\tSeems okay\n"); + } + + siglongjmp(jmpbuf, 1); +} + +static void sigill(int sig, siginfo_t *info, void *ctx_void) +{ + printf("[SKIP]\tIllegal instruction\n"); + siglongjmp(jmpbuf, 1); +} + +int main() +{ + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + + sethandler(SIGSEGV, sigsegv, SA_ONSTACK); + sethandler(SIGILL, sigill, SA_ONSTACK); + + /* + * Exercise another nasty special case. The 32-bit SYSCALL + * and SYSENTER instructions (even in compat mode) each + * clobber one register. A Linux system call has a syscall + * number and six arguments, and the user stack pointer + * needs to live in some register on return. That means + * that we need eight registers, but SYSCALL and SYSENTER + * only preserve seven registers. As a result, one argument + * ends up on the stack. The stack is user memory, which + * means that the kernel can fail to read it. + * + * The 32-bit fast system calls don't have a defined ABI: + * we're supposed to invoke them through the vDSO. So we'll + * fudge it: we set all regs to invalid pointer values and + * invoke the entry instruction. The return will fail no + * matter what, and we completely lose our program state, + * but we can fix it up with a signal handler. + */ + + printf("[RUN]\tSYSENTER with invalid state\n"); + if (sigsetjmp(jmpbuf, 1) == 0) { + asm volatile ( + "movl $-1, %%eax\n\t" + "movl $-1, %%ebx\n\t" + "movl $-1, %%ecx\n\t" + "movl $-1, %%edx\n\t" + "movl $-1, %%esi\n\t" + "movl $-1, %%edi\n\t" + "movl $-1, %%ebp\n\t" + "movl $-1, %%esp\n\t" + "sysenter" + : : : "memory", "flags"); + } + + printf("[RUN]\tSYSCALL with invalid state\n"); + if (sigsetjmp(jmpbuf, 1) == 0) { + asm volatile ( + "movl $-1, %%eax\n\t" + "movl $-1, %%ebx\n\t" + "movl $-1, %%ecx\n\t" + "movl $-1, %%edx\n\t" + "movl $-1, %%esi\n\t" + "movl $-1, %%edi\n\t" + "movl $-1, %%ebp\n\t" + "movl $-1, %%esp\n\t" + "syscall\n\t" + "pushl $0" /* make sure we segfault cleanly */ + : : : "memory", "flags"); + } + + return 0; +} |