From 0b9a29add43273e64ef45472e08d38116fee1d82 Mon Sep 17 00:00:00 2001 From: Paul E. McKenney Date: Sun, 24 Apr 2016 14:30:16 -0700 Subject: arm: Use _rcuidle tracepoint to allow use from idle Testing on ARM encountered the following pair of lockdep-RCU splats: ------------------------------------------------------------------------ =============================== [ INFO: suspicious RCU usage. ] 4.6.0-rc4-next-20160422 #1 Not tainted ------------------------------- include/trace/events/power.h:328 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! no locks held by swapper/0/0. stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc4-next-20160422 #1 Hardware name: Generic OMAP3-GP (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xa8/0xe0) [] (dump_stack) from [] (pwrdm_set_next_pwrst+0xf8/0x1cc) [] (pwrdm_set_next_pwrst) from [] (omap3_enter_idle_bm+0x1b8/0x1e8) [] (omap3_enter_idle_bm) from [] (cpuidle_enter_state+0x84/0x408) [] (cpuidle_enter_state) from [] (cpu_startup_entry+0x1c8/0x3f0) [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3cc) ------------------------------------------------------------------------ [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xa8/0xe0) [] (dump_stack) from [] (_pwrdm_state_switch+0x188/0x32c) [] (_pwrdm_state_switch) from [] (_pwrdm_post_transition_cb+0xc/0x14) [] (_pwrdm_post_transition_cb) from [] (pwrdm_for_each+0x30/0x5c) [] (pwrdm_for_each) from [] (pwrdm_post_transition+0x24/0x30) [] (pwrdm_post_transition) from [] (omap_sram_idle+0xfc/0x240) [] (omap_sram_idle) from [] (omap3_enter_idle_bm+0xf0/0x1e8) [] (omap3_enter_idle_bm) from [] (cpuidle_enter_state+0x84/0x408) [] (cpuidle_enter_state) from [] (cpu_startup_entry+0x1c8/0x3f0) [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3cc) ------------------------------------------------------------------------ These are caused by event tracing from the idle loop, and they were exposed by commit 293e2421fe25 ("rcu: Remove superfluous versions of rcu_read_lock_sched_held()"), which suppressed some false negatives. The current commit therefore adds the _rcuidle suffix to make RCU aware of this implicit use of RCU by event tracing, thus preventing both splats. Reported-by: Guenter Roeck Signed-off-by: Paul E. McKenney Tested-by: Guenter Roeck Tested-by: Tony Lindgren Cc: Russell King Reviewed-by: Steven Rostedt Cc: Cc: --- arch/arm/mach-omap2/powerdomain.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index 78af6d8cf2e2..daf2753de7aa 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -186,8 +186,9 @@ static int _pwrdm_state_switch(struct powerdomain *pwrdm, int flag) trace_state = (PWRDM_TRACE_STATES_FLAG | ((next & OMAP_POWERSTATE_MASK) << 8) | ((prev & OMAP_POWERSTATE_MASK) << 0)); - trace_power_domain_target(pwrdm->name, trace_state, - smp_processor_id()); + trace_power_domain_target_rcuidle(pwrdm->name, + trace_state, + smp_processor_id()); } break; default: @@ -523,8 +524,8 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst) if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) { /* Trace the pwrdm desired target state */ - trace_power_domain_target(pwrdm->name, pwrst, - smp_processor_id()); + trace_power_domain_target_rcuidle(pwrdm->name, pwrst, + smp_processor_id()); /* Program the pwrdm desired target state */ ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst); } -- cgit v1.2.3 From 6b4e941875ca464e0bf737d7cdc9b72b008df6eb Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 5 May 2016 15:33:37 -0700 Subject: ARM: dts: omap5-board-common: Describe the voltage supply mapping accurately OMAP5uEVM based platforms share a similar voltage rail map. This should be properly described in device tree, without this regulator core will be unable to determine the source voltage of LDOs such as LDO9 and SMPS10 which could be configured for bypass depending on the voltage requested of them. This results in conditions such as: ldo9: bypassed regulator has no supply! ldo9: failed to get the current voltage(-517) palmas-pmic 48070000.i2c:palmas@48:palmas_pmic: failed to register 48070000.i2c:palmas@48:palmas_pmic regulator Cc: Agustí Fontquerni Cc: Eduard Gavin Cc: Enric Balletbo i Serra Cc: Peter Ujfalusi Signed-off-by: Nishanth Menon [tony@atomide.com: fixed to use palmas style in-supply] Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 43 +++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index d0990606d16e..ebbaa140355b 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -14,6 +14,29 @@ display0 = &hdmi0; }; + vmain: fixedregulator-vmain { + compatible = "regulator-fixed"; + regulator-name = "vmain"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + }; + + vsys_cobra: fixedregulator-vsys_cobra { + compatible = "regulator-fixed"; + regulator-name = "vsys_cobra"; + vin-supply = <&vmain>; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + }; + + vdds_1v8_main: fixedregulator-vdds_1v8_main { + compatible = "regulator-fixed"; + regulator-name = "vdds_1v8_main"; + vin-supply = <&smps7_reg>; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + vmmcsd_fixed: fixedregulator-mmcsd { compatible = "regulator-fixed"; regulator-name = "vmmcsd_fixed"; @@ -409,6 +432,26 @@ ti,ldo6-vibrator; + smps123-in-supply = <&vsys_cobra>; + smps45-in-supply = <&vsys_cobra>; + smps6-in-supply = <&vsys_cobra>; + smps7-in-supply = <&vsys_cobra>; + smps8-in-supply = <&vsys_cobra>; + smps9-in-supply = <&vsys_cobra>; + smps10_out2-in-supply = <&vsys_cobra>; + smps10_out1-in-supply = <&vsys_cobra>; + ldo1-in-supply = <&vsys_cobra>; + ldo2-in-supply = <&vsys_cobra>; + ldo3-in-supply = <&vdds_1v8_main>; + ldo4-in-supply = <&vdds_1v8_main>; + ldo5-in-supply = <&vsys_cobra>; + ldo6-in-supply = <&vdds_1v8_main>; + ldo7-in-supply = <&vsys_cobra>; + ldo8-in-supply = <&vsys_cobra>; + ldo9-in-supply = <&vmmcsd_fixed>; + ldoln-in-supply = <&vsys_cobra>; + ldousb-in-supply = <&vsys_cobra>; + regulators { smps123_reg: smps123 { /* VDD_OPP_MPU */ -- cgit v1.2.3 From c0053bd50af57c4ebf032a9de1b07ca78c982452 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 6 Aug 2015 10:54:24 -0500 Subject: ARM: OMAP5 / DRA7: Introduce workaround for 801819 Add workaround for Cortex-A15 ARM erratum 801819 which says in summary that "A livelock can occur in the L2 cache arbitration that might prevent a snoop from completing. Under certain conditions this can cause the system to deadlock. " Recommended workaround is as follows: Do both of the following: 1) Do not use the write-back no-allocate memory type. 2) Do not issue write-back cacheable stores at any time when the cache is disabled (SCTLR.C=0) and the MMU is enabled (SCTLR.M=1). Because it is implementation defined whether cacheable stores update the cache when the cache is disabled it is not expected that any portable code will execute cacheable stores when the cache is disabled. For implementations of Cortex-A15 configured without the “L2 arbitration register slice” option (typically one or two core systems), you must also do the following: 3) Disable write-streaming in each CPU by setting ACTLR[28:25] = 0b1111 So, we provide an option to disable write streaming on OMAP5 and DRA7. It is a rare condition to occur and may be enabled selectively based on platform acceptance of risk. Applies to: A15 revisions r2p0, r2p1, r2p2, r2p3 or r2p4 and REVIDR[3] is set to 0. Based on ARM errata Document revision 18.0 (22 Nov 2013) Note: the configuration for the workaround needs to be done with each CPU bringup, since CPU0 bringup is done by bootloader, it is recommended to have the workaround in the bootloader, kernel also does ensure that CPU0 has the workaround and makes the workaround active when CPU1 gets active. With CONFIG_SMP disabled, it is expected to be done by the bootloader. This does show significant degradation in synthetic tests such as mbw (https://packages.qa.debian.org/m/mbw.html) mbw -n 100 100|grep AVG (on a test platform) Without enabling the erratum: AVG Method: MEMCPY Elapsed: 0.13406 MiB: 100.00000 Copy: 745.913 MiB/s AVG Method: DUMB Elapsed: 0.06746 MiB: 100.00000 Copy: 1482.357 MiB/s AVG Method: MCBLOCK Elapsed: 0.03058 MiB: 100.00000 Copy: 3270.569 MiB/s After enabling the erratum: AVG Method: MEMCPY Elapsed: 0.13757 MiB: 100.00000 Copy: 726.913 MiB/s AVG Method: DUMB Elapsed: 0.12024 MiB: 100.00000 Copy: 831.668 MiB/s AVG Method: MCBLOCK Elapsed: 0.09243 MiB: 100.00000 Copy: 1081.942 MiB/s Most benchmarks are designed for specific performance analysis, so overall usecase must be considered before making a decision to enable/disable the erratum workaround. Pending internal investigation, the erratum is kept disabled by default. Cc: Russell King Cc: Catalin Marinas Cc: Tony Lindgren Suggested-by: Richard Woodruff Suggested-by: Brad Griffis Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 8 +++++++ arch/arm/mach-omap2/omap-secure.h | 1 + arch/arm/mach-omap2/omap-smp.c | 48 +++++++++++++++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 0517f0c1581a..a63d3fe2ca46 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -240,4 +240,12 @@ endmenu endif +config OMAP5_ERRATA_801819 + bool "Errata 801819: An eviction from L1 data cache might stall indefinitely" + depends on SOC_OMAP5 || SOC_DRA7XX + help + A livelock can occur in the L2 cache arbitration that might prevent + a snoop from completing. Under certain conditions this can cause the + system to deadlock. + endmenu diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h index af2851fbcdf0..bae263fba640 100644 --- a/arch/arm/mach-omap2/omap-secure.h +++ b/arch/arm/mach-omap2/omap-secure.h @@ -46,6 +46,7 @@ #define OMAP5_DRA7_MON_SET_CNTFRQ_INDEX 0x109 #define OMAP5_MON_AMBA_IF_INDEX 0x108 +#define OMAP5_DRA7_MON_SET_ACR_INDEX 0x107 /* Secure PPA(Primary Protected Application) APIs */ #define OMAP4_PPA_L2_POR_INDEX 0x23 diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index c625cc10d9f9..8cd1de914ee4 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -50,6 +50,39 @@ void __iomem *omap4_get_scu_base(void) return scu_base; } +#ifdef CONFIG_OMAP5_ERRATA_801819 +void omap5_erratum_workaround_801819(void) +{ + u32 acr, revidr; + u32 acr_mask; + + /* REVIDR[3] indicates erratum fix available on silicon */ + asm volatile ("mrc p15, 0, %0, c0, c0, 6" : "=r" (revidr)); + if (revidr & (0x1 << 3)) + return; + + asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr)); + /* + * BIT(27) - Disables streaming. All write-allocate lines allocate in + * the L1 or L2 cache. + * BIT(25) - Disables streaming. All write-allocate lines allocate in + * the L1 cache. + */ + acr_mask = (0x3 << 25) | (0x3 << 27); + /* do we already have it done.. if yes, skip expensive smc */ + if ((acr & acr_mask) == acr_mask) + return; + + acr |= acr_mask; + omap_smc1(OMAP5_DRA7_MON_SET_ACR_INDEX, acr); + + pr_debug("%s: ARM erratum workaround 801819 applied on CPU%d\n", + __func__, smp_processor_id()); +} +#else +static inline void omap5_erratum_workaround_801819(void) { } +#endif + static void omap4_secondary_init(unsigned int cpu) { /* @@ -64,12 +97,15 @@ static void omap4_secondary_init(unsigned int cpu) omap_secure_dispatcher(OMAP4_PPA_CPU_ACTRL_SMP_INDEX, 4, 0, 0, 0, 0, 0); - /* - * Configure the CNTFRQ register for the secondary cpu's which - * indicates the frequency of the cpu local timers. - */ - if (soc_is_omap54xx() || soc_is_dra7xx()) + if (soc_is_omap54xx() || soc_is_dra7xx()) { + /* + * Configure the CNTFRQ register for the secondary cpu's which + * indicates the frequency of the cpu local timers. + */ set_cntfreq(); + /* Configure ACR to disable streaming WA for 801819 */ + omap5_erratum_workaround_801819(); + } /* * Synchronise with the boot thread. @@ -218,6 +254,8 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus) if (cpu_is_omap446x()) startup_addr = omap4460_secondary_startup; + if (soc_is_dra74x() || soc_is_omap54xx()) + omap5_erratum_workaround_801819(); /* * Write the address of secondary startup routine into the -- cgit v1.2.3 From 49111cd1c5498d2a356b0e51d74987dad0e88530 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 12 May 2016 13:29:48 -0700 Subject: ARM: dts: Fix igepv5 audiopwon-gpio Playing audio works on omap5-uevm, but produces an "Unhandled fault: imprecise external abort (0x1406) at 0x00000000" error on igepv5. Looks like the twl6040 audpwron GPIO pin is different for these boards. Let's fix the issue by configuring the audpwron in the board specific dts file. Cc: Agustí Fontquerni Cc: Eduard Gavin Cc: Enric Balletbo i Serra Acked-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 3 ++- arch/arm/boot/dts/omap5-igep0050.dts | 10 ++++++++++ arch/arm/boot/dts/omap5-uevm.dts | 10 ++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index ebbaa140355b..c6aa65a68642 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -643,7 +643,8 @@ pinctrl-0 = <&twl6040_pins>; interrupts = ; /* IRQ_SYS_2N cascaded to gic */ - ti,audpwron-gpio = <&gpio5 13 GPIO_ACTIVE_HIGH>; /* gpio line 141 */ + + /* audpwron gpio defined in the board specific dts */ vio-supply = <&smps7_reg>; v2v1-supply = <&smps9_reg>; diff --git a/arch/arm/boot/dts/omap5-igep0050.dts b/arch/arm/boot/dts/omap5-igep0050.dts index 46ecb1dd3b5c..700966b85575 100644 --- a/arch/arm/boot/dts/omap5-igep0050.dts +++ b/arch/arm/boot/dts/omap5-igep0050.dts @@ -52,3 +52,13 @@ <&gpio7 3 0>; /* 195, SDA */ }; +&twl6040 { + ti,audpwron-gpio = <&gpio5 16 GPIO_ACTIVE_HIGH>; /* gpio line 144 */ +}; + +&twl6040_pins { + pinctrl-single,pins = < + OMAP5_IOPAD(0x1c4, PIN_OUTPUT | MUX_MODE6) /* mcspi1_somi.gpio5_144 */ + OMAP5_IOPAD(0x1ca, PIN_OUTPUT | MUX_MODE6) /* perslimbus2_clock.gpio5_145 */ + >; +}; diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts index 60b3fbb3bf07..a51e60518eb6 100644 --- a/arch/arm/boot/dts/omap5-uevm.dts +++ b/arch/arm/boot/dts/omap5-uevm.dts @@ -51,3 +51,13 @@ <&gpio9 1 GPIO_ACTIVE_HIGH>, /* TCA6424A P00, LS OE */ <&gpio7 1 GPIO_ACTIVE_HIGH>; /* GPIO 193, HPD */ }; + +&twl6040 { + ti,audpwron-gpio = <&gpio5 13 GPIO_ACTIVE_HIGH>; /* gpio line 141 */ +}; + +&twl6040_pins { + pinctrl-single,pins = < + OMAP5_IOPAD(0x1be, PIN_OUTPUT | MUX_MODE6) /* mcspi1_somi.gpio5_141 */ + >; +}; -- cgit v1.2.3 From e0e80d43fc0c5aace76a6c5f9462a1aec3e004f8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 12 May 2016 13:29:48 -0700 Subject: ARM: dts: Fix uart wakeirq on omap5 by removing WAKEUP_EN for omaps The padconf register WAKEUP_EN is now handled in a generic way using Linux wakeirqs where pinctrl-single toggles the WAKEUP_EN bit when a wakeirq is enabled or disabled. At least omap5 gets confused if the WAKEUP_EN bit is set and the pin is not claimed as a wakeirq. The end result is that wakeirqs don't work properly as there is nothing handling the wakeirq. So let's just remove the WAKEUP_EN usage from dts files. Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-evm-37xx.dts | 2 +- arch/arm/boot/dts/omap3-n900.dts | 4 ++-- arch/arm/boot/dts/omap3-n950-n9.dtsi | 6 +++--- arch/arm/boot/dts/omap3-zoom3.dts | 6 +++--- arch/arm/boot/dts/omap5-board-common.dtsi | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap3-evm-37xx.dts b/arch/arm/boot/dts/omap3-evm-37xx.dts index 76056ba92ced..ed449827c3d3 100644 --- a/arch/arm/boot/dts/omap3-evm-37xx.dts +++ b/arch/arm/boot/dts/omap3-evm-37xx.dts @@ -85,7 +85,7 @@ OMAP3_CORE1_IOPAD(0x2158, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_clk.sdmmc2_clk */ OMAP3_CORE1_IOPAD(0x215a, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_cmd.sdmmc2_cmd */ OMAP3_CORE1_IOPAD(0x215c, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat0.sdmmc2_dat0 */ - OMAP3_CORE1_IOPAD(0x215e, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat1.sdmmc2_dat1 */ + OMAP3_CORE1_IOPAD(0x215e, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat1.sdmmc2_dat1 */ OMAP3_CORE1_IOPAD(0x2160, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat2.sdmmc2_dat2 */ OMAP3_CORE1_IOPAD(0x2162, PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc2_dat3.sdmmc2_dat3 */ >; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index b3c26a96a726..9c9e2ae6d6aa 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -288,7 +288,7 @@ pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x2180, PIN_INPUT_PULLUP | MUX_MODE1) /* ssi1_rdy_tx */ OMAP3_CORE1_IOPAD(0x217e, PIN_OUTPUT | MUX_MODE1) /* ssi1_flag_tx */ - OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* ssi1_wake_tx (cawake) */ + OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | MUX_MODE4) /* ssi1_wake_tx (cawake) */ OMAP3_CORE1_IOPAD(0x217c, PIN_OUTPUT | MUX_MODE1) /* ssi1_dat_tx */ OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT | MUX_MODE1) /* ssi1_dat_rx */ OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE1) /* ssi1_flag_rx */ @@ -300,7 +300,7 @@ modem_pins: pinmux_modem { pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x20dc, PIN_OUTPUT | MUX_MODE4) /* gpio 70 => cmt_apeslpx */ - OMAP3_CORE1_IOPAD(0x20e0, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* gpio 72 => ape_rst_rq */ + OMAP3_CORE1_IOPAD(0x20e0, PIN_INPUT | MUX_MODE4) /* gpio 72 => ape_rst_rq */ OMAP3_CORE1_IOPAD(0x20e2, PIN_OUTPUT | MUX_MODE4) /* gpio 73 => cmt_rst_rq */ OMAP3_CORE1_IOPAD(0x20e4, PIN_OUTPUT | MUX_MODE4) /* gpio 74 => cmt_en */ OMAP3_CORE1_IOPAD(0x20e6, PIN_OUTPUT | MUX_MODE4) /* gpio 75 => cmt_rst */ diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index a00ca761675d..927b17fc4ed8 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -97,7 +97,7 @@ OMAP3_CORE1_IOPAD(0x217c, PIN_OUTPUT | MUX_MODE1) /* ssi1_dat_tx */ OMAP3_CORE1_IOPAD(0x217e, PIN_OUTPUT | MUX_MODE1) /* ssi1_flag_tx */ OMAP3_CORE1_IOPAD(0x2180, PIN_INPUT_PULLUP | MUX_MODE1) /* ssi1_rdy_tx */ - OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* ssi1_wake_tx (cawake) */ + OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | MUX_MODE4) /* ssi1_wake_tx (cawake) */ OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT | MUX_MODE1) /* ssi1_dat_rx */ OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE1) /* ssi1_flag_rx */ OMAP3_CORE1_IOPAD(0x2188, PIN_OUTPUT | MUX_MODE1) /* ssi1_rdy_rx */ @@ -110,7 +110,7 @@ OMAP3_CORE1_IOPAD(0x217c, PIN_OUTPUT | MUX_MODE7) /* ssi1_dat_tx */ OMAP3_CORE1_IOPAD(0x217e, PIN_OUTPUT | MUX_MODE7) /* ssi1_flag_tx */ OMAP3_CORE1_IOPAD(0x2180, PIN_INPUT_PULLDOWN | MUX_MODE7) /* ssi1_rdy_tx */ - OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* ssi1_wake_tx (cawake) */ + OMAP3_CORE1_IOPAD(0x2182, PIN_INPUT | MUX_MODE4) /* ssi1_wake_tx (cawake) */ OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT | MUX_MODE7) /* ssi1_dat_rx */ OMAP3_CORE1_IOPAD(0x2186, PIN_INPUT | MUX_MODE7) /* ssi1_flag_rx */ OMAP3_CORE1_IOPAD(0x2188, PIN_OUTPUT | MUX_MODE4) /* ssi1_rdy_rx */ @@ -120,7 +120,7 @@ modem_pins1: pinmux_modem_core1_pins { pinctrl-single,pins = < - OMAP3_CORE1_IOPAD(0x207a, PIN_INPUT | WAKEUP_EN | MUX_MODE4) /* gpio_34 (ape_rst_rq) */ + OMAP3_CORE1_IOPAD(0x207a, PIN_INPUT | MUX_MODE4) /* gpio_34 (ape_rst_rq) */ OMAP3_CORE1_IOPAD(0x2100, PIN_OUTPUT | MUX_MODE4) /* gpio_88 (cmt_rst_rq) */ OMAP3_CORE1_IOPAD(0x210a, PIN_OUTPUT | MUX_MODE4) /* gpio_93 (cmt_apeslpx) */ >; diff --git a/arch/arm/boot/dts/omap3-zoom3.dts b/arch/arm/boot/dts/omap3-zoom3.dts index f19170bdcc1f..c29b41dc7b95 100644 --- a/arch/arm/boot/dts/omap3-zoom3.dts +++ b/arch/arm/boot/dts/omap3-zoom3.dts @@ -98,7 +98,7 @@ pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x2174, PIN_INPUT_PULLUP | MUX_MODE0) /* uart2_cts.uart2_cts */ OMAP3_CORE1_IOPAD(0x2176, PIN_OUTPUT | MUX_MODE0) /* uart2_rts.uart2_rts */ - OMAP3_CORE1_IOPAD(0x217a, WAKEUP_EN | PIN_INPUT | MUX_MODE0) /* uart2_rx.uart2_rx */ + OMAP3_CORE1_IOPAD(0x217a, PIN_INPUT | MUX_MODE0) /* uart2_rx.uart2_rx */ OMAP3_CORE1_IOPAD(0x2178, PIN_OUTPUT | MUX_MODE0) /* uart2_tx.uart2_tx */ >; }; @@ -107,7 +107,7 @@ pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x219a, PIN_INPUT_PULLDOWN | MUX_MODE0) /* uart3_cts_rctx.uart3_cts_rctx */ OMAP3_CORE1_IOPAD(0x219c, PIN_OUTPUT | MUX_MODE0) /* uart3_rts_sd.uart3_rts_sd */ - OMAP3_CORE1_IOPAD(0x219e, WAKEUP_EN | PIN_INPUT | MUX_MODE0) /* uart3_rx_irrx.uart3_rx_irrx */ + OMAP3_CORE1_IOPAD(0x219e, PIN_INPUT | MUX_MODE0) /* uart3_rx_irrx.uart3_rx_irrx */ OMAP3_CORE1_IOPAD(0x21a0, PIN_OUTPUT | MUX_MODE0) /* uart3_tx_irtx.uart3_tx_irtx */ >; }; @@ -125,7 +125,7 @@ pinctrl-single,pins = < OMAP3630_CORE2_IOPAD(0x25d8, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_clk.sdmmc3_clk */ OMAP3630_CORE2_IOPAD(0x25e4, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d4.sdmmc3_dat0 */ - OMAP3630_CORE2_IOPAD(0x25e6, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d5.sdmmc3_dat1 */ + OMAP3630_CORE2_IOPAD(0x25e6, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d5.sdmmc3_dat1 */ OMAP3630_CORE2_IOPAD(0x25e8, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d6.sdmmc3_dat2 */ OMAP3630_CORE2_IOPAD(0x25e2, PIN_INPUT_PULLUP | MUX_MODE2) /* etk_d3.sdmmc3_dat3 */ >; diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index c6aa65a68642..9851b5767ec4 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -332,7 +332,7 @@ wlcore_irq_pin: pinmux_wlcore_irq_pin { pinctrl-single,pins = < - OMAP5_IOPAD(0x40, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE6) /* llia_wakereqin.gpio1_wk14 */ + OMAP5_IOPAD(0x40, PIN_INPUT_PULLUP | MUX_MODE6) /* llia_wakereqin.gpio1_wk14 */ >; }; }; -- cgit v1.2.3 From 6c05495d6d0507a6e7886265f82339bebf25cfed Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 12 May 2016 13:29:48 -0700 Subject: ARM: dts: Fix ldo7 source for HDMI on igepv5 Fix ldo7 source for HDMI on igepv5. Suggested-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-igep0050.dts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap5-igep0050.dts b/arch/arm/boot/dts/omap5-igep0050.dts index 700966b85575..f75ce02fb398 100644 --- a/arch/arm/boot/dts/omap5-igep0050.dts +++ b/arch/arm/boot/dts/omap5-igep0050.dts @@ -35,6 +35,22 @@ }; }; +/* LDO4 is VPP1 - ball AD9 */ +&ldo4_reg { + regulator-min-microvolt = <2000000>; + regulator-max-microvolt = <2000000>; +}; + +/* + * LDO7 is used for HDMI: VDDA_DSIPORTA - ball AA33, VDDA_DSIPORTC - ball AE33, + * VDDA_HDMI - ball AN25 + */ +&ldo7_reg { + status = "okay"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; +}; + &omap5_pmx_core { i2c4_pins: pinmux_i2c4_pins { pinctrl-single,pins = < -- cgit v1.2.3 From 54c78870e4d70420b887d712a3e73a6783d5b51d Mon Sep 17 00:00:00 2001 From: Nicolas Chauvet Date: Tue, 10 May 2016 12:14:57 +0200 Subject: ARM: dts: Add non-removable to hsmmc on hp-t410 This will clean-up warnings at boot, since either that or cd-gpio{,s} are mandated by the dts specification of_get_named_gpiod_flags: can't parse 'cd-gpios' property of node '/ocp/mmc@47810000[0]' of_get_named_gpiod_flags: can't parse 'cd-gpio' property of node '/ocp/mmc@47810000[0]' v2: use the generic non-removable instead of ti,non-removable Signed-off-by: Nicolas Chauvet Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dm8148-t410.dts | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts index 5d4313fd5a46..97000b521cea 100644 --- a/arch/arm/boot/dts/dm8148-t410.dts +++ b/arch/arm/boot/dts/dm8148-t410.dts @@ -53,6 +53,7 @@ dmas = <&edma_xbar 8 0 1 /* use SDTXEVT1 instead of MCASP0TX */ &edma_xbar 9 0 2>; /* use SDRXEVT1 instead of MCASP0RX */ dma-names = "tx", "rx"; + non-removable; }; &pincntl { -- cgit v1.2.3 From 1ddbef4d596ce51bf25825549bc5865de08a7e62 Mon Sep 17 00:00:00 2001 From: Nicolas Chauvet Date: Tue, 10 May 2016 12:14:58 +0200 Subject: ARM: dts: disable mmc by default and enable when needed for dm814x This patch disable mmc nodes by default in the dm814x.dtsi and enable only when needed on a given dts v2: Disable un-used mmc nodes on the related boards dts files instead of from the included SOC dts Signed-off-by: Nicolas Chauvet Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dm8148-evm.dts | 8 ++++++++ arch/arm/boot/dts/dm8148-t410.dts | 8 ++++++++ 2 files changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/dm8148-evm.dts b/arch/arm/boot/dts/dm8148-evm.dts index cbc17b0794b1..4128fa91823c 100644 --- a/arch/arm/boot/dts/dm8148-evm.dts +++ b/arch/arm/boot/dts/dm8148-evm.dts @@ -93,6 +93,10 @@ }; }; +&mmc1 { + status = "disabled"; +}; + &mmc2 { pinctrl-names = "default"; pinctrl-0 = <&sd1_pins>; @@ -101,6 +105,10 @@ cd-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>; }; +&mmc3 { + status = "disabled"; +}; + &pincntl { sd1_pins: pinmux_sd1_pins { pinctrl-single,pins = < diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts index 97000b521cea..3f184863e0c5 100644 --- a/arch/arm/boot/dts/dm8148-t410.dts +++ b/arch/arm/boot/dts/dm8148-t410.dts @@ -45,6 +45,14 @@ phy-mode = "rgmii"; }; +&mmc1 { + status = "disabled"; +}; + +&mmc2 { + status = "disabled"; +}; + &mmc3 { pinctrl-names = "default"; pinctrl-0 = <&sd2_pins>; -- cgit v1.2.3 From 10ce2404ccab6828f86fb038e4888837f49f9f48 Mon Sep 17 00:00:00 2001 From: Franklin S Cooper Jr Date: Wed, 4 May 2016 12:43:55 -0500 Subject: ARM: dts: dra7: Add gpmc dma channel Add dma channel information to the gpmc. Signed-off-by: Franklin S Cooper Jr Signed-off-by: Sekhar Nori Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7.dtsi | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index e0074014385a..3a8f3976f6f9 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1451,6 +1451,8 @@ ti,hwmods = "gpmc"; reg = <0x50000000 0x37c>; /* device IO registers */ interrupts = ; + dmas = <&edma_xbar 4 0>; + dma-names = "rxtx"; gpmc,num-cs = <8>; gpmc,num-waitpins = <2>; #address-cells = <2>; -- cgit v1.2.3 From 3d9f77be066ccf8d364928154552ed6ca910793b Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 6 May 2016 08:37:57 -0500 Subject: ARM: dts: am57xx-idk-common: Fix input supply names Palmas Regulator is an exception and does not follow the standard "vin-supply" common definitions for all regulators, as a result of this, the input supplies are not reported to regulator framework, with the obvious result of not being appropriately mapped. Fix the same. Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am57xx-idk-common.dtsi | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi index b01a5948cdd0..0e63b9dff6e7 100644 --- a/arch/arm/boot/dts/am57xx-idk-common.dtsi +++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi @@ -60,10 +60,26 @@ tps659038_pmic { compatible = "ti,tps659038-pmic"; + + smps12-in-supply = <&vmain>; + smps3-in-supply = <&vmain>; + smps45-in-supply = <&vmain>; + smps6-in-supply = <&vmain>; + smps7-in-supply = <&vmain>; + smps8-in-supply = <&vmain>; + smps9-in-supply = <&vmain>; + ldo1-in-supply = <&vmain>; + ldo2-in-supply = <&vmain>; + ldo3-in-supply = <&vmain>; + ldo4-in-supply = <&vmain>; + ldo9-in-supply = <&vmain>; + ldoln-in-supply = <&vmain>; + ldousb-in-supply = <&vmain>; + ldortc-in-supply = <&vmain>; + regulators { smps12_reg: smps12 { /* VDD_MPU */ - vin-supply = <&vmain>; regulator-name = "smps12"; regulator-min-microvolt = <850000>; regulator-max-microvolt = <1250000>; @@ -73,7 +89,6 @@ smps3_reg: smps3 { /* VDD_DDR EMIF1 EMIF2 */ - vin-supply = <&vmain>; regulator-name = "smps3"; regulator-min-microvolt = <1350000>; regulator-max-microvolt = <1350000>; @@ -84,7 +99,6 @@ smps45_reg: smps45 { /* VDD_DSPEVE on AM572 */ /* VDD_IVA + VDD_DSP on AM571 */ - vin-supply = <&vmain>; regulator-name = "smps45"; regulator-min-microvolt = <850000>; regulator-max-microvolt = <1250000>; @@ -94,7 +108,6 @@ smps6_reg: smps6 { /* VDD_GPU */ - vin-supply = <&vmain>; regulator-name = "smps6"; regulator-min-microvolt = <850000>; regulator-max-microvolt = <1250000>; @@ -104,7 +117,6 @@ smps7_reg: smps7 { /* VDD_CORE */ - vin-supply = <&vmain>; regulator-name = "smps7"; regulator-min-microvolt = <850000>; regulator-max-microvolt = <1150000>; @@ -115,13 +127,11 @@ smps8_reg: smps8 { /* 5728 - VDD_IVAHD */ /* 5718 - N.C. test point */ - vin-supply = <&vmain>; regulator-name = "smps8"; }; smps9_reg: smps9 { /* VDD_3_3D */ - vin-supply = <&vmain>; regulator-name = "smps9"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; @@ -132,7 +142,6 @@ ldo1_reg: ldo1 { /* VDDSHV8 - VSDMMC */ /* NOTE: on rev 1.3a, data supply */ - vin-supply = <&vmain>; regulator-name = "ldo1"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; @@ -142,7 +151,6 @@ ldo2_reg: ldo2 { /* VDDSH18V */ - vin-supply = <&vmain>; regulator-name = "ldo2"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -152,7 +160,6 @@ ldo3_reg: ldo3 { /* R1.3a 572x V1_8PHY_LDO3: USB, SATA */ - vin-supply = <&vmain>; regulator-name = "ldo3"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -162,7 +169,6 @@ ldo4_reg: ldo4 { /* R1.3a 572x V1_8PHY_LDO4: PCIE, HDMI*/ - vin-supply = <&vmain>; regulator-name = "ldo4"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -174,7 +180,6 @@ ldo9_reg: ldo9 { /* VDD_RTC */ - vin-supply = <&vmain>; regulator-name = "ldo9"; regulator-min-microvolt = <840000>; regulator-max-microvolt = <1160000>; @@ -184,7 +189,6 @@ ldoln_reg: ldoln { /* VDDA_1V8_PLL */ - vin-supply = <&vmain>; regulator-name = "ldoln"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -194,7 +198,6 @@ ldousb_reg: ldousb { /* VDDA_3V_USB: VDDA_USBHS33 */ - vin-supply = <&vmain>; regulator-name = "ldousb"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; @@ -204,7 +207,6 @@ ldortc_reg: ldortc { /* VDDA_RTC */ - vin-supply = <&vmain>; regulator-name = "ldortc"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; -- cgit v1.2.3 From 65db875d110b8e517f81fa71c57a65fc6d0019ac Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Thu, 12 May 2016 13:20:52 -0500 Subject: ARM: OMAP2+: AM43XX: Enable fixes for Cortex-A9 errata This patch explicitly enables the fixes for the below errata applicable for AM43x Socs as was done for OMAP4. 754322: Faulty MMU translations following ASID switch 775420: A data cache maintenance operation which aborts, followed by an ISB, without any DSB in-between, might lead to deadlock Signed-off-by: Dave Gerlach Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index a63d3fe2ca46..415a0bd0cda6 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -67,6 +67,8 @@ config SOC_AM43XX select HAVE_ARM_SCU select GENERIC_CLOCKEVENTS_BROADCAST select HAVE_ARM_TWD + select ARM_ERRATA_754322 + select ARM_ERRATA_775420 config SOC_DRA7XX bool "TI DRA7XX" -- cgit v1.2.3 From b44f788cce4086da00fd2f14b6b4d5abcf85f842 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Fri, 6 May 2016 23:02:33 +0200 Subject: ARM: dts: igep00x0: Add SD card-detect. Fix SD card remove/insert detection by adding the correct card-detect pin. All IGEP OMAP3 based boards use the same card-detect pin. Signed-off-by: Enric Balletbo i Serra Signed-off-by: Ladislav Michl Reviewed-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-igep.dtsi | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap3-igep.dtsi b/arch/arm/boot/dts/omap3-igep.dtsi index 41f5d386f21f..f4f2ce46d681 100644 --- a/arch/arm/boot/dts/omap3-igep.dtsi +++ b/arch/arm/boot/dts/omap3-igep.dtsi @@ -188,6 +188,7 @@ vmmc-supply = <&vmmc1>; vmmc_aux-supply = <&vsim>; bus-width = <4>; + cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_LOW>; }; &mmc3 { -- cgit v1.2.3 From a1f6ad14176d466c00e6a687f9c78ec6c7ad6bf8 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Fri, 6 May 2016 23:02:34 +0200 Subject: ARM: dts: igep0020: Add SD card write-protect pin. A host device that supports write protection should refuse to write to an SD card that is designated read-only when write-protect is set. This is an optional feature of the SD specification. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-igep0020-common.dtsi | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap3-igep0020-common.dtsi b/arch/arm/boot/dts/omap3-igep0020-common.dtsi index d6f839cab649..b6971060648a 100644 --- a/arch/arm/boot/dts/omap3-igep0020-common.dtsi +++ b/arch/arm/boot/dts/omap3-igep0020-common.dtsi @@ -194,6 +194,12 @@ OMAP3630_CORE2_IOPAD(0x25f8, PIN_OUTPUT | MUX_MODE4) /* etk_d14.gpio_28 */ >; }; + + mmc1_wp_pins: pinmux_mmc1_cd_pins { + pinctrl-single,pins = < + OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT | MUX_MODE4) /* etk_d15.gpio_29 */ + >; + }; }; &i2c3 { @@ -250,3 +256,8 @@ }; }; }; + +&mmc1 { + pinctrl-0 = <&mmc1_pins &mmc1_wp_pins>; + wp-gpios = <&gpio1 29 GPIO_ACTIVE_LOW>; /* gpio_29 */ +}; -- cgit v1.2.3 From 4cb54493ab2003568222e6ad7449747354f22ce3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 12 May 2016 10:38:31 +0200 Subject: ARM: samsung: improve static dma_mask definition When no DMA master devices are part of the kernel configuration, we get a warning about the unused dma mask definition: arch/arm/plat-samsung/devs.c:71:12: error: 'samsung_device_dma_mask' defined but not used [-Werror=unused-variable] static u64 samsung_device_dma_mask = DMA_BIT_MASK(32); We could simply mark this as __maybe_unused to shut up that warning, but a nicer solution seems to be to have a separate mask for each device. The advantage is that a driver that happens to call dma_set_mask() on one device doesn't implicitly change the mask for the other devices as well. This is more of a theoretical problem, as obviously nothing does it for the devices in this file (or they would have always been broken), but it feels cleaner that way. The definition works by creating an array in place so we can take the address of it and let the compiler generate a hidden symbol for it at compile time. Signed-off-by: Arnd Bergmann Signed-off-by: Krzysztof Kozlowski --- arch/arm/plat-samsung/devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c index 84baa16f4c0b..e93aa6734147 100644 --- a/arch/arm/plat-samsung/devs.c +++ b/arch/arm/plat-samsung/devs.c @@ -68,7 +68,7 @@ #include #include -static u64 samsung_device_dma_mask = DMA_BIT_MASK(32); +#define samsung_device_dma_mask (*((u64[]) { DMA_BIT_MASK(32) })) /* AC97 */ #ifdef CONFIG_CPU_S3C2440 -- cgit v1.2.3 From 8a6f71ccb6c39ca5ae442d1478d3a076e2b1361e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 May 2016 16:17:36 +0200 Subject: ARM: exynos: don't select keyboard driver The samsung-keypad driver is implicitly selected by ARCH_EXYNOS4 (why?), but this fails if CONFIG_INPUT is a loadable module: drivers/input/built-in.o: In function `samsung_keypad_remove': drivers/input/keyboard/samsung-keypad.c:461: undefined reference to `input_unregister_device' drivers/input/built-in.o: In function `samsung_keypad_irq': drivers/input/keyboard/samsung-keypad.c:137: undefined reference to `input_event' drivers/input/built-in.o: In function `samsung_keypad_irq': include/linux/input.h:389: undefined reference to `input_event' drivers/input/built-in.o: In function `samsung_keypad_probe': drivers/input/keyboard/samsung-keypad.c:358: undefined reference to `devm_input_allocate_device' drivers/input/built-in.o:(.debug_addr+0x34): undefined reference to `input_set_capability' This removes the 'select' as suggested by Krzysztof Kozlowski and instead enables the driver from the defconfig files. The problem does not happen on mainline kernels, as we don't normally build built-in input drivers when CONFIG_INPUT=m, but I am experimenting with a patch to change this, and the samsung keypad driver showed up as one example that was silently broken before. Signed-off-by: Arnd Bergmann Link: https://lkml.org/lkml/2016/2/14/55 Signed-off-by: Krzysztof Kozlowski --- arch/arm/configs/exynos_defconfig | 1 + arch/arm/configs/multi_v7_defconfig | 1 + arch/arm/mach-exynos/Kconfig | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 6ffd7e76f3ce..1a197fc8fdc7 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -77,6 +77,7 @@ CONFIG_TOUCHSCREEN_ATMEL_MXT=y CONFIG_INPUT_MISC=y CONFIG_INPUT_MAX77693_HAPTIC=y CONFIG_INPUT_MAX8997_HAPTIC=y +CONFIG_KEYBOARD_SAMSUNG=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_SAMSUNG=y CONFIG_SERIAL_SAMSUNG_CONSOLE=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 28234906a064..ad97a41ceb35 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -260,6 +260,7 @@ CONFIG_KEYBOARD_TEGRA=y CONFIG_KEYBOARD_SPEAR=y CONFIG_KEYBOARD_ST_KEYSCAN=y CONFIG_KEYBOARD_CROS_EC=m +CONFIG_KEYBOARD_SAMSUNG=m CONFIG_MOUSE_PS2_ELANTECH=y CONFIG_MOUSE_CYAPA=m CONFIG_MOUSE_ELAN_I2C=y diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 207fa2c737a6..c0d9f334d1d0 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -58,7 +58,6 @@ config ARCH_EXYNOS4 select CLKSRC_SAMSUNG_PWM if CPU_EXYNOS4210 select CPU_EXYNOS4210 select GIC_NON_BANKED - select KEYBOARD_SAMSUNG if INPUT_KEYBOARD select MIGHT_HAVE_CACHE_L2X0 help Samsung EXYNOS4 (Cortex-A9) SoC based systems -- cgit v1.2.3 From 15b7cc78f0951e418c940d8b3b6a7a3b962b7748 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 30 May 2016 11:12:33 +0900 Subject: arm64: dts: drop "arm,amba-bus" in favor of "simple-bus" part 2 Tree-wide replacement was done by commit 2ef7d5f342c1 (ARM, ARM64: dts: drop "arm,amba-bus" in favor of "simple-bus"), but we have some new users of "arm,amba-bus" at Linux 4.7-rc1. Eliminate them now. Signed-off-by: Masahiro Yamada Acked-by: Heiko Stuebner Acked-by: Chanho Min Signed-off-by: Olof Johansson --- arch/arm64/boot/dts/lg/lg1312.dtsi | 2 +- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi index 3a4e9a2ab313..fbafa24cd533 100644 --- a/arch/arm64/boot/dts/lg/lg1312.dtsi +++ b/arch/arm64/boot/dts/lg/lg1312.dtsi @@ -125,7 +125,7 @@ #size-cells = <1>; #interrupts-cells = <3>; - compatible = "arm,amba-bus"; + compatible = "simple-bus"; interrupt-parent = <&gic>; ranges; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 46f325a143b0..d7f8e06910bc 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -163,7 +163,7 @@ }; amba { - compatible = "arm,amba-bus"; + compatible = "simple-bus"; #address-cells = <2>; #size-cells = <2>; ranges; -- cgit v1.2.3 From 5fdb884267890b26fdfd5ff5ddaf596745b9ab43 Mon Sep 17 00:00:00 2001 From: Olliver Schinagl Date: Fri, 13 May 2016 21:57:16 +0200 Subject: ARM: dts: sunxi: Add OLinuXino Lime2 eMMC to the Makefile commit 27dd9af6bc000ab21fd ("ARM: dts: sunxi: Add a olinuxino-lime2-emmc") added the new emmc equipped lime2 but forgot its Makefile. This patch adds an entry to the Makefile. Signed-off-by: Olliver Schinagl Acked-by: Maxime Ripard Signed-off-by: Olof Johansson --- arch/arm/boot/dts/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 06b6c2d695bf..414b42710a36 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -741,6 +741,7 @@ dtb-$(CONFIG_MACH_SUN7I) += \ sun7i-a20-olimex-som-evb.dtb \ sun7i-a20-olinuxino-lime.dtb \ sun7i-a20-olinuxino-lime2.dtb \ + sun7i-a20-olinuxino-lime2-emmc.dtb \ sun7i-a20-olinuxino-micro.dtb \ sun7i-a20-orangepi.dtb \ sun7i-a20-orangepi-mini.dtb \ -- cgit v1.2.3 From cb84f6c0a25eb76b1f838eb63e212705c0c06b5f Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 11 May 2016 13:23:13 +0800 Subject: ARM: dts: sun6i: primo81: Drop constraints on dc1sw regulator This is the same issue fixed in commit dcf5341f0150 ("ARM: dts: sun8i-q8-common: Do not set constraints on dc1sw regulator"). Commit message copied: dc1sw is an on/off only regulator and as such it cannot have constraints. This is a limitation of the kernel regulator implementation which resolves supplies on the first regulator_get(), which is done after applying constraints, and applying the constrains will fail because it calls _regulator_get_voltage() and _regulator_do_set_voltage() both of which will fail on a switch regulator when there is no supply (yet). This causes registering of all axp22x regulators to fail with the following errors: [ 1.395249] vcc-lcd: failed to get the current voltage(-22) [ 1.405131] axp20x-regulator axp20x-regulator: Failed to register dc1sw [ 1.412436] axp20x-regulator: probe of axp20x-regulator failed with error -22 This commit removes the constrains on dc1sw / vcc-lcd fixing this problem. Note that dcdc1 itself is contrained to the exact same values, so this does not change anything. Cc: Hans de Goede Signed-off-by: Chen-Yu Tsai Cc: # 4.6 Acked-by: Maxime Ripard Signed-off-by: Olof Johansson --- arch/arm/boot/dts/sun6i-a31s-primo81.dts | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun6i-a31s-primo81.dts b/arch/arm/boot/dts/sun6i-a31s-primo81.dts index 68b479b8772c..73c133f5e79c 100644 --- a/arch/arm/boot/dts/sun6i-a31s-primo81.dts +++ b/arch/arm/boot/dts/sun6i-a31s-primo81.dts @@ -176,8 +176,6 @@ }; ®_dc1sw { - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; regulator-name = "vcc-lcd"; }; -- cgit v1.2.3 From b223d6242c372a81cca3bb81998f53d3b3e3fb70 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 11 May 2016 13:23:14 +0800 Subject: ARM: dts: sun6i: yones-toptech-bs1078-v2: Drop constraints on dc1sw regulator This is the same issue fixed in commit dcf5341f0150 ("ARM: dts: sun8i-q8-common: Do not set constraints on dc1sw regulator"). Commit message copied: dc1sw is an on/off only regulator and as such it cannot have constraints. This is a limitation of the kernel regulator implementation which resolves supplies on the first regulator_get(), which is done after applying constraints, and applying the constrains will fail because it calls _regulator_get_voltage() and _regulator_do_set_voltage() both of which will fail on a switch regulator when there is no supply (yet). This causes registering of all axp22x regulators to fail with the following errors: [ 1.395249] vcc-lcd: failed to get the current voltage(-22) [ 1.405131] axp20x-regulator axp20x-regulator: Failed to register dc1sw [ 1.412436] axp20x-regulator: probe of axp20x-regulator failed with error -22 This commit removes the constrains on dc1sw / vcc-lcd fixing this problem. Note that dcdc1 itself is contrained to the exact same values, so this does not change anything. Cc: Hans de Goede Signed-off-by: Chen-Yu Tsai Cc: # 4.6 Acked-by: Maxime Ripard Signed-off-by: Olof Johansson --- arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts b/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts index 360adfb1e9ca..d6ad6196a768 100644 --- a/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts +++ b/arch/arm/boot/dts/sun6i-a31s-yones-toptech-bs1078-v2.dts @@ -135,8 +135,6 @@ ®_dc1sw { regulator-name = "vcc-lcd-usb2"; - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; }; ®_dc5ldo { -- cgit v1.2.3 From 2969c03763b40e946b46aee57ef083527711c69f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 6 Jun 2016 16:24:43 -0400 Subject: ARM: dts: exynos: Fix port nodes names for Exynos5250 Snow board Commit 5c9cbade0629 ("ARM: dts: exynos: Fix DTC unit name warnings in Exynos5250") fixed all the DTC warnings about mismatchs between unit names and reg properties in Exynos5250 boards DTS. But unfortunately it also added a regression on the Exynos5250 Snow Chromebook when changing the port node names since the OF graph logic expects the port nodes to be always named 'port'. The Documentation/devicetree/bindings/graph.txt binding document says that when there is more than one port, '#address-cells', '#size-cells' and 'reg' properties should be used to number the port nodes. Fixes: 5c9cbade0629 ("ARM: dts: exynos: Fix DTC unit name warnings in Exynos5250") Reported-by: Marc Zyngier Signed-off-by: Javier Martinez Canillas Tested-by: Marc Zyngier Signed-off-by: Krzysztof Kozlowski --- arch/arm/boot/dts/exynos5250-snow-common.dtsi | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/exynos5250-snow-common.dtsi b/arch/arm/boot/dts/exynos5250-snow-common.dtsi index ddfe1f558c10..fa14f77df563 100644 --- a/arch/arm/boot/dts/exynos5250-snow-common.dtsi +++ b/arch/arm/boot/dts/exynos5250-snow-common.dtsi @@ -242,7 +242,7 @@ hpd-gpios = <&gpx0 7 GPIO_ACTIVE_HIGH>; ports { - port0 { + port { dp_out: endpoint { remote-endpoint = <&bridge_in>; }; @@ -485,13 +485,20 @@ edid-emulation = <5>; ports { - port0 { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + bridge_out: endpoint { remote-endpoint = <&panel_in>; }; }; - port1 { + port@1 { + reg = <1>; + bridge_in: endpoint { remote-endpoint = <&dp_out>; }; -- cgit v1.2.3 From a7d7865fecd83adb98b20eca5eddef7efc94831d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 6 Jun 2016 16:24:44 -0400 Subject: ARM: dts: exynos: Fix port nodes names for Exynos5420 Peach Pit board Commit bea7eef6949c ("ARM: dts: exynos: Fix DTC unit name warnings in Peach Pit") fixed the DTC warnings about mismatches between unit names and reg properties in the Exynos5420 Peach Pit DTS. But unfortunately it also added a regression on the Peach Pit when changing the port node names since the OF graph logic expects the port nodes to be always named 'port'. The Documentation/devicetree/bindings/graph.txt binding document says that when there is more than one port, '#address-cells', '#size-cells' and 'reg' properties should be used to number the port nodes. Fixes: bea7eef6949c ("ARM: dts: exynos: Fix DTC unit name warnings in Peach Pit") Reported-by: Marc Zyngier Signed-off-by: Javier Martinez Canillas Signed-off-by: Krzysztof Kozlowski --- arch/arm/boot/dts/exynos5420-peach-pit.dts | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts index f9d2e4f1a0e0..1de972d46a87 100644 --- a/arch/arm/boot/dts/exynos5420-peach-pit.dts +++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts @@ -163,7 +163,7 @@ hpd-gpios = <&gpx2 6 GPIO_ACTIVE_HIGH>; ports { - port0 { + port { dp_out: endpoint { remote-endpoint = <&bridge_in>; }; @@ -631,13 +631,20 @@ use-external-pwm; ports { - port0 { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + bridge_out: endpoint { remote-endpoint = <&panel_in>; }; }; - port1 { + port@1 { + reg = <1>; + bridge_in: endpoint { remote-endpoint = <&dp_out>; }; -- cgit v1.2.3 From c106c21ce02e366f3dc887bff7c48f3f140c45c9 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 25 May 2016 22:40:42 +0000 Subject: ARM: dts: socfpga: Add missing PHY phandle Add missing PHY phandle into the DT, otherwise the stmmac code won't detect the PHY correctly anymore. Signed-off-by: Marek Vasut Cc: Dinh Nguyen Signed-off-by: Dinh Nguyen --- arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts index a3601e4c0a2e..b844473601d2 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts @@ -136,6 +136,7 @@ &gmac1 { status = "okay"; phy-mode = "rgmii"; + phy-handle = <&phy1>; snps,reset-gpio = <&porta 0 GPIO_ACTIVE_LOW>; snps,reset-active-low; -- cgit v1.2.3 From 2e4094bdaa3ef295abbebb31b978e3344ee64257 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 19 May 2016 18:20:17 -0500 Subject: ARM: OMAP2: Enable Errata 430973 for OMAP3 Enable Erratum 430973 similar to commit 5c86c5339c56 ("ARM: omap2plus_defconfig: Enable ARM erratum 430973 for omap3") - Since multiple defconfigs can exist from various points of view (multi_v7, omap2plus etc.. it is always better to enable the erratum from the Kconfig selection point of view so that downstream kernels dont have to rediscover this all over again. Reported-by: Grygorii Strashko Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 0517f0c1581a..b9b71328249b 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -17,6 +17,7 @@ config ARCH_OMAP3 select PM_OPP if PM select PM if CPU_IDLE select SOC_HAS_OMAP2_SDRC + select ARM_ERRATA_430973 config ARCH_OMAP4 bool "TI OMAP4" -- cgit v1.2.3 From 4c88c1c72f86dab63d8219c0aa9e9a398f2efaa9 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 20 May 2016 13:13:33 +0300 Subject: ARM: dts: DRA74x: fix DSS PLL2 addresses DSS's 'pll2_clkctrl' and 'pll2' have wrong addresses in the dra74x.dtsi file. Video PLL2 has not been used so wrong addresses went unnoticed. Signed-off-by: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra74x.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/dra74x.dtsi b/arch/arm/boot/dts/dra74x.dtsi index 4220eeffc65a..5e06020f450b 100644 --- a/arch/arm/boot/dts/dra74x.dtsi +++ b/arch/arm/boot/dts/dra74x.dtsi @@ -107,8 +107,8 @@ reg = <0x58000000 0x80>, <0x58004054 0x4>, <0x58004300 0x20>, - <0x58005054 0x4>, - <0x58005300 0x20>; + <0x58009054 0x4>, + <0x58009300 0x20>; reg-names = "dss", "pll1_clkctrl", "pll1", "pll2_clkctrl", "pll2"; -- cgit v1.2.3 From 8d29bdba7291f9f939bc17ac088ab650d106d451 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Tue, 24 May 2016 11:12:29 -0500 Subject: ARM: OMAP2+: Select OMAP_INTERCONNECT for SOC_AM43XX AM43XX SoCs make use of the omap_l3_noc driver so explicitly select OMAP_INTERCONNECT in the Kconfig for SOC_AM43XX to ensure it always gets enabled for AM43XX only builds. Signed-off-by: Dave Gerlach Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index b9b71328249b..e6405c094f37 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -37,6 +37,7 @@ config ARCH_OMAP4 select PM if CPU_IDLE select ARM_ERRATA_754322 select ARM_ERRATA_775420 + select OMAP_INTERCONNECT config SOC_OMAP5 bool "TI OMAP5" -- cgit v1.2.3 From 20c15226d1c73150c4d9107301cac5dda0b7f995 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 11 May 2016 16:39:30 -0300 Subject: ARM: imx6ul: Fix Micrel PHY mask The value used for Micrel PHY mask is not correct. Use the MICREL_PHY_ID_MASK definition instead. Thanks to Jiri Luznicky for proposing the fix at https://community.freescale.com/thread/387739 Cc: Fixes: 709bc0657fe6f9f55 ("ARM: imx6ul: add fec MAC refrence clock and phy fixup init") Signed-off-by: Fabio Estevam Reviewed-by: Andrew Lunn Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mach-imx6ul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/mach-imx6ul.c b/arch/arm/mach-imx/mach-imx6ul.c index a38b16b69923..b56de4b8cdf2 100644 --- a/arch/arm/mach-imx/mach-imx6ul.c +++ b/arch/arm/mach-imx/mach-imx6ul.c @@ -46,7 +46,7 @@ static int ksz8081_phy_fixup(struct phy_device *dev) static void __init imx6ul_enet_phy_init(void) { if (IS_BUILTIN(CONFIG_PHYLIB)) - phy_register_fixup_for_uid(PHY_ID_KSZ8081, 0xffffffff, + phy_register_fixup_for_uid(PHY_ID_KSZ8081, MICREL_PHY_ID_MASK, ksz8081_phy_fixup); } -- cgit v1.2.3 From 624531886987f0f1b5d01fb598034d039198e090 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Jun 2016 17:57:54 +0100 Subject: ARM: 8578/1: mm: ensure pmd_present only checks the valid bit In a subsequent patch, pmd_mknotpresent will clear the valid bit of the pmd entry, resulting in a not-present entry from the hardware's perspective. Unfortunately, pmd_present simply checks for a non-zero pmd value and will therefore continue to return true even after a pmd_mknotpresent operation. Since pmd_mknotpresent is only used for managing huge entries, this is only an issue for the 3-level case. This patch fixes the 3-level pmd_present implementation to take into account the valid bit. For bisectability, the change is made before the fix to pmd_mknotpresent. [catalin.marinas@arm.com: comment update regarding pmd_mknotpresent patch] Fixes: 8d9625070073 ("ARM: mm: Transparent huge page support for LPAE systems.") Cc: # 3.11+ Cc: Russell King Cc: Steve Capper Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-2level.h | 1 + arch/arm/include/asm/pgtable-3level.h | 1 + arch/arm/include/asm/pgtable.h | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index aeddd28b3595..92fd2c8a9af0 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -193,6 +193,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define pmd_large(pmd) (pmd_val(pmd) & 2) #define pmd_bad(pmd) (pmd_val(pmd) & 2) +#define pmd_present(pmd) (pmd_val(pmd)) #define copy_pmd(pmdpd,pmdps) \ do { \ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index fa70db7c714b..4dce1580bc15 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -211,6 +211,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) : !!(pmd_val(pmd) & (val))) #define pmd_isclear(pmd, val) (!(pmd_val(pmd) & (val))) +#define pmd_present(pmd) (pmd_isset((pmd), L_PMD_SECT_VALID)) #define pmd_young(pmd) (pmd_isset((pmd), PMD_SECT_AF)) #define pte_special(pte) (pte_isset((pte), L_PTE_SPECIAL)) static inline pte_t pte_mkspecial(pte_t pte) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 348caabb7625..d62204060cbe 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -182,7 +182,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) #define pmd_none(pmd) (!pmd_val(pmd)) -#define pmd_present(pmd) (pmd_val(pmd)) static inline pte_t *pmd_page_vaddr(pmd_t pmd) { -- cgit v1.2.3 From 56530f5d2ddc9b9fade7ef8db9cb886e9dc689b5 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 7 Jun 2016 17:58:06 +0100 Subject: ARM: 8579/1: mm: Fix definition of pmd_mknotpresent Currently pmd_mknotpresent will use a zero entry to respresent an invalidated pmd. Unfortunately this definition clashes with pmd_none, thus it is possible for a race condition to occur if zap_pmd_range sees pmd_none whilst __split_huge_pmd_locked is running too with pmdp_invalidate just called. This patch fixes the race condition by modifying pmd_mknotpresent to create non-zero faulting entries (as is done in other architectures), removing the ambiguity with pmd_none. [catalin.marinas@arm.com: using L_PMD_SECT_VALID instead of PMD_TYPE_SECT] Fixes: 8d9625070073 ("ARM: mm: Transparent huge page support for LPAE systems.") Cc: # 3.11+ Reported-by: Kirill A. Shutemov Acked-by: Will Deacon Cc: Russell King Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-3level.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 4dce1580bc15..2a029bceaf2f 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -250,10 +250,10 @@ PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); #define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) #define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) -/* represent a notpresent pmd by zero, this is used by pmdp_invalidate */ +/* represent a notpresent pmd by faulting entry, this is used by pmdp_invalidate */ static inline pmd_t pmd_mknotpresent(pmd_t pmd) { - return __pmd(0); + return __pmd(pmd_val(pmd) & ~L_PMD_SECT_VALID); } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) -- cgit v1.2.3 From 36194812a4063dd2a72070aec3ee7890d135a587 Mon Sep 17 00:00:00 2001 From: Aneesh Kumar K.V Date: Wed, 8 Jun 2016 19:55:50 +0530 Subject: powerpc/mm/radix: Update to tlb functions ric argument Radix invalidate control (RIC) is used to control which cache to flush using tlb instructions. When doing a PID flush, we currently flush everything including page walk cache. For address range flush, we flush only the TLB. In the next patch, we add support for flushing only the page walk cache. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb-radix.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 54efba2fd66e..71083621884e 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -18,16 +18,20 @@ static DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void __tlbiel_pid(unsigned long pid, int set) +#define RIC_FLUSH_TLB 0 +#define RIC_FLUSH_PWC 1 +#define RIC_FLUSH_ALL 2 + +static inline void __tlbiel_pid(unsigned long pid, int set, + unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = PPC_BIT(53); /* IS = 1 */ rb |= set << PPC_BITLSHIFT(51); rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 2; /* invalidate all the caches */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" @@ -39,25 +43,24 @@ static inline void __tlbiel_pid(unsigned long pid, int set) /* * We use 128 set in radix mode and 256 set in hpt mode. */ -static inline void _tlbiel_pid(unsigned long pid) +static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) { int set; for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) { - __tlbiel_pid(pid, set); + __tlbiel_pid(pid, set, ric); } return; } -static inline void _tlbie_pid(unsigned long pid) +static inline void _tlbie_pid(unsigned long pid, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = PPC_BIT(53); /* IS = 1 */ rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 2; /* invalidate all the caches */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" @@ -67,16 +70,15 @@ static inline void _tlbie_pid(unsigned long pid) } static inline void _tlbiel_va(unsigned long va, unsigned long pid, - unsigned long ap) + unsigned long ap, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = va & ~(PPC_BITMASK(52, 63)); rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 0; /* no cluster flush yet */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" @@ -86,16 +88,15 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, } static inline void _tlbie_va(unsigned long va, unsigned long pid, - unsigned long ap) + unsigned long ap, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = va & ~(PPC_BITMASK(52, 63)); rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 0; /* no cluster flush yet */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" @@ -122,7 +123,7 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm) preempt_disable(); pid = mm->context.id; if (pid != MMU_NO_CONTEXT) - _tlbiel_pid(pid); + _tlbiel_pid(pid, RIC_FLUSH_ALL); preempt_enable(); } EXPORT_SYMBOL(radix__local_flush_tlb_mm); @@ -135,7 +136,7 @@ void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, preempt_disable(); pid = mm ? mm->context.id : 0; if (pid != MMU_NO_CONTEXT) - _tlbiel_va(vmaddr, pid, ap); + _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); preempt_enable(); } @@ -172,11 +173,11 @@ void radix__flush_tlb_mm(struct mm_struct *mm) if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_pid(pid); + _tlbie_pid(pid, RIC_FLUSH_ALL); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } else - _tlbiel_pid(pid); + _tlbiel_pid(pid, RIC_FLUSH_ALL); no_context: preempt_enable(); } @@ -196,11 +197,11 @@ void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_va(vmaddr, pid, ap); + _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } else - _tlbiel_va(vmaddr, pid, ap); + _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); bail: preempt_enable(); } @@ -224,7 +225,7 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_pid(0); + _tlbie_pid(0, RIC_FLUSH_ALL); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } -- cgit v1.2.3 From a145abf12c9f7d30d8c330c9d8a97428cbf0589b Mon Sep 17 00:00:00 2001 From: Aneesh Kumar K.V Date: Wed, 8 Jun 2016 19:55:51 +0530 Subject: powerpc/mm/radix: Flush page walk cache when freeing page table Even though a tlb_flush() does a flush with invalidate all cache, we can end up doing an RCU page table free before calling tlb_flush(). That means we can have page walk cache entries even after we free the page table pages. This can result in us doing wrong page table walk. Avoid this by doing pwc flush on every page table free. We can't batch the pwc flush, because the rcu call back function where we free the page table pages doesn't have information of the mmu gather. Thus we have to do a pwc on every page table page freed. Note: I also removed the dummy tlb_flush_pgtable call functions for hash 32. Fixes: 1a472c9dba6b ("powerpc/mm/radix: Add tlbflush routines") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgalloc.h | 1 - arch/powerpc/include/asm/book3s/64/pgalloc.h | 16 ++++++++- .../powerpc/include/asm/book3s/64/tlbflush-radix.h | 3 ++ arch/powerpc/include/asm/book3s/64/tlbflush.h | 14 ++++++++ arch/powerpc/include/asm/book3s/pgalloc.h | 5 --- arch/powerpc/mm/tlb-radix.c | 41 ++++++++++++++++++++++ 6 files changed, 73 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index a2350194fc76..8e21bb492dca 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -102,7 +102,6 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb, static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { - tlb_flush_pgtable(tlb, address); pgtable_page_dtor(table); pgtable_free_tlb(tlb, page_address(table), 0); } diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 488279edb1f0..26eb2cb80c4e 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -110,6 +110,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long address) { + /* + * By now all the pud entries should be none entries. So go + * ahead and flush the page walk cache + */ + flush_tlb_pgtable(tlb, address); pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE); } @@ -127,6 +132,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long address) { + /* + * By now all the pud entries should be none entries. So go + * ahead and flush the page walk cache + */ + flush_tlb_pgtable(tlb, address); return pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX); } @@ -198,7 +208,11 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { - tlb_flush_pgtable(tlb, address); + /* + * By now all the pud entries should be none entries. So go + * ahead and flush the page walk cache + */ + flush_tlb_pgtable(tlb, address); pgtable_free_tlb(tlb, table, 0); } diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 13ef38828dfe..3fa94fcac628 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -18,16 +18,19 @@ extern void radix__local_flush_tlb_mm(struct mm_struct *mm); extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); extern void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid); +extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__tlb_flush(struct mmu_gather *tlb); #ifdef CONFIG_SMP extern void radix__flush_tlb_mm(struct mm_struct *mm); extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); extern void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid); +extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); #else #define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm) #define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr) #define radix___flush_tlb_page(mm,addr,p,i) radix___local_flush_tlb_page(mm,addr,p,i) +#define radix__flush_tlb_pwc(tlb, addr) radix__local_flush_tlb_pwc(tlb, addr) #endif #endif diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index d98424ae356c..96e5769b18b0 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -72,5 +72,19 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) #define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) #endif /* CONFIG_SMP */ +/* + * flush the page walk cache for the address + */ +static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long address) +{ + /* + * Flush the page table walk cache on freeing a page table. We already + * have marked the upper/higher level page table entry none by now. + * So it is safe to flush PWC here. + */ + if (!radix_enabled()) + return; + radix__flush_tlb_pwc(tlb, address); +} #endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */ diff --git a/arch/powerpc/include/asm/book3s/pgalloc.h b/arch/powerpc/include/asm/book3s/pgalloc.h index 54f591e9572e..c0a69ae92256 100644 --- a/arch/powerpc/include/asm/book3s/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/pgalloc.h @@ -4,11 +4,6 @@ #include extern void tlb_remove_table(struct mmu_gather *tlb, void *table); -static inline void tlb_flush_pgtable(struct mmu_gather *tlb, - unsigned long address) -{ - -} #ifdef CONFIG_PPC64 #include diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 71083621884e..ab2f60e812e2 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -128,6 +128,21 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm) } EXPORT_SYMBOL(radix__local_flush_tlb_mm); +void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +{ + unsigned long pid; + struct mm_struct *mm = tlb->mm; + + preempt_disable(); + + pid = mm->context.id; + if (pid != MMU_NO_CONTEXT) + _tlbiel_pid(pid, RIC_FLUSH_PWC); + + preempt_enable(); +} +EXPORT_SYMBOL(radix__local_flush_tlb_pwc); + void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { @@ -183,6 +198,32 @@ no_context: } EXPORT_SYMBOL(radix__flush_tlb_mm); +void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +{ + unsigned long pid; + struct mm_struct *mm = tlb->mm; + + preempt_disable(); + + pid = mm->context.id; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto no_context; + + if (!mm_is_core_local(mm)) { + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + raw_spin_lock(&native_tlbie_lock); + _tlbie_pid(pid, RIC_FLUSH_PWC); + if (lock_tlbie) + raw_spin_unlock(&native_tlbie_lock); + } else + _tlbiel_pid(pid, RIC_FLUSH_PWC); +no_context: + preempt_enable(); +} +EXPORT_SYMBOL(radix__flush_tlb_pwc); + void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { -- cgit v1.2.3 From 0487c44d1e1070b636ba3f3a12494ec456c2d005 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 10 Jun 2016 09:22:31 +0200 Subject: KVM: s390: ignore IBC if zero Looks like we forgot about the special IBC value of 0 meaning "no IBC". Let's fix that, otherwise it gets rounded up and suddenly an IBC is active with the lowest possible machine. Signed-off-by: David Hildenbrand Reviewed-by: Cornelia Huck Fixes: commit 053dd2308d81 ("KVM: s390: force ibc into valid range") Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6d8ec3ac9dd8..c9ae53924a69 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -657,7 +657,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) kvm->arch.model.cpuid = proc->cpuid; lowest_ibc = sclp.ibc >> 16 & 0xfff; unblocked_ibc = sclp.ibc & 0xfff; - if (lowest_ibc) { + if (lowest_ibc && proc->ibc) { if (proc->ibc > unblocked_ibc) kvm->arch.model.ibc = unblocked_ibc; else if (proc->ibc < lowest_ibc) -- cgit v1.2.3 From 9ec6de19235889ab0118e970ee732cb33c9efc06 Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Fri, 6 May 2016 16:33:06 +0300 Subject: KVM: s390: Add stats for PEI events Add partial execution intercepted events in kvm_stats_debugfs. Signed-off-by: Alexander Yarygin Acked-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/intercept.c | 2 ++ arch/s390/kvm/kvm-s390.c | 1 + 3 files changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 37b9017c6a96..ac82e8eb936d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -245,6 +245,7 @@ struct kvm_vcpu_stat { u32 exit_stop_request; u32 exit_validity; u32 exit_instruction; + u32 exit_pei; u32 halt_successful_poll; u32 halt_attempted_poll; u32 halt_poll_invalid; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 2e6b54e4d3f9..252157181302 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -341,6 +341,8 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) static int handle_partial_execution(struct kvm_vcpu *vcpu) { + vcpu->stat.exit_pei++; + if (vcpu->arch.sie_block->ipa == 0xb254) /* MVPG */ return handle_mvpg_pei(vcpu); if (vcpu->arch.sie_block->ipa >> 8 == 0xae) /* SIGP */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c9ae53924a69..43f2a2b80490 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -61,6 +61,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_external_request", VCPU_STAT(exit_external_request) }, { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, { "exit_instruction", VCPU_STAT(exit_instruction) }, + { "exit_pei", VCPU_STAT(exit_pei) }, { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, -- cgit v1.2.3 From d16c0d722d09496a03222dc27ee3071b7b1051e5 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 24 May 2016 08:35:38 -0500 Subject: ARM: OMAP: DRA7: powerdomain data: Set L3init and L4per to ON As per the latest revision F of public TRM for DRA7/AM57xx SoCs SPRUHZ6F[1] (April 2016), L4Per and L3init power domains now operate in always "ON" mode due to asymmetric aging limitations. Update the same [1] http://www.ti.com/lit/pdf/spruhz6 Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/powerdomains7xx_data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/powerdomains7xx_data.c b/arch/arm/mach-omap2/powerdomains7xx_data.c index 0ec2d00f4237..8ea447ed4dc4 100644 --- a/arch/arm/mach-omap2/powerdomains7xx_data.c +++ b/arch/arm/mach-omap2/powerdomains7xx_data.c @@ -111,7 +111,7 @@ static struct powerdomain l4per_7xx_pwrdm = { .name = "l4per_pwrdm", .prcm_offs = DRA7XX_PRM_L4PER_INST, .prcm_partition = DRA7XX_PRM_PARTITION, - .pwrsts = PWRSTS_RET_ON, + .pwrsts = PWRSTS_ON, .pwrsts_logic_ret = PWRSTS_RET, .banks = 2, .pwrsts_mem_ret = { @@ -260,7 +260,7 @@ static struct powerdomain l3init_7xx_pwrdm = { .name = "l3init_pwrdm", .prcm_offs = DRA7XX_PRM_L3INIT_INST, .prcm_partition = DRA7XX_PRM_PARTITION, - .pwrsts = PWRSTS_RET_ON, + .pwrsts = PWRSTS_ON, .pwrsts_logic_ret = PWRSTS_RET, .banks = 3, .pwrsts_mem_ret = { -- cgit v1.2.3 From 9ffb668f268c79f2f58b56bbd63208440b31260f Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 24 May 2016 08:35:39 -0500 Subject: ARM: OMAP: DRA7: powerdomain data: Remove unused pwrsts_logic_ret As per the latest revision F of public TRM for DRA7/AM57xx SoCs SPRUHZ6F[1] (April 2016), with the exception of MPU power domain (and CPUx sub power domains), all other power domains can either operate in "ON" mode OR in some cases, "OFF" mode. For these power states, the logic retention state is basically ignored by PRCM and does not require to be programmed. [1] http://www.ti.com/lit/pdf/spruhz6 Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/powerdomains7xx_data.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/powerdomains7xx_data.c b/arch/arm/mach-omap2/powerdomains7xx_data.c index 8ea447ed4dc4..88107b449710 100644 --- a/arch/arm/mach-omap2/powerdomains7xx_data.c +++ b/arch/arm/mach-omap2/powerdomains7xx_data.c @@ -36,7 +36,6 @@ static struct powerdomain iva_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_IVA_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, - .pwrsts_logic_ret = PWRSTS_OFF, .banks = 4, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* hwa_mem */ @@ -76,7 +75,6 @@ static struct powerdomain ipu_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_IPU_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, - .pwrsts_logic_ret = PWRSTS_OFF, .banks = 2, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* aessmem */ @@ -95,7 +93,6 @@ static struct powerdomain dss_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_DSS_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, - .pwrsts_logic_ret = PWRSTS_OFF, .banks = 1, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* dss_mem */ @@ -112,7 +109,6 @@ static struct powerdomain l4per_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_L4PER_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, - .pwrsts_logic_ret = PWRSTS_RET, .banks = 2, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* nonretained_bank */ @@ -161,7 +157,6 @@ static struct powerdomain core_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_CORE_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, - .pwrsts_logic_ret = PWRSTS_RET, .banks = 5, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* core_nret_bank */ @@ -226,7 +221,6 @@ static struct powerdomain vpe_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_VPE_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, - .pwrsts_logic_ret = PWRSTS_OFF, .banks = 1, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* vpe_bank */ @@ -261,7 +255,6 @@ static struct powerdomain l3init_7xx_pwrdm = { .prcm_offs = DRA7XX_PRM_L3INIT_INST, .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, - .pwrsts_logic_ret = PWRSTS_RET, .banks = 3, .pwrsts_mem_ret = { [0] = PWRSTS_OFF_RET, /* gmac_bank */ -- cgit v1.2.3 From 6b41d44862e8f3a4b95102c6ff6cad3fccc7994b Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 24 May 2016 08:35:40 -0500 Subject: ARM: OMAP: DRA7: powerdomain data: Remove unused pwrsts_mem_ret As per the latest revision F of public TRM for DRA7/AM57xx SoCs SPRUHZ6F[1] (April 2016), with the exception of MPU power domain, all other power domains do not have memories capable of retention since they all operate in either "ON" or "OFF" mode. For these power states, the retention state for memories are basically ignored by PRCM and does not require to be programmed. [1] http://www.ti.com/lit/pdf/spruhz6 Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/powerdomains7xx_data.c | 65 ------------------------------ 1 file changed, 65 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/powerdomains7xx_data.c b/arch/arm/mach-omap2/powerdomains7xx_data.c index 88107b449710..eb350a673133 100644 --- a/arch/arm/mach-omap2/powerdomains7xx_data.c +++ b/arch/arm/mach-omap2/powerdomains7xx_data.c @@ -37,12 +37,6 @@ static struct powerdomain iva_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 4, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* hwa_mem */ - [1] = PWRSTS_OFF_RET, /* sl2_mem */ - [2] = PWRSTS_OFF_RET, /* tcm1_mem */ - [3] = PWRSTS_OFF_RET, /* tcm2_mem */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* hwa_mem */ [1] = PWRSTS_ON, /* sl2_mem */ @@ -76,10 +70,6 @@ static struct powerdomain ipu_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 2, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* aessmem */ - [1] = PWRSTS_OFF_RET, /* periphmem */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* aessmem */ [1] = PWRSTS_ON, /* periphmem */ @@ -94,9 +84,6 @@ static struct powerdomain dss_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* dss_mem */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* dss_mem */ }, @@ -110,10 +97,6 @@ static struct powerdomain l4per_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, .banks = 2, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* nonretained_bank */ - [1] = PWRSTS_OFF_RET, /* retained_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* nonretained_bank */ [1] = PWRSTS_ON, /* retained_bank */ @@ -128,9 +111,6 @@ static struct powerdomain gpu_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* gpu_mem */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* gpu_mem */ }, @@ -144,8 +124,6 @@ static struct powerdomain wkupaon_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, .banks = 1, - .pwrsts_mem_ret = { - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* wkup_bank */ }, @@ -158,13 +136,6 @@ static struct powerdomain core_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, .banks = 5, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* core_nret_bank */ - [1] = PWRSTS_OFF_RET, /* core_ocmram */ - [2] = PWRSTS_OFF_RET, /* core_other_bank */ - [3] = PWRSTS_OFF_RET, /* ipu_l2ram */ - [4] = PWRSTS_OFF_RET, /* ipu_unicache */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* core_nret_bank */ [1] = PWRSTS_ON, /* core_ocmram */ @@ -222,9 +193,6 @@ static struct powerdomain vpe_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* vpe_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* vpe_bank */ }, @@ -256,11 +224,6 @@ static struct powerdomain l3init_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_ON, .banks = 3, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* gmac_bank */ - [1] = PWRSTS_OFF_RET, /* l3init_bank1 */ - [2] = PWRSTS_OFF_RET, /* l3init_bank2 */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* gmac_bank */ [1] = PWRSTS_ON, /* l3init_bank1 */ @@ -276,9 +239,6 @@ static struct powerdomain eve3_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* eve3_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* eve3_bank */ }, @@ -292,9 +252,6 @@ static struct powerdomain emu_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* emu_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* emu_bank */ }, @@ -307,11 +264,6 @@ static struct powerdomain dsp2_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 3, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* dsp2_edma */ - [1] = PWRSTS_OFF_RET, /* dsp2_l1 */ - [2] = PWRSTS_OFF_RET, /* dsp2_l2 */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* dsp2_edma */ [1] = PWRSTS_ON, /* dsp2_l1 */ @@ -327,11 +279,6 @@ static struct powerdomain dsp1_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 3, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* dsp1_edma */ - [1] = PWRSTS_OFF_RET, /* dsp1_l1 */ - [2] = PWRSTS_OFF_RET, /* dsp1_l2 */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* dsp1_edma */ [1] = PWRSTS_ON, /* dsp1_l1 */ @@ -347,9 +294,6 @@ static struct powerdomain cam_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* vip_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* vip_bank */ }, @@ -363,9 +307,6 @@ static struct powerdomain eve4_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* eve4_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* eve4_bank */ }, @@ -379,9 +320,6 @@ static struct powerdomain eve2_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* eve2_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* eve2_bank */ }, @@ -395,9 +333,6 @@ static struct powerdomain eve1_7xx_pwrdm = { .prcm_partition = DRA7XX_PRM_PARTITION, .pwrsts = PWRSTS_OFF_ON, .banks = 1, - .pwrsts_mem_ret = { - [0] = PWRSTS_OFF_RET, /* eve1_bank */ - }, .pwrsts_mem_on = { [0] = PWRSTS_ON, /* eve1_bank */ }, -- cgit v1.2.3 From 1c343f7b0e177e8ca7f4d4a5dd1fa790f85abbcc Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 13 Jun 2016 13:14:56 +0200 Subject: KVM: s390/mm: Fix CMMA reset during reboot commit 1e133ab296f ("s390/mm: split arch/s390/mm/pgtable.c") factored out the page table handling code from __gmap_zap and __s390_reset_cmma into ptep_zap_unused and added a simple flag that tells which one of the function (reset or not) is to be made. This also changed the behaviour, as it also zaps unused page table entries on reset. Turns out that this is wrong as s390_reset_cmma uses the page walker, which DOES NOT take the ptl lock. The most simple fix is to not do the zapping part on reset (which uses the walker) Signed-off-by: Christian Borntraeger Fixes: 1e133ab296f ("s390/mm: split arch/s390/mm/pgtable.c") Cc: stable@vger.kernel.org # 4.6+ Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4324b87f9398..9f0ce0e6eeb4 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -437,7 +437,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pgste = pgste_get_lock(ptep); pgstev = pgste_val(pgste); pte = *ptep; - if (pte_swap(pte) && + if (!reset && pte_swap(pte) && ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || (pgstev & _PGSTE_GPS_ZERO))) { ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); -- cgit v1.2.3 From 8550e2fa34f077c8a87cf1ba2453102bbbc9ade9 Mon Sep 17 00:00:00 2001 From: Aneesh Kumar K.V Date: Wed, 8 Jun 2016 19:55:55 +0530 Subject: powerpc/mm/hash: Use the correct PPP mask when updating HPTE With commit e58e87adc8bf9 "powerpc/mm: Update _PAGE_KERNEL_RO" we now use all the three PPP bits. The top bit is now used to have a PPP value of 0b110 which will be mapped to kernel read only. When updating the hpte entry use right mask such that we update the 63rd bit (top 'P' bit) too. Prior to e58e87adc8bf we didn't support KERNEL_RO at all (it was == KERNEL_RW), so this isn't a regression as such. Fixes: e58e87adc8bf ("powerpc/mm: Update _PAGE_KERNEL_RO") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 + arch/powerpc/mm/hash_native_64.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 290157e8d5b2..74839f24f412 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -88,6 +88,7 @@ #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) #define HPTE_R_PP ASM_CONST(0x0000000000000003) +#define HPTE_R_PPP ASM_CONST(0x8000000000000003) #define HPTE_R_N ASM_CONST(0x0000000000000004) #define HPTE_R_G ASM_CONST(0x0000000000000008) #define HPTE_R_M ASM_CONST(0x0000000000000010) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 40e05e7f43de..f8a871a72985 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -316,8 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" -> hit\n"); /* Update the HPTE */ hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & - ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N | + ~(HPTE_R_PPP | HPTE_R_N)) | + (newpp & (HPTE_R_PPP | HPTE_R_N | HPTE_R_C))); } native_unlock_hpte(hptep); @@ -385,8 +385,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, /* Update the HPTE */ hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & - ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N))); + ~(HPTE_R_PPP | HPTE_R_N)) | + (newpp & (HPTE_R_PPP | HPTE_R_N))); /* * Ensure it is out of the tlb too. Bolted entries base and * actual page size will be same. -- cgit v1.2.3 From 797179bc4fe06c89e47a9f36f886f68640b423f8 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 9 Jun 2016 10:50:43 +0100 Subject: MIPS: KVM: Fix modular KVM under QEMU Copy __kvm_mips_vcpu_run() into unmapped memory, so that we can never get a TLB refill exception in it when KVM is built as a module. This was observed to happen with the host MIPS kernel running under QEMU, due to a not entirely transparent optimisation in the QEMU TLB handling where TLB entries replaced with TLBWR are copied to a separate part of the TLB array. Code in those pages continue to be executable, but those mappings persist only until the next ASID switch, even if they are marked global. An ASID switch happens in __kvm_mips_vcpu_run() at exception level after switching to the guest exception base. Subsequent TLB mapped kernel instructions just prior to switching to the guest trigger a TLB refill exception, which enters the guest exception handlers without updating EPC. This appears as a guest triggered TLB refill on a host kernel mapped (host KSeg2) address, which is not handled correctly as user (guest) mode accesses to kernel (host) segments always generate address error exceptions. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: # 3.10.x- Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 1 + arch/mips/kvm/interrupt.h | 1 + arch/mips/kvm/locore.S | 1 + arch/mips/kvm/mips.c | 11 ++++++++++- 4 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 6733ac575da4..2d5bb133d11a 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -338,6 +338,7 @@ struct kvm_mips_tlb { #define KVM_MIPS_GUEST_TLB_SIZE 64 struct kvm_vcpu_arch { void *host_ebase, *guest_ebase; + int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); unsigned long host_stack; unsigned long host_gp; diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h index 4ab4bdfad703..2143884709e4 100644 --- a/arch/mips/kvm/interrupt.h +++ b/arch/mips/kvm/interrupt.h @@ -28,6 +28,7 @@ #define MIPS_EXC_MAX 12 /* XXXSL More to follow */ +extern char __kvm_mips_vcpu_run_end[]; extern char mips32_exception[], mips32_exceptionEnd[]; extern char mips32_GuestException[], mips32_GuestExceptionEnd[]; diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S index 3ef03009de5f..828fcfc1cd7f 100644 --- a/arch/mips/kvm/locore.S +++ b/arch/mips/kvm/locore.S @@ -202,6 +202,7 @@ FEXPORT(__kvm_mips_load_k0k1) /* Jump to guest */ eret +EXPORT(__kvm_mips_vcpu_run_end) VECTOR(MIPSX(exception), unknown) /* Find out what mode we came from and jump to the proper handler. */ diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index dc052fb5c7a2..44da5259f390 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -315,6 +315,15 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) memcpy(gebase + offset, mips32_GuestException, mips32_GuestExceptionEnd - mips32_GuestException); +#ifdef MODULE + offset += mips32_GuestExceptionEnd - mips32_GuestException; + memcpy(gebase + offset, (char *)__kvm_mips_vcpu_run, + __kvm_mips_vcpu_run_end - (char *)__kvm_mips_vcpu_run); + vcpu->arch.vcpu_run = gebase + offset; +#else + vcpu->arch.vcpu_run = __kvm_mips_vcpu_run; +#endif + /* Invalidate the icache for these ranges */ local_flush_icache_range((unsigned long)gebase, (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); @@ -404,7 +413,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Disable hardware page table walking while in guest */ htw_stop(); - r = __kvm_mips_vcpu_run(run, vcpu); + r = vcpu->arch.vcpu_run(run, vcpu); /* Re-enable HTW before enabling interrupts */ htw_start(); -- cgit v1.2.3 From 7f5a1ddc792901249c2060e165bcb3ca779cde35 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 9 Jun 2016 10:50:44 +0100 Subject: MIPS: KVM: Include bit 31 in segment matches When faulting guest addresses are matched against guest segments with the KVM_GUEST_KSEGX() macro, change the mask to 0xe0000000 so as to include bit 31. This is mainly for safety's sake, as it prevents a rogue BadVAddr in the host kseg2/kseg3 segments (e.g. 0xC*******) after a TLB exception from matching the guest kseg0 segment (e.g. 0x4*******), triggering an internal KVM error instead of allowing the corresponding guest kseg0 page to be mapped into the host vmalloc space. Such a rogue BadVAddr was observed to happen with the host MIPS kernel running under QEMU with KVM built as a module, due to a not entirely transparent optimisation in the QEMU TLB handling. This has already been worked around properly in a previous commit. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 2d5bb133d11a..36a391d289aa 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -74,7 +74,7 @@ #define KVM_GUEST_KUSEG 0x00000000UL #define KVM_GUEST_KSEG0 0x40000000UL #define KVM_GUEST_KSEG23 0x60000000UL -#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0x60000000) +#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0xe0000000) #define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff) #define KVM_GUEST_CKSEG0ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG0) -- cgit v1.2.3 From cc81e9486202345d6ca56495cf8b5f3d03fbc563 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 9 Jun 2016 10:50:45 +0100 Subject: MIPS: KVM: Don't unwind PC when emulating CACHE When a CACHE instruction is emulated by kvm_mips_emulate_cache(), the PC is first updated to point to the next instruction, and afterwards it falls through the "dont_update_pc" label, which rewinds the PC back to its original address. This works when dynamic translation of emulated instructions is enabled, since the CACHE instruction is replaced with a SYNCI which works without trapping, however when dynamic translation is disabled the guest hangs on CACHE instructions as they always trap and are never stepped over. Roughly swap the meanings of the "done" and "dont_update_pc" to match kvm_mips_emulate_CP0(), so that "done" will roll back the PC on failure, and "dont_update_pc" won't change PC at all (for the sake of exceptions that have already modified the PC). Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 396df6eb0a12..52bec0fe2fbb 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1666,7 +1666,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, cache, op, base, arch->gprs[base], offset); er = EMULATE_FAIL; preempt_enable(); - goto dont_update_pc; + goto done; } @@ -1694,16 +1694,20 @@ skip_fault: kvm_err("NO-OP CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", cache, op, base, arch->gprs[base], offset); er = EMULATE_FAIL; - preempt_enable(); - goto dont_update_pc; } preempt_enable(); +done: + /* Rollback PC only if emulation was unsuccessful */ + if (er == EMULATE_FAIL) + vcpu->arch.pc = curr_pc; dont_update_pc: - /* Rollback PC */ - vcpu->arch.pc = curr_pc; -done: + /* + * This is for exceptions whose emulation updates the PC, so do not + * overwrite the PC under any circumstances + */ + return er; } -- cgit v1.2.3 From 6df82a7b88dc9b0b519765562b005ef9196d812a Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 9 Jun 2016 10:50:46 +0100 Subject: MIPS: KVM: Fix CACHE triggered exception emulation When emulating TLB miss / invalid exceptions during CACHE instruction emulation, be sure to set up the correct PC and host_cp0_badvaddr state for the kvm_mips_emlulate_tlb*_ld() function to pick up for guest EPC and BadVAddr. PC needs to be rewound otherwise the guest EPC will end up pointing at the next instruction after the faulting CACHE instruction. host_cp0_badvaddr must be set because guest CACHE instructions trap with a Coprocessor Unusable exception, which doesn't update the host BadVAddr as a TLB exception would. This doesn't tend to get hit when dynamic translation of emulated instructions is enabled, since only the first execution of each CACHE instruction actually goes through this code path, with subsequent executions hitting the SYNCI instruction that it gets replaced with. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Ralf Baechle Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 52bec0fe2fbb..645c8a1982a7 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1636,6 +1636,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, if (index < 0) { vcpu->arch.host_cp0_entryhi = (va & VPN2_MASK); vcpu->arch.host_cp0_badvaddr = va; + vcpu->arch.pc = curr_pc; er = kvm_mips_emulate_tlbmiss_ld(cause, NULL, run, vcpu); preempt_enable(); @@ -1647,6 +1648,8 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, * invalid exception to the guest */ if (!TLB_IS_VALID(*tlb, va)) { + vcpu->arch.host_cp0_badvaddr = va; + vcpu->arch.pc = curr_pc; er = kvm_mips_emulate_tlbinv_ld(cause, NULL, run, vcpu); preempt_enable(); -- cgit v1.2.3 From dcfc47248d3f7d28df6f531e6426b933de94370d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 11 Jun 2016 23:06:53 +0900 Subject: kprobes/x86: Clear TF bit in fault on single-stepping Fix kprobe_fault_handler() to clear the TF (trap flag) bit of the flags register in the case of a fault fixup on single-stepping. If we put a kprobe on the instruction which caused a page fault (e.g. actual mov instructions in copy_user_*), that fault happens on the single-stepping buffer. In this case, kprobes resets running instance so that the CPU can retry execution on the original ip address. However, current code forgets to reset the TF bit. Since this fault happens with TF bit set for enabling single-stepping, when it retries, it causes a debug exception and kprobes can not handle it because it already reset itself. On the most of x86-64 platform, it can be easily reproduced by using kprobe tracer. E.g. # cd /sys/kernel/debug/tracing # echo p copy_user_enhanced_fast_string+5 > kprobe_events # echo 1 > events/kprobes/enable And you'll see a kernel panic on do_debug(), since the debug trap is not handled by kprobes. To fix this problem, we just need to clear the TF bit when resetting running kprobe. Signed-off-by: Masami Hiramatsu Reviewed-by: Ananth N Mavinakayanahalli Acked-by: Steven Rostedt Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: systemtap@sourceware.org Cc: stable@vger.kernel.org # All the way back to ancient kernels Link: http://lkml.kernel.org/r/20160611140648.25885.37482.stgit@devbox [ Updated the comments. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 38cf7a741250..7847e5c0e0b5 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -961,7 +961,19 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * normal page fault. */ regs->ip = (unsigned long)cur->addr; + /* + * Trap flag (TF) has been set here because this fault + * happened where the single stepping will be done. + * So clear it by resetting the current kprobe: + */ + regs->flags &= ~X86_EFLAGS_TF; + + /* + * If the TF flag was set before the kprobe hit, + * don't touch it: + */ regs->flags |= kcb->kprobe_old_flags; + if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else -- cgit v1.2.3 From c5cea06be060f38e5400d796e61cfc8c36e52924 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 13 Jun 2016 11:15:14 +0100 Subject: arm64: fix dump_instr when PAN and UAO are in use If the kernel is set to show unhandled signals, and a user task does not handle a SIGILL as a result of an instruction abort, we will attempt to log the offending instruction with dump_instr before killing the task. We use dump_instr to log the encoding of the offending userspace instruction. However, dump_instr is also used to dump instructions from kernel space, and internally always switches to KERNEL_DS before dumping the instruction with get_user. When both PAN and UAO are in use, reading a user instruction via get_user while in KERNEL_DS will result in a permission fault, which leads to an Oops. As we have regs corresponding to the context of the original instruction abort, we can inspect this and only flip to KERNEL_DS if the original abort was taken from the kernel, avoiding this issue. At the same time, remove the redundant (and incorrect) comments regarding the order dump_mem and dump_instr are called in. Cc: Catalin Marinas Cc: James Morse Cc: Robin Murphy Cc: #4.6+ Signed-off-by: Mark Rutland Reported-by: Vladimir Murzin Tested-by: Vladimir Murzin Fixes: 57f4959bad0a154a ("arm64: kernel: Add support for User Access Override") Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f7cf463107df..2a43012616b7 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -64,8 +64,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, /* * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. + * to safely read from kernel space. */ fs = get_fs(); set_fs(KERNEL_DS); @@ -111,21 +110,12 @@ static void dump_backtrace_entry(unsigned long where) print_ip_sym(where); } -static void dump_instr(const char *lvl, struct pt_regs *regs) +static void __dump_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); - mm_segment_t fs; char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; - /* - * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. - */ - fs = get_fs(); - set_fs(KERNEL_DS); - for (i = -4; i < 1; i++) { unsigned int val, bad; @@ -139,8 +129,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } } printk("%sCode: %s\n", lvl, str); +} - set_fs(fs); +static void dump_instr(const char *lvl, struct pt_regs *regs) +{ + if (!user_mode(regs)) { + mm_segment_t fs = get_fs(); + set_fs(KERNEL_DS); + __dump_instr(lvl, regs); + set_fs(fs); + } else { + __dump_instr(lvl, regs); + } } static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) -- cgit v1.2.3 From bbb1681ee3653bdcfc6a4ba31902738118311fd4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 13 Jun 2016 17:57:02 +0100 Subject: arm64: mm: mark fault_info table const Unlike the debug_fault_info table, we never intentionally alter the fault_info table at runtime, and all derived pointers are treated as const currently. Make the table const so that it can be placed in .rodata and protected from unintentional writes, as we do for the syscall tables. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ba3fc12bd272..013e2cbe7924 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -441,7 +441,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 1; } -static struct fault_info { +static const struct fault_info { int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); int sig; int code; -- cgit v1.2.3 From 7c64cc0531fa0d9720f9e15a0a0d97bcad1d1cd1 Mon Sep 17 00:00:00 2001 From: Paul E. McKenney Date: Tue, 26 Apr 2016 10:42:25 -0700 Subject: arm: Use _rcuidle for smp_cross_call() tracepoints Further testing with false negatives suppressed by commit 293e2421fe25 ("rcu: Remove superfluous versions of rcu_read_lock_sched_held()") identified another unprotected use of RCU from the idle loop. Because RCU actively ignores idle-loop code (for energy-efficiency reasons, among other things), using RCU from the idle loop can result in too-short grace periods, in turn resulting in arbitrary misbehavior. The resulting lockdep-RCU splat is as follows: ------------------------------------------------------------------------ =============================== [ INFO: suspicious RCU usage. ] 4.6.0-rc5-next-20160426+ #1112 Not tainted ------------------------------- include/trace/events/ipi.h:35 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! no locks held by swapper/0/0. stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1112 Hardware name: Generic OMAP4 (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xb0/0xe4) [] (dump_stack) from [] (smp_cross_call+0xbc/0x188) [] (smp_cross_call) from [] (generic_exec_single+0x9c/0x15c) [] (generic_exec_single) from [] (smp_call_function_single_async+0 x38/0x9c) [] (smp_call_function_single_async) from [] (cpuidle_coupled_poke_others+0x8c/0xa8) [] (cpuidle_coupled_poke_others) from [] (cpuidle_enter_state_coupled+0x26c/0x390) [] (cpuidle_enter_state_coupled) from [] (cpu_startup_entry+0x198/0x3a0) [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8) [] (start_kernel) from [<8000807c>] (0x8000807c) ------------------------------------------------------------------------ Reported-by: Tony Lindgren Signed-off-by: Paul E. McKenney Tested-by: Tony Lindgren Tested-by: Guenter Roeck Cc: Russell King Cc: Steven Rostedt Cc: Cc: --- arch/arm/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index baee70267f29..7afe48ae5d76 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -486,7 +486,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) { - trace_ipi_raise(target, ipi_types[ipinr]); + trace_ipi_raise_rcuidle(target, ipi_types[ipinr]); __smp_cross_call(target, ipinr); } -- cgit v1.2.3 From 38b850a73034f075c4088e7511b36ebbef9dce00 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Jun 2016 15:27:04 +0100 Subject: arm64: spinlock: order spin_{is_locked,unlock_wait} against local locks spin_is_locked has grown two very different use-cases: (1) [The sane case] API functions may require a certain lock to be held by the caller and can therefore use spin_is_locked as part of an assert statement in order to verify that the lock is indeed held. For example, usage of assert_spin_locked. (2) [The insane case] There are two locks, where a CPU takes one of the locks and then checks whether or not the other one is held before accessing some shared state. For example, the "optimized locking" in ipc/sem.c. In the latter case, the sequence looks like: spin_lock(&sem->lock); if (!spin_is_locked(&sma->sem_perm.lock)) /* Access shared state */ and requires that the spin_is_locked check is ordered after taking the sem->lock. Unfortunately, since our spinlocks are implemented using a LDAXR/STXR sequence, the read of &sma->sem_perm.lock can be speculated before the STXR and consequently return a stale value. Whilst this hasn't been seen to cause issues in practice, PowerPC fixed the same issue in 51d7d5205d33 ("powerpc: Add smp_mb() to arch_spin_is_locked()") and, although we did something similar for spin_unlock_wait in d86b8da04dfa ("arm64: spinlock: serialise spin_unlock_wait against concurrent lockers") that doesn't actually take care of ordering against local acquisition of a different lock. This patch adds an smp_mb() to the start of our arch_spin_is_locked and arch_spin_unlock_wait routines to ensure that the lock value is always loaded after any other locks have been taken by the current CPU. Reported-by: Peter Zijlstra Signed-off-by: Will Deacon --- arch/arm64/include/asm/spinlock.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index fc9682bfe002..aac64d55cb22 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -31,6 +31,12 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) unsigned int tmp; arch_spinlock_t lockval; + /* + * Ensure prior spin_lock operations to other locks have completed + * on this CPU before we test whether "lock" is locked. + */ + smp_mb(); + asm volatile( " sevl\n" "1: wfe\n" @@ -148,6 +154,7 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { + smp_mb(); /* See arch_spin_unlock_wait */ return !arch_spin_value_unlocked(READ_ONCE(*lock)); } -- cgit v1.2.3 From 3a5facd09da848193f5bcb0dea098a298bc1a29d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 8 Jun 2016 15:10:57 +0100 Subject: arm64: spinlock: fix spin_unlock_wait for LSE atomics Commit d86b8da04dfa ("arm64: spinlock: serialise spin_unlock_wait against concurrent lockers") fixed spin_unlock_wait for LL/SC-based atomics under the premise that the LSE atomics (in particular, the LDADDA instruction) are indivisible. Unfortunately, these instructions are only indivisible when used with the -AL (full ordering) suffix and, consequently, the same issue can theoretically be observed with LSE atomics, where a later (in program order) load can be speculated before the write portion of the atomic operation. This patch fixes the issue by performing a CAS of the lock once we've established that it's unlocked, in much the same way as the LL/SC code. Fixes: d86b8da04dfa ("arm64: spinlock: serialise spin_unlock_wait against concurrent lockers") Signed-off-by: Will Deacon --- arch/arm64/include/asm/spinlock.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index aac64d55cb22..d5c894253e73 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -43,13 +43,17 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) "2: ldaxr %w0, %2\n" " eor %w1, %w0, %w0, ror #16\n" " cbnz %w1, 1b\n" + /* Serialise against any concurrent lockers */ ARM64_LSE_ATOMIC_INSN( /* LL/SC */ " stxr %w1, %w0, %2\n" -" cbnz %w1, 2b\n", /* Serialise against any concurrent lockers */ - /* LSE atomics */ " nop\n" -" nop\n") +" nop\n", + /* LSE atomics */ +" mov %w1, %w0\n" +" cas %w0, %w0, %2\n" +" eor %w1, %w1, %w0\n") +" cbnz %w1, 2b\n" : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) : : "memory"); -- cgit v1.2.3 From c56bdcac153e60d96a619a59c7981f2a78cba598 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Jun 2016 18:40:07 +0100 Subject: arm64: spinlock: Ensure forward-progress in spin_unlock_wait Rather than wait until we observe the lock being free (which might never happen), we can also return from spin_unlock_wait if we observe that the lock is now held by somebody else, which implies that it was unlocked but we just missed seeing it in that state. Furthermore, in such a scenario there is no longer a need to write back the value that we loaded, since we know that there has been a lock hand-off, which is sufficient to publish any stores prior to the unlock_wait because the ARm architecture ensures that a Store-Release instruction is multi-copy atomic when observed by a Load-Acquire instruction. The litmus test is something like: AArch64 { 0:X1=x; 0:X3=y; 1:X1=y; 2:X1=y; 2:X3=x; } P0 | P1 | P2 ; MOV W0,#1 | MOV W0,#1 | LDAR W0,[X1] ; STR W0,[X1] | STLR W0,[X1] | LDR W2,[X3] ; DMB SY | | ; LDR W2,[X3] | | ; exists (0:X2=0 /\ 2:X0=1 /\ 2:X2=0) where P0 is doing spin_unlock_wait, P1 is doing spin_unlock and P2 is doing spin_lock. Signed-off-by: Will Deacon --- arch/arm64/include/asm/spinlock.h | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index d5c894253e73..e875a5a551d7 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -30,20 +30,39 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { unsigned int tmp; arch_spinlock_t lockval; + u32 owner; /* * Ensure prior spin_lock operations to other locks have completed * on this CPU before we test whether "lock" is locked. */ smp_mb(); + owner = READ_ONCE(lock->owner) << 16; asm volatile( " sevl\n" "1: wfe\n" "2: ldaxr %w0, %2\n" + /* Is the lock free? */ " eor %w1, %w0, %w0, ror #16\n" -" cbnz %w1, 1b\n" - /* Serialise against any concurrent lockers */ +" cbz %w1, 3f\n" + /* Lock taken -- has there been a subsequent unlock->lock transition? */ +" eor %w1, %w3, %w0, lsl #16\n" +" cbz %w1, 1b\n" + /* + * The owner has been updated, so there was an unlock->lock + * transition that we missed. That means we can rely on the + * store-release of the unlock operation paired with the + * load-acquire of the lock operation to publish any of our + * previous stores to the new lock owner and therefore don't + * need to bother with the writeback below. + */ +" b 4f\n" +"3:\n" + /* + * Serialise against any concurrent lockers by writing back the + * unlocked lock value + */ ARM64_LSE_ATOMIC_INSN( /* LL/SC */ " stxr %w1, %w0, %2\n" @@ -53,9 +72,11 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) " mov %w1, %w0\n" " cas %w0, %w0, %2\n" " eor %w1, %w1, %w0\n") + /* Somebody else wrote to the lock, GOTO 10 and reload the value */ " cbnz %w1, 2b\n" +"4:" : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) - : + : "r" (owner) : "memory"); } -- cgit v1.2.3 From 7d669f50847481c52faf0656aea7b4be63113210 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 15 Jun 2016 17:23:45 -0500 Subject: kvm: svm: Fix implicit declaration for __default_cpu_present_to_apicid() The commit 8221c1370056 ("svm: Manage vcpu load/unload when enable AVIC") introduces a build error due to implicit function declaration when #ifdef CONFIG_X86_32 and #ifndef CONFIG_X86_LOCAL_APIC (as reported by Kbuild test robot i386-randconfig-x0-06121009). So, this patch introduces kvm_cpu_get_apicid() wrapper around __default_cpu_present_to_apicid() with additional handling if CONFIG_X86_LOCAL_APIC is not defined. Reported-by: kbuild test robot Fixes: commit 8221c1370056 ("svm: Manage vcpu load/unload when enable AVIC") Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 11 +++++++++++ arch/x86/kvm/svm.c | 6 +++--- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e0fbe7e70dc1..69e62862b622 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -1368,4 +1369,14 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} +static inline int kvm_cpu_get_apicid(int mps_cpu) +{ +#ifdef CONFIG_X86_LOCAL_APIC + return __default_cpu_present_to_apicid(mps_cpu); +#else + WARN_ON_ONCE(1); + return BAD_APICID; +#endif +} + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1163e8173e5a..8a3c571e4a86 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1324,7 +1324,7 @@ free_avic: static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) { u64 entry; - int h_physical_id = __default_cpu_present_to_apicid(vcpu->cpu); + int h_physical_id = kvm_cpu_get_apicid(vcpu->cpu); struct vcpu_svm *svm = to_svm(vcpu); if (!kvm_vcpu_apicv_active(vcpu)) @@ -1349,7 +1349,7 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; /* ID = 0xff (broadcast), ID > 0xff (reserved) */ - int h_physical_id = __default_cpu_present_to_apicid(cpu); + int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); if (!kvm_vcpu_apicv_active(vcpu)) @@ -4236,7 +4236,7 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) if (avic_vcpu_is_running(vcpu)) wrmsrl(SVM_AVIC_DOORBELL, - __default_cpu_present_to_apicid(vcpu->cpu)); + kvm_cpu_get_apicid(vcpu->cpu)); else kvm_vcpu_wake_up(vcpu); } -- cgit v1.2.3 From 5b8abf1f33ccd9f1cbc4248ade3cd507d9319c48 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 15 Jun 2016 17:24:36 -0500 Subject: kvm: svm: Do not support AVIC if not CONFIG_X86_LOCAL_APIC Add logic to disable AVIC #ifndef CONFIG_X86_LOCAL_APIC. Suggested-by: Paolo Bonzini Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8a3c571e4a86..16ef31b87452 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -238,7 +238,9 @@ module_param(nested, int, S_IRUGO); /* enable / disable AVIC */ static int avic; +#ifdef CONFIG_X86_LOCAL_APIC module_param(avic, int, S_IRUGO); +#endif static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu); @@ -981,11 +983,14 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); - if (avic && (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC))) - avic = false; - - if (avic) - pr_info("AVIC enabled\n"); + if (avic) { + if (!npt_enabled || + !boot_cpu_has(X86_FEATURE_AVIC) || + !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) + avic = false; + else + pr_info("AVIC enabled\n"); + } return 0; -- cgit v1.2.3 From a0052191624e9bf8a8f9dc41b92ab5f252566c3c Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Mon, 13 Jun 2016 09:56:56 +0800 Subject: kvm: vmx: check apicv is active before using VT-d posted interrupt VT-d posted interrupt is relying on the CPU side's posted interrupt. Need to check whether VCPU's APICv is active before enabing VT-d posted interrupt. Fixes: d62caabb41f33d96333f9ef15e09cd26e1c12760 Cc: stable@vger.kernel.org Signed-off-by: Yang Zhang Signed-off-by: Shengge Ding Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fb93010beaa4..003618e324ce 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2072,7 +2072,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) unsigned int dest; if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) return; do { @@ -2180,7 +2181,8 @@ static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) return; /* Set SN when the vCPU is preempted */ @@ -10714,7 +10716,8 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu) struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) return 0; vcpu->pre_pcpu = vcpu->cpu; @@ -10780,7 +10783,8 @@ static void vmx_post_block(struct kvm_vcpu *vcpu) unsigned long flags; if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(vcpu)) return; do { @@ -10833,7 +10837,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, int idx, ret = -EINVAL; if (!kvm_arch_has_assigned_device(kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + !irq_remapping_cap(IRQ_POSTING_CAP) || + !kvm_vcpu_apicv_active(kvm->vcpus[0])) return 0; idx = srcu_read_lock(&kvm->irq_srcu); -- cgit v1.2.3 From 9254e70c4ef1fee2e5c43feded4433d19cbb6177 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 9 Jun 2016 12:28:13 +0200 Subject: s390/cpum_cf: use perf software context for hardware counters On s390, there are two different hardware PMUs for counting and sampling. Previously, both PMUs have shared the perf_hw_context which is not correct and, recently, results in this warning: ------------[ cut here ]------------ WARNING: CPU: 5 PID: 1 at kernel/events/core.c:8485 perf_pmu_register+0x420/0x428 Modules linked in: CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.7.0-rc1+ #2 task: 00000009c5240000 ti: 00000009c5234000 task.ti: 00000009c5234000 Krnl PSW : 0704c00180000000 0000000000220c50 (perf_pmu_register+0x420/0x428) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Krnl GPRS: ffffffffffffffff 0000000000b15ac6 0000000000000000 00000009cb440000 000000000022087a 0000000000000000 0000000000b78fa0 0000000000000000 0000000000a9aa90 0000000000000084 0000000000000005 000000000088a97a 0000000000000004 0000000000749dd0 000000000022087a 00000009c5237cc0 Krnl Code: 0000000000220c44: a7f4ff54 brc 15,220aec 0000000000220c48: 92011000 mvi 0(%r1),1 #0000000000220c4c: a7f40001 brc 15,220c4e >0000000000220c50: a7f4ff12 brc 15,220a74 0000000000220c54: 0707 bcr 0,%r7 0000000000220c56: 0707 bcr 0,%r7 0000000000220c58: ebdff0800024 stmg %r13,%r15,128(%r15) 0000000000220c5e: a7f13fe0 tmll %r15,16352 Call Trace: ([<000000000022087a>] perf_pmu_register+0x4a/0x428) ([<0000000000b2c25c>] init_cpum_sampling_pmu+0x14c/0x1f8) ([<0000000000100248>] do_one_initcall+0x48/0x140) ([<0000000000b25d26>] kernel_init_freeable+0x1e6/0x2a0) ([<000000000072bda4>] kernel_init+0x24/0x138) ([<000000000073495e>] kernel_thread_starter+0x6/0xc) ([<0000000000734958>] kernel_thread_starter+0x0/0xc) Last Breaking-Event-Address: [<0000000000220c4c>] perf_pmu_register+0x41c/0x428 ---[ end trace 0c6ef9f5b771ad97 ]--- Using the perf_sw_context is an option because the cpum_cf PMU does not use interrupts. To make this more clear, initialize the capabilities in the PMU structure. Signed-off-by: Hendrik Brueckner Suggested-by: Peter Zijlstra Acked-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 59215c518f37..7ec63b1d920d 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -649,6 +649,8 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) /* Performance monitoring unit for s390x */ static struct pmu cpumf_pmu = { + .task_ctx_nr = perf_sw_context, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, .pmu_enable = cpumf_pmu_enable, .pmu_disable = cpumf_pmu_disable, .event_init = cpumf_pmu_event_init, @@ -708,12 +710,6 @@ static int __init cpumf_pmu_init(void) goto out; } - /* The CPU measurement counter facility does not have overflow - * interrupts to do sampling. Sampling must be provided by - * external means, for example, by timers. - */ - cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - cpumf_pmu.attr_groups = cpumf_cf_event_group(); rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); if (rc) { -- cgit v1.2.3 From 0d15ef677839dab8313fbb86c007c3175b638d03 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 16 Jun 2016 16:51:52 +0100 Subject: arm64: kgdb: Match pstate size with gdbserver protocol Current versions of gdb do not interoperate cleanly with kgdb on arm64 systems because gdb and kgdb do not use the same register description. This patch modifies kgdb to work with recent releases of gdb (>= 7.8.1). Compatibility with gdb (after the patch is applied) is as follows: gdb-7.6 and earlier Ok gdb-7.7 series Works if user provides custom target description gdb-7.8(.0) Works if user provides custom target description gdb-7.8.1 and later Ok When commit 44679a4f142b ("arm64: KGDB: Add step debugging support") was introduced it was paired with a gdb patch that made an incompatible change to the gdbserver protocol. This patch was eventually merged into the gdb sources: https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=commit;h=a4d9ba85ec5597a6a556afe26b712e878374b9dd The change to the protocol was mostly made to simplify big-endian support inside the kernel gdb stub. Unfortunately the gdb project released gdb-7.7.x and gdb-7.8.0 before the protocol incompatibility was identified and reversed: https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=commit;h=bdc144174bcb11e808b4e73089b850cf9620a7ee This leaves us in a position where kgdb still uses the no-longer-used protocol; gdb-7.8.1, which restored the original behaviour, was released on 2014-10-29. I don't believe it is possible to detect/correct the protocol incompatiblity which means the kernel must take a view about which version of the gdb remote protocol is "correct". This patch takes the view that the original/current version of the protocol is correct and that version found in gdb-7.7.x and gdb-7.8.0 is anomalous. Signed-off-by: Daniel Thompson Signed-off-by: Will Deacon --- arch/arm64/include/asm/kgdb.h | 45 +++++++++++++++++++++++++++++++++++-------- arch/arm64/kernel/kgdb.c | 14 +++++++++++++- 2 files changed, 50 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h index f69f69c8120c..da84645525b9 100644 --- a/arch/arm64/include/asm/kgdb.h +++ b/arch/arm64/include/asm/kgdb.h @@ -38,25 +38,54 @@ extern int kgdb_fault_expected; #endif /* !__ASSEMBLY__ */ /* - * gdb is expecting the following registers layout. + * gdb remote procotol (well most versions of it) expects the following + * register layout. * * General purpose regs: * r0-r30: 64 bit * sp,pc : 64 bit - * pstate : 64 bit - * Total: 34 + * pstate : 32 bit + * Total: 33 + 1 * FPU regs: * f0-f31: 128 bit - * Total: 32 - * Extra regs * fpsr & fpcr: 32 bit - * Total: 2 + * Total: 32 + 2 * + * To expand a little on the "most versions of it"... when the gdb remote + * protocol for AArch64 was developed it depended on a statement in the + * Architecture Reference Manual that claimed "SPSR_ELx is a 32-bit register". + * and, as a result, allocated only 32-bits for the PSTATE in the remote + * protocol. In fact this statement is still present in ARM DDI 0487A.i. + * + * Unfortunately "is a 32-bit register" has a very special meaning for + * system registers. It means that "the upper bits, bits[63:32], are + * RES0.". RES0 is heavily used in the ARM architecture documents as a + * way to leave space for future architecture changes. So to translate a + * little for people who don't spend their spare time reading ARM architecture + * manuals, what "is a 32-bit register" actually means in this context is + * "is a 64-bit register but one with no meaning allocated to any of the + * upper 32-bits... *yet*". + * + * Perhaps then we should not be surprised that this has led to some + * confusion. Specifically a patch, influenced by the above translation, + * that extended PSTATE to 64-bit was accepted into gdb-7.7 but the patch + * was reverted in gdb-7.8.1 and all later releases, when this was + * discovered to be an undocumented protocol change. + * + * So... it is *not* wrong for us to only allocate 32-bits to PSTATE + * here even though the kernel itself allocates 64-bits for the same + * state. That is because this bit of code tells the kernel how the gdb + * remote protocol (well most versions of it) describes the register state. + * + * Note that if you are using one of the versions of gdb that supports + * the gdb-7.7 version of the protocol you cannot use kgdb directly + * without providing a custom register description (gdb can load new + * protocol descriptions at runtime). */ -#define _GP_REGS 34 +#define _GP_REGS 33 #define _FP_REGS 32 -#define _EXTRA_REGS 2 +#define _EXTRA_REGS 3 /* * general purpose registers size in bytes. * pstate is only 4 bytes. subtract 4 bytes diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index b67531a13136..b5f063e5eff7 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -58,7 +58,17 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { { "x30", 8, offsetof(struct pt_regs, regs[30])}, { "sp", 8, offsetof(struct pt_regs, sp)}, { "pc", 8, offsetof(struct pt_regs, pc)}, - { "pstate", 8, offsetof(struct pt_regs, pstate)}, + /* + * struct pt_regs thinks PSTATE is 64-bits wide but gdb remote + * protocol disagrees. Therefore we must extract only the lower + * 32-bits. Look for the big comment in asm/kgdb.h for more + * detail. + */ + { "pstate", 4, offsetof(struct pt_regs, pstate) +#ifdef CONFIG_CPU_BIG_ENDIAN + + 4 +#endif + }, { "v0", 16, -1 }, { "v1", 16, -1 }, { "v2", 16, -1 }, @@ -128,6 +138,8 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) memset((char *)gdb_regs, 0, NUMREGBYTES); thread_regs = task_pt_regs(task); memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); + /* Special case for PSTATE (check comments in asm/kgdb.h for details) */ + dbg_get_reg(33, gdb_regs + GP_REG_BYTES, thread_regs); } void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) -- cgit v1.2.3 From ef5bdccf6d4363fd934035e0b1ca8445975e1d89 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Thu, 16 Jun 2016 21:56:30 +0200 Subject: ARM: OMAP1: fix ams-delta FIQ handler to work with sparse IRQ After OMAP1 IRQ definitions have been changed by commit 685e2d08c54b ("ARM: OMAP1: Change interrupt numbering for sparse IRQ") introduced in v4.2, ams-delta FIQ handler which depends on them no longer works as expected. Fix it. Created and tested on Amstrad Delta against Linux-4.7-rc3 Signed-off-by: Janusz Krzysztofik Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/ams-delta-fiq-handler.S | 6 +++--- arch/arm/mach-omap1/ams-delta-fiq.c | 5 +++-- arch/arm/mach-omap1/include/mach/ams-delta-fiq.h | 2 ++ 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap1/ams-delta-fiq-handler.S b/arch/arm/mach-omap1/ams-delta-fiq-handler.S index 5d7fb596bf4a..bf608441b357 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq-handler.S +++ b/arch/arm/mach-omap1/ams-delta-fiq-handler.S @@ -43,8 +43,8 @@ #define OTHERS_MASK (MODEM_IRQ_MASK | HOOK_SWITCH_MASK) /* IRQ handler register bitmasks */ -#define DEFERRED_FIQ_MASK (0x1 << (INT_DEFERRED_FIQ % IH2_BASE)) -#define GPIO_BANK1_MASK (0x1 << INT_GPIO_BANK1) +#define DEFERRED_FIQ_MASK OMAP_IRQ_BIT(INT_DEFERRED_FIQ) +#define GPIO_BANK1_MASK OMAP_IRQ_BIT(INT_GPIO_BANK1) /* Driver buffer byte offsets */ #define BUF_MASK (FIQ_MASK * 4) @@ -110,7 +110,7 @@ ENTRY(qwerty_fiqin_start) mov r8, #2 @ reset FIQ agreement str r8, [r12, #IRQ_CONTROL_REG_OFFSET] - cmp r10, #INT_GPIO_BANK1 @ is it GPIO bank interrupt? + cmp r10, #(INT_GPIO_BANK1 - NR_IRQS_LEGACY) @ is it GPIO interrupt? beq gpio @ yes - process it mov r8, #1 diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c index d1f12095f315..ec760ae2f917 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq.c +++ b/arch/arm/mach-omap1/ams-delta-fiq.c @@ -109,7 +109,8 @@ void __init ams_delta_init_fiq(void) * Since no set_type() method is provided by OMAP irq chip, * switch to edge triggered interrupt type manually. */ - offset = IRQ_ILR0_REG_OFFSET + INT_DEFERRED_FIQ * 0x4; + offset = IRQ_ILR0_REG_OFFSET + + ((INT_DEFERRED_FIQ - NR_IRQS_LEGACY) & 0x1f) * 0x4; val = omap_readl(DEFERRED_FIQ_IH_BASE + offset) & ~(1 << 1); omap_writel(val, DEFERRED_FIQ_IH_BASE + offset); @@ -149,7 +150,7 @@ void __init ams_delta_init_fiq(void) /* * Redirect GPIO interrupts to FIQ */ - offset = IRQ_ILR0_REG_OFFSET + INT_GPIO_BANK1 * 0x4; + offset = IRQ_ILR0_REG_OFFSET + (INT_GPIO_BANK1 - NR_IRQS_LEGACY) * 0x4; val = omap_readl(OMAP_IH1_BASE + offset) | 1; omap_writel(val, OMAP_IH1_BASE + offset); } diff --git a/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h b/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h index adb5e7649659..6dfc3e1210a3 100644 --- a/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h +++ b/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h @@ -14,6 +14,8 @@ #ifndef __AMS_DELTA_FIQ_H #define __AMS_DELTA_FIQ_H +#include + /* * Interrupt number used for passing control from FIQ to IRQ. * IRQ12, described as reserved, has been selected. -- cgit v1.2.3 From 970f9091d25df14e9540ec7ff48a2f709e284cd1 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 16 Jun 2016 15:25:18 +0300 Subject: ARM: OMAP2+: timer: add probe for clocksources A few platforms are currently missing clocksource_probe() completely in their time_init functionality. On OMAP3430 for example, this is causing cpuidle to be pretty much dead, as the counter32k is not going to be registered and instead a gptimer is used as a clocksource. This will tick in periodic mode, preventing any deeper idle states. While here, also drop one unnecessary check for populated DT before existing clocksource_probe() call. Signed-off-by: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/timer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 5b385bb8aff9..cb9497a20fb3 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -496,8 +496,7 @@ void __init omap_init_time(void) __omap_sync32k_timer_init(1, "timer_32k_ck", "ti,timer-alwon", 2, "timer_sys_ck", NULL, false); - if (of_have_populated_dt()) - clocksource_probe(); + clocksource_probe(); } #if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM43XX) @@ -505,6 +504,8 @@ void __init omap3_secure_sync32k_timer_init(void) { __omap_sync32k_timer_init(12, "secure_32k_fck", "ti,timer-secure", 2, "timer_sys_ck", NULL, false); + + clocksource_probe(); } #endif /* CONFIG_ARCH_OMAP3 */ @@ -513,6 +514,8 @@ void __init omap3_gptimer_timer_init(void) { __omap_sync32k_timer_init(2, "timer_sys_ck", NULL, 1, "timer_sys_ck", "ti,timer-alwon", true); + + clocksource_probe(); } #endif -- cgit v1.2.3 From e568006b9d828403397668864d9797dc4bfefd28 Mon Sep 17 00:00:00 2001 From: Aneesh Kumar K.V Date: Fri, 17 Jun 2016 11:32:00 +0530 Subject: powerpc/mm/hash: Don't add memory coherence if cache inhibited is set H_ENTER hcall handling in qemu had assumptions that a cache inhibited hpte entry won't have memory conference set. Also older kernel mentioned that some version of pHyp required this (the code removed by the below commit says: /* Make pHyp happy */ if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU)) hpte_r &= ~HPTE_R_M; But with older kernel we had some inconsistent memory conherence mapping. We always enabled memory conherence in the page fault path and removed memory conherence is _PAGE_NO_CACHE was set when we mapped the page via htab_bolt_mapping. The commit mentioned below tried to consolidate that by always enabling memory conherence. But as mentioned above that breaks Qemu H_ENTER handling. This patch update this such that we enable memory conherence only if cache inhibited is not set and bring fault handling, lpar and bolt mapping in sync. Fixes: commit 30bda41aba4e("powerpc/mm: Drop WIMG in favour of new constant") Reported-by: Darrick J. Wong Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b2740c67e172..5b22ba0b58bc 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -201,9 +201,8 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) /* * We can't allow hardware to update hpte bits. Hence always * set 'R' bit and set 'C' if it is a write fault - * Memory coherence is always enabled */ - rflags |= HPTE_R_R | HPTE_R_M; + rflags |= HPTE_R_R; if (pteflags & _PAGE_DIRTY) rflags |= HPTE_R_C; @@ -213,10 +212,15 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) rflags |= HPTE_R_I; - if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT) + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT) rflags |= (HPTE_R_I | HPTE_R_G); - if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) - rflags |= (HPTE_R_I | HPTE_R_W); + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) + rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M); + else + /* + * Add memory coherence if cache inhibited is not set + */ + rflags |= HPTE_R_M; return rflags; } -- cgit v1.2.3 From b23d9c5b9c83c05e013aa52460f12a8365062cf4 Mon Sep 17 00:00:00 2001 From: Aneesh Kumar K.V Date: Fri, 17 Jun 2016 11:40:36 +0530 Subject: powerpc/mm/radix: Update Radix tree size as per ISA 3.0 ISA 3.0 updated it to be encoded as Radix tree size = 2^(RTS + 31). We have it encoded as 2^(RTS + 28). Add a helper with the correct encoding and use it instead of opencoding. Fixes: 2bfd65e45e87 ("powerpc/mm/radix: Add radix callbacks for early init routines") Reviewed-by: Balbir Singh Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/radix.h | 15 +++++++++++++++ arch/powerpc/mm/mmu_context_book3s64.c | 2 +- arch/powerpc/mm/pgtable-radix.c | 9 +++------ 3 files changed, 19 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 937d4e247ac3..df294224e280 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -228,5 +228,20 @@ extern void radix__vmemmap_remove_mapping(unsigned long start, extern int radix__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int psz); + +static inline unsigned long radix__get_tree_size(void) +{ + unsigned long rts_field; + /* + * we support 52 bits, hence 52-31 = 21, 0b10101 + * RTS encoding details + * bits 0 - 3 of rts -> bits 6 - 8 unsigned long + * bits 4 - 5 of rts -> bits 62 - 63 of unsigned long + */ + rts_field = (0x5UL << 5); /* 6 - 8 bits */ + rts_field |= (0x2UL << 61); + + return rts_field; +} #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 227b2a6c4544..196222227e82 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -65,7 +65,7 @@ static int radix__init_new_context(struct mm_struct *mm, int index) /* * set the process table entry, */ - rts_field = 3ull << PPC_BITLSHIFT(2); + rts_field = radix__get_tree_size(); process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE); return 0; } diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index c939e6e57a9e..e58707deef5c 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -160,9 +160,8 @@ redo: process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); /* * Fill in the process table. - * we support 52 bits, hence 52-28 = 24, 11000 */ - rts_field = 3ull << PPC_BITLSHIFT(2); + rts_field = radix__get_tree_size(); process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE); /* * Fill in the partition table. We are suppose to use effective address @@ -176,10 +175,8 @@ redo: static void __init radix_init_partition_table(void) { unsigned long rts_field; - /* - * we support 52 bits, hence 52-28 = 24, 11000 - */ - rts_field = 3ull << PPC_BITLSHIFT(2); + + rts_field = radix__get_tree_size(); BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT); -- cgit v1.2.3 From a3aa256b7258b3d19f8b44557cc64525a993b941 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 17 Jun 2016 13:05:11 +1000 Subject: powerpc/eeh: Fix invalid cached PE primary bus The PE primary bus cannot be got from its child devices when having full hotplug in error recovery. The PE primary bus is cached, which is done in commit <05ba75f84864> ("powerpc/eeh: Fix stale cached primary bus"). In eeh_reset_device(), the flag (EEH_PE_PRI_BUS) is cleared before the PCI hot remove. eeh_pe_bus_get() then returns NULL as the PE primary bus in pnv_eeh_reset() and it crashes the kernel eventually. This fixes the issue by clearing the flag (EEH_PE_PRI_BUS) before the PCI hot add. With it, the PowerNV EEH reset backend (pnv_eeh_reset()) can get valid PE primary bus through eeh_pe_bus_get(). Fixes: 67086e32b564 ("powerpc/eeh: powerpc/eeh: Support error recovery for VF PE") Reported-by: Pridhiviraj Paidipeddi Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 2714a3b81d24..b5f73cb5eeb6 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -642,7 +642,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (pe->type & EEH_PE_VF) { eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_lock_rescan_remove(); pci_hp_remove_devices(bus); pci_unlock_rescan_remove(); @@ -692,10 +691,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, */ edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - if (pe->type & EEH_PE_VF) + if (pe->type & EEH_PE_VF) { eeh_add_virt_device(edev, NULL); - else + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_hp_add_devices(bus); + } } else if (frozen_bus && rmv_data->removed) { pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); ssleep(5); -- cgit v1.2.3 From d279f7a7e95af6bb4b5eaea3527d1f85a28c5cf6 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Wed, 15 Jun 2016 11:45:28 +0530 Subject: ARM: dts: am437x-sk-evm: Reduce i2c0 bus speed for tps65218 Based on the latest timing specifications for the TPS65218 from the data sheet, http://www.ti.com/lit/ds/symlink/tps65218.pdf, document SLDS206 from November 2014, we must change the i2c bus speed to better fit within the minimum high SCL time required for proper i2c transfer. When running at 400khz, measurements show that SCL spends 0.8125 uS/1.666 uS high/low which violates the requirement for minimum high period of SCL provided in datasheet Table 7.6 which is 1 uS. Switching to 100khz gives us 5 uS/5 uS high/low which both fall above the minimum given values for 100 khz, 4.0 uS/4.7 uS high/low. Without this patch occasionally a voltage set operation from the kernel will appear to have worked but the actual voltage reflected on the PMIC will not have updated, causing problems especially with cpufreq that may update to a higher OPP without actually raising the voltage on DCDC2, leading to a hang. Signed-off-by: Dave Gerlach Signed-off-by: Nishanth Menon Signed-off-by: Franklin S Cooper Jr Signed-off-by: Aparna Balasubramanian Signed-off-by: Keerthy Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am437x-sk-evm.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index d82dd6e3f9b1..5687d6b4da60 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -418,7 +418,7 @@ status = "okay"; pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins>; - clock-frequency = <400000>; + clock-frequency = <100000>; tps@24 { compatible = "ti,tps65218"; -- cgit v1.2.3 From 3a4955111ad46a022f05b51f91306d864f989625 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Fri, 27 May 2016 18:08:27 -0400 Subject: isa: Allow ISA-style drivers on modern systems Several modern devices, such as PC/104 cards, are expected to run on modern systems via an ISA bus interface. Since ISA is a legacy interface for most modern architectures, ISA support should remain disabled in general. Support for ISA-style drivers should be enabled on a per driver basis. To allow ISA-style drivers on modern systems, this patch introduces the ISA_BUS_API and ISA_BUS Kconfig options. The ISA bus driver will now build conditionally on the ISA_BUS_API Kconfig option, which defaults to the legacy ISA Kconfig option. The ISA_BUS Kconfig option allows the ISA_BUS_API Kconfig option to be selected on architectures which do not enable ISA (e.g. X86_64). The ISA_BUS Kconfig option is currently only implemented for X86 architectures. Other architectures may have their own ISA_BUS Kconfig options added as required. Reviewed-by: Guenter Roeck Signed-off-by: William Breathitt Gray Acked-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/Kconfig | 3 +++ arch/x86/Kconfig | 9 +++++++++ drivers/base/Makefile | 2 +- include/linux/isa.h | 2 +- 4 files changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index d794384a0404..e9734796531f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -606,6 +606,9 @@ config HAVE_ARCH_HASH file which provides platform-specific implementations of some functions in or fs/namei.c. +config ISA_BUS_API + def_bool ISA + # # ABI hall of shame # diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0a7b885964ba..d9a94da0c29f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2439,6 +2439,15 @@ config PCI_CNB20LE_QUIRK source "drivers/pci/Kconfig" +config ISA_BUS + bool "ISA-style bus support on modern systems" if EXPERT + select ISA_BUS_API + help + Enables ISA-style drivers on modern systems. This is necessary to + support PC/104 devices on X86_64 platforms. + + If unsure, say N. + # x86_64 have no ISA slots, but can have ISA-style DMA. config ISA_DMA_API bool "ISA-style DMA support" if (X86_64 && EXPERT) diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 6b2a84e7f2be..2609ba20b396 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_DMA_CMA) += dma-contiguous.o obj-y += power/ obj-$(CONFIG_HAS_DMA) += dma-mapping.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o -obj-$(CONFIG_ISA) += isa.o +obj-$(CONFIG_ISA_BUS_API) += isa.o obj-$(CONFIG_FW_LOADER) += firmware_class.o obj-$(CONFIG_NUMA) += node.o obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o diff --git a/include/linux/isa.h b/include/linux/isa.h index 5ab85281230b..384ab9b7d79a 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -22,7 +22,7 @@ struct isa_driver { #define to_isa_driver(x) container_of((x), struct isa_driver, driver) -#ifdef CONFIG_ISA +#ifdef CONFIG_ISA_BUS_API int isa_register_driver(struct isa_driver *, unsigned int); void isa_unregister_driver(struct isa_driver *); #else -- cgit v1.2.3 From 0e289e534af1b20417a1940c8eba549588671cd8 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 17 Jun 2016 13:44:18 +0200 Subject: ARM: dts: STi: stih407-family: Disable reserved-memory co-processor nodes This patch fixes a non-booting issue in Mainline. When booting with a compressed kernel, we need to be careful how we populate memory close to DDR start. AUTO_ZRELADDR is enabled by default in multi-arch enabled configurations, which place some restrictions on where the kernel is placed and where it will be uncompressed to on boot. AUTO_ZRELADDR takes the decompressor code's start address and masks out the bottom 28 bits to obtain an address to uncompress the kernel to (thus a load address of 0x42000000 means that the kernel will be uncompressed to 0x40000000 i.e. DDR START on this platform). Even changing the load address to after the co-processor's shared memory won't render a booting platform, since the AUTO_ZRELADDR algorithm still ensures the kernel is uncompressed into memory shared with the first co-processor (0x40000000). Another option would be to move loading to 0x4A000000, since this will mean the decompressor will decompress the kernel to 0x48000000. However, this would mean a large chunk (0x44000000 => 0x48000000 (64MB)) of memory would essentially be wasted for no good reason. Until we can work with ST to find a suitable memory location to relocate co-processor shared memory, let's disable the shared memory nodes. This will ensure a working platform in the mean time. NB: The more observant of you will notice that we're leaving the DMU shared memory node enabled; this is because a) it is the only one in active use at the time of this writing and b) it is not affected by the current default behaviour which is causing issues. Fixes: fe135c6 (ARM: dts: STiH407: Move over to using the 'reserved-memory' API for obtaining DMA memory) Signed-off-by: Lee Jones Reviewed-by Peter Griffin Signed-off-by: Maxime Coquelin Signed-off-by: Olof Johansson --- arch/arm/boot/dts/stih407-family.dtsi | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi index ad8ba10764a3..d294e82447a2 100644 --- a/arch/arm/boot/dts/stih407-family.dtsi +++ b/arch/arm/boot/dts/stih407-family.dtsi @@ -24,18 +24,21 @@ compatible = "shared-dma-pool"; reg = <0x40000000 0x01000000>; no-map; + status = "disabled"; }; gp1_reserved: rproc@41000000 { compatible = "shared-dma-pool"; reg = <0x41000000 0x01000000>; no-map; + status = "disabled"; }; audio_reserved: rproc@42000000 { compatible = "shared-dma-pool"; reg = <0x42000000 0x01000000>; no-map; + status = "disabled"; }; dmu_reserved: rproc@43000000 { -- cgit v1.2.3 From f7e0efc9b50266f5317b50732efff98d40fc879a Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 17 Jun 2016 18:33:00 +0100 Subject: arm64: update ASID limit During a rollover, we mark the active ASID on each CPU as reserved, before allocating a new ID for the task that caused the rollover. This means that with N CPUs, we can only guarantee the new task to obtain a valid ASID if we have at least N+1 ASIDs. Update this limit in the initcall check. Note that this restriction was introduced by commit 8e648066 on the arch/arm side, which disallow re-using the previously active ASID on the local CPU, as it would introduce a TLB race. In addition, we only dispose of NUM_USER_ASIDS-1, since ASID 0 is reserved. Add this restriction as well. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- arch/arm64/mm/context.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index b7b397802088..efcf1f7ef1e4 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -179,7 +179,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) &asid_generation); flush_context(cpu); - /* We have at least 1 ASID per CPU, so this will always succeed */ + /* We have more ASIDs than CPUs, so this will always succeed */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); set_asid: @@ -227,8 +227,11 @@ switch_mm_fastpath: static int asids_init(void) { asid_bits = get_cpu_asid_bits(); - /* If we end up with more CPUs than ASIDs, expect things to crash */ - WARN_ON(NUM_USER_ASIDS < num_possible_cpus()); + /* + * Expect allocation after rollover to fail if we don't have at least + * one more ASID than CPUs. ASID #0 is reserved for init_mm. + */ + WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus()); atomic64_set(&asid_generation, ASID_FIRST_VERSION); asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map), GFP_KERNEL); -- cgit v1.2.3 From 9ca4e58c20d5cb2a5bc378238188c71317aceac5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 21 Jun 2016 10:44:00 +0900 Subject: arm64: fix boot image dependencies to not generate invalid images I fixed boot image dependencies for arch/arm in commit 3939f3345050 ("ARM: 8418/1: add boot image dependencies to not generate invalid images"). I see a similar problem for arch/arm64; "make -jN Image Image.gz" would sometimes end up generating bad images where N > 1. Fix the dependency in arch/arm64/Makefile to avoid the race between "make Image" and "make Image.*". Acked-by: Catalin Marinas Signed-off-by: Masahiro Yamada Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7085e322dc42..648a32c89541 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -95,7 +95,7 @@ boot := arch/arm64/boot Image: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -Image.%: vmlinux +Image.%: Image $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ zinstall install: -- cgit v1.2.3 From 20c27a4270c775d7ed661491af8ac03264d60fc6 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 21 Jun 2016 15:32:57 +0800 Subject: arm64: mm: remove page_mapping check in __sync_icache_dcache __sync_icache_dcache unconditionally skips the cache maintenance for anonymous pages, under the assumption that flushing is only required in the presence of D-side aliases [see 7249b79f6b4cc ("arm64: Do not flush the D-cache for anonymous pages")]. Unfortunately, this breaks migration of anonymous pages holding self-modifying code, where userspace cannot be reasonably expected to reissue maintenance instructions in response to a migration. This patch fixes the problem by removing the broken page_mapping(page) check from the cache syncing code, otherwise we may end up fetching and executing stale instructions from the PoU. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Reviewed-by: Catalin Marinas Signed-off-by: Shaokun Zhang Signed-off-by: Will Deacon --- arch/arm64/mm/flush.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index dbd12ea8ce68..43a76b07eb32 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -71,10 +71,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) { struct page *page = pte_page(pte); - /* no flushing needed for anonymous pages */ - if (!page_mapping(page)) - return; - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) sync_icache_aliases(page_address(page), PAGE_SIZE << compound_order(page)); -- cgit v1.2.3 From 5c492c3f5255bd34f7ff8867515ecf98dcba2a2e Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 22 Jun 2016 10:06:12 +0100 Subject: arm64: smp: Add function to determine if cpus are stuck in the kernel kernel/smp.c has a fancy counter that keeps track of the number of CPUs it marked as not-present and left in cpu_park_loop(). If there are any CPUs spinning in here, features like kexec or hibernate may release them by overwriting this memory. This problem also occurs on machines using spin-tables to release secondary cores. After commit 44dbcc93ab67 ("arm64: Fix behavior of maxcpus=N") we bring all known cpus into the secondary holding pen, meaning this memory can't be re-used by kexec or hibernate. Add a function cpus_are_stuck_in_kernel() to determine if either of these cases have occurred. Signed-off-by: James Morse Acked-by: Mark Rutland Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/smp.h | 12 ++++++++++++ arch/arm64/kernel/smp.c | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 433e50405274..022644704a93 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -124,6 +124,18 @@ static inline void cpu_panic_kernel(void) cpu_park_loop(); } +/* + * If a secondary CPU enters the kernel but fails to come online, + * (e.g. due to mismatched features), and cannot exit the kernel, + * we increment cpus_stuck_in_kernel and leave the CPU in a + * quiesecent loop within the kernel text. The memory containing + * this loop must not be re-used for anything else as the 'stuck' + * core is executing it. + * + * This function is used to inhibit features like kexec and hibernate. + */ +bool cpus_are_stuck_in_kernel(void); + #endif /* ifndef __ASSEMBLY__ */ #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 678e0842cb3b..62ff3c0622e2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -909,3 +909,21 @@ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } + +static bool have_cpu_die(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int any_cpu = raw_smp_processor_id(); + + if (cpu_ops[any_cpu]->cpu_die) + return true; +#endif + return false; +} + +bool cpus_are_stuck_in_kernel(void) +{ + bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die()); + + return !!cpus_stuck_in_kernel || smp_spin_tables; +} -- cgit v1.2.3 From d74b4e4f1a6dbe27acce723e071c86a6ed154bf2 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 22 Jun 2016 10:06:13 +0100 Subject: arm64: hibernate: Don't hibernate on systems with stuck CPUs Hibernate relies on cpu hotplug to prevent secondary cores executing the kernel text while it is being restored. Add a call to cpus_are_stuck_in_kernel() to determine if there are CPUs not counted by 'num_online_cpus()', and prevent hibernate in this case. Fixes: 82869ac57b5 ("arm64: kernel: Add support for hibernate/suspend-to-disk") Acked-by: Mark Rutland Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index f8df75d740f4..21ab5df9fa76 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -236,6 +237,11 @@ int swsusp_arch_suspend(void) unsigned long flags; struct sleep_stack_data state; + if (cpus_are_stuck_in_kernel()) { + pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n"); + return -EBUSY; + } + local_dbg_save(flags); if (__cpu_suspend_enter(&state)) { -- cgit v1.2.3 From 6e914ee629c411d9c6d160399ce7d3472d2c0ec7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 20 Jun 2016 19:23:43 +1000 Subject: powerpc: Fix faults caused by radix patching of SLB miss handler As part of the Radix MMU support we added some feature sections in the SLB miss handler. These are intended to catch the case that we incorrectly take an SLB miss when Radix is enabled, and instead of crashing weirdly they bail out to a well defined exit path and trigger an oops. However the way they were written meant the bailout case was enabled by default until we did CPU feature patching. On powermacs the early debug prints in setup_system() can cause an SLB miss, which happens before code patching, and so the SLB miss handler would incorrectly bailout and crash during boot. Fix it by inverting the sense of the feature section, so that the code which is in place at boot is correct for the hash case. Once we determine we are using Radix - which will never happen on a powermac - only then do we patch in the bailout case which unconditionally jumps. Fixes: caca285e5ab4 ("powerpc/mm/radix: Use STD_MMU_64 to properly isolate hash related code") Reported-by: Denis Kirjanov Tested-by: Denis Kirjanov Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4c9440629128..8bcc1b457115 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1399,11 +1399,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX) lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ mtlr r10 -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX) andi. r10,r12,MSR_RI /* check for unrecoverable exception */ +BEGIN_MMU_FTR_SECTION beq- 2f +FTR_SECTION_ELSE + b 2f +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX) .machine push .machine "power4" -- cgit v1.2.3 From 844e3be47693f92a108cb1fb3b0606bf25e9c7a6 Mon Sep 17 00:00:00 2001 From: Naveen N. Rao Date: Wed, 22 Jun 2016 21:55:01 +0530 Subject: powerpc/bpf/jit: Disable classic BPF JIT on ppc64le Classic BPF JIT was never ported completely to work on little endian powerpc. However, it can be enabled and will crash the system when used. As such, disable use of BPF JIT on ppc64le. Fixes: 7c105b63bd98 ("powerpc: Add CONFIG_CPU_LITTLE_ENDIAN kernel config option.") Reported-by: Thadeu Lima de Souza Cascardo Signed-off-by: Naveen N. Rao Acked-by: Thadeu Lima de Souza Cascardo Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 01f7464d9fea..0a9d439bcda6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -128,7 +128,7 @@ config PPC select IRQ_FORCED_THREADING select HAVE_RCU_TABLE_FREE if SMP select HAVE_SYSCALL_TRACEPOINTS - select HAVE_CBPF_JIT + select HAVE_CBPF_JIT if CPU_BIG_ENDIAN select HAVE_ARCH_JUMP_LABEL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_HAS_GCOV_PROFILE_ALL -- cgit v1.2.3 From 1cf38741308c64d08553602b3374fb39224eeb5a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 23 Jun 2016 07:12:27 +0200 Subject: x86/xen: fix upper bound of pmd loop in xen_cleanhighmap() xen_cleanhighmap() is operating on level2_kernel_pgt only. The upper bound of the loop setting non-kernel-image entries to zero should not exceed the size of level2_kernel_pgt. Reported-by: Linus Torvalds Signed-off-by: Juergen Gross Signed-off-by: David Vrabel --- arch/x86/xen/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 478a2de543a5..2693b7ed5a6f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1113,7 +1113,7 @@ static void __init xen_cleanhighmap(unsigned long vaddr, /* NOTE: The loop is more greedy than the cleanup_highmap variant. * We include the PMD passed in on _both_ boundaries. */ - for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE)); + for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PTRS_PER_PMD)); pmd++, vaddr += PMD_SIZE) { if (pmd_none(*pmd)) continue; -- cgit v1.2.3 From d6b186c1e2d852a92c43f090d0d8fad4704d51ef Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 17 May 2016 15:54:50 +0100 Subject: x86/xen: avoid m2p lookup when setting early page table entries When page tables entries are set using xen_set_pte_init() during early boot there is no page fault handler that could handle a fault when performing an M2P lookup. In 64 bit guests (usually dom0) early_ioremap() would fault in xen_set_pte_init() because an M2P lookup faults because the MFN is in MMIO space and not mapped in the M2P. This lookup is done to see if the PFN in in the range used for the initial page table pages, so that the PTE may be set as read-only. The M2P lookup can be avoided by moving the check (and clear of RW) earlier when the PFN is still available. Reported-by: Kevin Moraga Signed-off-by: David Vrabel Reviewed-by: Boris Ostrovsky Reviewed-by: Juergen Gross --- arch/x86/xen/mmu.c | 72 +++++++++++++++++++++++------------------------------- 1 file changed, 31 insertions(+), 41 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 2693b7ed5a6f..67433714b791 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1551,41 +1551,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) #endif } -#ifdef CONFIG_X86_32 -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) -{ - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); - - return pte; -} -#else /* CONFIG_X86_64 */ -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) -{ - unsigned long pfn; - - if (xen_feature(XENFEAT_writable_page_tables) || - xen_feature(XENFEAT_auto_translated_physmap) || - xen_start_info->mfn_list >= __START_KERNEL_map) - return pte; - - /* - * Pages belonging to the initial p2m list mapped outside the default - * address range must be mapped read-only. This region contains the - * page tables for mapping the p2m list, too, and page tables MUST be - * mapped read-only. - */ - pfn = pte_pfn(pte); - if (pfn >= xen_start_info->first_p2m_pfn && - pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) - pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW); - - return pte; -} -#endif /* CONFIG_X86_64 */ - /* * Init-time set_pte while constructing initial pagetables, which * doesn't allow RO page table pages to be remapped RW. @@ -1600,13 +1565,37 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) * so always write the PTE directly and rely on Xen trapping and * emulating any updates as necessary. */ -static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) +__visible pte_t xen_make_pte_init(pteval_t pte) { - if (pte_mfn(pte) != INVALID_P2M_ENTRY) - pte = mask_rw_pte(ptep, pte); - else - pte = __pte_ma(0); +#ifdef CONFIG_X86_64 + unsigned long pfn; + + /* + * Pages belonging to the initial p2m list mapped outside the default + * address range must be mapped read-only. This region contains the + * page tables for mapping the p2m list, too, and page tables MUST be + * mapped read-only. + */ + pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT; + if (xen_start_info->mfn_list < __START_KERNEL_map && + pfn >= xen_start_info->first_p2m_pfn && + pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) + pte &= ~_PAGE_RW; +#endif + pte = pte_pfn_to_mfn(pte); + return native_make_pte(pte); +} +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init); +static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) +{ +#ifdef CONFIG_X86_32 + /* If there's an existing pte, then don't allow _PAGE_RW to be set */ + if (pte_mfn(pte) != INVALID_P2M_ENTRY + && pte_val_ma(*ptep) & _PAGE_PRESENT) + pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & + pte_val_ma(pte)); +#endif native_set_pte(ptep, pte); } @@ -2407,6 +2396,7 @@ static void __init xen_post_allocator_init(void) pv_mmu_ops.alloc_pud = xen_alloc_pud; pv_mmu_ops.release_pud = xen_release_pud; #endif + pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte); #ifdef CONFIG_X86_64 pv_mmu_ops.write_cr3 = &xen_write_cr3; @@ -2455,7 +2445,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .pte_val = PV_CALLEE_SAVE(xen_pte_val), .pgd_val = PV_CALLEE_SAVE(xen_pgd_val), - .make_pte = PV_CALLEE_SAVE(xen_make_pte), + .make_pte = PV_CALLEE_SAVE(xen_make_pte_init), .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), #ifdef CONFIG_X86_PAE -- cgit v1.2.3 From da01e18a37a57f360222d3a123b8f6994aa1ad14 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 23 Jun 2016 12:20:01 -0700 Subject: x86: avoid avoid passing around 'thread_info' in stack dumping code None of the code actually wants a thread_info, it all wants a task_struct, and it's just converting to a thread_info pointer much too early. No semantic change. Signed-off-by: Linus Torvalds --- arch/x86/include/asm/stacktrace.h | 6 +++--- arch/x86/kernel/dumpstack.c | 22 ++++++++++------------ arch/x86/kernel/dumpstack_32.c | 4 +--- arch/x86/kernel/dumpstack_64.c | 8 +++----- 4 files changed, 17 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 7c247e7404be..0944218af9e2 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -14,7 +14,7 @@ extern int kstack_depth_to_print; struct thread_info; struct stacktrace_ops; -typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo, +typedef unsigned long (*walk_stack_t)(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, @@ -23,13 +23,13 @@ typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo, int *graph); extern unsigned long -print_context_stack(struct thread_info *tinfo, +print_context_stack(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph); extern unsigned long -print_context_stack_bp(struct thread_info *tinfo, +print_context_stack_bp(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2bb25c3fe2e8..d6209f3a69cb 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -42,16 +42,14 @@ void printk_address(unsigned long address) static void print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) + struct task_struct *task, int *graph) { - struct task_struct *task; unsigned long ret_addr; int index; if (addr != (unsigned long)return_to_handler) return; - task = tinfo->task; index = task->curr_ret_stack; if (!task->ret_stack || index < *graph) @@ -68,7 +66,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data, static inline void print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) + struct task_struct *task, int *graph) { } #endif @@ -79,10 +77,10 @@ print_ftrace_graph_addr(unsigned long addr, void *data, * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -static inline int valid_stack_ptr(struct thread_info *tinfo, +static inline int valid_stack_ptr(struct task_struct *task, void *p, unsigned int size, void *end) { - void *t = tinfo; + void *t = task_thread_info(task); if (end) { if (p < end && p >= (end-THREAD_SIZE)) return 1; @@ -93,14 +91,14 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, } unsigned long -print_context_stack(struct thread_info *tinfo, +print_context_stack(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph) { struct stack_frame *frame = (struct stack_frame *)bp; - while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { + while (valid_stack_ptr(task, stack, sizeof(*stack), end)) { unsigned long addr; addr = *stack; @@ -112,7 +110,7 @@ print_context_stack(struct thread_info *tinfo, } else { ops->address(data, addr, 0); } - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + print_ftrace_graph_addr(addr, data, ops, task, graph); } stack++; } @@ -121,7 +119,7 @@ print_context_stack(struct thread_info *tinfo, EXPORT_SYMBOL_GPL(print_context_stack); unsigned long -print_context_stack_bp(struct thread_info *tinfo, +print_context_stack_bp(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph) @@ -129,7 +127,7 @@ print_context_stack_bp(struct thread_info *tinfo, struct stack_frame *frame = (struct stack_frame *)bp; unsigned long *ret_addr = &frame->return_address; - while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { + while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) { unsigned long addr = *ret_addr; if (!__kernel_text_address(addr)) @@ -139,7 +137,7 @@ print_context_stack_bp(struct thread_info *tinfo, break; frame = frame->next_frame; ret_addr = &frame->return_address; - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + print_ftrace_graph_addr(addr, data, ops, task, graph); } return (unsigned long)frame; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 464ffd69b92e..fef917e79b9d 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -61,15 +61,13 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, bp = stack_frame(task, regs); for (;;) { - struct thread_info *context; void *end_stack; end_stack = is_hardirq_stack(stack, cpu); if (!end_stack) end_stack = is_softirq_stack(stack, cpu); - context = task_thread_info(task); - bp = ops->walk_stack(context, stack, bp, ops, data, + bp = ops->walk_stack(task, stack, bp, ops, data, end_stack, &graph); /* Stop if not on irq stack */ diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 5f1c6266eb30..d558a8a49016 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -153,7 +153,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - struct thread_info *tinfo; unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned long dummy; unsigned used = 0; @@ -179,7 +178,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, * current stack address. If the stacks consist of nested * exceptions */ - tinfo = task_thread_info(task); while (!done) { unsigned long *stack_end; enum stack_type stype; @@ -202,7 +200,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, id) < 0) break; - bp = ops->walk_stack(tinfo, stack, bp, ops, + bp = ops->walk_stack(task, stack, bp, ops, data, stack_end, &graph); ops->stack(data, ""); /* @@ -218,7 +216,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, "IRQ") < 0) break; - bp = ops->walk_stack(tinfo, stack, bp, + bp = ops->walk_stack(task, stack, bp, ops, data, stack_end, &graph); /* * We link to the next stack (which would be @@ -240,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, /* * This handles the process stack: */ - bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); + bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph); put_cpu(); } EXPORT_SYMBOL(dump_trace); -- cgit v1.2.3 From b235beea9e996a4d36fed6cfef4801a3e7d7a9a5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 15:09:37 -0700 Subject: Clarify naming of thread info/stack allocators We've had the thread info allocated together with the thread stack for most architectures for a long time (since the thread_info was split off from the task struct), but that is about to change. But the patches that move the thread info to be off-stack (and a part of the task struct instead) made it clear how confused the allocator and freeing functions are. Because the common case was that we share an allocation with the thread stack and the thread_info, the two pointers were identical. That identity then meant that we would have things like ti = alloc_thread_info_node(tsk, node); ... tsk->stack = ti; which certainly _worked_ (since stack and thread_info have the same value), but is rather confusing: why are we assigning a thread_info to the stack? And if we move the thread_info away, the "confusing" code just gets to be entirely bogus. So remove all this confusion, and make it clear that we are doing the stack allocation by renaming and clarifying the function names to be about the stack. The fact that the thread_info then shares the allocation is an implementation detail, and not really about the allocation itself. This is a pure renaming and type fix: we pass in the same pointer, it's just that we clarify what the pointer means. The ia64 code that actually only has one single allocation (for all of task_struct, thread_info and kernel thread stack) now looks a bit odd, but since "tsk->stack" is actually not even used there, that oddity doesn't matter. It would be a separate thing to clean that up, I intentionally left the ia64 changes as a pure brute-force renaming and type change. Acked-by: Andy Lutomirski Signed-off-by: Linus Torvalds --- arch/Kconfig | 4 +-- arch/ia64/Kconfig | 2 +- arch/ia64/include/asm/thread_info.h | 8 +++--- arch/mn10300/include/asm/thread_info.h | 2 +- arch/mn10300/kernel/kgdb.c | 3 +- arch/tile/include/asm/thread_info.h | 2 +- arch/tile/kernel/process.c | 3 +- include/linux/sched.h | 2 +- init/main.c | 4 +-- kernel/fork.c | 50 +++++++++++++++++----------------- 10 files changed, 41 insertions(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index e9734796531f..15996290fed4 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -226,8 +226,8 @@ config ARCH_INIT_TASK config ARCH_TASK_STRUCT_ALLOCATOR bool -# Select if arch has its private alloc_thread_info() function -config ARCH_THREAD_INFO_ALLOCATOR +# Select if arch has its private alloc_thread_stack() function +config ARCH_THREAD_STACK_ALLOCATOR bool # Select if arch wants to size task_struct dynamically via arch_task_struct_size: diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index f80758cb7157..e109ee95e919 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -45,7 +45,7 @@ config IA64 select GENERIC_SMP_IDLE_THREAD select ARCH_INIT_TASK select ARCH_TASK_STRUCT_ALLOCATOR - select ARCH_THREAD_INFO_ALLOCATOR + select ARCH_THREAD_STACK_ALLOCATOR select ARCH_CLOCKSOURCE_DATA select GENERIC_TIME_VSYSCALL_OLD select SYSCTL_ARCH_UNALIGN_NO_WARN diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index aa995b67c3f5..d1212b84fb83 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -48,15 +48,15 @@ struct thread_info { #ifndef ASM_OFFSETS_C /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) -#define alloc_thread_info_node(tsk, node) \ - ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) +#define alloc_thread_stack_node(tsk, node) \ + ((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE)) #define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) #else #define current_thread_info() ((struct thread_info *) 0) -#define alloc_thread_info_node(tsk, node) ((struct thread_info *) 0) +#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) #define task_thread_info(tsk) ((struct thread_info *) 0) #endif -#define free_thread_info(ti) /* nothing */ +#define free_thread_stack(ti) /* nothing */ #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index 4861a78c7160..f5f90bbf019d 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -115,7 +115,7 @@ static inline unsigned long current_stack_pointer(void) } #ifndef CONFIG_KGDB -void arch_release_thread_info(struct thread_info *ti); +void arch_release_thread_stack(unsigned long *stack); #endif #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c index 99770823451a..2d7986c386fe 100644 --- a/arch/mn10300/kernel/kgdb.c +++ b/arch/mn10300/kernel/kgdb.c @@ -397,8 +397,9 @@ static bool kgdb_arch_undo_singlestep(struct pt_regs *regs) * single-step state is cleared. At this point the breakpoints should have * been removed by __switch_to(). */ -void arch_release_thread_info(struct thread_info *ti) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *ti = (void *)stack; if (kgdb_sstep_thread == ti) { kgdb_sstep_thread = NULL; diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 4b7cef9e94e0..c1467ac59ce6 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -78,7 +78,7 @@ struct thread_info { #ifndef __ASSEMBLY__ -void arch_release_thread_info(struct thread_info *info); +void arch_release_thread_stack(unsigned long *stack); /* How to get the thread information struct from C. */ register unsigned long stack_pointer __asm__("sp"); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 6b705ccc9cc1..a465d8372edd 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -73,8 +73,9 @@ void arch_cpu_idle(void) /* * Release a thread_info structure */ -void arch_release_thread_info(struct thread_info *info) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *info = (void *)stack; struct single_step_state *step_state = info->step_state; if (step_state) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e42ada26345..253538f29ade 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3007,7 +3007,7 @@ static inline int object_is_on_stack(void *obj) return (obj >= stack) && (obj < (stack + THREAD_SIZE)); } -extern void thread_info_cache_init(void); +extern void thread_stack_cache_init(void); #ifdef CONFIG_DEBUG_STACK_USAGE static inline unsigned long stack_not_used(struct task_struct *p) diff --git a/init/main.c b/init/main.c index 4c17fda5c2ff..826fd57fa3aa 100644 --- a/init/main.c +++ b/init/main.c @@ -453,7 +453,7 @@ void __init __weak smp_setup_processor_id(void) } # if THREAD_SIZE >= PAGE_SIZE -void __init __weak thread_info_cache_init(void) +void __init __weak thread_stack_cache_init(void) { } #endif @@ -627,7 +627,7 @@ asmlinkage __visible void __init start_kernel(void) /* Should be run before the first non-init thread is created */ init_espfix_bsp(); #endif - thread_info_cache_init(); + thread_stack_cache_init(); cred_init(); fork_init(); proc_caches_init(); diff --git a/kernel/fork.c b/kernel/fork.c index 5c2c355aa97f..37b9439b8c07 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -148,18 +148,18 @@ static inline void free_task_struct(struct task_struct *tsk) } #endif -void __weak arch_release_thread_info(struct thread_info *ti) +void __weak arch_release_thread_stack(unsigned long *stack) { } -#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR +#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR /* * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a * kmemcache based allocator. */ # if THREAD_SIZE >= PAGE_SIZE -static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, +static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) { struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP, @@ -172,33 +172,33 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, return page ? page_address(page) : NULL; } -static inline void free_thread_info(struct thread_info *ti) +static inline void free_thread_stack(unsigned long *stack) { - struct page *page = virt_to_page(ti); + struct page *page = virt_to_page(stack); memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK, -(1 << THREAD_SIZE_ORDER)); __free_kmem_pages(page, THREAD_SIZE_ORDER); } # else -static struct kmem_cache *thread_info_cache; +static struct kmem_cache *thread_stack_cache; -static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, +static struct thread_info *alloc_thread_stack_node(struct task_struct *tsk, int node) { - return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node); + return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); } -static void free_thread_info(struct thread_info *ti) +static void free_stack(unsigned long *stack) { - kmem_cache_free(thread_info_cache, ti); + kmem_cache_free(thread_stack_cache, stack); } -void thread_info_cache_init(void) +void thread_stack_cache_init(void) { - thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, + thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE, THREAD_SIZE, 0, NULL); - BUG_ON(thread_info_cache == NULL); + BUG_ON(thread_stack_cache == NULL); } # endif #endif @@ -221,9 +221,9 @@ struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; -static void account_kernel_stack(struct thread_info *ti, int account) +static void account_kernel_stack(unsigned long *stack, int account) { - struct zone *zone = page_zone(virt_to_page(ti)); + struct zone *zone = page_zone(virt_to_page(stack)); mod_zone_page_state(zone, NR_KERNEL_STACK, account); } @@ -231,8 +231,8 @@ static void account_kernel_stack(struct thread_info *ti, int account) void free_task(struct task_struct *tsk) { account_kernel_stack(tsk->stack, -1); - arch_release_thread_info(tsk->stack); - free_thread_info(tsk->stack); + arch_release_thread_stack(tsk->stack); + free_thread_stack(tsk->stack); rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); put_seccomp_filter(tsk); @@ -343,7 +343,7 @@ void set_task_stack_end_magic(struct task_struct *tsk) static struct task_struct *dup_task_struct(struct task_struct *orig, int node) { struct task_struct *tsk; - struct thread_info *ti; + unsigned long *stack; int err; if (node == NUMA_NO_NODE) @@ -352,15 +352,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (!tsk) return NULL; - ti = alloc_thread_info_node(tsk, node); - if (!ti) + stack = alloc_thread_stack_node(tsk, node); + if (!stack) goto free_tsk; err = arch_dup_task_struct(tsk, orig); if (err) - goto free_ti; + goto free_stack; - tsk->stack = ti; + tsk->stack = stack; #ifdef CONFIG_SECCOMP /* * We must handle setting up seccomp filters once we're under @@ -392,14 +392,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; - account_kernel_stack(ti, 1); + account_kernel_stack(stack, 1); kcov_task_init(tsk); return tsk; -free_ti: - free_thread_info(ti); +free_stack: + free_thread_stack(stack); free_tsk: free_task_struct(tsk); return NULL; -- cgit v1.2.3 From aca9c293d098292579e345b2b39b394778d41526 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 16:55:53 -0700 Subject: x86: fix up a few misc stack pointer vs thread_info confusions As the actual pointer value is the same for the thread stack allocation and the thread_info, code that confused the two worked fine, but will break when the thread info is moved away from the stack allocation. It also looks very confusing. For example, the kprobe code wanted to know the current top of stack. To do that, it used this: (unsigned long)current_thread_info() + THREAD_SIZE which did indeed give the correct value. But it's not only a fairly nonsensical expression, it's also rather complex, especially since we actually have this: static inline unsigned long current_top_of_stack(void) which not only gives us the value we are interested in, but happens to be how "current_thread_info()" is currently defined as: (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); so using current_thread_info() to figure out the top of the stack really is a very round-about thing to do. The other cases are just simpler confusion about task_thread_info() vs task_stack_page(), which currently return the same pointer - but if you want the stack page, you really should be using the latter one. And there was one entirely unused assignment of the current stack to a thread_info pointer. All cleaned up to make more sense today, and make it easier to move the thread_info away from the stack in the future. No semantic changes. Signed-off-by: Linus Torvalds --- arch/x86/include/asm/kprobes.h | 11 +++++------ arch/x86/kernel/dumpstack.c | 2 +- arch/x86/kernel/irq_32.c | 2 -- 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4421b5da409d..d1d1e5094c28 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -38,12 +38,11 @@ typedef u8 kprobe_opcode_t; #define RELATIVECALL_OPCODE 0xe8 #define RELATIVE_ADDR_SIZE 4 #define MAX_STACK_SIZE 64 -#define MIN_STACK_SIZE(ADDR) \ - (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ - THREAD_SIZE - (unsigned long)(ADDR))) \ - ? (MAX_STACK_SIZE) \ - : (((unsigned long)current_thread_info()) + \ - THREAD_SIZE - (unsigned long)(ADDR))) +#define CUR_STACK_SIZE(ADDR) \ + (current_top_of_stack() - (unsigned long)(ADDR)) +#define MIN_STACK_SIZE(ADDR) \ + (MAX_STACK_SIZE < CUR_STACK_SIZE(ADDR) ? \ + MAX_STACK_SIZE : CUR_STACK_SIZE(ADDR)) #define flush_insn_slot(p) do { } while (0) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index d6209f3a69cb..ef8017ca5ba9 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -80,7 +80,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data, static inline int valid_stack_ptr(struct task_struct *task, void *p, unsigned int size, void *end) { - void *t = task_thread_info(task); + void *t = task_stack_page(task); if (end) { if (p < end && p >= (end-THREAD_SIZE)) return 1; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 38da8f29a9c8..c627bf8d98ad 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -130,11 +130,9 @@ void irq_ctx_init(int cpu) void do_softirq_own_stack(void) { - struct thread_info *curstk; struct irq_stack *irqstk; u32 *isp, *prev_esp; - curstk = current_stack(); irqstk = __this_cpu_read(softirq_stack); /* build the stack frame on the softirq stack */ -- cgit v1.2.3 From 7f1a00b6fcd0e3c19beba2e92d157dc0c2cf3494 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 17:07:33 -0700 Subject: fix up initial thread stack pointer vs thread_info confusion The INIT_TASK() initializer was similarly confused about the stack vs thread_info allocation that the allocators had, and that were fixed in commit b235beea9e99 ("Clarify naming of thread info/stack allocators"). The task ->stack pointer only incidentally ends up having the same value as the thread_info, and in fact that will change. So fix the initial task struct initializer to point to 'init_stack' instead of 'init_thread_info', and make sure the ia64 definition for that exists. This actually makes the ia64 tsk->stack pointer be sensible for the initial task, but not for any other task. As mentioned in commit b235beea9e99, that whole pointer isn't actually used on ia64, since task_stack_page() there just points to the (single) allocation. All the other architectures seem to have copied the 'init_stack' definition, even if it tended to be generally unusued. Signed-off-by: Linus Torvalds --- arch/ia64/kernel/init_task.c | 1 + include/linux/init_task.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index f9efe9739d3f..0eaa89f3defd 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -26,6 +26,7 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * handled. This is done by having a special ".data..init_task" section... */ #define init_thread_info init_task_mem.s.thread_info +#define init_stack init_task_mem.stack union { struct { diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f2cb8d45513d..f8834f820ec2 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -190,7 +190,7 @@ extern struct task_group root_task_group; #define INIT_TASK(tsk) \ { \ .state = 0, \ - .stack = &init_thread_info, \ + .stack = init_stack, \ .usage = ATOMIC_INIT(2), \ .flags = PF_KTHREAD, \ .prio = MAX_PRIO-20, \ -- cgit v1.2.3 From 32d6bd9059f265f617f6502c68dfbcae7e515add Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:47 -0700 Subject: tree wide: get rid of __GFP_REPEAT for order-0 allocations part I This is the third version of the patchset previously sent [1]. I have basically only rebased it on top of 4.7-rc1 tree and dropped "dm: get rid of superfluous gfp flags" which went through dm tree. I am sending it now because it is tree wide and chances for conflicts are reduced considerably when we want to target rc2. I plan to send the next step and rename the flag and move to a better semantic later during this release cycle so we will have a new semantic ready for 4.8 merge window hopefully. Motivation: While working on something unrelated I've checked the current usage of __GFP_REPEAT in the tree. It seems that a majority of the usage is and always has been bogus because __GFP_REPEAT has always been about costly high order allocations while we are using it for order-0 or very small orders very often. It seems that a big pile of them is just a copy&paste when a code has been adopted from one arch to another. I think it makes some sense to get rid of them because they are just making the semantic more unclear. Please note that GFP_REPEAT is documented as * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt * _might_ fail. This depends upon the particular VM implementation. while !costly requests have basically nofail semantic. So one could reasonably expect that order-0 request with __GFP_REPEAT will not loop for ever. This is not implemented right now though. I would like to move on with __GFP_REPEAT and define a better semantic for it. $ git grep __GFP_REPEAT origin/master | wc -l 111 $ git grep __GFP_REPEAT | wc -l 36 So we are down to the third after this patch series. The remaining places really seem to be relying on __GFP_REPEAT due to large allocation requests. This still needs some double checking which I will do later after all the simple ones are sorted out. I am touching a lot of arch specific code here and I hope I got it right but as a matter of fact I even didn't compile test for some archs as I do not have cross compiler for them. Patches should be quite trivial to review for stupid compile mistakes though. The tricky parts are usually hidden by macro definitions and thats where I would appreciate help from arch maintainers. [1] http://lkml.kernel.org/r/1461849846-27209-1-git-send-email-mhocko@kernel.org This patch (of 19): __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. Yet we have the full kernel tree with its usage for apparently order-0 allocations. This is really confusing because __GFP_REPEAT is explicitly documented to allow allocation failures which is a weaker semantic than the current order-0 has (basically nofail). Let's simply drop __GFP_REPEAT from those places. This would allow to identify place which really need allocator to retry harder and formulate a more specific semantic for what the flag is supposed to do actually. Link: http://lkml.kernel.org/r/1464599699-30131-2-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: "David S. Miller" Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: "Theodore Ts'o" Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chen Liqin Cc: Chris Metcalf [for tile] Cc: Guan Xuetao Cc: Heiko Carstens Cc: Helge Deller Cc: Ingo Molnar Cc: Jan Kara Cc: John Crispin Cc: Lennox Wu Cc: Ley Foon Tan Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Ralf Baechle Cc: Rich Felker Cc: Russell King Cc: Thomas Gleixner Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgalloc.h | 4 ++-- arch/arm/include/asm/pgalloc.h | 2 +- arch/avr32/include/asm/pgalloc.h | 6 +++--- arch/cris/include/asm/pgalloc.h | 4 ++-- arch/frv/mm/pgalloc.c | 6 +++--- arch/hexagon/include/asm/pgalloc.h | 4 ++-- arch/m68k/include/asm/mcf_pgalloc.h | 4 ++-- arch/m68k/include/asm/motorola_pgalloc.h | 4 ++-- arch/m68k/include/asm/sun3_pgalloc.h | 4 ++-- arch/metag/include/asm/pgalloc.h | 5 ++--- arch/microblaze/include/asm/pgalloc.h | 4 ++-- arch/microblaze/mm/pgtable.c | 3 +-- arch/mn10300/mm/pgtable.c | 6 +++--- arch/openrisc/include/asm/pgalloc.h | 2 +- arch/openrisc/mm/ioremap.c | 2 +- arch/parisc/include/asm/pgalloc.h | 4 ++-- arch/powerpc/include/asm/book3s/64/pgalloc.h | 2 +- arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 +- arch/powerpc/mm/pgtable_32.c | 4 ++-- arch/powerpc/mm/pgtable_64.c | 3 +-- arch/sh/include/asm/pgalloc.h | 4 ++-- arch/sparc/mm/init_64.c | 6 ++---- arch/um/kernel/mem.c | 4 ++-- arch/x86/include/asm/pgalloc.h | 4 ++-- arch/x86/xen/p2m.c | 2 +- arch/xtensa/include/asm/pgalloc.h | 2 +- drivers/block/aoe/aoecmd.c | 2 +- 27 files changed, 47 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index aab14a019c20..c2ebb6f36c9d 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -40,7 +40,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return ret; } @@ -53,7 +53,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index 19cfab526d13..20febb368844 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -29,7 +29,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + return (pmd_t *)get_zeroed_page(GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) diff --git a/arch/avr32/include/asm/pgalloc.h b/arch/avr32/include/asm/pgalloc.h index 1aba19d68c5e..db039cb368be 100644 --- a/arch/avr32/include/asm/pgalloc.h +++ b/arch/avr32/include/asm/pgalloc.h @@ -43,7 +43,7 @@ static inline void pgd_ctor(void *x) */ static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return quicklist_alloc(QUICK_PGD, GFP_KERNEL | __GFP_REPEAT, pgd_ctor); + return quicklist_alloc(QUICK_PGD, GFP_KERNEL, pgd_ctor); } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -54,7 +54,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, @@ -63,7 +63,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, struct page *page; void *pg; - pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); if (!pg) return NULL; diff --git a/arch/cris/include/asm/pgalloc.h b/arch/cris/include/asm/pgalloc.h index 235ece437ddd..42f1affb9c2d 100644 --- a/arch/cris/include/asm/pgalloc.h +++ b/arch/cris/include/asm/pgalloc.h @@ -24,14 +24,14 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + pte = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c index 41907d25ed38..c9ed14f6c67d 100644 --- a/arch/frv/mm/pgalloc.c +++ b/arch/frv/mm/pgalloc.c @@ -22,7 +22,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE))); pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL); if (pte) clear_page(pte); return pte; @@ -33,9 +33,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) struct page *page; #ifdef CONFIG_HIGHPTE - page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0); + page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0); #else - page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + page = alloc_pages(GFP_KERNEL, 0); #endif if (!page) return NULL; diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index 77da3b0ae3c2..eeebf862c46c 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -64,7 +64,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); + pte = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { @@ -78,7 +78,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO; + gfp_t flags = GFP_KERNEL | __GFP_ZERO; return (pte_t *) __get_free_page(flags); } diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h index f9924fbcfe42..fb95aed5f428 100644 --- a/arch/m68k/include/asm/mcf_pgalloc.h +++ b/arch/m68k/include/asm/mcf_pgalloc.h @@ -14,7 +14,7 @@ extern const char bad_pmd_string[]; extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - unsigned long page = __get_free_page(GFP_DMA|__GFP_REPEAT); + unsigned long page = __get_free_page(GFP_DMA); if (!page) return NULL; @@ -51,7 +51,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page, static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_pages(GFP_DMA|__GFP_REPEAT, 0); + struct page *page = alloc_pages(GFP_DMA, 0); pte_t *pte; if (!page) diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h index 24bcba496c75..c895b987202c 100644 --- a/arch/m68k/include/asm/motorola_pgalloc.h +++ b/arch/m68k/include/asm/motorola_pgalloc.h @@ -11,7 +11,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad { pte_t *pte; - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); if (pte) { __flush_page_to_ram(pte); flush_tlb_kernel_page(pte); @@ -32,7 +32,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addres struct page *page; pte_t *pte; - page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + page = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0); if(!page) return NULL; if (!pgtable_page_ctor(page)) { diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 0931388de47f..1901f61f926f 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -37,7 +37,7 @@ do { \ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - unsigned long page = __get_free_page(GFP_KERNEL|__GFP_REPEAT); + unsigned long page = __get_free_page(GFP_KERNEL); if (!page) return NULL; @@ -49,7 +49,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + struct page *page = alloc_pages(GFP_KERNEL, 0); if (page == NULL) return NULL; diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h index 3104df0a4822..c2caa1ee4360 100644 --- a/arch/metag/include/asm/pgalloc.h +++ b/arch/metag/include/asm/pgalloc.h @@ -42,8 +42,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | - __GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); return pte; } @@ -51,7 +50,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); + pte = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h index 61436d69775c..7c89390c0c13 100644 --- a/arch/microblaze/include/asm/pgalloc.h +++ b/arch/microblaze/include/asm/pgalloc.h @@ -116,9 +116,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, struct page *ptepage; #ifdef CONFIG_HIGHPTE - int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; + int flags = GFP_KERNEL | __GFP_HIGHMEM; #else - int flags = GFP_KERNEL | __GFP_REPEAT; + int flags = GFP_KERNEL; #endif ptepage = alloc_pages(flags, 0); diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index 4f4520e779a5..eb99fcc76088 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -239,8 +239,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; if (mem_init_done) { - pte = (pte_t *)__get_free_page(GFP_KERNEL | - __GFP_REPEAT | __GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } else { pte = (pte_t *)early_get_page(); if (pte) diff --git a/arch/mn10300/mm/pgtable.c b/arch/mn10300/mm/pgtable.c index e77a7c728081..9577cf768875 100644 --- a/arch/mn10300/mm/pgtable.c +++ b/arch/mn10300/mm/pgtable.c @@ -63,7 +63,7 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL); if (pte) clear_page(pte); return pte; @@ -74,9 +74,9 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) struct page *pte; #ifdef CONFIG_HIGHPTE - pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0); + pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0); #else - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + pte = alloc_pages(GFP_KERNEL, 0); #endif if (!pte) return NULL; diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 21484e5b9e9a..87eebd185089 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -77,7 +77,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + pte = alloc_pages(GFP_KERNEL, 0); if (!pte) return NULL; clear_page(page_address(pte)); diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index 62b08ef392be..5b2a95116e8f 100644 --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -122,7 +122,7 @@ pte_t __init_refok *pte_alloc_one_kernel(struct mm_struct *mm, pte_t *pte; if (likely(mem_init_done)) { - pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT); + pte = (pte_t *) __get_free_page(GFP_KERNEL); } else { pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); #if 0 diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index f2fd327dce2e..52c3defb40c9 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -124,7 +124,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO); if (!page) return NULL; if (!pgtable_page_ctor(page)) { @@ -137,7 +137,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 488279edb1f0..049b80359db6 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -151,7 +151,7 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 069369f6414b..8a8a7d991249 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -88,7 +88,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index bf7bf32b54f8..7f922f557936 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -84,7 +84,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add pte_t *pte; if (slab_is_available()) { - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); } else { pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); if (pte) @@ -97,7 +97,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *ptepage; - gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO; + gfp_t flags = GFP_KERNEL | __GFP_ZERO; ptepage = alloc_pages(flags, 0); if (!ptepage) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e009e0604a8a..f5e8d4edb808 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -350,8 +350,7 @@ static pte_t *get_from_cache(struct mm_struct *mm) static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) { void *ret = NULL; - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); if (!page) return NULL; if (!kernel && !pgtable_page_ctor(page)) { diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index a33673b3687d..f3f42c84c40f 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -34,7 +34,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, @@ -43,7 +43,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, struct page *page; void *pg; - pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); if (!pg) return NULL; page = virt_to_page(pg); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 14bb0d5ed3c6..aec508e37490 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2704,8 +2704,7 @@ void __flush_tlb_all(void) pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); pte_t *pte = NULL; if (page) @@ -2717,8 +2716,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); if (!page) return NULL; if (!pgtable_page_ctor(page)) { diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index b2a2dff50b4e..e7437ec62710 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -204,7 +204,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } @@ -212,7 +212,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = alloc_page(GFP_KERNEL|__GFP_ZERO); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index bf7f8b55b0f9..574c23cf761a 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -81,7 +81,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { struct page *page; - page = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); if (!page) return NULL; if (!pgtable_pmd_page_ctor(page)) { @@ -125,7 +125,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + return (pud_t *)get_zeroed_page(GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index cab9f766bb06..dd2a49a8aacc 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -182,7 +182,7 @@ static void * __ref alloc_p2m_page(void) if (unlikely(!slab_is_available())) return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); - return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); + return (void *)__get_free_page(GFP_KERNEL); } static void __ref free_p2m_page(void *p) diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h index d38eb9237e64..1065bc8bcae5 100644 --- a/arch/xtensa/include/asm/pgalloc.h +++ b/arch/xtensa/include/asm/pgalloc.h @@ -44,7 +44,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, pte_t *ptep; int i; - ptep = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + ptep = (pte_t *)__get_free_page(GFP_KERNEL); if (!ptep) return NULL; for (i = 0; i < 1024; i++) diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index d597e432e195..ab19adb07a12 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1750,7 +1750,7 @@ aoecmd_init(void) int ret; /* get_zeroed_page returns page with ref count 1 */ - p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + p = (void *) get_zeroed_page(GFP_KERNEL); if (!p) return -ENOMEM; empty_page = virt_to_page(p); -- cgit v1.2.3 From a3a9a59d206779dc0c4ca5a6de6a2ff40382732b Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:50 -0700 Subject: x86: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. PGALLOC_GFP uses __GFP_REPEAT but none of the allocation which uses this flag is for more than order-0. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-3-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/espfix_64.c | 2 +- arch/x86/mm/pgtable.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 4d38416e2a7f..04f89caef9c4 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -57,7 +57,7 @@ # error "Need more than one PGD for the ESPFIX hack" #endif -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) /* This contains the *bottom* address of the espfix stack */ DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 4eb287e25043..aa0ff4b02a96 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -6,7 +6,7 @@ #include #include -#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO +#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO #ifdef CONFIG_HIGHPTE #define PGALLOC_USER_GFP __GFP_HIGHMEM -- cgit v1.2.3 From f58f230a832ba8220a64f44aaafcce4b7358d826 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:53 -0700 Subject: x86/efi: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. efi_alloc_page_tables uses __GFP_REPEAT but it allocates an order-0 page. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-4-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Matt Fleming Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/platform/efi/efi_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 6e7242be1c87..b226b3f497f1 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -139,7 +139,7 @@ int __init efi_alloc_page_tables(void) if (efi_enabled(EFI_OLD_MEMMAP)) return 0; - gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO; + gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_page(gfp_mask); if (!efi_pgd) return -ENOMEM; -- cgit v1.2.3 From f3610a6aff7dd70b788364255c0cbc128488ef72 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:56 -0700 Subject: arm64: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. {pte,pmd,pud}_alloc_one{_kernel}, late_pgtable_alloc use PGALLOC_GFP for __get_free_page (aka order-0). pgd_alloc is slightly more complex because it allocates from pgd_cache if PGD_SIZE != PAGE_SIZE and PGD_SIZE depends on the configuration (CONFIG_ARM64_VA_BITS, PAGE_SHIFT and CONFIG_PGTABLE_LEVELS). As per config PGTABLE_LEVELS int default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36 default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47 default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48 we should have the following options CONFIG_ARM64_VA_BITS:48 CONFIG_PGTABLE_LEVELS:4 PAGE_SIZE:4k size:4096 pages:1 CONFIG_ARM64_VA_BITS:48 CONFIG_PGTABLE_LEVELS:4 PAGE_SIZE:16k size:16 pages:1 CONFIG_ARM64_VA_BITS:48 CONFIG_PGTABLE_LEVELS:3 PAGE_SIZE:64k size:512 pages:1 CONFIG_ARM64_VA_BITS:47 CONFIG_PGTABLE_LEVELS:3 PAGE_SIZE:16k size:16384 pages:1 CONFIG_ARM64_VA_BITS:42 CONFIG_PGTABLE_LEVELS:2 PAGE_SIZE:64k size:65536 pages:1 CONFIG_ARM64_VA_BITS:39 CONFIG_PGTABLE_LEVELS:3 PAGE_SIZE:4k size:4096 pages:1 CONFIG_ARM64_VA_BITS:36 CONFIG_PGTABLE_LEVELS:2 PAGE_SIZE:16k size:16384 pages:1 All of them fit into a single page (aka order-0). This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-6-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Will Deacon Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index ff98585d085a..d25f4f137c2a 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,7 +26,7 @@ #define check_pgt_cache() do { } while (0) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 -- cgit v1.2.3 From 54d87d600adbe9889bccaff38420cec02250993b Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:48:58 -0700 Subject: arc: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pte_alloc_one_kernel uses __get_order_pte but this is obviously always zero because BITS_FOR_PTE is not larger than 9 yet the page size is always larger than 4K. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-7-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/include/asm/pgalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 86ed671286df..3749234b7419 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -95,7 +95,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, + pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, __get_order_pte()); return pte; @@ -107,7 +107,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) pgtable_t pte_pg; struct page *page; - pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); + pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte()); if (!pte_pg) return 0; memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t)); -- cgit v1.2.3 From 65f84656ff7c24177c43652bc88cc2a06f9a48b1 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:01 -0700 Subject: mips: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pte_alloc_one{_kernel}, pmd_alloc_one allocate PTE_ORDER resp. PMD_ORDER but both are not larger than 1. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-8-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: John Crispin Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/asm/pgalloc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index b336037e8768..93c079a1cfc8 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -69,7 +69,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, PTE_ORDER); + pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); return pte; } @@ -79,7 +79,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); + pte = alloc_pages(GFP_KERNEL, PTE_ORDER); if (!pte) return NULL; clear_highpage(pte); @@ -113,7 +113,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { pmd_t *pmd; - pmd = (pmd_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PMD_ORDER); + pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ORDER); if (pmd) pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); return pmd; -- cgit v1.2.3 From 565299d03363c71d5bcf7edabb41b2b36a9ea36e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:04 -0700 Subject: nios2: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pte_alloc_one{_kernel} allocate PTE_ORDER which is 0. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-9-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Ley Foon Tan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/nios2/include/asm/pgalloc.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index 6e2985e0a7b9..bb47d08c8ef7 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, - PTE_ORDER); + pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); return pte; } @@ -53,7 +52,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); + pte = alloc_pages(GFP_KERNEL, PTE_ORDER); if (pte) { if (!pgtable_page_ctor(pte)) { __free_page(pte); -- cgit v1.2.3 From aade311a50b0be5d5ee93bac7ebc2da9a16556d7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:06 -0700 Subject: parisc: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pmd_alloc_one allocate PMD_ORDER which is 1. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-10-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: "James E.J. Bottomley" Cc: Helge Deller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/parisc/include/asm/pgalloc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index 52c3defb40c9..f08dda3f0995 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -63,8 +63,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, - PMD_ORDER); + pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL, PMD_ORDER); if (pmd) memset(pmd, 0, PAGE_SIZE< Cc: Chen Liqin Cc: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/score/include/asm/pgalloc.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/score/include/asm/pgalloc.h b/arch/score/include/asm/pgalloc.h index 2e067657db98..49b012d78c1a 100644 --- a/arch/score/include/asm/pgalloc.h +++ b/arch/score/include/asm/pgalloc.h @@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, - PTE_ORDER); + pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); return pte; } @@ -53,7 +52,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); + pte = alloc_pages(GFP_KERNEL, PTE_ORDER); if (!pte) return NULL; clear_highpage(pte); -- cgit v1.2.3 From 2379a23e34b58520dfc8f4909f116a08393138e4 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:12 -0700 Subject: powerpc: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. {pud,pmd}_alloc_one are allocating from {PGT,PUD}_CACHE initialized in pgtable_cache_init which doesn't have larger than sizeof(void *) << 12 size and that fits into !costly allocation request size. PGALLOC_GFP is used only in radix__pgd_alloc which uses either order-0 or order-4 requests. The first one doesn't need the flag while the second does. Drop __GFP_REPEAT from PGALLOC_GFP and add it for the order-4 one. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-12-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/pgalloc.h | 10 ++++------ arch/powerpc/include/asm/nohash/64/pgalloc.h | 6 ++---- arch/powerpc/mm/hugetlbpage.c | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 049b80359db6..d14fcf82c00c 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -41,7 +41,7 @@ extern struct kmem_cache *pgtable_cache[]; pgtable_cache[(shift) - 1]; \ }) -#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO +#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int); extern void pte_fragment_free(unsigned long *, int); @@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) return (pgd_t *)__get_free_page(PGALLOC_GFP); #else struct page *page; - page = alloc_pages(PGALLOC_GFP, 4); + page = alloc_pages(PGALLOC_GFP | __GFP_REPEAT, 4); if (!page) return NULL; return (pgd_t *) page_address(page); @@ -93,8 +93,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -115,8 +114,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 8a8a7d991249..897d2e1c8a9b 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -57,8 +57,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -190,8 +189,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 5aac1a3f86cd..119d18611500 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -73,7 +73,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, cachep = PGT_CACHE(pdshift - pshift); #endif - new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); + new = kmem_cache_zalloc(cachep, GFP_KERNEL); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); -- cgit v1.2.3 From 45eeff260d40ff02af3d5b8e2919033ee59f9ff6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:14 -0700 Subject: sparc: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. {pud,pmd}_alloc_one is using __GFP_REPEAT but it always allocates from pgtable_cache which is initialzed to PAGE_SIZE objects. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-13-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/pgalloc_64.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index 5e3187185b4a..3529f1378cd8 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -41,8 +41,7 @@ static inline void __pud_populate(pud_t *pud, pmd_t *pmd) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(pgtable_cache, - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -52,8 +51,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(pgtable_cache, - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) -- cgit v1.2.3 From 10d58bf297e2cba0cfa2cd143d4f0df26e129040 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:17 -0700 Subject: s390: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. page_table_alloc then uses the flag for a single page allocation. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-14-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/pgalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index e8b5962ac12a..e2565d2d0c32 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -169,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) return table; } /* Allocate a fresh page */ - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + page = alloc_page(GFP_KERNEL); if (!page) return NULL; if (!pgtable_page_ctor(page)) { -- cgit v1.2.3 From 884ed4cb8aa19ccff32f5c5586257c56e56f91a4 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:20 -0700 Subject: sh: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. PGALLOC_GFP uses __GFP_REPEAT but {pgd,pmd}_alloc allocate from {pgd,pmd}_cache but both caches are allocating up to PAGE_SIZE objects. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-15-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Yoshinori Sato Cc: Rich Felker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c index 26e03a1f7ca4..a62bd8696779 100644 --- a/arch/sh/mm/pgtable.c +++ b/arch/sh/mm/pgtable.c @@ -1,7 +1,7 @@ #include #include -#define PGALLOC_GFP GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO +#define PGALLOC_GFP GFP_KERNEL | __GFP_ZERO static struct kmem_cache *pgd_cachep; #if PAGETABLE_LEVELS > 2 -- cgit v1.2.3 From f45eebc25e78991ef6a6d784ab54151d3003cfdf Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:22 -0700 Subject: tile: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. pgtable_alloc_one uses __GFP_REPEAT flag for L2_USER_PGTABLE_ORDER but the order is either 0 or 3 if L2_KERNEL_PGTABLE_SHIFT for HPAGE_SHIFT. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-16-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Chris Metcalf [for tile] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 7bf2491a9c1f..c4d5bf841a7f 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -231,7 +231,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address, int order) { - gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO; + gfp_t flags = GFP_KERNEL|__GFP_ZERO; struct page *p; int i; -- cgit v1.2.3 From a830627b01b26452a13abb7e7b37d39365be4b05 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:25 -0700 Subject: unicore32: get rid of superfluous __GFP_REPEAT __GFP_REPEAT has a rather weak semantic but since it has been introduced around 2.6.12 it has been ignored for low order allocations. PGALLOC_GFP uses __GFP_REPEAT but it is only used in pte_alloc_one, pte_alloc_one_kernel which does order-0 request. This means that this flag has never been actually useful here because it has always been used only for PAGE_ALLOC_COSTLY requests. Link: http://lkml.kernel.org/r/1464599699-30131-17-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Guan Xuetao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/unicore32/include/asm/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h index 2e02d1356fdf..26775793c204 100644 --- a/arch/unicore32/include/asm/pgalloc.h +++ b/arch/unicore32/include/asm/pgalloc.h @@ -28,7 +28,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd); #define pgd_alloc(mm) get_pgd_slow(mm) #define pgd_free(mm, pgd) free_pgd_slow(mm, pgd) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) /* * Allocate one PTE table. -- cgit v1.2.3