diff options
author | Linus Torvalds | 2013-09-03 15:59:39 -0700 |
---|---|---|
committer | Linus Torvalds | 2013-09-03 15:59:39 -0700 |
commit | 40031da445fb4d269af9c7c445b2adf674f171e7 (patch) | |
tree | 021df7906708e939dee9978669a5461b12ff1296 /drivers/cpuidle | |
parent | dcaaaeac871ff73043c616db3b2f91482637801d (diff) | |
parent | f41b83126cba53849dd2353476a7715613af648f (diff) |
Merge tag 'pm+acpi-3.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull ACPI and power management updates from Rafael Wysocki:
1) ACPI-based PCI hotplug (ACPIPHP) subsystem rework and introduction
of Intel Thunderbolt support on systems that use ACPI for signalling
Thunderbolt hotplug events. This also should make ACPIPHP work in
some cases in which it was known to have problems. From
Rafael J Wysocki, Mika Westerberg and Kirill A Shutemov.
2) ACPI core code cleanups and dock station support cleanups from
Jiang Liu and Rafael J Wysocki.
3) Fixes for locking problems related to ACPI device hotplug from
Rafael J Wysocki.
4) ACPICA update to version 20130725 includig fixes, cleanups, support
for more than 256 GPEs per GPE block and a change to make the ACPI
PM Timer optional (we've seen systems without the PM Timer in the
field already). One of the fixes, related to the DeRefOf operator,
is necessary to prevent some Windows 8 oriented AML from causing
problems to happen. From Bob Moore, Lv Zheng, and Jung-uk Kim.
5) Removal of the old and long deprecated /proc/acpi/event interface
and related driver changes from Thomas Renninger.
6) ACPI and Xen changes to make the reduced hardware sleep work with
the latter from Ben Guthro.
7) ACPI video driver cleanups and a blacklist of systems that should
not tell the BIOS that they are compatible with Windows 8 (or ACPI
backlight and possibly other things will not work on them). From
Felipe Contreras.
8) Assorted ACPI fixes and cleanups from Aaron Lu, Hanjun Guo,
Kuppuswamy Sathyanarayanan, Lan Tianyu, Sachin Kamat, Tang Chen,
Toshi Kani, and Wei Yongjun.
9) cpufreq ondemand governor target frequency selection change to
reduce oscillations between min and max frequencies (essentially,
it causes the governor to choose target frequencies proportional
to load) from Stratos Karafotis.
10) cpufreq fixes allowing sysfs attributes file permissions to be
preserved over suspend/resume cycles Srivatsa S Bhat.
11) Removal of Device Tree parsing for CPU device nodes from multiple
cpufreq drivers that required some changes related to
of_get_cpu_node() to be made in a few architectures and in the
driver core. From Sudeep KarkadaNagesha.
12) cpufreq core fixes and cleanups related to mutual exclusion and
driver module references from Viresh Kumar, Lukasz Majewski and
Rafael J Wysocki.
13) Assorted cpufreq fixes and cleanups from Amit Daniel Kachhap,
Bartlomiej Zolnierkiewicz, Hanjun Guo, Jingoo Han, Joseph Lo,
Julia Lawall, Li Zhong, Mark Brown, Sascha Hauer, Stephen Boyd,
Stratos Karafotis, and Viresh Kumar.
14) Fixes to prevent race conditions in coupled cpuidle from happening
from Colin Cross.
15) cpuidle core fixes and cleanups from Daniel Lezcano and
Tuukka Tikkanen.
16) Assorted cpuidle fixes and cleanups from Daniel Lezcano,
Geert Uytterhoeven, Jingoo Han, Julia Lawall, Linus Walleij,
and Sahara.
17) System sleep tracing changes from Todd E Brandt and Shuah Khan.
18) PNP subsystem conversion to using struct dev_pm_ops for power
management from Shuah Khan.
* tag 'pm+acpi-3.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (217 commits)
cpufreq: Don't use smp_processor_id() in preemptible context
cpuidle: coupled: fix race condition between pokes and safe state
cpuidle: coupled: abort idle if pokes are pending
cpuidle: coupled: disable interrupts after entering safe state
ACPI / hotplug: Remove containers synchronously
driver core / ACPI: Avoid device hot remove locking issues
cpufreq: governor: Fix typos in comments
cpufreq: governors: Remove duplicate check of target freq in supported range
cpufreq: Fix timer/workqueue corruption due to double queueing
ACPI / EC: Add ASUSTEK L4R to quirk list in order to validate ECDT
ACPI / thermal: Add check of "_TZD" availability and evaluating result
cpufreq: imx6q: Fix clock enable balance
ACPI: blacklist win8 OSI for buggy laptops
cpufreq: tegra: fix the wrong clock name
cpuidle: Change struct menu_device field types
cpuidle: Add a comment warning about possible overflow
cpuidle: Fix variable domains in get_typical_interval()
cpuidle: Fix menu_device->intervals type
cpuidle: CodingStyle: Break up multiple assignments on single line
cpuidle: Check called function parameter in get_typical_interval()
...
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r-- | drivers/cpuidle/Kconfig | 20 | ||||
-rw-r--r-- | drivers/cpuidle/Kconfig.arm | 29 | ||||
-rw-r--r-- | drivers/cpuidle/Makefile | 9 | ||||
-rw-r--r-- | drivers/cpuidle/coupled.c | 129 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-calxeda.c | 2 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-kirkwood.c | 5 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-ux500.c | 131 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle.c | 94 | ||||
-rw-r--r-- | drivers/cpuidle/governors/ladder.c | 12 | ||||
-rw-r--r-- | drivers/cpuidle/governors/menu.c | 108 | ||||
-rw-r--r-- | drivers/cpuidle/sysfs.c | 101 |
11 files changed, 463 insertions, 177 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index 0e2cd5cab4d0..b3fb81d7cf04 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -1,5 +1,6 @@ +menu "CPU Idle" -menuconfig CPU_IDLE +config CPU_IDLE bool "CPU idle PM support" default y if ACPI || PPC_PSERIES select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE) @@ -29,20 +30,13 @@ config CPU_IDLE_GOV_MENU bool "Menu governor (for tickless system)" default y -config CPU_IDLE_CALXEDA - bool "CPU Idle Driver for Calxeda processors" - depends on ARCH_HIGHBANK - select ARM_CPU_SUSPEND - help - Select this to enable cpuidle on Calxeda processors. - -config CPU_IDLE_ZYNQ - bool "CPU Idle Driver for Xilinx Zynq processors" - depends on ARCH_ZYNQ - help - Select this to enable cpuidle on Xilinx Zynq processors. +menu "ARM CPU Idle Drivers" +depends on ARM +source "drivers/cpuidle/Kconfig.arm" +endmenu endif config ARCH_NEEDS_CPU_IDLE_COUPLED def_bool n +endmenu diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm new file mode 100644 index 000000000000..b3302193c15a --- /dev/null +++ b/drivers/cpuidle/Kconfig.arm @@ -0,0 +1,29 @@ +# +# ARM CPU Idle drivers +# + +config ARM_HIGHBANK_CPUIDLE + bool "CPU Idle Driver for Calxeda processors" + depends on ARCH_HIGHBANK + select ARM_CPU_SUSPEND + help + Select this to enable cpuidle on Calxeda processors. + +config ARM_KIRKWOOD_CPUIDLE + bool "CPU Idle Driver for Marvell Kirkwood SoCs" + depends on ARCH_KIRKWOOD + help + This adds the CPU Idle driver for Marvell Kirkwood SoCs. + +config ARM_ZYNQ_CPUIDLE + bool "CPU Idle Driver for Xilinx Zynq processors" + depends on ARCH_ZYNQ + help + Select this to enable cpuidle on Xilinx Zynq processors. + +config ARM_U8500_CPUIDLE + bool "Cpu Idle Driver for the ST-E u8500 processors" + depends on ARCH_U8500 + help + Select this to enable cpuidle for ST-E u8500 processors + diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index 8767a7b3eb91..0b9d200c7e45 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -5,6 +5,9 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o -obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o -obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o -obj-$(CONFIG_CPU_IDLE_ZYNQ) += cpuidle-zynq.o +################################################################################## +# ARM SoC drivers +obj-$(CONFIG_ARM_HIGHBANK_CPUIDLE) += cpuidle-calxeda.o +obj-$(CONFIG_ARM_KIRKWOOD_CPUIDLE) += cpuidle-kirkwood.o +obj-$(CONFIG_ARM_ZYNQ_CPUIDLE) += cpuidle-zynq.o +obj-$(CONFIG_ARM_U8500_CPUIDLE) += cpuidle-ux500.o diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index 2a297f86dbad..f8a86364c6b6 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -106,6 +106,7 @@ struct cpuidle_coupled { cpumask_t coupled_cpus; int requested_state[NR_CPUS]; atomic_t ready_waiting_counts; + atomic_t abort_barrier; int online_count; int refcnt; int prevent; @@ -122,12 +123,19 @@ static DEFINE_MUTEX(cpuidle_coupled_lock); static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb); /* - * The cpuidle_coupled_poked_mask mask is used to avoid calling + * The cpuidle_coupled_poke_pending mask is used to avoid calling * __smp_call_function_single with the per cpu call_single_data struct already * in use. This prevents a deadlock where two cpus are waiting for each others * call_single_data struct to be available */ -static cpumask_t cpuidle_coupled_poked_mask; +static cpumask_t cpuidle_coupled_poke_pending; + +/* + * The cpuidle_coupled_poked mask is used to ensure that each cpu has been poked + * once to minimize entering the ready loop with a poke pending, which would + * require aborting and retrying. + */ +static cpumask_t cpuidle_coupled_poked; /** * cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus @@ -291,10 +299,11 @@ static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev, return state; } -static void cpuidle_coupled_poked(void *info) +static void cpuidle_coupled_handle_poke(void *info) { int cpu = (unsigned long)info; - cpumask_clear_cpu(cpu, &cpuidle_coupled_poked_mask); + cpumask_set_cpu(cpu, &cpuidle_coupled_poked); + cpumask_clear_cpu(cpu, &cpuidle_coupled_poke_pending); } /** @@ -313,7 +322,7 @@ static void cpuidle_coupled_poke(int cpu) { struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); - if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poked_mask)) + if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) __smp_call_function_single(cpu, csd, 0); } @@ -340,30 +349,19 @@ static void cpuidle_coupled_poke_others(int this_cpu, * @coupled: the struct coupled that contains the current cpu * @next_state: the index in drv->states of the requested state for this cpu * - * Updates the requested idle state for the specified cpuidle device, - * poking all coupled cpus out of idle if necessary to let them see the new - * state. + * Updates the requested idle state for the specified cpuidle device. + * Returns the number of waiting cpus. */ -static void cpuidle_coupled_set_waiting(int cpu, +static int cpuidle_coupled_set_waiting(int cpu, struct cpuidle_coupled *coupled, int next_state) { - int w; - coupled->requested_state[cpu] = next_state; /* - * If this is the last cpu to enter the waiting state, poke - * all the other cpus out of their waiting state so they can - * enter a deeper state. This can race with one of the cpus - * exiting the waiting state due to an interrupt and - * decrementing waiting_count, see comment below. - * * The atomic_inc_return provides a write barrier to order the write * to requested_state with the later write that increments ready_count. */ - w = atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK; - if (w == coupled->online_count) - cpuidle_coupled_poke_others(cpu, coupled); + return atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK; } /** @@ -410,19 +408,33 @@ static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled) * been processed and the poke bit has been cleared. * * Other interrupts may also be processed while interrupts are enabled, so - * need_resched() must be tested after turning interrupts off again to make sure + * need_resched() must be tested after this function returns to make sure * the interrupt didn't schedule work that should take the cpu out of idle. * - * Returns 0 if need_resched was false, -EINTR if need_resched was true. + * Returns 0 if no poke was pending, 1 if a poke was cleared. */ static int cpuidle_coupled_clear_pokes(int cpu) { + if (!cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending)) + return 0; + local_irq_enable(); - while (cpumask_test_cpu(cpu, &cpuidle_coupled_poked_mask)) + while (cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending)) cpu_relax(); local_irq_disable(); - return need_resched() ? -EINTR : 0; + return 1; +} + +static bool cpuidle_coupled_any_pokes_pending(struct cpuidle_coupled *coupled) +{ + cpumask_t cpus; + int ret; + + cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus); + ret = cpumask_and(&cpus, &cpuidle_coupled_poke_pending, &cpus); + + return ret; } /** @@ -449,31 +461,56 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev, { int entered_state = -1; struct cpuidle_coupled *coupled = dev->coupled; + int w; if (!coupled) return -EINVAL; while (coupled->prevent) { - if (cpuidle_coupled_clear_pokes(dev->cpu)) { + cpuidle_coupled_clear_pokes(dev->cpu); + if (need_resched()) { local_irq_enable(); return entered_state; } entered_state = cpuidle_enter_state(dev, drv, dev->safe_state_index); + local_irq_disable(); } /* Read barrier ensures online_count is read after prevent is cleared */ smp_rmb(); - cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state); +reset: + cpumask_clear_cpu(dev->cpu, &cpuidle_coupled_poked); + + w = cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state); + /* + * If this is the last cpu to enter the waiting state, poke + * all the other cpus out of their waiting state so they can + * enter a deeper state. This can race with one of the cpus + * exiting the waiting state due to an interrupt and + * decrementing waiting_count, see comment below. + */ + if (w == coupled->online_count) { + cpumask_set_cpu(dev->cpu, &cpuidle_coupled_poked); + cpuidle_coupled_poke_others(dev->cpu, coupled); + } retry: /* * Wait for all coupled cpus to be idle, using the deepest state - * allowed for a single cpu. + * allowed for a single cpu. If this was not the poking cpu, wait + * for at least one poke before leaving to avoid a race where + * two cpus could arrive at the waiting loop at the same time, + * but the first of the two to arrive could skip the loop without + * processing the pokes from the last to arrive. */ - while (!cpuidle_coupled_cpus_waiting(coupled)) { - if (cpuidle_coupled_clear_pokes(dev->cpu)) { + while (!cpuidle_coupled_cpus_waiting(coupled) || + !cpumask_test_cpu(dev->cpu, &cpuidle_coupled_poked)) { + if (cpuidle_coupled_clear_pokes(dev->cpu)) + continue; + + if (need_resched()) { cpuidle_coupled_set_not_waiting(dev->cpu, coupled); goto out; } @@ -485,14 +522,22 @@ retry: entered_state = cpuidle_enter_state(dev, drv, dev->safe_state_index); + local_irq_disable(); } - if (cpuidle_coupled_clear_pokes(dev->cpu)) { + cpuidle_coupled_clear_pokes(dev->cpu); + if (need_resched()) { cpuidle_coupled_set_not_waiting(dev->cpu, coupled); goto out; } /* + * Make sure final poke status for this cpu is visible before setting + * cpu as ready. + */ + smp_wmb(); + + /* * All coupled cpus are probably idle. There is a small chance that * one of the other cpus just became active. Increment the ready count, * and spin until all coupled cpus have incremented the counter. Once a @@ -511,6 +556,28 @@ retry: cpu_relax(); } + /* + * Make sure read of all cpus ready is done before reading pending pokes + */ + smp_rmb(); + + /* + * There is a small chance that a cpu left and reentered idle after this + * cpu saw that all cpus were waiting. The cpu that reentered idle will + * have sent this cpu a poke, which will still be pending after the + * ready loop. The pending interrupt may be lost by the interrupt + * controller when entering the deep idle state. It's not possible to + * clear a pending interrupt without turning interrupts on and handling + * it, and it's too late to turn on interrupts here, so reset the + * coupled idle state of all cpus and retry. + */ + if (cpuidle_coupled_any_pokes_pending(coupled)) { + cpuidle_coupled_set_done(dev->cpu, coupled); + /* Wait for all cpus to see the pending pokes */ + cpuidle_coupled_parallel_barrier(dev, &coupled->abort_barrier); + goto reset; + } + /* all cpus have acked the coupled state */ next_state = cpuidle_coupled_get_state(dev, coupled); @@ -596,7 +663,7 @@ have_coupled: coupled->refcnt++; csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu); - csd->func = cpuidle_coupled_poked; + csd->func = cpuidle_coupled_handle_poke; csd->info = (void *)(unsigned long)dev->cpu; return 0; diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c index 0e6e408c0a63..346058479572 100644 --- a/drivers/cpuidle/cpuidle-calxeda.c +++ b/drivers/cpuidle/cpuidle-calxeda.c @@ -35,7 +35,7 @@ #include <asm/cp15.h> extern void highbank_set_cpu_jump(int cpu, void *jump_addr); -extern void *scu_base_addr; +extern void __iomem *scu_base_addr; static noinline void calxeda_idle_restore(void) { diff --git a/drivers/cpuidle/cpuidle-kirkwood.c b/drivers/cpuidle/cpuidle-kirkwood.c index 521b0a7fdd89..41ba843251b8 100644 --- a/drivers/cpuidle/cpuidle-kirkwood.c +++ b/drivers/cpuidle/cpuidle-kirkwood.c @@ -60,9 +60,6 @@ static int kirkwood_cpuidle_probe(struct platform_device *pdev) struct resource *res; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res == NULL) - return -EINVAL; - ddr_operation_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(ddr_operation_base)) return PTR_ERR(ddr_operation_base); @@ -70,7 +67,7 @@ static int kirkwood_cpuidle_probe(struct platform_device *pdev) return cpuidle_register(&kirkwood_idle_driver, NULL); } -int kirkwood_cpuidle_remove(struct platform_device *pdev) +static int kirkwood_cpuidle_remove(struct platform_device *pdev) { cpuidle_unregister(&kirkwood_idle_driver); return 0; diff --git a/drivers/cpuidle/cpuidle-ux500.c b/drivers/cpuidle/cpuidle-ux500.c new file mode 100644 index 000000000000..e0564652af35 --- /dev/null +++ b/drivers/cpuidle/cpuidle-ux500.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2012 Linaro : Daniel Lezcano <daniel.lezcano@linaro.org> (IBM) + * + * Based on the work of Rickard Andersson <rickard.andersson@stericsson.com> + * and Jonas Aaberg <jonas.aberg@stericsson.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/cpuidle.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> +#include <linux/smp.h> +#include <linux/mfd/dbx500-prcmu.h> +#include <linux/platform_data/arm-ux500-pm.h> +#include <linux/platform_device.h> + +#include <asm/cpuidle.h> +#include <asm/proc-fns.h> + +static atomic_t master = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(master_lock); + +static inline int ux500_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + int this_cpu = smp_processor_id(); + bool recouple = false; + + if (atomic_inc_return(&master) == num_online_cpus()) { + + /* With this lock, we prevent the other cpu to exit and enter + * this function again and become the master */ + if (!spin_trylock(&master_lock)) + goto wfi; + + /* decouple the gic from the A9 cores */ + if (prcmu_gic_decouple()) { + spin_unlock(&master_lock); + goto out; + } + + /* If an error occur, we will have to recouple the gic + * manually */ + recouple = true; + + /* At this state, as the gic is decoupled, if the other + * cpu is in WFI, we have the guarantee it won't be wake + * up, so we can safely go to retention */ + if (!prcmu_is_cpu_in_wfi(this_cpu ? 0 : 1)) + goto out; + + /* The prcmu will be in charge of watching the interrupts + * and wake up the cpus */ + if (prcmu_copy_gic_settings()) + goto out; + + /* Check in the meantime an interrupt did + * not occur on the gic ... */ + if (prcmu_gic_pending_irq()) + goto out; + + /* ... and the prcmu */ + if (prcmu_pending_irq()) + goto out; + + /* Go to the retention state, the prcmu will wait for the + * cpu to go WFI and this is what happens after exiting this + * 'master' critical section */ + if (prcmu_set_power_state(PRCMU_AP_IDLE, true, true)) + goto out; + + /* When we switch to retention, the prcmu is in charge + * of recoupling the gic automatically */ + recouple = false; + + spin_unlock(&master_lock); + } +wfi: + cpu_do_idle(); +out: + atomic_dec(&master); + + if (recouple) { + prcmu_gic_recouple(); + spin_unlock(&master_lock); + } + + return index; +} + +static struct cpuidle_driver ux500_idle_driver = { + .name = "ux500_idle", + .owner = THIS_MODULE, + .states = { + ARM_CPUIDLE_WFI_STATE, + { + .enter = ux500_enter_idle, + .exit_latency = 70, + .target_residency = 260, + .flags = CPUIDLE_FLAG_TIME_VALID | + CPUIDLE_FLAG_TIMER_STOP, + .name = "ApIdle", + .desc = "ARM Retention", + }, + }, + .safe_state_index = 0, + .state_count = 2, +}; + +static int __init dbx500_cpuidle_probe(struct platform_device *pdev) +{ + /* Configure wake up reasons */ + prcmu_enable_wakeups(PRCMU_WAKEUP(ARM) | PRCMU_WAKEUP(RTC) | + PRCMU_WAKEUP(ABB)); + + return cpuidle_register(&ux500_idle_driver, NULL); +} + +static struct platform_driver dbx500_cpuidle_plat_driver = { + .driver = { + .name = "cpuidle-dbx500", + .owner = THIS_MODULE, + }, + .probe = dbx500_cpuidle_probe, +}; + +module_platform_driver(dbx500_cpuidle_plat_driver); diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index fdc432f18022..d75040ddd2b3 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -42,8 +42,6 @@ void disable_cpuidle(void) off = 1; } -static int __cpuidle_register_device(struct cpuidle_device *dev); - /** * cpuidle_play_dead - cpu off-lining * @@ -278,7 +276,7 @@ static void poll_idle_init(struct cpuidle_driver *drv) {} */ int cpuidle_enable_device(struct cpuidle_device *dev) { - int ret, i; + int ret; struct cpuidle_driver *drv; if (!dev) @@ -292,15 +290,12 @@ int cpuidle_enable_device(struct cpuidle_device *dev) if (!drv || !cpuidle_curr_governor) return -EIO; + if (!dev->registered) + return -EINVAL; + if (!dev->state_count) dev->state_count = drv->state_count; - if (dev->registered == 0) { - ret = __cpuidle_register_device(dev); - if (ret) - return ret; - } - poll_idle_init(drv); ret = cpuidle_add_device_sysfs(dev); @@ -311,12 +306,6 @@ int cpuidle_enable_device(struct cpuidle_device *dev) (ret = cpuidle_curr_governor->enable(drv, dev))) goto fail_sysfs; - for (i = 0; i < dev->state_count; i++) { - dev->states_usage[i].usage = 0; - dev->states_usage[i].time = 0; - } - dev->last_residency = 0; - smp_wmb(); dev->enabled = 1; @@ -360,6 +349,23 @@ void cpuidle_disable_device(struct cpuidle_device *dev) EXPORT_SYMBOL_GPL(cpuidle_disable_device); +static void __cpuidle_unregister_device(struct cpuidle_device *dev) +{ + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + + list_del(&dev->device_list); + per_cpu(cpuidle_devices, dev->cpu) = NULL; + module_put(drv->owner); +} + +static int __cpuidle_device_init(struct cpuidle_device *dev) +{ + memset(dev->states_usage, 0, sizeof(dev->states_usage)); + dev->last_residency = 0; + + return 0; +} + /** * __cpuidle_register_device - internal register function called before register * and enable routines @@ -377,24 +383,15 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) per_cpu(cpuidle_devices, dev->cpu) = dev; list_add(&dev->device_list, &cpuidle_detected_devices); - ret = cpuidle_add_sysfs(dev); - if (ret) - goto err_sysfs; ret = cpuidle_coupled_register_device(dev); - if (ret) - goto err_coupled; + if (ret) { + __cpuidle_unregister_device(dev); + return ret; + } dev->registered = 1; return 0; - -err_coupled: - cpuidle_remove_sysfs(dev); -err_sysfs: - list_del(&dev->device_list); - per_cpu(cpuidle_devices, dev->cpu) = NULL; - module_put(drv->owner); - return ret; } /** @@ -403,25 +400,44 @@ err_sysfs: */ int cpuidle_register_device(struct cpuidle_device *dev) { - int ret; + int ret = -EBUSY; if (!dev) return -EINVAL; mutex_lock(&cpuidle_lock); - if ((ret = __cpuidle_register_device(dev))) { - mutex_unlock(&cpuidle_lock); - return ret; - } + if (dev->registered) + goto out_unlock; + + ret = __cpuidle_device_init(dev); + if (ret) + goto out_unlock; + + ret = __cpuidle_register_device(dev); + if (ret) + goto out_unlock; + + ret = cpuidle_add_sysfs(dev); + if (ret) + goto out_unregister; + + ret = cpuidle_enable_device(dev); + if (ret) + goto out_sysfs; - cpuidle_enable_device(dev); cpuidle_install_idle_handler(); +out_unlock: mutex_unlock(&cpuidle_lock); - return 0; + return ret; +out_sysfs: + cpuidle_remove_sysfs(dev); +out_unregister: + __cpuidle_unregister_device(dev); + goto out_unlock; } EXPORT_SYMBOL_GPL(cpuidle_register_device); @@ -432,8 +448,6 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device); */ void cpuidle_unregister_device(struct cpuidle_device *dev) { - struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); - if (dev->registered == 0) return; @@ -442,14 +456,12 @@ void cpuidle_unregister_device(struct cpuidle_device *dev) cpuidle_disable_device(dev); cpuidle_remove_sysfs(dev); - list_del(&dev->device_list); - per_cpu(cpuidle_devices, dev->cpu) = NULL; + + __cpuidle_unregister_device(dev); cpuidle_coupled_unregister_device(dev); cpuidle_resume_and_unlock(); - - module_put(drv->owner); } EXPORT_SYMBOL_GPL(cpuidle_unregister_device); diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 9b784051ec12..9f08e8cce1af 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -192,14 +192,4 @@ static int __init init_ladder(void) return cpuidle_register_governor(&ladder_governor); } -/** - * exit_ladder - exits the governor - */ -static void __exit exit_ladder(void) -{ - cpuidle_unregister_governor(&ladder_governor); -} - -MODULE_LICENSE("GPL"); -module_init(init_ladder); -module_exit(exit_ladder); +postcore_initcall(init_ladder); diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index bc580b67a652..cf7f2f0e4ef5 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -21,6 +21,15 @@ #include <linux/math64.h> #include <linux/module.h> +/* + * Please note when changing the tuning values: + * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of + * a scaling operation multiplication may overflow on 32 bit platforms. + * In that case, #define RESOLUTION as ULL to get 64 bit result: + * #define RESOLUTION 1024ULL + * + * The default values do not overflow. + */ #define BUCKETS 12 #define INTERVALS 8 #define RESOLUTION 1024 @@ -114,11 +123,11 @@ struct menu_device { int needs_update; unsigned int expected_us; - u64 predicted_us; + unsigned int predicted_us; unsigned int exit_us; unsigned int bucket; - u64 correction_factor[BUCKETS]; - u32 intervals[INTERVALS]; + unsigned int correction_factor[BUCKETS]; + unsigned int intervals[INTERVALS]; int interval_ptr; }; @@ -199,16 +208,20 @@ static u64 div_round64(u64 dividend, u32 divisor) */ static void get_typical_interval(struct menu_device *data) { - int i = 0, divisor = 0; - uint64_t max = 0, avg = 0, stddev = 0; - int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */ + int i, divisor; + unsigned int max, thresh; + uint64_t avg, stddev; + + thresh = UINT_MAX; /* Discard outliers above this value */ again: - /* first calculate average and standard deviation of the past */ - max = avg = divisor = stddev = 0; + /* First calculate the average of past intervals */ + max = 0; + avg = 0; + divisor = 0; for (i = 0; i < INTERVALS; i++) { - int64_t value = data->intervals[i]; + unsigned int value = data->intervals[i]; if (value <= thresh) { avg += value; divisor++; @@ -218,15 +231,38 @@ again: } do_div(avg, divisor); + /* Then try to determine standard deviation */ + stddev = 0; for (i = 0; i < INTERVALS; i++) { - int64_t value = data->intervals[i]; + unsigned int value = data->intervals[i]; if (value <= thresh) { int64_t diff = value - avg; stddev += diff * diff; } } do_div(stddev, divisor); - stddev = int_sqrt(stddev); + /* + * The typical interval is obtained when standard deviation is small + * or standard deviation is small compared to the average interval. + * + * int_sqrt() formal parameter type is unsigned long. When the + * greatest difference to an outlier exceeds ~65 ms * sqrt(divisor) + * the resulting squared standard deviation exceeds the input domain + * of int_sqrt on platforms where unsigned long is 32 bits in size. + * In such case reject the candidate average. + * + * Use this result only if there is no timer to wake us up sooner. + */ + if (likely(stddev <= ULONG_MAX)) { + stddev = int_sqrt(stddev); + if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) + || stddev <= 20) { + if (data->expected_us > avg) + data->predicted_us = avg; + return; + } + } + /* * If we have outliers to the upside in our distribution, discard * those by setting the threshold to exclude these outliers, then @@ -235,20 +271,12 @@ again: * * This can deal with workloads that have long pauses interspersed * with sporadic activity with a bunch of short pauses. - * - * The typical interval is obtained when standard deviation is small - * or standard deviation is small compared to the average interval. */ - if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) - || stddev <= 20) { - data->predicted_us = avg; + if ((divisor * 4) <= INTERVALS * 3) return; - } else if ((divisor * 4) > INTERVALS * 3) { - /* Exclude the max interval */ - thresh = max - 1; - goto again; - } + thresh = max - 1; + goto again; } /** @@ -293,8 +321,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (data->correction_factor[data->bucket] == 0) data->correction_factor[data->bucket] = RESOLUTION * DECAY; - /* Make sure to round up for half microseconds */ - data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], + /* + * Force the result of multiplication to be 64 bits even if both + * operands are 32 bits. + * Make sure to round up for half microseconds. + */ + data->predicted_us = div_round64((uint64_t)data->expected_us * + data->correction_factor[data->bucket], RESOLUTION * DECAY); get_typical_interval(data); @@ -360,7 +393,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) unsigned int last_idle_us = cpuidle_get_last_residency(dev); struct cpuidle_state *target = &drv->states[last_idx]; unsigned int measured_us; - u64 new_factor; + unsigned int new_factor; /* * Ugh, this idle state doesn't support residency measurements, so we @@ -381,10 +414,9 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) measured_us -= data->exit_us; - /* update our correction ratio */ - - new_factor = data->correction_factor[data->bucket] - * (DECAY - 1) / DECAY; + /* Update our correction ratio */ + new_factor = data->correction_factor[data->bucket]; + new_factor -= new_factor / DECAY; if (data->expected_us > 0 && measured_us < MAX_INTERESTING) new_factor += RESOLUTION * measured_us / data->expected_us; @@ -397,9 +429,11 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) /* * We don't want 0 as factor; we always want at least - * a tiny bit of estimated time. + * a tiny bit of estimated time. Fortunately, due to rounding, + * new_factor will stay nonzero regardless of measured_us values + * and the compiler can eliminate this test as long as DECAY > 1. */ - if (new_factor == 0) + if (DECAY == 1 && unlikely(new_factor == 0)) new_factor = 1; data->correction_factor[data->bucket] = new_factor; @@ -442,14 +476,4 @@ static int __init init_menu(void) return cpuidle_register_governor(&menu_governor); } -/** - * exit_menu - exits the governor - */ -static void __exit exit_menu(void) -{ - cpuidle_unregister_governor(&menu_governor); -} - -MODULE_LICENSE("GPL"); -module_init(init_menu); -module_exit(exit_menu); +postcore_initcall(init_menu); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 428754af6236..8739cc05228c 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -11,8 +11,10 @@ #include <linux/sysfs.h> #include <linux/slab.h> #include <linux/cpu.h> +#include <linux/completion.h> #include <linux/capability.h> #include <linux/device.h> +#include <linux/kobject.h> #include "cpuidle.h" @@ -33,7 +35,8 @@ static ssize_t show_available_governors(struct device *dev, mutex_lock(&cpuidle_lock); list_for_each_entry(tmp, &cpuidle_governors, governor_list) { - if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2)) + if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) - + CPUIDLE_NAME_LEN - 2)) goto out; i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name); } @@ -166,13 +169,28 @@ struct cpuidle_attr { #define define_one_rw(_name, show, store) \ static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store) -#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj) #define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr) -static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf) + +struct cpuidle_device_kobj { + struct cpuidle_device *dev; + struct completion kobj_unregister; + struct kobject kobj; +}; + +static inline struct cpuidle_device *to_cpuidle_device(struct kobject *kobj) +{ + struct cpuidle_device_kobj *kdev = + container_of(kobj, struct cpuidle_device_kobj, kobj); + + return kdev->dev; +} + +static ssize_t cpuidle_show(struct kobject *kobj, struct attribute *attr, + char *buf) { int ret = -EIO; - struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); - struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr); + struct cpuidle_device *dev = to_cpuidle_device(kobj); + struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr); if (cattr->show) { mutex_lock(&cpuidle_lock); @@ -182,12 +200,12 @@ static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char return ret; } -static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr, - const char * buf, size_t count) +static ssize_t cpuidle_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) { int ret = -EIO; - struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); - struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr); + struct cpuidle_device *dev = to_cpuidle_device(kobj); + struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr); if (cattr->store) { mutex_lock(&cpuidle_lock); @@ -204,9 +222,10 @@ static const struct sysfs_ops cpuidle_sysfs_ops = { static void cpuidle_sysfs_release(struct kobject *kobj) { - struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); + struct cpuidle_device_kobj *kdev = + container_of(kobj, struct cpuidle_device_kobj, kobj); - complete(&dev->kobj_unregister); + complete(&kdev->kobj_unregister); } static struct kobj_type ktype_cpuidle = { @@ -237,8 +256,8 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \ #define define_store_state_ull_function(_name) \ static ssize_t store_state_##_name(struct cpuidle_state *state, \ - struct cpuidle_state_usage *state_usage, \ - const char *buf, size_t size) \ + struct cpuidle_state_usage *state_usage, \ + const char *buf, size_t size) \ { \ unsigned long long value; \ int err; \ @@ -256,14 +275,16 @@ static ssize_t store_state_##_name(struct cpuidle_state *state, \ #define define_show_state_ull_function(_name) \ static ssize_t show_state_##_name(struct cpuidle_state *state, \ - struct cpuidle_state_usage *state_usage, char *buf) \ + struct cpuidle_state_usage *state_usage, \ + char *buf) \ { \ return sprintf(buf, "%llu\n", state_usage->_name);\ } #define define_show_state_str_function(_name) \ static ssize_t show_state_##_name(struct cpuidle_state *state, \ - struct cpuidle_state_usage *state_usage, char *buf) \ + struct cpuidle_state_usage *state_usage, \ + char *buf) \ { \ if (state->_name[0] == '\0')\ return sprintf(buf, "<null>\n");\ @@ -309,8 +330,9 @@ struct cpuidle_state_kobj { #define kobj_to_state(k) (kobj_to_state_obj(k)->state) #define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage) #define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr) -static ssize_t cpuidle_state_show(struct kobject * kobj, - struct attribute * attr ,char * buf) + +static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr, + char * buf) { int ret = -EIO; struct cpuidle_state *state = kobj_to_state(kobj); @@ -323,8 +345,8 @@ static ssize_t cpuidle_state_show(struct kobject * kobj, return ret; } -static ssize_t cpuidle_state_store(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t size) +static ssize_t cpuidle_state_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t size) { int ret = -EIO; struct cpuidle_state *state = kobj_to_state(kobj); @@ -371,6 +393,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) { int i, ret = -ENOMEM; struct cpuidle_state_kobj *kobj; + struct cpuidle_device_kobj *kdev = device->kobj_dev; struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device); /* state statistics */ @@ -383,7 +406,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) init_completion(&kobj->kobj_unregister); ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle, - &device->kobj, "state%d", i); + &kdev->kobj, "state%d", i); if (ret) { kfree(kobj); goto error_state; @@ -449,8 +472,8 @@ static void cpuidle_driver_sysfs_release(struct kobject *kobj) complete(&driver_kobj->kobj_unregister); } -static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute * attr, - char * buf) +static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute *attr, + char *buf) { int ret = -EIO; struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj); @@ -500,6 +523,7 @@ static struct kobj_type ktype_driver_cpuidle = { static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) { struct cpuidle_driver_kobj *kdrv; + struct cpuidle_device_kobj *kdev = dev->kobj_dev; struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int ret; @@ -511,7 +535,7 @@ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) init_completion(&kdrv->kobj_unregister); ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle, - &dev->kobj, "driver"); + &kdev->kobj, "driver"); if (ret) { kfree(kdrv); return ret; @@ -580,16 +604,28 @@ void cpuidle_remove_device_sysfs(struct cpuidle_device *device) */ int cpuidle_add_sysfs(struct cpuidle_device *dev) { + struct cpuidle_device_kobj *kdev; struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); int error; - init_completion(&dev->kobj_unregister); + kdev = kzalloc(sizeof(*kdev), GFP_KERNEL); + if (!kdev) + return -ENOMEM; + kdev->dev = dev; + dev->kobj_dev = kdev; - error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &cpu_dev->kobj, - "cpuidle"); - if (!error) - kobject_uevent(&dev->kobj, KOBJ_ADD); - return error; + init_completion(&kdev->kobj_unregister); + + error = kobject_init_and_add(&kdev->kobj, &ktype_cpuidle, &cpu_dev->kobj, + "cpuidle"); + if (error) { + kfree(kdev); + return error; + } + + kobject_uevent(&kdev->kobj, KOBJ_ADD); + + return 0; } /** @@ -598,6 +634,9 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev) */ void cpuidle_remove_sysfs(struct cpuidle_device *dev) { - kobject_put(&dev->kobj); - wait_for_completion(&dev->kobj_unregister); + struct cpuidle_device_kobj *kdev = dev->kobj_dev; + + kobject_put(&kdev->kobj); + wait_for_completion(&kdev->kobj_unregister); + kfree(kdev); } |