diff options
author | Rafael J. Wysocki | 2016-10-02 01:42:45 +0200 |
---|---|---|
committer | Rafael J. Wysocki | 2016-10-02 01:42:45 +0200 |
commit | 7005f6dc69948685b846c895e80fca2646476b83 (patch) | |
tree | b30009dd75caed344a5d03d791398ce5bb88496b /kernel/sched | |
parent | 2dc3c72cd0a3ea85b8b7ae469904cfc24af1de60 (diff) | |
parent | b6e251178286eff8bbd2c01a1b6aa7a317eb3c67 (diff) |
Merge branch 'pm-cpufreq'
* pm-cpufreq: (24 commits)
cpufreq: st: add missing \n to end of dev_err message
cpufreq: kirkwood: add missing \n to end of dev_err messages
cpufreq: CPPC: Avoid overflow when calculating desired_perf
cpufreq: ti: Use generic platdev driver
cpufreq: intel_pstate: Add io_boost trace
cpufreq: intel_pstate: Use IOWAIT flag in Atom algorithm
cpufreq: schedutil: Add iowait boosting
cpufreq / sched: SCHED_CPUFREQ_IOWAIT flag to indicate iowait condition
cpufreq: CPPC: Force reporting values in KHz to fix user space interface
cpufreq: create link to policy only for registered CPUs
intel_pstate: constify local structures
cpufreq: dt: Support governor tunables per policy
cpufreq: dt: Update kconfig description
cpufreq: dt: Remove unused code
MAINTAINERS: Add Documentation/cpu-freq/
cpufreq: dt: Add support for r8a7792
cpufreq / sched: ignore SMT when determining max cpu capacity
cpufreq: Drop unnecessary check from cpufreq_policy_alloc()
ARM: multi_v7_defconfig: Don't attempt to enable schedutil governor as module
ARM: exynos_defconfig: Don't attempt to enable schedutil governor as module
...
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/cpufreq.c | 2 | ||||
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 122 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 5 | ||||
-rw-r--r-- | kernel/sched/fair.c | 23 | ||||
-rw-r--r-- | kernel/sched/rt.c | 5 | ||||
-rw-r--r-- | kernel/sched/sched.h | 40 |
6 files changed, 122 insertions, 75 deletions
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c index 1141954e73b4..dbc51442ecbc 100644 --- a/kernel/sched/cpufreq.c +++ b/kernel/sched/cpufreq.c @@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); */ void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, void (*func)(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max)) + unsigned int flags)) { if (WARN_ON(!data || !func)) return; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index a84641b222c1..69e06898997d 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -12,7 +12,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cpufreq.h> -#include <linux/module.h> #include <linux/slab.h> #include <trace/events/power.h> @@ -48,11 +47,14 @@ struct sugov_cpu { struct sugov_policy *sg_policy; unsigned int cached_raw_freq; + unsigned long iowait_boost; + unsigned long iowait_boost_max; + u64 last_update; /* The fields below are only needed when sharing a policy. */ unsigned long util; unsigned long max; - u64 last_update; + unsigned int flags; }; static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); @@ -144,24 +146,75 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, return cpufreq_driver_resolve_freq(policy, freq); } +static void sugov_get_util(unsigned long *util, unsigned long *max) +{ + struct rq *rq = this_rq(); + unsigned long cfs_max; + + cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id()); + + *util = min(rq->cfs.avg.util_avg, cfs_max); + *max = cfs_max; +} + +static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, + unsigned int flags) +{ + if (flags & SCHED_CPUFREQ_IOWAIT) { + sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + } else if (sg_cpu->iowait_boost) { + s64 delta_ns = time - sg_cpu->last_update; + + /* Clear iowait_boost if the CPU apprears to have been idle. */ + if (delta_ns > TICK_NSEC) + sg_cpu->iowait_boost = 0; + } +} + +static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, + unsigned long *max) +{ + unsigned long boost_util = sg_cpu->iowait_boost; + unsigned long boost_max = sg_cpu->iowait_boost_max; + + if (!boost_util) + return; + + if (*util * boost_max < *max * boost_util) { + *util = boost_util; + *max = boost_max; + } + sg_cpu->iowait_boost >>= 1; +} + static void sugov_update_single(struct update_util_data *hook, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; + unsigned long util, max; unsigned int next_f; + sugov_set_iowait_boost(sg_cpu, time, flags); + sg_cpu->last_update = time; + if (!sugov_should_update_freq(sg_policy, time)) return; - next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq : - get_next_freq(sg_cpu, util, max); + if (flags & SCHED_CPUFREQ_RT_DL) { + next_f = policy->cpuinfo.max_freq; + } else { + sugov_get_util(&util, &max); + sugov_iowait_boost(sg_cpu, &util, &max); + next_f = get_next_freq(sg_cpu, util, max); + } sugov_update_commit(sg_policy, time, next_f); } static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, - unsigned long util, unsigned long max) + unsigned long util, unsigned long max, + unsigned int flags) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; @@ -169,9 +222,11 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 last_freq_update_time = sg_policy->last_freq_update_time; unsigned int j; - if (util == ULONG_MAX) + if (flags & SCHED_CPUFREQ_RT_DL) return max_f; + sugov_iowait_boost(sg_cpu, &util, &max); + for_each_cpu(j, policy->cpus) { struct sugov_cpu *j_sg_cpu; unsigned long j_util, j_max; @@ -186,41 +241,50 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, * frequency update and the time elapsed between the last update * of the CPU utilization and the last frequency update is long * enough, don't take the CPU into account as it probably is - * idle now. + * idle now (and clear iowait_boost for it). */ delta_ns = last_freq_update_time - j_sg_cpu->last_update; - if (delta_ns > TICK_NSEC) + if (delta_ns > TICK_NSEC) { + j_sg_cpu->iowait_boost = 0; continue; - - j_util = j_sg_cpu->util; - if (j_util == ULONG_MAX) + } + if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) return max_f; + j_util = j_sg_cpu->util; j_max = j_sg_cpu->max; if (j_util * max > j_max * util) { util = j_util; max = j_max; } + + sugov_iowait_boost(j_sg_cpu, &util, &max); } return get_next_freq(sg_cpu, util, max); } static void sugov_update_shared(struct update_util_data *hook, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_policy *sg_policy = sg_cpu->sg_policy; + unsigned long util, max; unsigned int next_f; + sugov_get_util(&util, &max); + raw_spin_lock(&sg_policy->update_lock); sg_cpu->util = util; sg_cpu->max = max; + sg_cpu->flags = flags; + + sugov_set_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { - next_f = sugov_next_freq_shared(sg_cpu, util, max); + next_f = sugov_next_freq_shared(sg_cpu, util, max, flags); sugov_update_commit(sg_policy, time, next_f); } @@ -444,10 +508,13 @@ static int sugov_start(struct cpufreq_policy *policy) sg_cpu->sg_policy = sg_policy; if (policy_is_shared(policy)) { - sg_cpu->util = ULONG_MAX; + sg_cpu->util = 0; sg_cpu->max = 0; + sg_cpu->flags = SCHED_CPUFREQ_RT; sg_cpu->last_update = 0; sg_cpu->cached_raw_freq = 0; + sg_cpu->iowait_boost = 0; + sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, sugov_update_shared); } else { @@ -495,28 +562,15 @@ static struct cpufreq_governor schedutil_gov = { .limits = sugov_limits, }; -static int __init sugov_module_init(void) -{ - return cpufreq_register_governor(&schedutil_gov); -} - -static void __exit sugov_module_exit(void) -{ - cpufreq_unregister_governor(&schedutil_gov); -} - -MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>"); -MODULE_DESCRIPTION("Utilization-based CPU frequency selection"); -MODULE_LICENSE("GPL"); - #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL struct cpufreq_governor *cpufreq_default_governor(void) { return &schedutil_gov; } - -fs_initcall(sugov_module_init); -#else -module_init(sugov_module_init); #endif -module_exit(sugov_module_exit); + +static int __init sugov_register(void) +{ + return cpufreq_register_governor(&schedutil_gov); +} +fs_initcall(sugov_register); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 1ce8867283dc..974779656999 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -735,9 +735,8 @@ static void update_curr_dl(struct rq *rq) return; } - /* kick cpufreq (see the comment in linux/cpufreq.h). */ - if (cpu_of(rq) == smp_processor_id()) - cpufreq_trigger_update(rq_clock(rq)); + /* kick cpufreq (see the comment in kernel/sched/sched.h). */ + cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 039de34f1521..a5cd07b25aa1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2875,12 +2875,7 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {} static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) { - struct rq *rq = rq_of(cfs_rq); - int cpu = cpu_of(rq); - - if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) { - unsigned long max = rq->cpu_capacity_orig; - + if (&this_rq()->cfs == cfs_rq) { /* * There are a few boundary cases this might miss but it should * get called often enough that that should (hopefully) not be @@ -2897,8 +2892,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) * * See cpu_util(). */ - cpufreq_update_util(rq_clock(rq), - min(cfs_rq->avg.util_avg, max), max); + cpufreq_update_util(rq_of(cfs_rq), 0); } } @@ -3159,10 +3153,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) static inline void update_load_avg(struct sched_entity *se, int not_used) { - struct cfs_rq *cfs_rq = cfs_rq_of(se); - struct rq *rq = rq_of(cfs_rq); - - cpufreq_trigger_update(rq_clock(rq)); + cpufreq_update_util(rq_of(cfs_rq_of(se)), 0); } static inline void @@ -4509,6 +4500,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; + /* + * If in_iowait is set, the code below may not trigger any cpufreq + * utilization updates, so do it here explicitly with the IOWAIT flag + * passed. + */ + if (p->in_iowait) + cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT); + for_each_sched_entity(se) { if (se->on_rq) break; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d5690b722691..2516b8df6dbb 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -957,9 +957,8 @@ static void update_curr_rt(struct rq *rq) if (unlikely((s64)delta_exec <= 0)) return; - /* Kick cpufreq (see the comment in linux/cpufreq.h). */ - if (cpu_of(rq) == smp_processor_id()) - cpufreq_trigger_update(rq_clock(rq)); + /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ + cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c64fc5114004..b7fc1ced4380 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1763,27 +1763,13 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); /** * cpufreq_update_util - Take a note about CPU utilization changes. - * @time: Current time. - * @util: Current utilization. - * @max: Utilization ceiling. + * @rq: Runqueue to carry out the update for. + * @flags: Update reason flags. * - * This function is called by the scheduler on every invocation of - * update_load_avg() on the CPU whose utilization is being updated. + * This function is called by the scheduler on the CPU whose utilization is + * being updated. * * It can only be called from RCU-sched read-side critical sections. - */ -static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) -{ - struct update_util_data *data; - - data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); - if (data) - data->func(data, time, util, max); -} - -/** - * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. - * @time: Current time. * * The way cpufreq is currently arranged requires it to evaluate the CPU * performance state (frequency/voltage) on a regular basis to prevent it from @@ -1797,13 +1783,23 @@ static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned lo * but that really is a band-aid. Going forward it should be replaced with * solutions targeted more specifically at RT and DL tasks. */ -static inline void cpufreq_trigger_update(u64 time) +static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) +{ + struct update_util_data *data; + + data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); + if (data) + data->func(data, rq_clock(rq), flags); +} + +static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) { - cpufreq_update_util(time, ULONG_MAX, 0); + if (cpu_of(rq) == smp_processor_id()) + cpufreq_update_util(rq, flags); } #else -static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {} -static inline void cpufreq_trigger_update(u64 time) {} +static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} +static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {} #endif /* CONFIG_CPU_FREQ */ #ifdef arch_scale_freq_capacity |