From 58919e83c85c3a3c5fb34025dc0e95ddd998c478 Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Tue, 16 Aug 2016 22:14:55 +0200
Subject: cpufreq / sched: Pass flags to cpufreq_update_util()

It is useful to know the reason why cpufreq_update_util() has just
been called and that can be passed as flags to cpufreq_update_util()
and to the ->func() callback in struct update_util_data.  However,
doing that in addition to passing the util and max arguments they
already take would be clumsy, so avoid it.

Instead, use the observation that the schedutil governor is part
of the scheduler proper, so it can access scheduler data directly.
This allows the util and max arguments of cpufreq_update_util()
and the ->func() callback in struct update_util_data to be replaced
with a flags one, but schedutil has to be modified to follow.

Thus make the schedutil governor obtain the CFS utilization
information from the scheduler and use the "RT" and "DL" flags
instead of the special utilization value of ULONG_MAX to track
updates from the RT and DL sched classes.  Make it non-modular
too to avoid having to export scheduler variables to modules at
large.

Next, update all of the other users of cpufreq_update_util()
and the ->func() callback in struct update_util_data accordingly.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 kernel/sched/cpufreq.c           |  2 +-
 kernel/sched/cpufreq_schedutil.c | 67 ++++++++++++++++++++++------------------
 kernel/sched/deadline.c          |  4 +--
 kernel/sched/fair.c              | 12 +++----
 kernel/sched/rt.c                |  4 +--
 kernel/sched/sched.h             | 31 ++++++-------------
 6 files changed, 56 insertions(+), 64 deletions(-)

(limited to 'kernel/sched')

diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index 1141954e73b4..dbc51442ecbc 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
  */
 void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
 			void (*func)(struct update_util_data *data, u64 time,
-				     unsigned long util, unsigned long max))
+				     unsigned int flags))
 {
 	if (WARN_ON(!data || !func))
 		return;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index a84641b222c1..60d985f4dc47 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -12,7 +12,6 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/cpufreq.h>
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <trace/events/power.h>
 
@@ -53,6 +52,7 @@ struct sugov_cpu {
 	unsigned long util;
 	unsigned long max;
 	u64 last_update;
+	unsigned int flags;
 };
 
 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
@@ -144,24 +144,39 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
 	return cpufreq_driver_resolve_freq(policy, freq);
 }
 
+static void sugov_get_util(unsigned long *util, unsigned long *max)
+{
+	struct rq *rq = this_rq();
+	unsigned long cfs_max = rq->cpu_capacity_orig;
+
+	*util = min(rq->cfs.avg.util_avg, cfs_max);
+	*max = cfs_max;
+}
+
 static void sugov_update_single(struct update_util_data *hook, u64 time,
-				unsigned long util, unsigned long max)
+				unsigned int flags)
 {
 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
+	unsigned long util, max;
 	unsigned int next_f;
 
 	if (!sugov_should_update_freq(sg_policy, time))
 		return;
 
-	next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
-			get_next_freq(sg_cpu, util, max);
+	if (flags & SCHED_CPUFREQ_RT_DL) {
+		next_f = policy->cpuinfo.max_freq;
+	} else {
+		sugov_get_util(&util, &max);
+		next_f = get_next_freq(sg_cpu, util, max);
+	}
 	sugov_update_commit(sg_policy, time, next_f);
 }
 
 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
-					   unsigned long util, unsigned long max)
+					   unsigned long util, unsigned long max,
+					   unsigned int flags)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
@@ -169,7 +184,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 	u64 last_freq_update_time = sg_policy->last_freq_update_time;
 	unsigned int j;
 
-	if (util == ULONG_MAX)
+	if (flags & SCHED_CPUFREQ_RT_DL)
 		return max_f;
 
 	for_each_cpu(j, policy->cpus) {
@@ -192,10 +207,10 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 		if (delta_ns > TICK_NSEC)
 			continue;
 
-		j_util = j_sg_cpu->util;
-		if (j_util == ULONG_MAX)
+		if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
 			return max_f;
 
+		j_util = j_sg_cpu->util;
 		j_max = j_sg_cpu->max;
 		if (j_util * max > j_max * util) {
 			util = j_util;
@@ -207,20 +222,24 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 }
 
 static void sugov_update_shared(struct update_util_data *hook, u64 time,
-				unsigned long util, unsigned long max)
+				unsigned int flags)
 {
 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+	unsigned long util, max;
 	unsigned int next_f;
 
+	sugov_get_util(&util, &max);
+
 	raw_spin_lock(&sg_policy->update_lock);
 
 	sg_cpu->util = util;
 	sg_cpu->max = max;
+	sg_cpu->flags = flags;
 	sg_cpu->last_update = time;
 
 	if (sugov_should_update_freq(sg_policy, time)) {
-		next_f = sugov_next_freq_shared(sg_cpu, util, max);
+		next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
 		sugov_update_commit(sg_policy, time, next_f);
 	}
 
@@ -444,8 +463,9 @@ static int sugov_start(struct cpufreq_policy *policy)
 
 		sg_cpu->sg_policy = sg_policy;
 		if (policy_is_shared(policy)) {
-			sg_cpu->util = ULONG_MAX;
+			sg_cpu->util = 0;
 			sg_cpu->max = 0;
+			sg_cpu->flags = SCHED_CPUFREQ_RT;
 			sg_cpu->last_update = 0;
 			sg_cpu->cached_raw_freq = 0;
 			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
@@ -495,28 +515,15 @@ static struct cpufreq_governor schedutil_gov = {
 	.limits = sugov_limits,
 };
 
-static int __init sugov_module_init(void)
-{
-	return cpufreq_register_governor(&schedutil_gov);
-}
-
-static void __exit sugov_module_exit(void)
-{
-	cpufreq_unregister_governor(&schedutil_gov);
-}
-
-MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>");
-MODULE_DESCRIPTION("Utilization-based CPU frequency selection");
-MODULE_LICENSE("GPL");
-
 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
 struct cpufreq_governor *cpufreq_default_governor(void)
 {
 	return &schedutil_gov;
 }
-
-fs_initcall(sugov_module_init);
-#else
-module_init(sugov_module_init);
 #endif
-module_exit(sugov_module_exit);
+
+static int __init sugov_register(void)
+{
+	return cpufreq_register_governor(&schedutil_gov);
+}
+fs_initcall(sugov_register);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 1ce8867283dc..4464cc3e4f3d 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -735,9 +735,9 @@ static void update_curr_dl(struct rq *rq)
 		return;
 	}
 
-	/* kick cpufreq (see the comment in linux/cpufreq.h). */
+	/* kick cpufreq (see the comment in kernel/sched/sched.h). */
 	if (cpu_of(rq) == smp_processor_id())
-		cpufreq_trigger_update(rq_clock(rq));
+		cpufreq_update_util(rq_clock(rq), SCHED_CPUFREQ_DL);
 
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 039de34f1521..f91fa5796e50 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2875,11 +2875,8 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
 
 static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 {
-	struct rq *rq = rq_of(cfs_rq);
-	int cpu = cpu_of(rq);
-
-	if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
-		unsigned long max = rq->cpu_capacity_orig;
+	if (&this_rq()->cfs == cfs_rq) {
+		struct rq *rq = rq_of(cfs_rq);
 
 		/*
 		 * There are a few boundary cases this might miss but it should
@@ -2897,8 +2894,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 		 *
 		 * See cpu_util().
 		 */
-		cpufreq_update_util(rq_clock(rq),
-				    min(cfs_rq->avg.util_avg, max), max);
+		cpufreq_update_util(rq_clock(rq), 0);
 	}
 }
 
@@ -3162,7 +3158,7 @@ static inline void update_load_avg(struct sched_entity *se, int not_used)
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	struct rq *rq = rq_of(cfs_rq);
 
-	cpufreq_trigger_update(rq_clock(rq));
+	cpufreq_update_util(rq_clock(rq), 0);
 }
 
 static inline void
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d5690b722691..8a9cd9ba5153 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -957,9 +957,9 @@ static void update_curr_rt(struct rq *rq)
 	if (unlikely((s64)delta_exec <= 0))
 		return;
 
-	/* Kick cpufreq (see the comment in linux/cpufreq.h). */
+	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
 	if (cpu_of(rq) == smp_processor_id())
-		cpufreq_trigger_update(rq_clock(rq));
+		cpufreq_update_util(rq_clock(rq), SCHED_CPUFREQ_RT);
 
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c64fc5114004..82fc5542708c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1764,26 +1764,12 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
 /**
  * cpufreq_update_util - Take a note about CPU utilization changes.
  * @time: Current time.
- * @util: Current utilization.
- * @max: Utilization ceiling.
+ * @flags: Update reason flags.
  *
- * This function is called by the scheduler on every invocation of
- * update_load_avg() on the CPU whose utilization is being updated.
+ * This function is called by the scheduler on the CPU whose utilization is
+ * being updated.
  *
  * It can only be called from RCU-sched read-side critical sections.
- */
-static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
-{
-       struct update_util_data *data;
-
-       data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
-       if (data)
-               data->func(data, time, util, max);
-}
-
-/**
- * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
- * @time: Current time.
  *
  * The way cpufreq is currently arranged requires it to evaluate the CPU
  * performance state (frequency/voltage) on a regular basis to prevent it from
@@ -1797,13 +1783,16 @@ static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned lo
  * but that really is a band-aid.  Going forward it should be replaced with
  * solutions targeted more specifically at RT and DL tasks.
  */
-static inline void cpufreq_trigger_update(u64 time)
+static inline void cpufreq_update_util(u64 time, unsigned int flags)
 {
-	cpufreq_update_util(time, ULONG_MAX, 0);
+	struct update_util_data *data;
+
+	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
+	if (data)
+		data->func(data, time, flags);
 }
 #else
-static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
-static inline void cpufreq_trigger_update(u64 time) {}
+static inline void cpufreq_update_util(u64 time, unsigned int flags) {}
 #endif /* CONFIG_CPU_FREQ */
 
 #ifdef arch_scale_freq_capacity
-- 
cgit v1.2.3


From 12bde33dbb3eadd60343a8a71c39766073c1d752 Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Wed, 10 Aug 2016 03:11:17 +0200
Subject: cpufreq / sched: Pass runqueue pointer to cpufreq_update_util()

All of the callers of cpufreq_update_util() pass rq_clock(rq) to it
as the time argument and some of them check whether or not cpu_of(rq)
is equal to smp_processor_id() before calling it, so rework it to
take a runqueue pointer as the argument and move the rq_clock(rq)
evaluation into it.

Additionally, provide a wrapper checking cpu_of(rq) against
smp_processor_id() for the cpufreq_update_util() callers that
need it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 kernel/sched/deadline.c |  3 +--
 kernel/sched/fair.c     |  9 ++-------
 kernel/sched/rt.c       |  3 +--
 kernel/sched/sched.h    | 15 +++++++++++----
 4 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'kernel/sched')

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 4464cc3e4f3d..974779656999 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -736,8 +736,7 @@ static void update_curr_dl(struct rq *rq)
 	}
 
 	/* kick cpufreq (see the comment in kernel/sched/sched.h). */
-	if (cpu_of(rq) == smp_processor_id())
-		cpufreq_update_util(rq_clock(rq), SCHED_CPUFREQ_DL);
+	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);
 
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f91fa5796e50..5d558cc91f08 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2876,8 +2876,6 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
 static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 {
 	if (&this_rq()->cfs == cfs_rq) {
-		struct rq *rq = rq_of(cfs_rq);
-
 		/*
 		 * There are a few boundary cases this might miss but it should
 		 * get called often enough that that should (hopefully) not be
@@ -2894,7 +2892,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 		 *
 		 * See cpu_util().
 		 */
-		cpufreq_update_util(rq_clock(rq), 0);
+		cpufreq_update_util(rq_of(cfs_rq), 0);
 	}
 }
 
@@ -3155,10 +3153,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
 
 static inline void update_load_avg(struct sched_entity *se, int not_used)
 {
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	struct rq *rq = rq_of(cfs_rq);
-
-	cpufreq_update_util(rq_clock(rq), 0);
+	cpufreq_update_util(rq_of(cfs_rq_of(se)), 0);
 }
 
 static inline void
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8a9cd9ba5153..2516b8df6dbb 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -958,8 +958,7 @@ static void update_curr_rt(struct rq *rq)
 		return;
 
 	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
-	if (cpu_of(rq) == smp_processor_id())
-		cpufreq_update_util(rq_clock(rq), SCHED_CPUFREQ_RT);
+	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
 
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 82fc5542708c..b7fc1ced4380 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1763,7 +1763,7 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
 
 /**
  * cpufreq_update_util - Take a note about CPU utilization changes.
- * @time: Current time.
+ * @rq: Runqueue to carry out the update for.
  * @flags: Update reason flags.
  *
  * This function is called by the scheduler on the CPU whose utilization is
@@ -1783,16 +1783,23 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
  * but that really is a band-aid.  Going forward it should be replaced with
  * solutions targeted more specifically at RT and DL tasks.
  */
-static inline void cpufreq_update_util(u64 time, unsigned int flags)
+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
 {
 	struct update_util_data *data;
 
 	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
 	if (data)
-		data->func(data, time, flags);
+		data->func(data, rq_clock(rq), flags);
+}
+
+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
+{
+	if (cpu_of(rq) == smp_processor_id())
+		cpufreq_update_util(rq, flags);
 }
 #else
-static inline void cpufreq_update_util(u64 time, unsigned int flags) {}
+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
 #endif /* CONFIG_CPU_FREQ */
 
 #ifdef arch_scale_freq_capacity
-- 
cgit v1.2.3


From 8314bc83f6a33958a033955e9bdc48e8dd4d5fb0 Mon Sep 17 00:00:00 2001
From: Steve Muckle
Date: Fri, 26 Aug 2016 11:40:47 -0700
Subject: cpufreq / sched: ignore SMT when determining max cpu capacity

PELT does not consider SMT when scaling its utilization values via
arch_scale_cpu_capacity(). The value in rq->cpu_capacity_orig does
take SMT into consideration though and therefore may be smaller than
the utilization reported by PELT.

On an Intel i7-3630QM for example rq->cpu_capacity_orig is 589 but
util_avg scales up to 1024. This means that a 50% utilized CPU will show
up in schedutil as ~86% busy.

Fix this by using the same CPU scaling value in schedutil as that which
is used by PELT.

Signed-off-by: Steve Muckle <smuckle@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel/sched')

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 60d985f4dc47..cb8a77b1ef1b 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -147,7 +147,9 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
 static void sugov_get_util(unsigned long *util, unsigned long *max)
 {
 	struct rq *rq = this_rq();
-	unsigned long cfs_max = rq->cpu_capacity_orig;
+	unsigned long cfs_max;
+
+	cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id());
 
 	*util = min(rq->cfs.avg.util_avg, cfs_max);
 	*max = cfs_max;
-- 
cgit v1.2.3


From 8c34ab1910a79319731107ec8ecd2e80893ea30c Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Fri, 9 Sep 2016 23:59:33 +0200
Subject: cpufreq / sched: SCHED_CPUFREQ_IOWAIT flag to indicate iowait
 condition

Testing indicates that it is possible to improve performace
significantly without increasing energy consumption too much by
teaching cpufreq governors to bump up the CPU performance level if
the in_iowait flag is set for the task in enqueue_task_fair().

For this purpose, define a new cpufreq_update_util() flag
SCHED_CPUFREQ_IOWAIT and modify enqueue_task_fair() to pass that
flag to cpufreq_update_util() in the in_iowait case.  That generally
requires cpufreq_update_util() to be called directly from there,
because update_load_avg() may not be invoked in that case.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Looks-good-to: Steve Muckle <smuckle@linaro.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/sched.h | 1 +
 kernel/sched/fair.c   | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'kernel/sched')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b0fa726b7f31..98fe95fea30c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3471,6 +3471,7 @@ static inline unsigned long rlimit_max(unsigned int limit)
 
 #define SCHED_CPUFREQ_RT	(1U << 0)
 #define SCHED_CPUFREQ_DL	(1U << 1)
+#define SCHED_CPUFREQ_IOWAIT	(1U << 2)
 
 #define SCHED_CPUFREQ_RT_DL	(SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5d558cc91f08..a5cd07b25aa1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4500,6 +4500,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
 
+	/*
+	 * If in_iowait is set, the code below may not trigger any cpufreq
+	 * utilization updates, so do it here explicitly with the IOWAIT flag
+	 * passed.
+	 */
+	if (p->in_iowait)
+		cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
+
 	for_each_sched_entity(se) {
 		if (se->on_rq)
 			break;
-- 
cgit v1.2.3


From 21ca6d2c52f8ca8638129c1dfc489d0b0ae68532 Mon Sep 17 00:00:00 2001
From: Rafael J. Wysocki
Date: Sat, 10 Sep 2016 00:00:31 +0200
Subject: cpufreq: schedutil: Add iowait boosting

Modify the schedutil cpufreq governor to boost the CPU
frequency if the SCHED_CPUFREQ_IOWAIT flag is passed to
it via cpufreq_update_util().

If that happens, the frequency is set to the maximum during
the first update after receiving the SCHED_CPUFREQ_IOWAIT flag
and then the boost is reduced by half during each following update.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Looks-good-to: Steve Muckle <smuckle@linaro.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/cpufreq_schedutil.c | 53 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 49 insertions(+), 4 deletions(-)

(limited to 'kernel/sched')

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index cb8a77b1ef1b..69e06898997d 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -47,11 +47,13 @@ struct sugov_cpu {
 	struct sugov_policy *sg_policy;
 
 	unsigned int cached_raw_freq;
+	unsigned long iowait_boost;
+	unsigned long iowait_boost_max;
+	u64 last_update;
 
 	/* The fields below are only needed when sharing a policy. */
 	unsigned long util;
 	unsigned long max;
-	u64 last_update;
 	unsigned int flags;
 };
 
@@ -155,6 +157,36 @@ static void sugov_get_util(unsigned long *util, unsigned long *max)
 	*max = cfs_max;
 }
 
+static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
+				   unsigned int flags)
+{
+	if (flags & SCHED_CPUFREQ_IOWAIT) {
+		sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+	} else if (sg_cpu->iowait_boost) {
+		s64 delta_ns = time - sg_cpu->last_update;
+
+		/* Clear iowait_boost if the CPU apprears to have been idle. */
+		if (delta_ns > TICK_NSEC)
+			sg_cpu->iowait_boost = 0;
+	}
+}
+
+static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
+			       unsigned long *max)
+{
+	unsigned long boost_util = sg_cpu->iowait_boost;
+	unsigned long boost_max = sg_cpu->iowait_boost_max;
+
+	if (!boost_util)
+		return;
+
+	if (*util * boost_max < *max * boost_util) {
+		*util = boost_util;
+		*max = boost_max;
+	}
+	sg_cpu->iowait_boost >>= 1;
+}
+
 static void sugov_update_single(struct update_util_data *hook, u64 time,
 				unsigned int flags)
 {
@@ -164,6 +196,9 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	unsigned long util, max;
 	unsigned int next_f;
 
+	sugov_set_iowait_boost(sg_cpu, time, flags);
+	sg_cpu->last_update = time;
+
 	if (!sugov_should_update_freq(sg_policy, time))
 		return;
 
@@ -171,6 +206,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 		next_f = policy->cpuinfo.max_freq;
 	} else {
 		sugov_get_util(&util, &max);
+		sugov_iowait_boost(sg_cpu, &util, &max);
 		next_f = get_next_freq(sg_cpu, util, max);
 	}
 	sugov_update_commit(sg_policy, time, next_f);
@@ -189,6 +225,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 	if (flags & SCHED_CPUFREQ_RT_DL)
 		return max_f;
 
+	sugov_iowait_boost(sg_cpu, &util, &max);
+
 	for_each_cpu(j, policy->cpus) {
 		struct sugov_cpu *j_sg_cpu;
 		unsigned long j_util, j_max;
@@ -203,12 +241,13 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 		 * frequency update and the time elapsed between the last update
 		 * of the CPU utilization and the last frequency update is long
 		 * enough, don't take the CPU into account as it probably is
-		 * idle now.
+		 * idle now (and clear iowait_boost for it).
 		 */
 		delta_ns = last_freq_update_time - j_sg_cpu->last_update;
-		if (delta_ns > TICK_NSEC)
+		if (delta_ns > TICK_NSEC) {
+			j_sg_cpu->iowait_boost = 0;
 			continue;
-
+		}
 		if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
 			return max_f;
 
@@ -218,6 +257,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 			util = j_util;
 			max = j_max;
 		}
+
+		sugov_iowait_boost(j_sg_cpu, &util, &max);
 	}
 
 	return get_next_freq(sg_cpu, util, max);
@@ -238,6 +279,8 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
 	sg_cpu->util = util;
 	sg_cpu->max = max;
 	sg_cpu->flags = flags;
+
+	sugov_set_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;
 
 	if (sugov_should_update_freq(sg_policy, time)) {
@@ -470,6 +513,8 @@ static int sugov_start(struct cpufreq_policy *policy)
 			sg_cpu->flags = SCHED_CPUFREQ_RT;
 			sg_cpu->last_update = 0;
 			sg_cpu->cached_raw_freq = 0;
+			sg_cpu->iowait_boost = 0;
+			sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
 			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
 						     sugov_update_shared);
 		} else {
-- 
cgit v1.2.3