From 7a17e1db1265471f7718af100cfc5e41280d53a7 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Thu, 16 Dec 2021 22:53:19 +0000 Subject: sched/sugov: Ignore 'busy' filter when rq is capped by uclamp_max sugov_update_single_{freq, perf}() contains a 'busy' filter that ensures we don't bring the frqeuency down if there's no idle time (CPU is busy). The problem is that with uclamp_max we will have scenarios where a busy task is capped to run at a lower frequency and this filter prevents applying the capping when this task starts running. We handle this by skipping the filter when uclamp is enabled and the rq is being capped by uclamp_max. We introduce a new function uclamp_rq_is_capped() to help detecting when this capping is taking effect. Some code shuffling was required to allow using cpu_util_{cfs, rt}() in this new function. On 2 Core SMT2 Intel laptop I see: Without this patch: uclampset -M 0 sysbench --test=cpu --threads = 4 run produces a score of ~3200 consistently. Which is the highest possible. Compiling the kernel also results in frequency running at max 3.1GHz all the time - running uclampset -M 400 to cap it has no effect without this patch. With this patch: uclampset -M 0 sysbench --test=cpu --threads = 4 run produces a score of ~1100 with some outliers in ~1700. Uclamp max aggregates the performance requirements, so having high values sometimes is expected if some other task happens to require that frequency starts running at the same time. When compiling the kernel with uclampset -M 400 I can see the frequencies mostly in the ~2GHz region. Helpful to conserve power and prevent heating when not plugged in. Fixes: 982d9cdc22c9 ("sched/cpufreq, sched/uclamp: Add clamps for FAIR and RT tasks") Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20211216225320.2957053-2-qais.yousef@arm.com --- kernel/sched/cpufreq_schedutil.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel/sched/cpufreq_schedutil.c') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 26778884d9ab..62d98b09aaa5 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -348,8 +348,11 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. + * + * Except when the rq is capped by uclamp_max. */ - if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) { + if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && + sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) { next_f = sg_policy->next_freq; /* Restore cached freq as next_freq has changed */ @@ -395,8 +398,11 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, /* * Do not reduce the target performance level if the CPU has not been * idle recently, as the reduction is likely to be premature then. + * + * Except when the rq is capped by uclamp_max. */ - if (sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util) + if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && + sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util) sg_cpu->util = prev_util; cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), -- cgit v1.2.3 From d37aee9018e68b0d356195caefbb651910e0bbfa Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Thu, 16 Dec 2021 22:53:20 +0000 Subject: sched/uclamp: Fix iowait boost escaping uclamp restriction iowait_boost signal is applied independently of util and doesn't take into account uclamp settings of the rq. An io heavy task that is capped by uclamp_max could still request higher frequency because sugov_iowait_apply() doesn't clamp the boost via uclamp_rq_util_with() like effective_cpu_util() does. Make sure that iowait_boost honours uclamp requests by calling uclamp_rq_util_with() when applying the boost. Fixes: 982d9cdc22c9 ("sched/cpufreq, sched/uclamp: Add clamps for FAIR and RT tasks") Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20211216225320.2957053-3-qais.yousef@arm.com --- kernel/sched/cpufreq_schedutil.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/sched/cpufreq_schedutil.c') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 62d98b09aaa5..6d65ab6e484e 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -289,6 +289,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) * into the same scale so we can compare. */ boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; + boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); if (sg_cpu->util < boost) sg_cpu->util = boost; } -- cgit v1.2.3 From 801c141955108fb7cf1244dda76e6de8b16fd3ae Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Feb 2022 13:23:24 +0100 Subject: sched/headers: Introduce kernel/sched/build_utility.c and build multiple .c files there Collect all utility functionality source code files into a single kernel/sched/build_utility.c file, via #include-ing the .c files: kernel/sched/clock.c kernel/sched/completion.c kernel/sched/loadavg.c kernel/sched/swait.c kernel/sched/wait_bit.c kernel/sched/wait.c CONFIG_CPU_FREQ: kernel/sched/cpufreq.c CONFIG_CPU_FREQ_GOV_SCHEDUTIL: kernel/sched/cpufreq_schedutil.c CONFIG_CGROUP_CPUACCT: kernel/sched/cpuacct.c CONFIG_SCHED_DEBUG: kernel/sched/debug.c CONFIG_SCHEDSTATS: kernel/sched/stats.c CONFIG_SMP: kernel/sched/cpupri.c kernel/sched/stop_task.c kernel/sched/topology.c CONFIG_SCHED_CORE: kernel/sched/core_sched.c CONFIG_PSI: kernel/sched/psi.c CONFIG_MEMBARRIER: kernel/sched/membarrier.c CONFIG_CPU_ISOLATION: kernel/sched/isolation.c CONFIG_SCHED_AUTOGROUP: kernel/sched/autogroup.c The goal is to amortize the 60+ KLOC header bloat from over a dozen build units into a single build unit. The build time of build_utility.c also roughly matches the build time of core.c and fair.c - allowing better load-balancing of scheduler-only rebuilds. Signed-off-by: Ingo Molnar Reviewed-by: Peter Zijlstra --- kernel/sched/Makefile | 25 ++++++-------- kernel/sched/autogroup.c | 3 +- kernel/sched/build_utility.c | 70 ++++++++++++++++++++++++++++++++++++++++ kernel/sched/clock.c | 2 -- kernel/sched/completion.c | 2 +- kernel/sched/core_sched.c | 3 -- kernel/sched/cpuacct.c | 3 +- kernel/sched/cpufreq.c | 3 -- kernel/sched/cpufreq_schedutil.c | 7 ---- kernel/sched/cpupri.c | 1 - kernel/sched/debug.c | 1 - kernel/sched/isolation.c | 1 - kernel/sched/loadavg.c | 1 - kernel/sched/membarrier.c | 1 - kernel/sched/psi.c | 15 --------- kernel/sched/sched.h | 57 ++++++++++++++++++++++++++++++-- kernel/sched/stats.c | 1 - kernel/sched/stop_task.c | 1 - kernel/sched/swait.c | 1 - kernel/sched/topology.c | 1 - kernel/sched/wait.c | 1 - kernel/sched/wait_bit.c | 2 +- 22 files changed, 139 insertions(+), 63 deletions(-) create mode 100644 kernel/sched/build_utility.c (limited to 'kernel/sched/cpufreq_schedutil.c') diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index c0c52026ad0d..4a4785cb3cd2 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -22,18 +22,13 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer endif -obj-y += core.o loadavg.o clock.o cputime.o -obj-y += idle.o fair.o rt.o deadline.o -obj-y += wait.o wait_bit.o swait.o completion.o - -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o -obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o -obj-$(CONFIG_SCHEDSTATS) += stats.o -obj-$(CONFIG_SCHED_DEBUG) += debug.o -obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o -obj-$(CONFIG_CPU_FREQ) += cpufreq.o -obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o -obj-$(CONFIG_MEMBARRIER) += membarrier.o -obj-$(CONFIG_CPU_ISOLATION) += isolation.o -obj-$(CONFIG_PSI) += psi.o -obj-$(CONFIG_SCHED_CORE) += core_sched.o +# +# Build efficiency: +# +# These compilation units have roughly the same size and complexity - so their +# build parallelizes well and finishes roughly at once: +# +obj-y += core.o +obj-y += fair.o +obj-y += build_utility.o +obj-y += idle.o rt.o deadline.o cputime.o cpudeadline.o pelt.o diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index 31dd2593145e..16092b49ff6a 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c @@ -1,9 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 + /* * Auto-group scheduling implementation: */ -#include -#include "sched.h" unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; static struct autogroup autogroup_default; diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c new file mode 100644 index 000000000000..31216ce0b4b3 --- /dev/null +++ b/kernel/sched/build_utility.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * These are various utility functions of the scheduler, + * built in a single compilation unit for build efficiency reasons. + * + * ( Incidentally, the size of the compilation unit is roughly + * comparable to core.c, fair.c, smp.c and policy.c, the other + * big compilation units. This helps balance build time, while + * coalescing source files to amortize header inclusion + * cost. ) + */ + +#include "sched.h" +#include "sched-pelt.h" + +#include + +#include "clock.c" + +#ifdef CONFIG_CGROUP_CPUACCT +# include "cpuacct.c" +#endif + +#ifdef CONFIG_CPU_FREQ +# include "cpufreq.c" +#endif + +#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL +# include "cpufreq_schedutil.c" +#endif + +#ifdef CONFIG_SCHED_DEBUG +# include "debug.c" +#endif + +#ifdef CONFIG_SCHEDSTATS +# include "stats.c" +#endif + +#include "loadavg.c" +#include "completion.c" +#include "swait.c" +#include "wait_bit.c" +#include "wait.c" + +#ifdef CONFIG_SMP +# include "cpupri.c" +# include "stop_task.c" +# include "topology.c" +#endif + +#ifdef CONFIG_SCHED_CORE +# include "core_sched.c" +#endif + +#ifdef CONFIG_PSI +# include "psi.c" +#endif + +#ifdef CONFIG_MEMBARRIER +# include "membarrier.c" +#endif + +#ifdef CONFIG_CPU_ISOLATION +# include "isolation.c" +#endif + +#ifdef CONFIG_SCHED_AUTOGROUP +# include "autogroup.c" +#endif diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 540d0e50e31c..d9272d9061a3 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -53,8 +53,6 @@ * that is otherwise invisible (TSC gets stopped). * */ -#include "sched.h" -#include /* * Scheduler clock - returns current time in nanosec units. diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index a778554f9dad..35f15c26ed54 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 + /* * Generic wait-for-completion handler; * @@ -11,7 +12,6 @@ * typically be used for exclusion which gives rise to priority inversion. * Waiting for completion is a typically sync point, but not an exclusion point. */ -#include "sched.h" /** * complete: - signals a single thread waiting on this completion diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c index c8746a9a7ada..38a2cec21014 100644 --- a/kernel/sched/core_sched.c +++ b/kernel/sched/core_sched.c @@ -1,8 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -#include -#include "sched.h" - /* * A simple wrapper around refcount. An allocated sched_core_cookie's * address is used to compute the cookie of the task. diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 3d06c5e4220d..2c505cf800aa 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -1,12 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 + /* * CPU accounting code for task groups. * * Based on the work by Paul Menage (menage@google.com) and Balbir Singh * (balbir@in.ibm.com). */ -#include -#include "sched.h" /* Time spent by the tasks of the CPU accounting group executing in ... */ enum cpuacct_stat_index { diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c index 7c2fe50fd76d..5252fb191fae 100644 --- a/kernel/sched/cpufreq.c +++ b/kernel/sched/cpufreq.c @@ -5,9 +5,6 @@ * Copyright (C) 2016, Intel Corporation * Author: Rafael J. Wysocki */ -#include - -#include "sched.h" DEFINE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 6d65ab6e484e..f68885d049f2 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -6,13 +6,6 @@ * Author: Rafael J. Wysocki */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include "sched.h" - -#include -#include - #define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8) struct sugov_tunables { diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index d583f2aa744e..fa9ce9d83683 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -22,7 +22,6 @@ * worst case complexity of O(min(101, nr_domcpus)), though the scenario that * yields the worst case search is fairly contrived. */ -#include "sched.h" /* * p->rt_priority p->prio newpri cpupri diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 102d6f70e84d..bb3d63bdf4ae 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -6,7 +6,6 @@ * * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar */ -#include "sched.h" /* * This allows printing both to /proc/sched_debug and diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index b4d10815c45a..373d42c707bc 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -7,7 +7,6 @@ * Copyright (C) 2017-2018 SUSE, Frederic Weisbecker * */ -#include "sched.h" enum hk_flags { HK_FLAG_TIMER = BIT(HK_TYPE_TIMER), diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index 954b229868d9..52c8f8226b0d 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -6,7 +6,6 @@ * figure. Its a silly number but people think its important. We go through * great pains to make it work on big machines and tickless kernels. */ -#include "sched.h" /* * Global load-average calculations diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 3d2825408e3a..0c5be7ebb1dc 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -4,7 +4,6 @@ * * membarrier system call */ -#include "sched.h" /* * For documentation purposes, here are some membarrier ordering diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 8fb08a12f602..a4fa3aadfcba 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -137,21 +137,6 @@ * sampling of the aggregate task states would be. */ -#include "../workqueue_internal.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "sched.h" - static int psi_bug __read_mostly; DEFINE_STATIC_KEY_FALSE(psi_disabled); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index eab4a18f71c2..79c7a8a2be65 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -6,7 +6,25 @@ #define _KERNEL_SCHED_SCHED_H #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include #include #include #include @@ -24,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -69,8 +89,7 @@ #include #include #include - -#include +#include #ifdef CONFIG_PARAVIRT # include @@ -87,6 +106,40 @@ # define SCHED_WARN_ON(x) ({ (void)(x), 0; }) #endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../workqueue_internal.h" + struct rq; struct cpuidle_state; diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 07dde2928c79..857f837f52cb 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -2,7 +2,6 @@ /* * /proc/schedstat implementation */ -#include "sched.h" void __update_stats_wait_start(struct rq *rq, struct task_struct *p, struct sched_statistics *stats) diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 0b165a25f22f..d04073a93eb4 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -7,7 +7,6 @@ * * See kernel/stop_machine.c */ -#include "sched.h" #ifdef CONFIG_SMP static int diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c index e1c655f928c7..76b9b796e695 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c @@ -2,7 +2,6 @@ /* * (simple wait queues ) implementation: */ -#include "sched.h" void __init_swait_queue_head(struct swait_queue_head *q, const char *name, struct lock_class_key *key) diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 32841c6741d1..e8af72fd70bd 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -2,7 +2,6 @@ /* * Scheduler topology setup/handling methods */ -#include "sched.h" DEFINE_MUTEX(sched_domains_mutex); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index eca38107b32f..9860bb9a847c 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -4,7 +4,6 @@ * * (C) 2004 Nadia Yvette Chambers, Oracle */ -#include "sched.h" void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key) { diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index 02ce292b9bc0..d4788f810b55 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only + /* * The implementation of the wait_bit*() and related waiting APIs: */ -#include "sched.h" #define WAIT_TABLE_BITS 8 #define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS) -- cgit v1.2.3