aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched/topology.c
diff options
context:
space:
mode:
authorLinus Torvalds2020-10-12 12:56:01 -0700
committerLinus Torvalds2020-10-12 12:56:01 -0700
commitedaa5ddf3833669a25654d42c0fb653dfdd906df (patch)
tree156c29b0375581dfa7c7e578d8d61924b3eb81b2 /kernel/sched/topology.c
parent13cb73490f475f8e7669f9288be0bcfa85399b1f (diff)
parentfeff2e65efd8d84cf831668e182b2ce73c604bbb (diff)
Merge tag 'sched-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: - reorganize & clean up the SD* flags definitions and add a bunch of sanity checks. These new checks caught quite a few bugs or at least inconsistencies, resulting in another set of patches. - rseq updates, add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ - add a new tracepoint to improve CPU capacity tracking - improve overloaded SMP system load-balancing behavior - tweak SMT balancing - energy-aware scheduling updates - NUMA balancing improvements - deadline scheduler fixes and improvements - CPU isolation fixes - misc cleanups, simplifications and smaller optimizations * tag 'sched-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (42 commits) sched/deadline: Unthrottle PI boosted threads while enqueuing sched/debug: Add new tracepoint to track cpu_capacity sched/fair: Tweak pick_next_entity() rseq/selftests: Test MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ rseq/selftests,x86_64: Add rseq_offset_deref_addv() rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ sched/fair: Use dst group while checking imbalance for NUMA balancer sched/fair: Reduce busy load balance interval sched/fair: Minimize concurrent LBs between domain level sched/fair: Reduce minimal imbalance threshold sched/fair: Relax constraint on task's load during load balance sched/fair: Remove the force parameter of update_tg_load_avg() sched/fair: Fix wrong cpu selecting from isolated domain sched: Remove unused inline function uclamp_bucket_base_value() sched/rt: Disable RT_RUNTIME_SHARE by default sched/deadline: Fix stale throttling on de-/boosted tasks sched/numa: Use runnable_avg to classify node sched/topology: Move sd_flag_debug out of #ifdef CONFIG_SYSCTL MAINTAINERS: Add myself as SCHED_DEADLINE reviewer sched/topology: Move SD_DEGENERATE_GROUPS_MASK out of linux/sched/topology.h ...
Diffstat (limited to 'kernel/sched/topology.c')
-rw-r--r--kernel/sched/topology.c69
1 files changed, 40 insertions, 29 deletions
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 1bd7e3af904f..dd7770226086 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -25,10 +25,18 @@ static inline bool sched_debug(void)
return sched_debug_enabled;
}
+#define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = #_name },
+const struct sd_flag_debug sd_flag_debug[] = {
+#include <linux/sched/sd_flags.h>
+};
+#undef SD_FLAG
+
static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
struct cpumask *groupmask)
{
struct sched_group *group = sd->groups;
+ unsigned long flags = sd->flags;
+ unsigned int idx;
cpumask_clear(groupmask);
@@ -43,6 +51,21 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);
}
+ for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
+ unsigned int flag = BIT(idx);
+ unsigned int meta_flags = sd_flag_debug[idx].meta_flags;
+
+ if ((meta_flags & SDF_SHARED_CHILD) && sd->child &&
+ !(sd->child->flags & flag))
+ printk(KERN_ERR "ERROR: flag %s set here but not in child\n",
+ sd_flag_debug[idx].name);
+
+ if ((meta_flags & SDF_SHARED_PARENT) && sd->parent &&
+ !(sd->parent->flags & flag))
+ printk(KERN_ERR "ERROR: flag %s set here but not in parent\n",
+ sd_flag_debug[idx].name);
+ }
+
printk(KERN_DEBUG "%*s groups:", level + 1, "");
do {
if (!group) {
@@ -137,22 +160,22 @@ static inline bool sched_debug(void)
}
#endif /* CONFIG_SCHED_DEBUG */
+/* Generate a mask of SD flags with the SDF_NEEDS_GROUPS metaflag */
+#define SD_FLAG(name, mflags) (name * !!((mflags) & SDF_NEEDS_GROUPS)) |
+static const unsigned int SD_DEGENERATE_GROUPS_MASK =
+#include <linux/sched/sd_flags.h>
+0;
+#undef SD_FLAG
+
static int sd_degenerate(struct sched_domain *sd)
{
if (cpumask_weight(sched_domain_span(sd)) == 1)
return 1;
/* Following flags need at least 2 groups */
- if (sd->flags & (SD_BALANCE_NEWIDLE |
- SD_BALANCE_FORK |
- SD_BALANCE_EXEC |
- SD_SHARE_CPUCAPACITY |
- SD_ASYM_CPUCAPACITY |
- SD_SHARE_PKG_RESOURCES |
- SD_SHARE_POWERDOMAIN)) {
- if (sd->groups != sd->groups->next)
- return 0;
- }
+ if ((sd->flags & SD_DEGENERATE_GROUPS_MASK) &&
+ (sd->groups != sd->groups->next))
+ return 0;
/* Following flags don't use groups */
if (sd->flags & (SD_WAKE_AFFINE))
@@ -173,18 +196,9 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
return 0;
/* Flags needing groups don't count if only 1 group in parent */
- if (parent->groups == parent->groups->next) {
- pflags &= ~(SD_BALANCE_NEWIDLE |
- SD_BALANCE_FORK |
- SD_BALANCE_EXEC |
- SD_ASYM_CPUCAPACITY |
- SD_SHARE_CPUCAPACITY |
- SD_SHARE_PKG_RESOURCES |
- SD_PREFER_SIBLING |
- SD_SHARE_POWERDOMAIN);
- if (nr_node_ids == 1)
- pflags &= ~SD_SERIALIZE;
- }
+ if (parent->groups == parent->groups->next)
+ pflags &= ~SD_DEGENERATE_GROUPS_MASK;
+
if (~cflags & pflags)
return 0;
@@ -1292,7 +1306,6 @@ int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
* SD_SHARE_CPUCAPACITY - describes SMT topologies
* SD_SHARE_PKG_RESOURCES - describes shared caches
* SD_NUMA - describes NUMA topologies
- * SD_SHARE_POWERDOMAIN - describes shared power domain
*
* Odd one out, which beside describing the topology has a quirk also
* prescribes the desired behaviour that goes along with it:
@@ -1303,8 +1316,7 @@ int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
(SD_SHARE_CPUCAPACITY | \
SD_SHARE_PKG_RESOURCES | \
SD_NUMA | \
- SD_ASYM_PACKING | \
- SD_SHARE_POWERDOMAIN)
+ SD_ASYM_PACKING)
static struct sched_domain *
sd_init(struct sched_domain_topology_level *tl,
@@ -1336,8 +1348,8 @@ sd_init(struct sched_domain_topology_level *tl,
*sd = (struct sched_domain){
.min_interval = sd_weight,
.max_interval = 2*sd_weight,
- .busy_factor = 32,
- .imbalance_pct = 125,
+ .busy_factor = 16,
+ .imbalance_pct = 117,
.cache_nice_tries = 0,
@@ -1989,11 +2001,10 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
/* Set up domains for CPUs specified by the cpu_map: */
for_each_cpu(i, cpu_map) {
struct sched_domain_topology_level *tl;
+ int dflags = 0;
sd = NULL;
for_each_sd_topology(tl) {
- int dflags = 0;
-
if (tl == tl_asym) {
dflags |= SD_ASYM_CPUCAPACITY;
has_asym = true;