diff options
author | Linus Torvalds | 2020-08-06 11:55:43 -0700 |
---|---|---|
committer | Linus Torvalds | 2020-08-06 11:55:43 -0700 |
commit | 6d2b84a4e5b954bd2587e06c29577256f59e0030 (patch) | |
tree | 52fc4d86be60205541d01802388296d3d4d3b756 /kernel | |
parent | 4cec929370763c475111b1eb307df6759b6733e7 (diff) | |
parent | 4fd5750af02ab7bba7c58a073060cc1da8a69173 (diff) |
Merge tag 'sched-fifo-2020-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull sched/fifo updates from Ingo Molnar:
"This adds the sched_set_fifo*() encapsulation APIs to remove static
priority level knowledge from non-scheduler code.
The three APIs for non-scheduler code to set SCHED_FIFO are:
- sched_set_fifo()
- sched_set_fifo_low()
- sched_set_normal()
These are two FIFO priority levels: default (high), and a 'low'
priority level, plus sched_set_normal() to set the policy back to
non-SCHED_FIFO.
Since the changes affect a lot of non-scheduler code, we kept this in
a separate tree"
* tag 'sched-fifo-2020-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
sched,tracing: Convert to sched_set_fifo()
sched: Remove sched_set_*() return value
sched: Remove sched_setscheduler*() EXPORTs
sched,psi: Convert to sched_set_fifo_low()
sched,rcutorture: Convert to sched_set_fifo_low()
sched,rcuperf: Convert to sched_set_fifo_low()
sched,locktorture: Convert to sched_set_fifo()
sched,irq: Convert to sched_set_fifo()
sched,watchdog: Convert to sched_set_fifo()
sched,serial: Convert to sched_set_fifo()
sched,powerclamp: Convert to sched_set_fifo()
sched,ion: Convert to sched_set_normal()
sched,powercap: Convert to sched_set_fifo*()
sched,spi: Convert to sched_set_fifo*()
sched,mmc: Convert to sched_set_fifo*()
sched,ivtv: Convert to sched_set_fifo*()
sched,drm/scheduler: Convert to sched_set_fifo*()
sched,msm: Convert to sched_set_fifo*()
sched,psci: Convert to sched_set_fifo*()
sched,drbd: Convert to sched_set_fifo*()
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/irq/manage.c | 6 | ||||
-rw-r--r-- | kernel/locking/locktorture.c | 10 | ||||
-rw-r--r-- | kernel/rcu/rcuperf.c | 8 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 7 | ||||
-rw-r--r-- | kernel/sched/core.c | 50 | ||||
-rw-r--r-- | kernel/sched/psi.c | 5 | ||||
-rw-r--r-- | kernel/trace/ring_buffer_benchmark.c | 48 |
7 files changed, 77 insertions, 57 deletions
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 48c38e09c673..d55ba625d426 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1308,9 +1308,6 @@ static int setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) { struct task_struct *t; - struct sched_param param = { - .sched_priority = MAX_USER_RT_PRIO/2, - }; if (!secondary) { t = kthread_create(irq_thread, new, "irq/%d-%s", irq, @@ -1318,13 +1315,12 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) } else { t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq, new->name); - param.sched_priority -= 1; } if (IS_ERR(t)) return PTR_ERR(t); - sched_setscheduler_nocheck(t, SCHED_FIFO, ¶m); + sched_set_fifo(t); /* * We keep the reference to the task struct even if diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 8ff6f50e06a0..9cfa5e89cff7 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -436,8 +436,6 @@ static int torture_rtmutex_lock(void) __acquires(torture_rtmutex) static void torture_rtmutex_boost(struct torture_random_state *trsp) { - int policy; - struct sched_param param; const unsigned int factor = 50000; /* yes, quite arbitrary */ if (!rt_task(current)) { @@ -448,8 +446,7 @@ static void torture_rtmutex_boost(struct torture_random_state *trsp) */ if (trsp && !(torture_random(trsp) % (cxt.nrealwriters_stress * factor))) { - policy = SCHED_FIFO; - param.sched_priority = MAX_RT_PRIO - 1; + sched_set_fifo(current); } else /* common case, do nothing */ return; } else { @@ -462,13 +459,10 @@ static void torture_rtmutex_boost(struct torture_random_state *trsp) */ if (!trsp || !(torture_random(trsp) % (cxt.nrealwriters_stress * factor * 2))) { - policy = SCHED_NORMAL; - param.sched_priority = 0; + sched_set_normal(current, 0); } else /* common case, do nothing */ return; } - - sched_setscheduler_nocheck(current, policy, ¶m); } static void torture_rtmutex_delay(struct torture_random_state *trsp) diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index ec903d781778..21448d3374e2 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -361,7 +361,6 @@ rcu_perf_writer(void *arg) int i_max; long me = (long)arg; struct rcu_head *rhp = NULL; - struct sched_param sp; bool started = false, done = false, alldone = false; u64 t; u64 *wdp; @@ -370,8 +369,7 @@ rcu_perf_writer(void *arg) VERBOSE_PERFOUT_STRING("rcu_perf_writer task started"); WARN_ON(!wdpp); set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); - sp.sched_priority = 1; - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); + sched_set_fifo_low(current); if (holdoff) schedule_timeout_uninterruptible(holdoff * HZ); @@ -427,9 +425,7 @@ retry: started = true; if (!done && i >= MIN_MEAS) { done = true; - sp.sched_priority = 0; - sched_setscheduler_nocheck(current, - SCHED_NORMAL, &sp); + sched_set_normal(current, 0); pr_alert("%s%s rcu_perf_writer %ld has %d measurements\n", perf_type, PERF_FLAG, me, MIN_MEAS); if (atomic_inc_return(&n_rcu_perf_writer_finished) >= diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index d0d265304d14..f453bf8d2f1e 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -895,16 +895,11 @@ static int rcu_torture_boost(void *arg) unsigned long endtime; unsigned long oldstarttime; struct rcu_boost_inflight rbi = { .inflight = 0 }; - struct sched_param sp; VERBOSE_TOROUT_STRING("rcu_torture_boost started"); /* Set real-time priority. */ - sp.sched_priority = 1; - if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) { - VERBOSE_TOROUT_STRING("rcu_torture_boost RT prio failed!"); - n_rcu_torture_boost_rterror++; - } + sched_set_fifo_low(current); init_rcu_head_on_stack(&rbi.rcu); /* Each pass through the following loop does one boost-test cycle. */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4a0e7b449b88..84758f34cdb0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5496,6 +5496,8 @@ static int _sched_setscheduler(struct task_struct *p, int policy, * @policy: new policy. * @param: structure containing the new RT priority. * + * Use sched_set_fifo(), read its comment. + * * Return: 0 on success. An error code otherwise. * * NOTE that the task may be already dead. @@ -5505,13 +5507,11 @@ int sched_setscheduler(struct task_struct *p, int policy, { return _sched_setscheduler(p, policy, param, true); } -EXPORT_SYMBOL_GPL(sched_setscheduler); int sched_setattr(struct task_struct *p, const struct sched_attr *attr) { return __sched_setscheduler(p, attr, true, true); } -EXPORT_SYMBOL_GPL(sched_setattr); int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) { @@ -5536,7 +5536,51 @@ int sched_setscheduler_nocheck(struct task_struct *p, int policy, { return _sched_setscheduler(p, policy, param, false); } -EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); + +/* + * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally + * incapable of resource management, which is the one thing an OS really should + * be doing. + * + * This is of course the reason it is limited to privileged users only. + * + * Worse still; it is fundamentally impossible to compose static priority + * workloads. You cannot take two correctly working static prio workloads + * and smash them together and still expect them to work. + * + * For this reason 'all' FIFO tasks the kernel creates are basically at: + * + * MAX_RT_PRIO / 2 + * + * The administrator _MUST_ configure the system, the kernel simply doesn't + * know enough information to make a sensible choice. + */ +void sched_set_fifo(struct task_struct *p) +{ + struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 }; + WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); +} +EXPORT_SYMBOL_GPL(sched_set_fifo); + +/* + * For when you don't much care about FIFO, but want to be above SCHED_NORMAL. + */ +void sched_set_fifo_low(struct task_struct *p) +{ + struct sched_param sp = { .sched_priority = 1 }; + WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); +} +EXPORT_SYMBOL_GPL(sched_set_fifo_low); + +void sched_set_normal(struct task_struct *p, int nice) +{ + struct sched_attr attr = { + .sched_policy = SCHED_NORMAL, + .sched_nice = nice, + }; + WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0); +} +EXPORT_SYMBOL_GPL(sched_set_normal); static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index e53b711bd643..967732c0766c 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -616,11 +616,8 @@ out: static int psi_poll_worker(void *data) { struct psi_group *group = (struct psi_group *)data; - struct sched_param param = { - .sched_priority = 1, - }; - sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); + sched_set_fifo_low(current); while (true) { wait_event_interruptible(group->poll_wait, diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 8df0aa810950..78e576575b79 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -45,8 +45,8 @@ MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings"); static int producer_nice = MAX_NICE; static int consumer_nice = MAX_NICE; -static int producer_fifo = -1; -static int consumer_fifo = -1; +static int producer_fifo; +static int consumer_fifo; module_param(producer_nice, int, 0644); MODULE_PARM_DESC(producer_nice, "nice prio for producer"); @@ -55,10 +55,10 @@ module_param(consumer_nice, int, 0644); MODULE_PARM_DESC(consumer_nice, "nice prio for consumer"); module_param(producer_fifo, int, 0644); -MODULE_PARM_DESC(producer_fifo, "fifo prio for producer"); +MODULE_PARM_DESC(producer_fifo, "use fifo for producer: 0 - disabled, 1 - low prio, 2 - fifo"); module_param(consumer_fifo, int, 0644); -MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer"); +MODULE_PARM_DESC(consumer_fifo, "use fifo for consumer: 0 - disabled, 1 - low prio, 2 - fifo"); static int read_events; @@ -303,22 +303,22 @@ static void ring_buffer_producer(void) trace_printk("ERROR!\n"); if (!disable_reader) { - if (consumer_fifo < 0) + if (consumer_fifo) + trace_printk("Running Consumer at SCHED_FIFO %s\n", + consumer_fifo == 1 ? "low" : "high"); + else trace_printk("Running Consumer at nice: %d\n", consumer_nice); - else - trace_printk("Running Consumer at SCHED_FIFO %d\n", - consumer_fifo); } - if (producer_fifo < 0) + if (producer_fifo) + trace_printk("Running Producer at SCHED_FIFO %s\n", + producer_fifo == 1 ? "low" : "high"); + else trace_printk("Running Producer at nice: %d\n", producer_nice); - else - trace_printk("Running Producer at SCHED_FIFO %d\n", - producer_fifo); /* Let the user know that the test is running at low priority */ - if (producer_fifo < 0 && consumer_fifo < 0 && + if (!producer_fifo && !consumer_fifo && producer_nice == MAX_NICE && consumer_nice == MAX_NICE) trace_printk("WARNING!!! This test is running at lowest priority.\n"); @@ -455,21 +455,19 @@ static int __init ring_buffer_benchmark_init(void) * Run them as low-prio background tasks by default: */ if (!disable_reader) { - if (consumer_fifo >= 0) { - struct sched_param param = { - .sched_priority = consumer_fifo - }; - sched_setscheduler(consumer, SCHED_FIFO, ¶m); - } else + if (consumer_fifo >= 2) + sched_set_fifo(consumer); + else if (consumer_fifo == 1) + sched_set_fifo_low(consumer); + else set_user_nice(consumer, consumer_nice); } - if (producer_fifo >= 0) { - struct sched_param param = { - .sched_priority = producer_fifo - }; - sched_setscheduler(producer, SCHED_FIFO, ¶m); - } else + if (producer_fifo >= 2) + sched_set_fifo(producer); + else if (producer_fifo == 1) + sched_set_fifo_low(producer); + else set_user_nice(producer, producer_nice); return 0; |