aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
authorPeter Zijlstra2020-09-24 13:50:42 +0200
committerPeter Zijlstra2020-11-17 13:15:28 +0100
commitec618b84f6e15281cc3660664d34cd0dd2f2579e (patch)
treeaf7536d66f934bb1979bf9e9dd2052082ee9eaea /kernel/sched/core.c
parentf97bb5272d9e95d400d6c8643ebb146b3e3e7842 (diff)
sched: Fix rq->nr_iowait ordering
schedule() ttwu() deactivate_task(); if (p->on_rq && ...) // false atomic_dec(&task_rq(p)->nr_iowait); if (prev->in_iowait) atomic_inc(&rq->nr_iowait); Allows nr_iowait to be decremented before it gets incremented, resulting in more dodgy IO-wait numbers than usual. Note that because we can now do ttwu_queue_wakelist() before p->on_cpu==0, we lose the natural ordering and have to further delay the decrement. Fixes: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") Reported-by: Tejun Heo <tj@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Mel Gorman <mgorman@techsingularity.net> Link: https://lkml.kernel.org/r/20201117093829.GD3121429@hirez.programming.kicks-ass.net
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c15
1 files changed, 10 insertions, 5 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d2003a7d5ab5..9f0ebfb0d17b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2501,7 +2501,12 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
#ifdef CONFIG_SMP
if (wake_flags & WF_MIGRATED)
en_flags |= ENQUEUE_MIGRATED;
+ else
#endif
+ if (p->in_iowait) {
+ delayacct_blkio_end(p);
+ atomic_dec(&task_rq(p)->nr_iowait);
+ }
activate_task(rq, p, en_flags);
ttwu_do_wakeup(rq, p, wake_flags, rf);
@@ -2888,11 +2893,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
goto unlock;
- if (p->in_iowait) {
- delayacct_blkio_end(p);
- atomic_dec(&task_rq(p)->nr_iowait);
- }
-
#ifdef CONFIG_SMP
/*
* Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
@@ -2963,6 +2963,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
if (task_cpu(p) != cpu) {
+ if (p->in_iowait) {
+ delayacct_blkio_end(p);
+ atomic_dec(&task_rq(p)->nr_iowait);
+ }
+
wake_flags |= WF_MIGRATED;
psi_ttwu_dequeue(p);
set_task_cpu(p, cpu);