sched/core: Fix incorrect utilization accounting when switching to fair class

When a task switches to fair scheduling class, the period between now and the last update of its utilization is accounted as running time whatever happened during this period. This incorrect accounting applies to the task and also to the task group branch. When changing the property of a running task like its list of allowed CPUs or its scheduling class, we follow the sequence: - dequeue task - put task - change the property - set task as current task - enqueue task The end of the sequence doesn't follow the normal sequence (as per __schedule()) which is: - enqueue a task - then set the task as current task. This incorrectordering is the root cause of incorrect utilization accounting. Update the sequence to follow the right one: - dequeue task - put task - change the property - enqueue task - set task as current task Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Morten.Rasmussen@arm.com Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: bsegall@google.com Cc: dietmar.eggemann@arm.com Cc: linaro-kernel@lists.linaro.org Cc: pjt@google.com Cc: yuyang.du@intel.com Link: http://lkml.kernel.org/r/1473666472-13749-8-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Vincent Guittot 2016-09-12 09:47:52 +0200
committer: Ingo Molnar 2016-09-30 11:03:27 +0200
commit: a399d233078edbba7cf7902a6d080100cdf75636 (patch)
tree: c0b2adc93ff94863ae0c52ad293af486e9a1d6f1 /kernel/sched
parent: 1b568f0aabf280555125bc7cefc08321ff0ebaba (diff)
1 files changed, 10 insertions, 10 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 94115453c1c4..2b5e150e070b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1109,10 +1109,10 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 
 	p->sched_class->set_cpus_allowed(p, new_mask);
 
-	if (running)
-		p->sched_class->set_curr_task(rq);
 	if (queued)
 		enqueue_task(rq, p, ENQUEUE_RESTORE);
+	if (running)
+		p->sched_class->set_curr_task(rq);
 }
 
 /*
@@ -3707,10 +3707,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
 	p->prio = prio;
 
-	if (running)
-		p->sched_class->set_curr_task(rq);
 	if (queued)
 		enqueue_task(rq, p, queue_flag);
+	if (running)
+		p->sched_class->set_curr_task(rq);
 
 	check_class_changed(rq, p, prev_class, oldprio);
 out_unlock:
@@ -4263,8 +4263,6 @@ change:
 	prev_class = p->sched_class;
 	__setscheduler(rq, p, attr, pi);
 
-	if (running)
-		p->sched_class->set_curr_task(rq);
 	if (queued) {
 		/*
 		 * We enqueue to tail when the priority of a task is
@@ -4275,6 +4273,8 @@ change:
 
 		enqueue_task(rq, p, queue_flags);
 	}
+	if (running)
+		p->sched_class->set_curr_task(rq);
 
 	check_class_changed(rq, p, prev_class, oldprio);
 	preempt_disable(); /* avoid rq from going away on us */
@@ -5439,10 +5439,10 @@ void sched_setnuma(struct task_struct *p, int nid)
 
 	p->numa_preferred_nid = nid;
 
-	if (running)
-		p->sched_class->set_curr_task(rq);
 	if (queued)
 		enqueue_task(rq, p, ENQUEUE_RESTORE);
+	if (running)
+		p->sched_class->set_curr_task(rq);
 	task_rq_unlock(rq, p, &rf);
 }
 #endif /* CONFIG_NUMA_BALANCING */
@@ -7949,10 +7949,10 @@ void sched_move_task(struct task_struct *tsk)
 
 	sched_change_group(tsk, TASK_MOVE_GROUP);
 
-	if (unlikely(running))
-		tsk->sched_class->set_curr_task(rq);
 	if (queued)
 		enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
+	if (unlikely(running))
+		tsk->sched_class->set_curr_task(rq);
 
 	task_rq_unlock(rq, tsk, &rf);
 }
author	Vincent Guittot	2016-09-12 09:47:52 +0200
committer	Ingo Molnar	2016-09-30 11:03:27 +0200
commit	a399d233078edbba7cf7902a6d080100cdf75636 (patch)
tree	c0b2adc93ff94863ae0c52ad293af486e9a1d6f1 /kernel/sched
parent	1b568f0aabf280555125bc7cefc08321ff0ebaba (diff)