diff options
author | Li Zefan | 2013-06-13 15:11:44 +0800 |
---|---|---|
committer | Tejun Heo | 2013-06-13 10:51:22 -0700 |
commit | f047cecf2cfc9595b1f39c9aab383bb0682f5a53 (patch) | |
tree | e3cdfd41e39c1a43ab9cba2721efb0aaf000e858 /kernel/cpuset.c | |
parent | 88fa523bff295f1d60244a54833480b02f775152 (diff) |
cpuset: fix to migrate mm correctly in a corner case
Before moving tasks out of empty cpusets, update_tasks_nodemask()
is called, which calls do_migrate_pages(xx, from, to). Then those
tasks are moved to an ancestor, and do_migrate_pages() is called
again.
The first time: from = node_to_be_offlined, to = empty.
The second time: from = empty, to = ancestor's nodemask.
so looks like no pages will be migrated.
Fix this by:
- Don't call update_tasks_nodemask() on empty cpusets.
- Pass cs->old_mems_allowed to do_migrate_pages().
v4: added comment in cpuset_hotplug_update_tasks() and rephased comment
in cpuset_attach().
Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 25 |
1 files changed, 19 insertions, 6 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3b3fdfdd4d78..4c17d96bd3a5 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1563,9 +1563,18 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) struct cpuset *mems_oldcs = effective_nodemask_cpuset(oldcs); mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); - if (is_memory_migrate(cs)) - cpuset_migrate_mm(mm, &mems_oldcs->mems_allowed, + + /* + * old_mems_allowed is the same with mems_allowed here, except + * if this task is being moved automatically due to hotplug. + * In that case @mems_allowed has been updated and is empty, + * so @old_mems_allowed is the right nodesets that we migrate + * mm from. + */ + if (is_memory_migrate(cs)) { + cpuset_migrate_mm(mm, &mems_oldcs->old_mems_allowed, &cpuset_attach_nodemask_to); + } mmput(mm); } @@ -2152,10 +2161,12 @@ retry: /* * If sane_behavior flag is set, we need to update tasks' cpumask - * for empty cpuset to take on ancestor's cpumask. + * for empty cpuset to take on ancestor's cpumask. Otherwise, don't + * call update_tasks_cpumask() if the cpuset becomes empty, as + * the tasks in it will be migrated to an ancestor. */ if ((sane && cpumask_empty(cs->cpus_allowed)) || - !cpumask_empty(&off_cpus)) + (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed))) update_tasks_cpumask(cs, NULL); mutex_lock(&callback_mutex); @@ -2164,10 +2175,12 @@ retry: /* * If sane_behavior flag is set, we need to update tasks' nodemask - * for empty cpuset to take on ancestor's nodemask. + * for empty cpuset to take on ancestor's nodemask. Otherwise, don't + * call update_tasks_nodemask() if the cpuset becomes empty, as + * the tasks in it will be migratd to an ancestor. */ if ((sane && nodes_empty(cs->mems_allowed)) || - !nodes_empty(off_mems)) + (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed))) update_tasks_nodemask(cs, NULL); is_empty = cpumask_empty(cs->cpus_allowed) || |