diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 109 |
1 files changed, 71 insertions, 38 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9acfb165eb52..fc10620967c7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -903,14 +903,20 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, if (prev && reclaim->generation != iter->generation) goto out_unlock; - do { + while (1) { pos = READ_ONCE(iter->position); + if (!pos || css_tryget(&pos->css)) + break; /* - * A racing update may change the position and - * put the last reference, hence css_tryget(), - * or retry to see the updated position. + * css reference reached zero, so iter->position will + * be cleared by ->css_released. However, we should not + * rely on this happening soon, because ->css_released + * is called from a work queue, and by busy-waiting we + * might block it. So we clear iter->position right + * away. */ - } while (pos && !css_tryget(&pos->css)); + (void)cmpxchg(&iter->position, pos, NULL); + } } if (pos) @@ -956,17 +962,13 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, } if (reclaim) { - if (cmpxchg(&iter->position, pos, memcg) == pos) { - if (memcg) - css_get(&memcg->css); - if (pos) - css_put(&pos->css); - } - /* - * pairs with css_tryget when dereferencing iter->position - * above. + * The position could have already been updated by a competing + * thread, so check that the value hasn't changed since we read + * it to avoid reclaiming from the same cgroup twice. */ + (void)cmpxchg(&iter->position, pos, memcg); + if (pos) css_put(&pos->css); @@ -999,6 +1001,28 @@ void mem_cgroup_iter_break(struct mem_cgroup *root, css_put(&prev->css); } +static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) +{ + struct mem_cgroup *memcg = dead_memcg; + struct mem_cgroup_reclaim_iter *iter; + struct mem_cgroup_per_zone *mz; + int nid, zid; + int i; + + while ((memcg = parent_mem_cgroup(memcg))) { + for_each_node(nid) { + for (zid = 0; zid < MAX_NR_ZONES; zid++) { + mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; + for (i = 0; i <= DEF_PRIORITY; i++) { + iter = &mz->iter[i]; + cmpxchg(&iter->position, + dead_memcg, NULL); + } + } + } + } +} + /* * Iteration constructs for visiting all cgroups (under a tree). If * loops are exited prematurely (break), mem_cgroup_iter_break() must @@ -2128,7 +2152,7 @@ done_restock: */ do { if (page_counter_read(&memcg->memory) > memcg->high) { - current->memcg_nr_pages_over_high += nr_pages; + current->memcg_nr_pages_over_high += batch; set_notify_resume(current); break; } @@ -4324,6 +4348,13 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) wb_memcg_offline(memcg); } +static void mem_cgroup_css_released(struct cgroup_subsys_state *css) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(css); + + invalidate_reclaim_iterators(memcg); +} + static void mem_cgroup_css_free(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); @@ -4779,23 +4810,18 @@ static void mem_cgroup_clear_mc(void) spin_unlock(&mc.lock); } -static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) +static int mem_cgroup_can_attach(struct cgroup_taskset *tset) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct cgroup_subsys_state *css; + struct mem_cgroup *memcg; struct mem_cgroup *from; struct task_struct *leader, *p; struct mm_struct *mm; unsigned long move_flags; int ret = 0; - /* - * We are now commited to this value whatever it is. Changes in this - * tunable will only affect upcoming migrations, not the current one. - * So we need to save it, and keep it going. - */ - move_flags = READ_ONCE(memcg->move_charge_at_immigrate); - if (!move_flags) + /* charge immigration isn't supported on the default hierarchy */ + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) return 0; /* @@ -4805,13 +4831,23 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, * multiple. */ p = NULL; - cgroup_taskset_for_each_leader(leader, tset) { + cgroup_taskset_for_each_leader(leader, css, tset) { WARN_ON_ONCE(p); p = leader; + memcg = mem_cgroup_from_css(css); } if (!p) return 0; + /* + * We are now commited to this value whatever it is. Changes in this + * tunable will only affect upcoming migrations, not the current one. + * So we need to save it, and keep it going. + */ + move_flags = READ_ONCE(memcg->move_charge_at_immigrate); + if (!move_flags) + return 0; + from = mem_cgroup_from_task(p); VM_BUG_ON(from == memcg); @@ -4842,8 +4878,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, return ret; } -static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) +static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset) { if (mc.to) mem_cgroup_clear_mc(); @@ -4985,10 +5020,10 @@ retry: atomic_dec(&mc.from->moving_account); } -static void mem_cgroup_move_task(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) +static void mem_cgroup_move_task(struct cgroup_taskset *tset) { - struct task_struct *p = cgroup_taskset_first(tset); + struct cgroup_subsys_state *css; + struct task_struct *p = cgroup_taskset_first(tset, &css); struct mm_struct *mm = get_task_mm(p); if (mm) { @@ -5000,17 +5035,14 @@ static void mem_cgroup_move_task(struct cgroup_subsys_state *css, mem_cgroup_clear_mc(); } #else /* !CONFIG_MMU */ -static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) +static int mem_cgroup_can_attach(struct cgroup_taskset *tset) { return 0; } -static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) +static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset) { } -static void mem_cgroup_move_task(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) +static void mem_cgroup_move_task(struct cgroup_taskset *tset) { } #endif @@ -5184,6 +5216,7 @@ struct cgroup_subsys memory_cgrp_subsys = { .css_alloc = mem_cgroup_css_alloc, .css_online = mem_cgroup_css_online, .css_offline = mem_cgroup_css_offline, + .css_released = mem_cgroup_css_released, .css_free = mem_cgroup_css_free, .css_reset = mem_cgroup_css_reset, .can_attach = mem_cgroup_can_attach, @@ -5511,11 +5544,11 @@ void mem_cgroup_uncharge_list(struct list_head *page_list) * mem_cgroup_replace_page - migrate a charge to another page * @oldpage: currently charged page * @newpage: page to transfer the charge to - * @lrucare: either or both pages might be on the LRU already * * Migrate the charge from @oldpage to @newpage. * * Both pages must be locked, @newpage->mapping must be set up. + * Either or both pages might be on the LRU already. */ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage) { |