diff options
-rw-r--r-- | Documentation/controllers/memory.txt | 27 | ||||
-rw-r--r-- | mm/memcontrol.c | 41 |
2 files changed, 57 insertions, 11 deletions
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index 58f32c166fac..54253b7a8db2 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt @@ -237,11 +237,30 @@ reclaimed. A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a cgroup might have some charge associated with it, even though all tasks have migrated away from it. -Such charges are moved to its parent as much as possible and freed if parent -is full. Both of RSS and CACHES are moved to parent. -If both of them are busy, rmdir() returns -EBUSY. +Such charges are freed(at default) or moved to its parent. When moved, +both of RSS and CACHES are moved to parent. +If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also. -5. TODO +5. Misc. interfaces. + +5.1 force_empty + memory.force_empty interface is provided to make cgroup's memory usage empty. + You can use this interface only when the cgroup has no tasks. + When writing anything to this + + # echo 0 > memory.force_empty + + Almost all pages tracked by this memcg will be unmapped and freed. Some of + pages cannot be freed because it's locked or in-use. Such pages are moved + to parent and this cgroup will be empty. But this may return -EBUSY in + some too busy case. + + Typical use case of this interface is that calling this before rmdir(). + Because rmdir() moves all pages to parent, some out-of-use page caches can be + moved to the parent. If you want to avoid that, force_empty will be useful. + + +6. TODO 1. Add support for accounting huge pages (as a separate controller) 2. Make per-cgroup scanner reclaim not-shared pages first diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e00f25e6545f..decace3bb57e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1062,21 +1062,27 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, * make mem_cgroup's charge to be 0 if there is no task. * This enables deleting this mem_cgroup. */ -static int mem_cgroup_force_empty(struct mem_cgroup *mem) +static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all) { int ret; int node, zid, shrink; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; + struct cgroup *cgrp = mem->css.cgroup; css_get(&mem->css); shrink = 0; + /* should free all ? */ + if (free_all) + goto try_to_free; move_account: while (mem->res.usage > 0) { ret = -EBUSY; - if (atomic_read(&mem->css.cgroup->count) > 0) + if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) + goto out; + ret = -EINTR; + if (signal_pending(current)) goto out; - /* This is for making all *used* pages to be on LRU. */ lru_add_drain_all(); ret = 0; @@ -1106,19 +1112,29 @@ out: return ret; try_to_free: - /* returns EBUSY if we come here twice. */ - if (shrink) { + /* returns EBUSY if there is a task or if we come here twice. */ + if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) { ret = -EBUSY; goto out; } + /* we call try-to-free pages for make this cgroup empty */ + lru_add_drain_all(); /* try to free all pages in this cgroup */ shrink = 1; while (nr_retries && mem->res.usage > 0) { int progress; + + if (signal_pending(current)) { + ret = -EINTR; + goto out; + } progress = try_to_free_mem_cgroup_pages(mem, GFP_HIGHUSER_MOVABLE); - if (!progress) + if (!progress) { nr_retries--; + /* maybe some writeback is necessary */ + congestion_wait(WRITE, HZ/10); + } } /* try move_account...there may be some *locked* pages. */ @@ -1128,6 +1144,12 @@ try_to_free: goto out; } +int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) +{ + return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true); +} + + static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) { return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, @@ -1225,6 +1247,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, return 0; } + static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", @@ -1253,6 +1276,10 @@ static struct cftype mem_cgroup_files[] = { .name = "stat", .read_map = mem_control_stat_show, }, + { + .name = "force_empty", + .trigger = mem_cgroup_force_empty_write, + }, }; static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) @@ -1350,7 +1377,7 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, struct cgroup *cont) { struct mem_cgroup *mem = mem_cgroup_from_cont(cont); - mem_cgroup_force_empty(mem); + mem_cgroup_force_empty(mem, false); } static void mem_cgroup_destroy(struct cgroup_subsys *ss, |