From 1e32e77f95d60b121b6072e3e3a650a7f93068f9 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 4 Jun 2014 16:07:37 -0700 Subject: memcg, slab: do not schedule cache destruction when last page goes away This patchset is a part of preparations for kmemcg re-parenting. It targets at simplifying kmemcg work-flows and synchronization. First, it removes async per memcg cache destruction (see patches 1, 2). Now caches are only destroyed on memcg offline. That means the caches that are not empty on memcg offline will be leaked. However, they are already leaked, because memcg_cache_params::nr_pages normally never drops to 0 so the destruction work is never scheduled except kmem_cache_shrink is called explicitly. In the future I'm planning reaping such dead caches on vmpressure or periodically. Second, it substitutes per memcg slab_caches_mutex's with the global memcg_slab_mutex, which should be taken during the whole per memcg cache creation/destruction path before the slab_mutex (see patch 3). This greatly simplifies synchronization among various per memcg cache creation/destruction paths. I'm still not quite sure about the end picture, in particular I don't know whether we should reap dead memcgs' kmem caches periodically or try to merge them with their parents (see https://lkml.org/lkml/2014/4/20/38 for more details), but whichever way we choose, this set looks like a reasonable change to me, because it greatly simplifies kmemcg work-flows and eases further development. This patch (of 3): After a memcg is offlined, we mark its kmem caches that cannot be deleted right now due to pending objects as dead by setting the memcg_cache_params::dead flag, so that memcg_release_pages will schedule cache destruction (memcg_cache_params::destroy) as soon as the last slab of the cache is freed (memcg_cache_params::nr_pages drops to zero). I guess the idea was to destroy the caches as soon as possible, i.e. immediately after freeing the last object. However, it just doesn't work that way, because kmem caches always preserve some pages for the sake of performance, so that nr_pages never gets to zero unless the cache is shrunk explicitly using kmem_cache_shrink. Of course, we could account the total number of objects on the cache or check if all the slabs allocated for the cache are empty on kmem_cache_free and schedule destruction if so, but that would be too costly. Thus we have a piece of code that works only when we explicitly call kmem_cache_shrink, but complicates the whole picture a lot. Moreover, it's racy in fact. For instance, kmem_cache_shrink may free the last slab and thus schedule cache destruction before it finishes checking that the cache is empty, which can lead to use-after-free. So I propose to remove this async cache destruction from memcg_release_pages, and check if the cache is empty explicitly after calling kmem_cache_shrink instead. This will simplify things a lot w/o introducing any functional changes. And regarding dead memcg caches (i.e. those that are left hanging around after memcg offline for they have objects), I suppose we should reap them either periodically or on vmpressure as Glauber suggested initially. I'm going to implement this later. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Glauber Costa Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 - include/linux/slab.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5155d09e749d..087a45314181 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -509,7 +509,6 @@ __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size); void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size); -void mem_cgroup_destroy_cache(struct kmem_cache *cachep); int __kmem_cache_destroy_memcg_children(struct kmem_cache *s); /** diff --git a/include/linux/slab.h b/include/linux/slab.h index a6aab2c0dfc5..905541dd3778 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -524,7 +524,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) * @memcg: pointer to the memcg this cache belongs to * @list: list_head for the list of all caches in this memcg * @root_cache: pointer to the global, root cache, this cache was derived from - * @dead: set to true after the memcg dies; the cache may still be around. * @nr_pages: number of pages that belongs to this cache. * @destroy: worker to be called whenever we are ready, or believe we may be * ready, to destroy this cache. @@ -540,7 +539,6 @@ struct memcg_cache_params { struct mem_cgroup *memcg; struct list_head list; struct kmem_cache *root_cache; - bool dead; atomic_t nr_pages; struct work_struct destroy; }; -- cgit v1.2.3