From c1a67fefd0546a5552289c65fe31b1d60e64b643 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 4 May 2015 16:52:20 -0700 Subject: mempool: Add mempool_init()/mempool_exit() Allows mempools to be embedded in other structs, getting rid of a pointer indirection from allocation fastpaths. mempool_exit() is safe to call on an uninitialized but zeroed mempool. Signed-off-by: Kent Overstreet Signed-off-by: Jens Axboe --- mm/mempool.c | 108 ++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 81 insertions(+), 27 deletions(-) (limited to 'mm') diff --git a/mm/mempool.c b/mm/mempool.c index 5c9dce34719b..b54f2c20e5e0 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -137,6 +137,28 @@ static void *remove_element(mempool_t *pool, gfp_t flags) return element; } +/** + * mempool_exit - exit a mempool initialized with mempool_init() + * @pool: pointer to the memory pool which was initialized with + * mempool_init(). + * + * Free all reserved elements in @pool and @pool itself. This function + * only sleeps if the free_fn() function sleeps. + * + * May be called on a zeroed but uninitialized mempool (i.e. allocated with + * kzalloc()). + */ +void mempool_exit(mempool_t *pool) +{ + while (pool->curr_nr) { + void *element = remove_element(pool, GFP_KERNEL); + pool->free(element, pool->pool_data); + } + kfree(pool->elements); + pool->elements = NULL; +} +EXPORT_SYMBOL(mempool_exit); + /** * mempool_destroy - deallocate a memory pool * @pool: pointer to the memory pool which was allocated via @@ -150,15 +172,65 @@ void mempool_destroy(mempool_t *pool) if (unlikely(!pool)) return; - while (pool->curr_nr) { - void *element = remove_element(pool, GFP_KERNEL); - pool->free(element, pool->pool_data); - } - kfree(pool->elements); + mempool_exit(pool); kfree(pool); } EXPORT_SYMBOL(mempool_destroy); +int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data, + gfp_t gfp_mask, int node_id) +{ + spin_lock_init(&pool->lock); + pool->min_nr = min_nr; + pool->pool_data = pool_data; + pool->alloc = alloc_fn; + pool->free = free_fn; + init_waitqueue_head(&pool->wait); + + pool->elements = kmalloc_array_node(min_nr, sizeof(void *), + gfp_mask, node_id); + if (!pool->elements) + return -ENOMEM; + + /* + * First pre-allocate the guaranteed number of buffers. + */ + while (pool->curr_nr < pool->min_nr) { + void *element; + + element = pool->alloc(gfp_mask, pool->pool_data); + if (unlikely(!element)) { + mempool_exit(pool); + return -ENOMEM; + } + add_element(pool, element); + } + + return 0; +} +EXPORT_SYMBOL(mempool_init_node); + +/** + * mempool_init - initialize a memory pool + * @min_nr: the minimum number of elements guaranteed to be + * allocated for this pool. + * @alloc_fn: user-defined element-allocation function. + * @free_fn: user-defined element-freeing function. + * @pool_data: optional private data available to the user-defined functions. + * + * Like mempool_create(), but initializes the pool in (i.e. embedded in another + * structure). + */ +int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data) +{ + return mempool_init_node(pool, min_nr, alloc_fn, free_fn, + pool_data, GFP_KERNEL, NUMA_NO_NODE); + +} +EXPORT_SYMBOL(mempool_init); + /** * mempool_create - create a memory pool * @min_nr: the minimum number of elements guaranteed to be @@ -186,35 +258,17 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, gfp_t gfp_mask, int node_id) { mempool_t *pool; + pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id); if (!pool) return NULL; - pool->elements = kmalloc_array_node(min_nr, sizeof(void *), - gfp_mask, node_id); - if (!pool->elements) { + + if (mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data, + gfp_mask, node_id)) { kfree(pool); return NULL; } - spin_lock_init(&pool->lock); - pool->min_nr = min_nr; - pool->pool_data = pool_data; - init_waitqueue_head(&pool->wait); - pool->alloc = alloc_fn; - pool->free = free_fn; - /* - * First pre-allocate the guaranteed number of buffers. - */ - while (pool->curr_nr < pool->min_nr) { - void *element; - - element = pool->alloc(gfp_mask, pool->pool_data); - if (unlikely(!element)) { - mempool_destroy(pool); - return NULL; - } - add_element(pool, element); - } return pool; } EXPORT_SYMBOL(mempool_create_node); -- cgit v1.2.3 From f183464684190bacbfb14623bd3e4e51b7575b4c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 May 2018 10:56:32 -0700 Subject: bdi: Move cgroup bdi_writeback to a dedicated low concurrency workqueue From 0aa2e9b921d6db71150633ff290199554f0842a8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 May 2018 10:29:00 -0700 cgwb_release() punts the actual release to cgwb_release_workfn() on system_wq. Depending on the number of cgroups or block devices, there can be a lot of cgwb_release_workfn() in flight at the same time. We're periodically seeing close to 256 kworkers getting stuck with the following stack trace and overtime the entire system gets stuck. [] _synchronize_rcu_expedited.constprop.72+0x2fc/0x330 [] synchronize_rcu_expedited+0x24/0x30 [] bdi_unregister+0x53/0x290 [] release_bdi+0x89/0xc0 [] wb_exit+0x85/0xa0 [] cgwb_release_workfn+0x54/0xb0 [] process_one_work+0x150/0x410 [] worker_thread+0x6d/0x520 [] kthread+0x12c/0x160 [] ret_from_fork+0x29/0x40 [] 0xffffffffffffffff The events leading to the lockup are... 1. A lot of cgwb_release_workfn() is queued at the same time and all system_wq kworkers are assigned to execute them. 2. They all end up calling synchronize_rcu_expedited(). One of them wins and tries to perform the expedited synchronization. 3. However, that invovles queueing rcu_exp_work to system_wq and waiting for it. Because #1 is holding all available kworkers on system_wq, rcu_exp_work can't be executed. cgwb_release_workfn() is waiting for synchronize_rcu_expedited() which in turn is waiting for cgwb_release_workfn() to free up some of the kworkers. We shouldn't be scheduling hundreds of cgwb_release_workfn() at the same time. There's nothing to be gained from that. This patch updates cgwb release path to use a dedicated percpu workqueue with @max_active of 1. While this resolves the problem at hand, it might be a good idea to isolate rcu_exp_work to its own workqueue too as it can be used from various paths and is prone to this sort of indirect A-A deadlocks. Signed-off-by: Tejun Heo Cc: "Paul E. McKenney" Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- mm/backing-dev.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7441bd93b732..8fe3ebd6ac00 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -412,6 +412,7 @@ static void wb_exit(struct bdi_writeback *wb) * protected. */ static DEFINE_SPINLOCK(cgwb_lock); +static struct workqueue_struct *cgwb_release_wq; /** * wb_congested_get_create - get or create a wb_congested @@ -522,7 +523,7 @@ static void cgwb_release(struct percpu_ref *refcnt) { struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback, refcnt); - schedule_work(&wb->release_work); + queue_work(cgwb_release_wq, &wb->release_work); } static void cgwb_kill(struct bdi_writeback *wb) @@ -784,6 +785,21 @@ static void cgwb_bdi_register(struct backing_dev_info *bdi) spin_unlock_irq(&cgwb_lock); } +static int __init cgwb_init(void) +{ + /* + * There can be many concurrent release work items overwhelming + * system_wq. Put them in a separate wq and limit concurrency. + * There's no point in executing many of these in parallel. + */ + cgwb_release_wq = alloc_workqueue("cgwb_release", 0, 1); + if (!cgwb_release_wq) + return -ENOMEM; + + return 0; +} +subsys_initcall(cgwb_init); + #else /* CONFIG_CGROUP_WRITEBACK */ static int cgwb_bdi_init(struct backing_dev_info *bdi) -- cgit v1.2.3