From 3480373ebdf7625ee29bee6508c9fc4ae70c00bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Mar 2023 09:49:51 +0900 Subject: btrfs, block: move REQ_CGROUP_PUNT to btrfs REQ_CGROUP_PUNT is a bit annoying as it is hard to follow and adds a branch to the bio submission hot path. To fix this, export blkcg_punt_bio_submit and let btrfs call it directly. Add a new REQ_FS_PRIVATE flag for btrfs to indicate to it's own low-level bio submission code that a punt to the cgroup submission helper is required. Reviewed-by: Jens Axboe Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- block/blk-cgroup.c | 31 +++++++++++++++++-------------- block/blk-cgroup.h | 12 ------------ block/blk-core.c | 3 --- 3 files changed, 17 insertions(+), 29 deletions(-) (limited to 'block') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index bd50b55bdb61..9f5f3263c178 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1688,24 +1688,27 @@ out_unlock: } EXPORT_SYMBOL_GPL(blkcg_policy_unregister); -bool __blkcg_punt_bio_submit(struct bio *bio) +/* + * When a shared kthread issues a bio for a cgroup, doing so synchronously can + * lead to priority inversions as the kthread can be trapped waiting for that + * cgroup. Use this helper instead of submit_bio to punt the actual issuing to + * a dedicated per-blkcg work item to avoid such priority inversions. + */ +void blkcg_punt_bio_submit(struct bio *bio) { struct blkcg_gq *blkg = bio->bi_blkg; - /* consume the flag first */ - bio->bi_opf &= ~REQ_CGROUP_PUNT; - - /* never bounce for the root cgroup */ - if (!blkg->parent) - return false; - - spin_lock_bh(&blkg->async_bio_lock); - bio_list_add(&blkg->async_bios, bio); - spin_unlock_bh(&blkg->async_bio_lock); - - queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); - return true; + if (blkg->parent) { + spin_lock_bh(&blkg->async_bio_lock); + bio_list_add(&blkg->async_bios, bio); + spin_unlock_bh(&blkg->async_bio_lock); + queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); + } else { + /* never bounce for the root cgroup */ + submit_bio(bio); + } } +EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit); /* * Scale the accumulated delay based on how long it has been since we updated diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 9c5078755e5e..64758ab9f1f1 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -375,16 +375,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q))) -bool __blkcg_punt_bio_submit(struct bio *bio); - -static inline bool blkcg_punt_bio_submit(struct bio *bio) -{ - if (bio->bi_opf & REQ_CGROUP_PUNT) - return __blkcg_punt_bio_submit(bio); - else - return false; -} - static inline void blkcg_bio_issue_init(struct bio *bio) { bio_issue_init(&bio->bi_issue, bio_sectors(bio)); @@ -506,8 +496,6 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } static inline void blkg_get(struct blkcg_gq *blkg) { } static inline void blkg_put(struct blkcg_gq *blkg) { } - -static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline void blk_cgroup_bio_start(struct bio *bio) { } static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; } diff --git a/block/blk-core.c b/block/blk-core.c index 42926e6cb83c..478978dcb2bd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -830,9 +830,6 @@ EXPORT_SYMBOL(submit_bio_noacct); */ void submit_bio(struct bio *bio) { - if (blkcg_punt_bio_submit(bio)) - return; - if (bio_op(bio) == REQ_OP_READ) { task_io_account_read(bio->bi_iter.bi_size); count_vm_events(PGPGIN, bio_sectors(bio)); -- cgit v1.2.3 From 12be09fe18f2fd9f882ca0acbe14cf121250bcbe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Mar 2023 09:49:52 +0900 Subject: block: async_bio_lock does not need to be bh-safe async_bio_lock is only taken from bio submission and workqueue context, both are never in bottom halves. Reviewed-by: Jens Axboe Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- block/blk-cgroup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 9f5f3263c178..c524ecab440b 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -198,10 +198,10 @@ static void blkg_async_bio_workfn(struct work_struct *work) bool need_plug = false; /* as long as there are pending bios, @blkg can't go away */ - spin_lock_bh(&blkg->async_bio_lock); + spin_lock(&blkg->async_bio_lock); bio_list_merge(&bios, &blkg->async_bios); bio_list_init(&blkg->async_bios); - spin_unlock_bh(&blkg->async_bio_lock); + spin_unlock(&blkg->async_bio_lock); /* start plug only when bio_list contains at least 2 bios */ if (bios.head && bios.head->bi_next) { @@ -1699,9 +1699,9 @@ void blkcg_punt_bio_submit(struct bio *bio) struct blkcg_gq *blkg = bio->bi_blkg; if (blkg->parent) { - spin_lock_bh(&blkg->async_bio_lock); + spin_lock(&blkg->async_bio_lock); bio_list_add(&blkg->async_bios, bio); - spin_unlock_bh(&blkg->async_bio_lock); + spin_unlock(&blkg->async_bio_lock); queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); } else { /* never bounce for the root cgroup */ -- cgit v1.2.3 From 2c275afeb61dab732353aae2c7de01b6a87dcefc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Mar 2023 09:49:53 +0900 Subject: block: make blkcg_punt_bio_submit optional Guard all the code to punt bios to a per-cgroup submission helper by a new CONFIG_BLK_CGROUP_PUNT_BIO symbol that is selected by btrfs. This way non-btrfs kernel builds don't need to have this code. Reviewed-by: Jens Axboe Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- block/Kconfig | 3 +++ block/blk-cgroup.c | 77 +++++++++++++++++++++++++++++------------------------- block/blk-cgroup.h | 3 ++- fs/btrfs/Kconfig | 1 + 4 files changed, 48 insertions(+), 36 deletions(-) (limited to 'block') diff --git a/block/Kconfig b/block/Kconfig index 941b2dca70db..69ccf7457ae1 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -41,6 +41,9 @@ config BLK_RQ_ALLOC_TIME config BLK_CGROUP_RWSTAT bool +config BLK_CGROUP_PUNT_BIO + bool + config BLK_DEV_BSG_COMMON tristate diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c524ecab440b..18c922579719 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -56,7 +56,6 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ bool blkcg_debug_stats = false; -static struct workqueue_struct *blkcg_punt_bio_wq; #define BLKG_DESTROY_BATCH_SIZE 64 @@ -166,7 +165,9 @@ static void __blkg_release(struct rcu_head *rcu) { struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); +#ifdef CONFIG_BLK_CGROUP_PUNT_BIO WARN_ON(!bio_list_empty(&blkg->async_bios)); +#endif /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); @@ -188,6 +189,9 @@ static void blkg_release(struct percpu_ref *ref) call_rcu(&blkg->rcu_head, __blkg_release); } +#ifdef CONFIG_BLK_CGROUP_PUNT_BIO +static struct workqueue_struct *blkcg_punt_bio_wq; + static void blkg_async_bio_workfn(struct work_struct *work) { struct blkcg_gq *blkg = container_of(work, struct blkcg_gq, @@ -214,6 +218,40 @@ static void blkg_async_bio_workfn(struct work_struct *work) blk_finish_plug(&plug); } +/* + * When a shared kthread issues a bio for a cgroup, doing so synchronously can + * lead to priority inversions as the kthread can be trapped waiting for that + * cgroup. Use this helper instead of submit_bio to punt the actual issuing to + * a dedicated per-blkcg work item to avoid such priority inversions. + */ +void blkcg_punt_bio_submit(struct bio *bio) +{ + struct blkcg_gq *blkg = bio->bi_blkg; + + if (blkg->parent) { + spin_lock(&blkg->async_bio_lock); + bio_list_add(&blkg->async_bios, bio); + spin_unlock(&blkg->async_bio_lock); + queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); + } else { + /* never bounce for the root cgroup */ + submit_bio(bio); + } +} +EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit); + +static int __init blkcg_punt_bio_init(void) +{ + blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio", + WQ_MEM_RECLAIM | WQ_FREEZABLE | + WQ_UNBOUND | WQ_SYSFS, 0); + if (!blkcg_punt_bio_wq) + return -ENOMEM; + return 0; +} +subsys_initcall(blkcg_punt_bio_init); +#endif /* CONFIG_BLK_CGROUP_PUNT_BIO */ + /** * bio_blkcg_css - return the blkcg CSS associated with a bio * @bio: target bio @@ -269,10 +307,12 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, blkg->q = disk->queue; INIT_LIST_HEAD(&blkg->q_node); + blkg->blkcg = blkcg; +#ifdef CONFIG_BLK_CGROUP_PUNT_BIO spin_lock_init(&blkg->async_bio_lock); bio_list_init(&blkg->async_bios); INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); - blkg->blkcg = blkcg; +#endif u64_stats_init(&blkg->iostat.sync); for_each_possible_cpu(cpu) { @@ -1688,28 +1728,6 @@ out_unlock: } EXPORT_SYMBOL_GPL(blkcg_policy_unregister); -/* - * When a shared kthread issues a bio for a cgroup, doing so synchronously can - * lead to priority inversions as the kthread can be trapped waiting for that - * cgroup. Use this helper instead of submit_bio to punt the actual issuing to - * a dedicated per-blkcg work item to avoid such priority inversions. - */ -void blkcg_punt_bio_submit(struct bio *bio) -{ - struct blkcg_gq *blkg = bio->bi_blkg; - - if (blkg->parent) { - spin_lock(&blkg->async_bio_lock); - bio_list_add(&blkg->async_bios, bio); - spin_unlock(&blkg->async_bio_lock); - queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); - } else { - /* never bounce for the root cgroup */ - submit_bio(bio); - } -} -EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit); - /* * Scale the accumulated delay based on how long it has been since we updated * the delay. We only call this when we are adding delay, in case it's been a @@ -2088,16 +2106,5 @@ bool blk_cgroup_congested(void) return ret; } -static int __init blkcg_init(void) -{ - blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio", - WQ_MEM_RECLAIM | WQ_FREEZABLE | - WQ_UNBOUND | WQ_SYSFS, 0); - if (!blkcg_punt_bio_wq) - return -ENOMEM; - return 0; -} -subsys_initcall(blkcg_init); - module_param(blkcg_debug_stats, bool, 0644); MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not"); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 64758ab9f1f1..e98d2c1be354 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -72,9 +72,10 @@ struct blkcg_gq { struct blkg_iostat_set iostat; struct blkg_policy_data *pd[BLKCG_MAX_POLS]; - +#ifdef CONFIG_BLK_CGROUP_PUNT_BIO spinlock_t async_bio_lock; struct bio_list async_bios; +#endif union { struct work_struct async_bio_work; struct work_struct free_work; diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 37b6bab90c83..66fa9ab2c046 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -2,6 +2,7 @@ config BTRFS_FS tristate "Btrfs filesystem support" + select BLK_CGROUP_PUNT_BIO select CRYPTO select CRYPTO_CRC32C select LIBCRC32C -- cgit v1.2.3