aboutsummaryrefslogtreecommitdiff
path: root/block/blk-iolatency.c
diff options
context:
space:
mode:
authorLinus Torvalds2022-06-03 10:14:48 -0700
committerLinus Torvalds2022-06-03 10:14:48 -0700
commit34845d92bca527b5c2cf8b2293b71b9c746c79ca (patch)
treea1fd77581f6275843d0305a966d42d5b1b4748cf /block/blk-iolatency.c
parent5ac8bdb9ad47334a9590e29daf7e4149b0a34729 (diff)
parent41e46b3c2aa24f755b2ae9ec4ce931ba5f0d8532 (diff)
Merge tag 'for-5.19/block-2022-06-02' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Just a collection of fixes that have been queued up since the initial merge window pull request, the majority of which are targeted for stable as well. One bio_set fix that fixes an issue with the dm adoption of cached bio structs that got introduced in this merge window" * tag 'for-5.19/block-2022-06-02' of git://git.kernel.dk/linux-block: block: Fix potential deadlock in blk_ia_range_sysfs_show() block: fix bio_clone_blkg_association() to associate with proper blkcg_gq block: remove useless BUG_ON() in blk_mq_put_tag() blk-mq: do not update io_ticks with passthrough requests block: make bioset_exit() fully resilient against being called twice block: use bio_queue_enter instead of blk_queue_enter in bio_poll block: document BLK_STS_AGAIN usage block: take destination bvec offsets into account in bio_copy_data_iter blk-iolatency: Fix inflight count imbalances and IO hangs on offline blk-mq: don't touch ->tagset in blk_mq_get_sq_hctx
Diffstat (limited to 'block/blk-iolatency.c')
-rw-r--r--block/blk-iolatency.c122
1 files changed, 64 insertions, 58 deletions
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 5b676c7cf2b6..9568bf8dfe82 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -87,7 +87,17 @@ struct iolatency_grp;
struct blk_iolatency {
struct rq_qos rqos;
struct timer_list timer;
- atomic_t enabled;
+
+ /*
+ * ->enabled is the master enable switch gating the throttling logic and
+ * inflight tracking. The number of cgroups which have iolat enabled is
+ * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly
+ * from ->enable_work with the request_queue frozen. For details, See
+ * blkiolatency_enable_work_fn().
+ */
+ bool enabled;
+ atomic_t enable_cnt;
+ struct work_struct enable_work;
};
static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
@@ -95,11 +105,6 @@ static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
return container_of(rqos, struct blk_iolatency, rqos);
}
-static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
-{
- return atomic_read(&blkiolat->enabled) > 0;
-}
-
struct child_latency_info {
spinlock_t lock;
@@ -464,7 +469,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
struct blkcg_gq *blkg = bio->bi_blkg;
bool issue_as_root = bio_issue_as_root_blkg(bio);
- if (!blk_iolatency_enabled(blkiolat))
+ if (!blkiolat->enabled)
return;
while (blkg && blkg->parent) {
@@ -594,7 +599,6 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
u64 window_start;
u64 now;
bool issue_as_root = bio_issue_as_root_blkg(bio);
- bool enabled = false;
int inflight = 0;
blkg = bio->bi_blkg;
@@ -605,8 +609,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
if (!iolat)
return;
- enabled = blk_iolatency_enabled(iolat->blkiolat);
- if (!enabled)
+ if (!iolat->blkiolat->enabled)
return;
now = ktime_to_ns(ktime_get());
@@ -645,6 +648,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
del_timer_sync(&blkiolat->timer);
+ flush_work(&blkiolat->enable_work);
blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
kfree(blkiolat);
}
@@ -716,6 +720,44 @@ next:
rcu_read_unlock();
}
+/**
+ * blkiolatency_enable_work_fn - Enable or disable iolatency on the device
+ * @work: enable_work of the blk_iolatency of interest
+ *
+ * iolatency needs to keep track of the number of in-flight IOs per cgroup. This
+ * is relatively expensive as it involves walking up the hierarchy twice for
+ * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we
+ * want to disable the in-flight tracking.
+ *
+ * We have to make sure that the counting is balanced - we don't want to leak
+ * the in-flight counts by disabling accounting in the completion path while IOs
+ * are in flight. This is achieved by ensuring that no IO is in flight by
+ * freezing the queue while flipping ->enabled. As this requires a sleepable
+ * context, ->enabled flipping is punted to this work function.
+ */
+static void blkiolatency_enable_work_fn(struct work_struct *work)
+{
+ struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency,
+ enable_work);
+ bool enabled;
+
+ /*
+ * There can only be one instance of this function running for @blkiolat
+ * and it's guaranteed to be executed at least once after the latest
+ * ->enabled_cnt modification. Acting on the latest ->enable_cnt is
+ * sufficient.
+ *
+ * Also, we know @blkiolat is safe to access as ->enable_work is flushed
+ * in blkcg_iolatency_exit().
+ */
+ enabled = atomic_read(&blkiolat->enable_cnt);
+ if (enabled != blkiolat->enabled) {
+ blk_mq_freeze_queue(blkiolat->rqos.q);
+ blkiolat->enabled = enabled;
+ blk_mq_unfreeze_queue(blkiolat->rqos.q);
+ }
+}
+
int blk_iolatency_init(struct request_queue *q)
{
struct blk_iolatency *blkiolat;
@@ -741,17 +783,15 @@ int blk_iolatency_init(struct request_queue *q)
}
timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
+ INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
return 0;
}
-/*
- * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
- * return 0.
- */
-static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
{
struct iolatency_grp *iolat = blkg_to_lat(blkg);
+ struct blk_iolatency *blkiolat = iolat->blkiolat;
u64 oldval = iolat->min_lat_nsec;
iolat->min_lat_nsec = val;
@@ -759,13 +799,15 @@ static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
BLKIOLATENCY_MAX_WIN_SIZE);
- if (!oldval && val)
- return 1;
+ if (!oldval && val) {
+ if (atomic_inc_return(&blkiolat->enable_cnt) == 1)
+ schedule_work(&blkiolat->enable_work);
+ }
if (oldval && !val) {
blkcg_clear_delay(blkg);
- return -1;
+ if (atomic_dec_return(&blkiolat->enable_cnt) == 0)
+ schedule_work(&blkiolat->enable_work);
}
- return 0;
}
static void iolatency_clear_scaling(struct blkcg_gq *blkg)
@@ -797,7 +839,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
u64 lat_val = 0;
u64 oldval;
int ret;
- int enable = 0;
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
if (ret)
@@ -832,41 +873,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
blkg = ctx.blkg;
oldval = iolat->min_lat_nsec;
- enable = iolatency_set_min_lat_nsec(blkg, lat_val);
- if (enable) {
- if (!blk_get_queue(blkg->q)) {
- ret = -ENODEV;
- goto out;
- }
-
- blkg_get(blkg);
- }
-
- if (oldval != iolat->min_lat_nsec) {
+ iolatency_set_min_lat_nsec(blkg, lat_val);
+ if (oldval != iolat->min_lat_nsec)
iolatency_clear_scaling(blkg);
- }
-
ret = 0;
out:
blkg_conf_finish(&ctx);
- if (ret == 0 && enable) {
- struct iolatency_grp *tmp = blkg_to_lat(blkg);
- struct blk_iolatency *blkiolat = tmp->blkiolat;
-
- blk_mq_freeze_queue(blkg->q);
-
- if (enable == 1)
- atomic_inc(&blkiolat->enabled);
- else if (enable == -1)
- atomic_dec(&blkiolat->enabled);
- else
- WARN_ON_ONCE(1);
-
- blk_mq_unfreeze_queue(blkg->q);
-
- blkg_put(blkg);
- blk_put_queue(blkg->q);
- }
return ret ?: nbytes;
}
@@ -1005,14 +1017,8 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd)
{
struct iolatency_grp *iolat = pd_to_lat(pd);
struct blkcg_gq *blkg = lat_to_blkg(iolat);
- struct blk_iolatency *blkiolat = iolat->blkiolat;
- int ret;
- ret = iolatency_set_min_lat_nsec(blkg, 0);
- if (ret == 1)
- atomic_inc(&blkiolat->enabled);
- if (ret == -1)
- atomic_dec(&blkiolat->enabled);
+ iolatency_set_min_lat_nsec(blkg, 0);
iolatency_clear_scaling(blkg);
}