diff options
author | Linus Torvalds | 2018-02-04 11:16:35 -0800 |
---|---|---|
committer | Linus Torvalds | 2018-02-04 11:16:35 -0800 |
commit | 64b28683deba132f301d1cecfc25c32e295f53a1 (patch) | |
tree | be38a4e77c530fb129339f983a9d307c60312df8 /block | |
parent | d3658c2266012f270da52e3e0365536e394bd3bd (diff) | |
parent | 1d51877578799bfe0fcfe189d8233c9fccf05931 (diff) |
Merge tag 'for-linus-20180204' of git://git.kernel.dk/linux-block
Pull more block updates from Jens Axboe:
"Most of this is fixes and not new code/features:
- skd fix from Arnd, fixing a build error dependent on sla allocator
type.
- blk-mq scheduler discard merging fixes, one from me and one from
Keith. This fixes a segment miscalculation for blk-mq-sched, where
we mistakenly think two segments are physically contigious even
though the request isn't carrying real data. Also fixes a bio-to-rq
merge case.
- Don't re-set a bit on the buffer_head flags, if it's already set.
This can cause scalability concerns on bigger machines and
workloads. From Kemi Wang.
- Add BLK_STS_DEV_RESOURCE return value to blk-mq, allowing us to
distuingish between a local (device related) resource starvation
and a global one. The latter might happen without IO being in
flight, so it has to be handled a bit differently. From Ming"
* tag 'for-linus-20180204' of git://git.kernel.dk/linux-block:
block: skd: fix incorrect linux/slab_def.h inclusion
buffer: Avoid setting buffer bits that are already set
blk-mq-sched: Enable merging discard bio into request
blk-mq: fix discard merge with scheduler attached
blk-mq: introduce BLK_STS_DEV_RESOURCE
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 3 | ||||
-rw-r--r-- | block/blk-merge.c | 29 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 2 | ||||
-rw-r--r-- | block/blk-mq.c | 20 |
4 files changed, 47 insertions, 7 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index a2005a485335..d0d104268f1a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -145,6 +145,7 @@ static const struct { [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" }, [BLK_STS_PROTECTION] = { -EILSEQ, "protection" }, [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" }, + [BLK_STS_DEV_RESOURCE] = { -EBUSY, "device resource" }, [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" }, /* device mapper special case, should not leak out: */ @@ -3282,6 +3283,8 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, { if (bio_has_data(bio)) rq->nr_phys_segments = bio_phys_segments(q, bio); + else if (bio_op(bio) == REQ_OP_DISCARD) + rq->nr_phys_segments = 1; rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; diff --git a/block/blk-merge.c b/block/blk-merge.c index 8452fc7164cc..782940c65d8a 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -550,6 +550,24 @@ static bool req_no_special_merge(struct request *req) return !q->mq_ops && req->special; } +static bool req_attempt_discard_merge(struct request_queue *q, struct request *req, + struct request *next) +{ + unsigned short segments = blk_rq_nr_discard_segments(req); + + if (segments >= queue_max_discard_segments(q)) + goto no_merge; + if (blk_rq_sectors(req) + bio_sectors(next->bio) > + blk_rq_get_max_sectors(req, blk_rq_pos(req))) + goto no_merge; + + req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next); + return true; +no_merge: + req_set_nomerge(q, req); + return false; +} + static int ll_merge_requests_fn(struct request_queue *q, struct request *req, struct request *next) { @@ -683,9 +701,13 @@ static struct request *attempt_merge(struct request_queue *q, * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn * will have updated segment counts, update sector - * counts here. + * counts here. Handle DISCARDs separately, as they + * have separate settings. */ - if (!ll_merge_requests_fn(q, req, next)) + if (req_op(req) == REQ_OP_DISCARD) { + if (!req_attempt_discard_merge(q, req, next)) + return NULL; + } else if (!ll_merge_requests_fn(q, req, next)) return NULL; /* @@ -715,7 +737,8 @@ static struct request *attempt_merge(struct request_queue *q, req->__data_len += blk_rq_bytes(next); - elv_merge_requests(q, req, next); + if (req_op(req) != REQ_OP_DISCARD) + elv_merge_requests(q, req, next); /* * 'next' is going away, so update stats accordingly diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 55c0a745b427..25c14c58385c 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -259,6 +259,8 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, if (!*merged_request) elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE); return true; + case ELEVATOR_DISCARD_MERGE: + return bio_attempt_discard_merge(q, rq, bio); default: return false; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 01f271d40825..df93102e2149 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1162,6 +1162,8 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx, return true; } +#define BLK_MQ_RESOURCE_DELAY 3 /* ms units */ + bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, bool got_budget) { @@ -1169,6 +1171,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, struct request *rq, *nxt; bool no_tag = false; int errors, queued; + blk_status_t ret = BLK_STS_OK; if (list_empty(list)) return false; @@ -1181,7 +1184,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, errors = queued = 0; do { struct blk_mq_queue_data bd; - blk_status_t ret; rq = list_first_entry(list, struct request, queuelist); if (!blk_mq_get_driver_tag(rq, &hctx, false)) { @@ -1226,7 +1228,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, } ret = q->mq_ops->queue_rq(hctx, &bd); - if (ret == BLK_STS_RESOURCE) { + if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { /* * If an I/O scheduler has been configured and we got a * driver tag for the next request already, free it @@ -1257,6 +1259,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, * that is where we will continue on next queue run. */ if (!list_empty(list)) { + bool needs_restart; + spin_lock(&hctx->lock); list_splice_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); @@ -1280,10 +1284,17 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, * - Some but not all block drivers stop a queue before * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq * and dm-rq. + * + * If driver returns BLK_STS_RESOURCE and SCHED_RESTART + * bit is set, run queue after a delay to avoid IO stalls + * that could otherwise occur if the queue is idle. */ - if (!blk_mq_sched_needs_restart(hctx) || + needs_restart = blk_mq_sched_needs_restart(hctx); + if (!needs_restart || (no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) blk_mq_run_hw_queue(hctx, true); + else if (needs_restart && (ret == BLK_STS_RESOURCE)) + blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY); } return (queued + errors) != 0; @@ -1764,6 +1775,7 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, *cookie = new_cookie; break; case BLK_STS_RESOURCE: + case BLK_STS_DEV_RESOURCE: __blk_mq_requeue_request(rq); break; default: @@ -1826,7 +1838,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, hctx_lock(hctx, &srcu_idx); ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false); - if (ret == BLK_STS_RESOURCE) + if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) blk_mq_sched_insert_request(rq, false, true, false); else if (ret != BLK_STS_OK) blk_mq_end_request(rq, ret); |